From d8c3d03a0b7f10977dd508a5a965a417b7f1b065 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 28 Jun 2007 22:54:37 -0700 Subject: diffcore_count_changes: pass diffcore_filespec We may want to use richer information on the data we are dealing with in this function, so instead of passing a buffer address and length, just pass the diffcore_filespec structure. Existing callers always call this function with parameters taken from a filespec anyway, so there is no functionality changes. Signed-off-by: Junio C Hamano --- diffcore-break.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'diffcore-break.c') diff --git a/diffcore-break.c b/diffcore-break.c index 9c19b8cab7..ae8a7d03e2 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -66,8 +66,7 @@ static int should_break(struct diff_filespec *src, if (base_size < MINIMUM_BREAK_SIZE) return 0; /* we do not break too small filepair */ - if (diffcore_count_changes(src->data, src->size, - dst->data, dst->size, + if (diffcore_count_changes(src, dst, NULL, NULL, 0, &src_copied, &literal_added)) -- cgit v1.3 From 6dd4b66fdecc2ffdc68758b6c4e059fcaaca512b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 20 Oct 2007 12:31:31 -0700 Subject: Fix diffcore-break total breakage Ok, so on the kernel list, some people noticed that "git log --follow" doesn't work too well with some files in the x86 merge, because a lot of files got renamed in very special ways. In particular, there was a pattern of doing single commits with renames that looked basically like - rename "filename.h" -> "filename_64.h" - create new "filename.c" that includes "filename_32.h" or "filename_64.h" depending on whether we're 32-bit or 64-bit. which was preparatory for smushing the two trees together. Now, there's two issues here: - "filename.c" *remained*. Yes, it was a rename, but there was a new file created with the old name in the same commit. This was important, because we wanted each commit to compile properly, so that it was bisectable, so splitting the rename into one commit and the "create helper file" into another was *not* an option. So we need to break associations where the contents change too much. Fine. We have the -B flag for that. When we break things up, then the rename detection will be able to figure out whether there are better alternatives. - "git log --follow" didn't with with -B. Now, the second case was really simple: we use a different "diffopt" structure for the rename detection than the basic one (which we use for showing the diffs). So that second case is trivially fixed by a trivial one-liner that just copies the break_opt values from the "real" diffopts to the one used for rename following. So now "git log -B --follow" works fine: diff --git a/tree-diff.c b/tree-diff.c index 26bdbdd..7c261fd 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -319,6 +319,7 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co diff_opts.detect_rename = DIFF_DETECT_RENAME; diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; diff_opts.single_follow = opt->paths[0]; + diff_opts.break_opt = opt->break_opt; paths[0] = NULL; diff_tree_setup_paths(paths, &diff_opts); if (diff_setup_done(&diff_opts) < 0) however, the end result does *not* work. Because our diffcore-break.c logic is totally bogus! In particular: - it used to do if (base_size < MINIMUM_BREAK_SIZE) return 0; /* we do not break too small filepair */ which basically says "don't bother to break small files". But that "base_size" is the *smaller* of the two sizes, which means that if some large file was rewritten into one that just includes another file, we would look at the (small) result, and decide that it's smaller than the break size, so it cannot be worth it to break it up! Even if the other side was ten times bigger and looked *nothing* like the samell file! That's clearly bogus. I replaced "base_size" with "max_size", so that we compare the *bigger* of the filepair with the break size. - It calculated a "merge_score", which was the score needed to merge it back together if nothing else wanted it. But even if it was *so* different that we would never want to merge it back, we wouldn't consider it a break! That makes no sense. So I added if (*merge_score_p > break_score) return 1; to make it clear that if we wouldn't want to merge it at the end, it was *definitely* a break. - It compared the whole "extent of damage", counting all inserts and deletes, but it based this score on the "base_size", and generated the damage score with delta_size = src_removed + literal_added; damage_score = delta_size * MAX_SCORE / base_size; but that makes no sense either, since quite often, this will result in a number that is *bigger* than MAX_SCORE! Why? Because base_size is (again) the smaller of the two files we compare, and when you start out from a small file and add a lot (or start out from a large file and remove a lot), the base_size is going to be much smaller than the damage! Again, the fix was to replace "base_size" with "max_size", at which point the damage actually becomes a sane percentage of the whole. With these changes in place, not only does "git log -B --follow" work for the case that triggered this in the first place, ie now git log -B --follow arch/x86/kernel/vmlinux_64.lds.S actually gives reasonable results. But I also wanted to verify it in general, by doing a full-history git log --stat -B -C on my kernel tree with the old code and the new code. There's some tweaking to be done, but generally, the new code generates much better results wrt breaking up files (and then finding better rename candidates). Here's a few examples of the "--stat" output: - This: include/asm-x86/Kbuild | 2 - include/asm-x86/debugreg.h | 79 +++++++++++++++++++++++++++++++++++------ include/asm-x86/debugreg_32.h | 64 --------------------------------- include/asm-x86/debugreg_64.h | 65 --------------------------------- 4 files changed, 68 insertions(+), 142 deletions(-) Becomes: include/asm-x86/Kbuild | 2 - include/asm-x86/{debugreg_64.h => debugreg.h} | 9 +++- include/asm-x86/debugreg_32.h | 64 ------------------------- 3 files changed, 7 insertions(+), 68 deletions(-) - This: include/asm-x86/bug.h | 41 +++++++++++++++++++++++++++++++++++++++-- include/asm-x86/bug_32.h | 37 ------------------------------------- include/asm-x86/bug_64.h | 34 ---------------------------------- 3 files changed, 39 insertions(+), 73 deletions(-) Becomes include/asm-x86/{bug_64.h => bug.h} | 20 +++++++++++++----- include/asm-x86/bug_32.h | 37 ----------------------------------- 2 files changed, 14 insertions(+), 43 deletions(-) Now, in some other cases, it does actually turn a rename into a real "delete+create" pair, and then the diff is usually bigger, so truth in advertizing: it doesn't always generate a nicer diff. But for what -B was meant for, I think this is a big improvement, and I suspect those cases where it generates a bigger diff are tweakable. So I think this diff fixes a real bug, but we might still want to tweak the default values and perhaps the exact rules for when a break happens. Signed-off-by: Linus Torvalds Signed-off-by: Shawn O. Pearce --- diffcore-break.c | 11 +++++++---- tree-diff.c | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'diffcore-break.c') diff --git a/diffcore-break.c b/diffcore-break.c index ae8a7d03e2..c71a22621a 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src, * The value we return is 1 if we want the pair to be broken, * or 0 if we do not. */ - unsigned long delta_size, base_size, src_copied, literal_added, - src_removed; + unsigned long delta_size, base_size, max_size; + unsigned long src_copied, literal_added, src_removed; *merge_score_p = 0; /* assume no deletion --- "do not break" * is the default. @@ -63,7 +63,8 @@ static int should_break(struct diff_filespec *src, return 0; /* error but caught downstream */ base_size = ((src->size < dst->size) ? src->size : dst->size); - if (base_size < MINIMUM_BREAK_SIZE) + max_size = ((src->size > dst->size) ? src->size : dst->size); + if (max_size < MINIMUM_BREAK_SIZE) return 0; /* we do not break too small filepair */ if (diffcore_count_changes(src, dst, @@ -89,12 +90,14 @@ static int should_break(struct diff_filespec *src, * less than the minimum, after rename/copy runs. */ *merge_score_p = (int)(src_removed * MAX_SCORE / src->size); + if (*merge_score_p > break_score) + return 1; /* Extent of damage, which counts both inserts and * deletes. */ delta_size = src_removed + literal_added; - if (delta_size * MAX_SCORE / base_size < break_score) + if (delta_size * MAX_SCORE / max_size < break_score) return 0; /* If you removed a lot without adding new material, that is diff --git a/tree-diff.c b/tree-diff.c index 26bdbdd2bf..7c261fd7c3 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -319,6 +319,7 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co diff_opts.detect_rename = DIFF_DETECT_RENAME; diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; diff_opts.single_follow = opt->paths[0]; + diff_opts.break_opt = opt->break_opt; paths[0] = NULL; diff_tree_setup_paths(paths, &diff_opts); if (diff_setup_done(&diff_opts) < 0) -- cgit v1.3 From b45563a229f5150271837cf487a91ddd8224fbd3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 30 Nov 2007 22:22:38 -0800 Subject: rename: Break filepairs with different types. When we consider if a path has been totally rewritten, we did not touch changes from symlinks to files or vice versa. But a change that modifies even the type of a blob surely should count as a complete rewrite. While we are at it, modernise diffcore-break to be aware of gitlinks (we do not want to touch them). Signed-off-by: Junio C Hamano --- cache.h | 7 ++++ diffcore-break.c | 12 ++++-- t/t4008-diff-break-rewrite.sh | 6 +-- t/t4023-diff-rename-typechange.sh | 86 +++++++++++++++++++++++++++++++++++++++ tree-walk.h | 7 ---- 5 files changed, 104 insertions(+), 14 deletions(-) create mode 100755 t/t4023-diff-rename-typechange.sh (limited to 'diffcore-break.c') diff --git a/cache.h b/cache.h index aaa135bfde..d0e7a71c6e 100644 --- a/cache.h +++ b/cache.h @@ -192,6 +192,13 @@ enum object_type { OBJ_MAX, }; +static inline enum object_type object_type(unsigned int mode) +{ + return S_ISDIR(mode) ? OBJ_TREE : + S_ISGITLINK(mode) ? OBJ_COMMIT : + OBJ_BLOB; +} + #define GIT_DIR_ENVIRONMENT "GIT_DIR" #define GIT_WORK_TREE_ENVIRONMENT "GIT_WORK_TREE" #define DEFAULT_GIT_DIR_ENVIRONMENT ".git" diff --git a/diffcore-break.c b/diffcore-break.c index c71a22621a..31cdcfe8bc 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -52,8 +52,10 @@ static int should_break(struct diff_filespec *src, * is the default. */ - if (!S_ISREG(src->mode) || !S_ISREG(dst->mode)) - return 0; /* leave symlink rename alone */ + if (S_ISREG(src->mode) != S_ISREG(dst->mode)) { + *merge_score_p = (int)MAX_SCORE; + return 1; /* even their types are different */ + } if (src->sha1_valid && dst->sha1_valid && !hashcmp(src->sha1, dst->sha1)) @@ -168,11 +170,13 @@ void diffcore_break(int break_score) struct diff_filepair *p = q->queue[i]; int score; - /* We deal only with in-place edit of non directory. + /* + * We deal only with in-place edit of blobs. * We do not break anything else. */ if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) && - !S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) && + object_type(p->one->mode) == OBJ_BLOB && + object_type(p->two->mode) == OBJ_BLOB && !strcmp(p->one->path, p->two->path)) { if (should_break(p->one, p->two, break_score, &score)) { diff --git a/t/t4008-diff-break-rewrite.sh b/t/t4008-diff-break-rewrite.sh index 1287d2ad17..26c2e4aa65 100755 --- a/t/t4008-diff-break-rewrite.sh +++ b/t/t4008-diff-break-rewrite.sh @@ -122,11 +122,11 @@ test_expect_success \ 'run diff with -B -M' \ 'git diff-index -B -M "$tree" >current' -# This should not mistake file0 as the copy source of new file1 -# due to type differences. +# file0 changed from regular to symlink. file1 is very close to the preimage of file0. +# because we break file0, file1 can become a rename of it. cat >expected <<\EOF :100644 120000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 67be421f88824578857624f7b3dc75e99a8a1481 T file0 -:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 M100 file1 +:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 R file0 file1 EOF test_expect_success \ diff --git a/t/t4023-diff-rename-typechange.sh b/t/t4023-diff-rename-typechange.sh new file mode 100755 index 0000000000..255604effd --- /dev/null +++ b/t/t4023-diff-rename-typechange.sh @@ -0,0 +1,86 @@ +#!/bin/sh + +test_description='typechange rename detection' + +. ./test-lib.sh + +test_expect_success setup ' + + rm -f foo bar && + cat ../../COPYING >foo && + ln -s linklink bar && + git add foo bar && + git commit -a -m Initial && + git tag one && + + rm -f foo bar && + cat ../../COPYING >bar && + ln -s linklink foo && + git add foo bar && + git commit -a -m Second && + git tag two && + + rm -f foo bar && + cat ../../COPYING >foo && + git add foo && + git commit -a -m Third && + git tag three && + + mv foo bar && + ln -s linklink foo && + git add foo bar && + git commit -a -m Fourth && + git tag four && + + # This is purely for sanity check + + rm -f foo bar && + cat ../../COPYING >foo && + cat ../../Makefile >bar && + git add foo bar && + git commit -a -m Fifth && + git tag five && + + rm -f foo bar && + cat ../../Makefile >foo && + cat ../../COPYING >bar && + git add foo bar && + git commit -a -m Sixth && + git tag six + +' + +test_expect_success 'cross renames to be detected for regular files' ' + + git diff-tree five six -r --name-status -B -M | sort >actual && + { + echo "R100 foo bar" + echo "R100 bar foo" + } | sort >expect && + diff -u expect actual + +' + +test_expect_success 'cross renames to be detected for typechange' ' + + git diff-tree one two -r --name-status -B -M | sort >actual && + { + echo "R100 foo bar" + echo "R100 bar foo" + } | sort >expect && + diff -u expect actual + +' + +test_expect_success 'moves and renames' ' + + git diff-tree three four -r --name-status -B -M | sort >actual && + { + echo "R100 foo bar" + echo "T100 foo" + } | sort >expect && + diff -u expect actual + +' + +test_done diff --git a/tree-walk.h b/tree-walk.h index 903a7b0f48..db0fbdc701 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -7,13 +7,6 @@ struct name_entry { unsigned int mode; }; -static inline enum object_type object_type(unsigned int mode) -{ - return S_ISDIR(mode) ? OBJ_TREE : - S_ISGITLINK(mode) ? OBJ_COMMIT : - OBJ_BLOB; -} - struct tree_desc { const void *buffer; struct name_entry entry; -- cgit v1.3 From eb3a9dd3279fe4b05f286665986ebf6d43a6ccc0 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 7 Mar 2009 21:02:10 +0100 Subject: Remove unused function scope local variables These variables were unused and can be removed safely: builtin-clone.c::cmd_clone(): use_local_hardlinks, use_separate_remote builtin-fetch-pack.c::find_common(): len builtin-remote.c::mv(): symref diff.c::show_stats():show_stats(): total diffcore-break.c::should_break(): base_size fast-import.c::validate_raw_date(): date, sign fsck.c::fsck_tree(): o_sha1, sha1 xdiff-interface.c::parse_num(): read_some Signed-off-by: Benjamin Kramer Signed-off-by: Junio C Hamano --- builtin-clone.c | 6 ------ builtin-fetch-pack.c | 3 +-- builtin-remote.c | 3 +-- diff.c | 4 +--- diffcore-break.c | 3 +-- fast-import.c | 8 +++----- fsck.c | 6 +----- xdiff-interface.c | 3 +-- 8 files changed, 9 insertions(+), 27 deletions(-) (limited to 'diffcore-break.c') diff --git a/builtin-clone.c b/builtin-clone.c index c338910b1c..92826cd14c 100644 --- a/builtin-clone.c +++ b/builtin-clone.c @@ -365,8 +365,6 @@ static void install_branch_config(const char *local, int cmd_clone(int argc, const char **argv, const char *prefix) { - int use_local_hardlinks = 1; - int use_separate_remote = 1; int is_bundle = 0; struct stat buf; const char *repo_name, *repo, *work_tree, *git_dir; @@ -388,9 +386,6 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (argc == 0) die("You must specify a repository to clone."); - if (option_no_hardlinks) - use_local_hardlinks = 0; - if (option_mirror) option_bare = 1; @@ -399,7 +394,6 @@ int cmd_clone(int argc, const char **argv, const char *prefix) die("--bare and --origin %s options are incompatible.", option_origin); option_no_checkout = 1; - use_separate_remote = 0; } if (!option_origin) diff --git a/builtin-fetch-pack.c b/builtin-fetch-pack.c index 67fb80ec48..c2e5adc884 100644 --- a/builtin-fetch-pack.c +++ b/builtin-fetch-pack.c @@ -216,9 +216,8 @@ static int find_common(int fd[2], unsigned char *result_sha1, if (args.depth > 0) { char line[1024]; unsigned char sha1[20]; - int len; - while ((len = packet_read_line(fd[0], line, sizeof(line)))) { + while (packet_read_line(fd[0], line, sizeof(line))) { if (!prefixcmp(line, "shallow ")) { if (get_sha1_hex(line + 8, sha1)) die("invalid shallow line: %s", line); diff --git a/builtin-remote.c b/builtin-remote.c index ac69d37c8a..e171096ece 100644 --- a/builtin-remote.c +++ b/builtin-remote.c @@ -484,9 +484,8 @@ static int mv(int argc, const char **argv) struct string_list_item *item = remote_branches.items + i; int flag = 0; unsigned char sha1[20]; - const char *symref; - symref = resolve_ref(item->string, sha1, 1, &flag); + resolve_ref(item->string, sha1, 1, &flag); if (!(flag & REF_ISSYMREF)) continue; if (delete_ref(item->string, NULL, REF_NODEREF)) diff --git a/diff.c b/diff.c index 3feca1b173..e06c93707f 100644 --- a/diff.c +++ b/diff.c @@ -875,7 +875,7 @@ static void fill_print_name(struct diffstat_file *file) static void show_stats(struct diffstat_t* data, struct diff_options *options) { - int i, len, add, del, total, adds = 0, dels = 0; + int i, len, add, del, adds = 0, dels = 0; int max_change = 0, max_len = 0; int total_files = data->nr; int width, name_width; @@ -978,14 +978,12 @@ static void show_stats(struct diffstat_t* data, struct diff_options *options) */ add = added; del = deleted; - total = add + del; adds += add; dels += del; if (width <= max_change) { add = scale_linear(add, width, max_change); del = scale_linear(del, width, max_change); - total = add + del; } show_name(options->file, prefix, name, len, reset, set); fprintf(options->file, "%5d%s", added + deleted, diff --git a/diffcore-break.c b/diffcore-break.c index 31cdcfe8bc..d7097bb576 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -45,7 +45,7 @@ static int should_break(struct diff_filespec *src, * The value we return is 1 if we want the pair to be broken, * or 0 if we do not. */ - unsigned long delta_size, base_size, max_size; + unsigned long delta_size, max_size; unsigned long src_copied, literal_added, src_removed; *merge_score_p = 0; /* assume no deletion --- "do not break" @@ -64,7 +64,6 @@ static int should_break(struct diff_filespec *src, if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0)) return 0; /* error but caught downstream */ - base_size = ((src->size < dst->size) ? src->size : dst->size); max_size = ((src->size > dst->size) ? src->size : dst->size); if (max_size < MINIMUM_BREAK_SIZE) return 0; /* we do not break too small filepair */ diff --git a/fast-import.c b/fast-import.c index 3748ddf48d..beeac0d004 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1745,21 +1745,19 @@ static void parse_data(struct strbuf *sb) static int validate_raw_date(const char *src, char *result, int maxlen) { const char *orig_src = src; - char *endp, sign; - unsigned long date; + char *endp; errno = 0; - date = strtoul(src, &endp, 10); + strtoul(src, &endp, 10); if (errno || endp == src || *endp != ' ') return -1; src = endp + 1; if (*src != '-' && *src != '+') return -1; - sign = *src; - date = strtoul(src + 1, &endp, 10); + strtoul(src + 1, &endp, 10); if (errno || endp == src || *endp || (endp - orig_src) >= maxlen) return -1; diff --git a/fsck.c b/fsck.c index 97f76c5815..511b82cba9 100644 --- a/fsck.c +++ b/fsck.c @@ -148,20 +148,17 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) struct tree_desc desc; unsigned o_mode; const char *o_name; - const unsigned char *o_sha1; init_tree_desc(&desc, item->buffer, item->size); o_mode = 0; o_name = NULL; - o_sha1 = NULL; while (desc.size) { unsigned mode; const char *name; - const unsigned char *sha1; - sha1 = tree_entry_extract(&desc, &name, &mode); + tree_entry_extract(&desc, &name, &mode); if (strchr(name, '/')) has_full_path = 1; @@ -207,7 +204,6 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) o_mode = mode; o_name = name; - o_sha1 = sha1; } retval = 0; diff --git a/xdiff-interface.c b/xdiff-interface.c index d782f06d99..b9b0db8d86 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -15,11 +15,10 @@ static int parse_num(char **cp_p, int *num_p) { char *cp = *cp_p; int num = 0; - int read_some; while ('0' <= *cp && *cp <= '9') num = num * 10 + *cp++ - '0'; - if (!(read_some = cp - *cp_p)) + if (!(cp - *cp_p)) return -1; *cp_p = cp; *num_p = num; -- cgit v1.3