From e0173ad9fcabfcabe54d65be2c8b6602f61079e6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 28 Apr 2007 23:38:52 -0700 Subject: Make macros to prevent double-inclusion in headers consistent. Signed-off-by: Junio C Hamano --- diffcore.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'diffcore.h') diff --git a/diffcore.h b/diffcore.h index 1ea80671e3..7b9294eab2 100644 --- a/diffcore.h +++ b/diffcore.h @@ -1,8 +1,8 @@ /* * Copyright (C) 2005 Junio C Hamano */ -#ifndef _DIFFCORE_H_ -#define _DIFFCORE_H_ +#ifndef DIFFCORE_H +#define DIFFCORE_H /* This header file is internal between diff.c and its diff transformers * (e.g. diffcore-rename, diffcore-pickaxe). Never include this header -- cgit v1.3 From d8c3d03a0b7f10977dd508a5a965a417b7f1b065 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 28 Jun 2007 22:54:37 -0700 Subject: diffcore_count_changes: pass diffcore_filespec We may want to use richer information on the data we are dealing with in this function, so instead of passing a buffer address and length, just pass the diffcore_filespec structure. Existing callers always call this function with parameters taken from a filespec anyway, so there is no functionality changes. Signed-off-by: Junio C Hamano --- diffcore-break.c | 3 +-- diffcore-delta.c | 8 ++++---- diffcore-rename.c | 3 +-- diffcore.h | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) (limited to 'diffcore.h') diff --git a/diffcore-break.c b/diffcore-break.c index 9c19b8cab7..ae8a7d03e2 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -66,8 +66,7 @@ static int should_break(struct diff_filespec *src, if (base_size < MINIMUM_BREAK_SIZE) return 0; /* we do not break too small filepair */ - if (diffcore_count_changes(src->data, src->size, - dst->data, dst->size, + if (diffcore_count_changes(src, dst, NULL, NULL, 0, &src_copied, &literal_added)) diff --git a/diffcore-delta.c b/diffcore-delta.c index 7338a40c59..0e1fae79de 100644 --- a/diffcore-delta.c +++ b/diffcore-delta.c @@ -156,8 +156,8 @@ static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz) return hash; } -int diffcore_count_changes(void *src, unsigned long src_size, - void *dst, unsigned long dst_size, +int diffcore_count_changes(struct diff_filespec *src, + struct diff_filespec *dst, void **src_count_p, void **dst_count_p, unsigned long delta_limit, @@ -172,14 +172,14 @@ int diffcore_count_changes(void *src, unsigned long src_size, if (src_count_p) src_count = *src_count_p; if (!src_count) { - src_count = hash_chars(src, src_size); + src_count = hash_chars(src->data, src->size); if (src_count_p) *src_count_p = src_count; } if (dst_count_p) dst_count = *dst_count_p; if (!dst_count) { - dst_count = hash_chars(dst, dst_size); + dst_count = hash_chars(dst->data, dst->size); if (dst_count_p) *dst_count_p = dst_count; } diff --git a/diffcore-rename.c b/diffcore-rename.c index 79c984c9cf..cb227366b8 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -189,8 +189,7 @@ static int estimate_similarity(struct diff_filespec *src, delta_limit = (unsigned long) (base_size * (MAX_SCORE-minimum_score) / MAX_SCORE); - if (diffcore_count_changes(src->data, src->size, - dst->data, dst->size, + if (diffcore_count_changes(src, dst, &src->cnt_data, &dst->cnt_data, delta_limit, &src_copied, &literal_added)) diff --git a/diffcore.h b/diffcore.h index 7b9294eab2..990dec50f1 100644 --- a/diffcore.h +++ b/diffcore.h @@ -103,8 +103,8 @@ void diff_debug_queue(const char *, struct diff_queue_struct *); #define diff_debug_queue(a,b) do {} while(0) #endif -extern int diffcore_count_changes(void *src, unsigned long src_size, - void *dst, unsigned long dst_size, +extern int diffcore_count_changes(struct diff_filespec *src, + struct diff_filespec *dst, void **src_count_p, void **dst_count_p, unsigned long delta_limit, -- cgit v1.3 From 706098af6b53403b5ea3db9216fb99afbe06f52d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 28 Jun 2007 22:56:07 -0700 Subject: diffcore_filespec: add is_binary diffcore-break and diffcore-rename would want to behave slightly differently depending on the binary-ness of the data, so add one bit to the filespec, as the structure is now passed down to diffcore_count_changes() function. Signed-off-by: Junio C Hamano --- diff.c | 16 ++++++++++++++++ diffcore.h | 1 + 2 files changed, 17 insertions(+) (limited to 'diffcore.h') diff --git a/diff.c b/diff.c index 9938969fa5..74c1198e67 100644 --- a/diff.c +++ b/diff.c @@ -3005,6 +3005,22 @@ void diffcore_std(struct diff_options *options) { if (options->quiet) return; + + /* + * break/rename count similarity differently depending on + * the binary-ness. + */ + if ((options->break_opt != -1) || (options->detect_rename)) { + struct diff_queue_struct *q = &diff_queued_diff; + int i; + + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + p->one->is_binary = file_is_binary(p->one); + p->two->is_binary = file_is_binary(p->two); + } + } + if (options->break_opt != -1) diffcore_break(options->break_opt); if (options->detect_rename) diff --git a/diffcore.h b/diffcore.h index 990dec50f1..0c8abb5b94 100644 --- a/diffcore.h +++ b/diffcore.h @@ -37,6 +37,7 @@ struct diff_filespec { #define DIFF_FILE_VALID(spec) (((spec)->mode) != 0) unsigned should_free : 1; /* data should be free()'ed */ unsigned should_munmap : 1; /* data should be munmap()'ed */ + unsigned is_binary : 1; /* data should be considered "binary" */ }; extern struct diff_filespec *alloc_filespec(const char *); -- cgit v1.3 From 29a3eefde111f6a24292163c4308f00ab3572627 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 6 Jul 2007 00:18:54 -0700 Subject: Introduce diff_filespec_is_binary() This replaces an explicit initialization of filespec->is_binary field used for rename/break followed by direct access to that field with a wrapper function that lazily iniaitlizes and accesses the field. We would add more attribute accesses for the use of diff routines, and it would be better to make this abstraction earlier. Signed-off-by: Junio C Hamano --- diff.c | 71 ++++++++++++++++++++++++++++---------------------------- diffcore-delta.c | 2 +- diffcore.h | 2 ++ 3 files changed, 39 insertions(+), 36 deletions(-) (limited to 'diffcore.h') diff --git a/diff.c b/diff.c index 19589707c4..16ea7100df 100644 --- a/diff.c +++ b/diff.c @@ -1102,30 +1102,45 @@ static void setup_diff_attr_check(struct git_attr_check *check) { static struct git_attr *attr_diff; - if (!attr_diff) + if (!attr_diff) { attr_diff = git_attr("diff", 4); - check->attr = attr_diff; + } + check[0].attr = attr_diff; } -static int file_is_binary(struct diff_filespec *one) +static void diff_filespec_check_attr(struct diff_filespec *one) { - struct git_attr_check attr_diff_check; + struct git_attr_check attr_diff_check[1]; - setup_diff_attr_check(&attr_diff_check); - if (!git_checkattr(one->path, 1, &attr_diff_check)) { - const char *value = attr_diff_check.value; + if (one->checked_attr) + return; + + setup_diff_attr_check(attr_diff_check); + one->is_binary = 0; + + if (!git_checkattr(one->path, ARRAY_SIZE(attr_diff_check), attr_diff_check)) { + const char *value; + + /* binaryness */ + value = attr_diff_check[0].value; if (ATTR_TRUE(value)) - return 0; + ; else if (ATTR_FALSE(value)) - return 1; + one->is_binary = 1; } - if (!one->data) { - if (!DIFF_FILE_VALID(one)) - return 0; + if (!one->data && DIFF_FILE_VALID(one)) diff_populate_filespec(one, 0); - } - return buffer_is_binary(one->data, one->size); + + if (one->data) + one->is_binary = buffer_is_binary(one->data, one->size); + +} + +int diff_filespec_is_binary(struct diff_filespec *one) +{ + diff_filespec_check_attr(one); + return one->is_binary; } static void builtin_diff(const char *name_a, @@ -1182,7 +1197,8 @@ static void builtin_diff(const char *name_a, if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) die("unable to read files to diff"); - if (!o->text && (file_is_binary(one) || file_is_binary(two))) { + if (!o->text && + (diff_filespec_is_binary(one) || diff_filespec_is_binary(two))) { /* Quite common confusing case */ if (mf1.size == mf2.size && !memcmp(mf1.ptr, mf2.ptr, mf1.size)) @@ -1260,7 +1276,7 @@ static void builtin_diffstat(const char *name_a, const char *name_b, if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) die("unable to read files to diff"); - if (file_is_binary(one) || file_is_binary(two)) { + if (diff_filespec_is_binary(one) || diff_filespec_is_binary(two)) { data->is_binary = 1; data->added = mf2.size; data->deleted = mf1.size; @@ -1302,7 +1318,7 @@ static void builtin_checkdiff(const char *name_a, const char *name_b, if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) die("unable to read files to diff"); - if (file_is_binary(two)) + if (diff_filespec_is_binary(two)) goto free_and_return; else { /* Crazy xdl interfaces.. */ @@ -1880,8 +1896,8 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o) if (o->binary) { mmfile_t mf; - if ((!fill_mmfile(&mf, one) && file_is_binary(one)) || - (!fill_mmfile(&mf, two) && file_is_binary(two))) + if ((!fill_mmfile(&mf, one) && diff_filespec_is_binary(one)) || + (!fill_mmfile(&mf, two) && diff_filespec_is_binary(two))) abbrev = 40; } len += snprintf(msg + len, sizeof(msg) - len, @@ -2783,7 +2799,7 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1) return error("unable to read files to diff"); /* Maybe hash p->two? into the patch id? */ - if (file_is_binary(p->two)) + if (diff_filespec_is_binary(p->two)) continue; len1 = remove_space(p->one->path, strlen(p->one->path)); @@ -3011,21 +3027,6 @@ void diffcore_std(struct diff_options *options) if (options->quiet) return; - /* - * break/rename count similarity differently depending on - * the binary-ness. - */ - if ((options->break_opt != -1) || (options->detect_rename)) { - struct diff_queue_struct *q = &diff_queued_diff; - int i; - - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - p->one->is_binary = file_is_binary(p->one); - p->two->is_binary = file_is_binary(p->two); - } - } - if (options->break_opt != -1) diffcore_break(options->break_opt); if (options->detect_rename) diff --git a/diffcore-delta.c b/diffcore-delta.c index a038b166c5..d9729e5ec2 100644 --- a/diffcore-delta.c +++ b/diffcore-delta.c @@ -129,7 +129,7 @@ static struct spanhash_top *hash_chars(struct diff_filespec *one) struct spanhash_top *hash; unsigned char *buf = one->data; unsigned int sz = one->size; - int is_text = !one->is_binary; + int is_text = !diff_filespec_is_binary(one); i = INITIAL_HASH_SIZE; hash = xmalloc(sizeof(*hash) + sizeof(struct spanhash) * (1<mode) != 0) unsigned should_free : 1; /* data should be free()'ed */ unsigned should_munmap : 1; /* data should be munmap()'ed */ + unsigned checked_attr : 1; unsigned is_binary : 1; /* data should be considered "binary" */ }; @@ -46,6 +47,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *, extern int diff_populate_filespec(struct diff_filespec *, int); extern void diff_free_filespec_data(struct diff_filespec *); +extern int diff_filespec_is_binary(struct diff_filespec *); struct diff_filepair { struct diff_filespec *one; -- cgit v1.3 From f258475a6ede3617ae768b69e33f78cbab8312de Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 6 Jul 2007 00:45:10 -0700 Subject: Per-path attribute based hunk header selection. This makes"diff -p" hunk headers customizable via gitattributes mechanism. It is based on Johannes's earlier patch that allowed to define a single regexp to be used for everything. The mechanism to arrive at the regexp that is used to define hunk header is the same as other use of gitattributes. You assign an attribute, funcname (because "diff -p" typically uses the name of the function the patch is about as the hunk header), a simple string value. This can be one of the names of built-in pattern (currently, "java" is defined) or a custom pattern name, to be looked up from the configuration file. (in .gitattributes) *.java funcname=java *.perl funcname=perl (in .git/config) [funcname] java = ... # ugly and complicated regexp to override the built-in one. perl = ... # another ugly and complicated regexp to define a new one. Signed-off-by: Junio C Hamano --- diff.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++- diffcore.h | 1 + t/t4018-diff-funcname.sh | 60 ++++++++++++++++++++++++++++++ xdiff-interface.c | 71 +++++++++++++++++++++++++++++++++++ xdiff-interface.h | 2 + xdiff/xdiff.h | 4 ++ xdiff/xemit.c | 37 ++++++++++++------- 7 files changed, 256 insertions(+), 15 deletions(-) create mode 100644 t/t4018-diff-funcname.sh (limited to 'diffcore.h') diff --git a/diff.c b/diff.c index d10e848c79..04e7e91adf 100644 --- a/diff.c +++ b/diff.c @@ -1101,22 +1101,26 @@ static void emit_binary_diff(mmfile_t *one, mmfile_t *two) static void setup_diff_attr_check(struct git_attr_check *check) { static struct git_attr *attr_diff; + static struct git_attr *attr_diff_func_name; if (!attr_diff) { attr_diff = git_attr("diff", 4); + attr_diff_func_name = git_attr("funcname", 8); } check[0].attr = attr_diff; + check[1].attr = attr_diff_func_name; } static void diff_filespec_check_attr(struct diff_filespec *one) { - struct git_attr_check attr_diff_check[1]; + struct git_attr_check attr_diff_check[2]; if (one->checked_attr) return; setup_diff_attr_check(attr_diff_check); one->is_binary = 0; + one->hunk_header_ident = NULL; if (!git_checkattr(one->path, ARRAY_SIZE(attr_diff_check), attr_diff_check)) { const char *value; @@ -1127,6 +1131,13 @@ static void diff_filespec_check_attr(struct diff_filespec *one) ; else if (ATTR_FALSE(value)) one->is_binary = 1; + + /* hunk header ident */ + value = attr_diff_check[1].value; + if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value)) + ; + else + one->hunk_header_ident = value; } if (!one->data && DIFF_FILE_VALID(one)) @@ -1143,6 +1154,82 @@ int diff_filespec_is_binary(struct diff_filespec *one) return one->is_binary; } +static struct hunk_header_regexp { + char *name; + char *regexp; + struct hunk_header_regexp *next; +} *hunk_header_regexp_list, **hunk_header_regexp_tail; + +static int hunk_header_config(const char *var, const char *value) +{ + static const char funcname[] = "funcname."; + struct hunk_header_regexp *hh; + + if (prefixcmp(var, funcname)) + return 0; + var += strlen(funcname); + for (hh = hunk_header_regexp_list; hh; hh = hh->next) + if (!strcmp(var, hh->name)) { + free(hh->regexp); + hh->regexp = xstrdup(value); + return 0; + } + hh = xcalloc(1, sizeof(*hh)); + hh->name = xstrdup(var); + hh->regexp = xstrdup(value); + hh->next = NULL; + *hunk_header_regexp_tail = hh; + return 0; +} + +static const char *hunk_header_regexp(const char *ident) +{ + struct hunk_header_regexp *hh; + + if (!hunk_header_regexp_tail) { + hunk_header_regexp_tail = &hunk_header_regexp_list; + git_config(hunk_header_config); + } + for (hh = hunk_header_regexp_list; hh; hh = hh->next) + if (!strcmp(ident, hh->name)) + return hh->regexp; + return NULL; +} + +static const char *diff_hunk_header_regexp(struct diff_filespec *one) +{ + const char *ident, *regexp; + + diff_filespec_check_attr(one); + ident = one->hunk_header_ident; + + if (!ident) + /* + * If the config file has "funcname.default" defined, that + * regexp is used; otherwise NULL is returned and xemit uses + * the built-in default. + */ + return hunk_header_regexp("default"); + + /* Look up custom "funcname.$ident" regexp from config. */ + regexp = hunk_header_regexp(ident); + if (regexp) + return regexp; + + /* + * And define built-in fallback patterns here. Note that + * these can be overriden by the user's config settings. + */ + if (!strcmp(ident, "java")) + return "!^[ ]*\\(catch\\|do\\|for\\|if\\|instanceof\\|" + "new\\|return\\|switch\\|throw\\|while\\)\n" + "^[ ]*\\(\\([ ]*" + "[A-Za-z_][A-Za-z_0-9]*\\)\\{2,\\}" + "[ ]*([^;]*$\\)"; + + return NULL; +} + static void builtin_diff(const char *name_a, const char *name_b, struct diff_filespec *one, @@ -1217,6 +1304,11 @@ static void builtin_diff(const char *name_a, xdemitconf_t xecfg; xdemitcb_t ecb; struct emit_callback ecbdata; + const char *hunk_header_regexp; + + hunk_header_regexp = diff_hunk_header_regexp(one); + if (!hunk_header_regexp) + hunk_header_regexp = diff_hunk_header_regexp(two); memset(&xecfg, 0, sizeof(xecfg)); memset(&ecbdata, 0, sizeof(ecbdata)); @@ -1226,6 +1318,8 @@ static void builtin_diff(const char *name_a, xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts; xecfg.ctxlen = o->context; xecfg.flags = XDL_EMIT_FUNCNAMES; + if (hunk_header_regexp) + xdiff_set_find_func(&xecfg, hunk_header_regexp); if (!diffopts) ; else if (!prefixcmp(diffopts, "--unified=")) diff --git a/diffcore.h b/diffcore.h index dcab7e20bf..05985147ea 100644 --- a/diffcore.h +++ b/diffcore.h @@ -27,6 +27,7 @@ struct diff_filespec { char *path; void *data; void *cnt_data; + const void *hunk_header_ident; unsigned long size; int xfrm_flags; /* for use by the xfrm */ unsigned short mode; /* file mode */ diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh new file mode 100644 index 0000000000..dc7a47b3f1 --- /dev/null +++ b/t/t4018-diff-funcname.sh @@ -0,0 +1,60 @@ +#!/bin/sh +# +# Copyright (c) 2007 Johannes E. Schindelin +# + +test_description='Test custom diff function name patterns' + +. ./test-lib.sh + +LF=' +' + +cat > Beer.java << EOF +public class Beer +{ + int special; + public static void main(String args[]) + { + String s=" "; + for(int x = 99; x > 0; x--) + { + System.out.print(x + " bottles of beer on the wall " + + x + " bottles of beer\n" + + "Take one down, pass it around, " + (x - 1) + + " bottles of beer on the wall.\n"); + } + System.out.print("Go to the store, buy some more,\n" + + "99 bottles of beer on the wall.\n"); + } +} +EOF + +sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java + +test_expect_success 'default behaviour' ' + git diff Beer.java Beer-correct.java | + grep "^@@.*@@ public class Beer" +' + +test_expect_success 'preset java pattern' ' + echo "*.java funcname=java" >.gitattributes && + git diff Beer.java Beer-correct.java | + grep "^@@.*@@ public static void main(" +' + +git config funcname.java '!static +!String +[^ ].*s.*' + +test_expect_success 'custom pattern' ' + git diff Beer.java Beer-correct.java | + grep "^@@.*@@ int special;$" +' + +test_expect_success 'last regexp must not be negated' ' + git config diff.functionnameregexp "!static" && + ! git diff Beer.java Beer-correct.java +' + +test_done diff --git a/xdiff-interface.c b/xdiff-interface.c index e407cf11b1..be866d12d3 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -129,3 +129,74 @@ int buffer_is_binary(const char *ptr, unsigned long size) size = FIRST_FEW_BYTES; return !!memchr(ptr, 0, size); } + +struct ff_regs { + int nr; + struct ff_reg { + regex_t re; + int negate; + } *array; +}; + +static long ff_regexp(const char *line, long len, + char *buffer, long buffer_size, void *priv) +{ + char *line_buffer = xstrndup(line, len); /* make NUL terminated */ + struct ff_regs *regs = priv; + regmatch_t pmatch[2]; + int result = 0, i; + + for (i = 0; i < regs->nr; i++) { + struct ff_reg *reg = regs->array + i; + if (reg->negate ^ !!regexec(®->re, + line_buffer, 2, pmatch, 0)) { + free(line_buffer); + return -1; + } + } + i = pmatch[1].rm_so >= 0 ? 1 : 0; + line += pmatch[i].rm_so; + result = pmatch[i].rm_eo - pmatch[i].rm_so; + if (result > buffer_size) + result = buffer_size; + else + while (result > 0 && (isspace(line[result - 1]) || + line[result - 1] == '\n')) + result--; + memcpy(buffer, line, result); + free(line_buffer); + return result; +} + +void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value) +{ + int i; + struct ff_regs *regs; + + xecfg->find_func = ff_regexp; + regs = xecfg->find_func_priv = xmalloc(sizeof(struct ff_regs)); + for (i = 0, regs->nr = 1; value[i]; i++) + if (value[i] == '\n') + regs->nr++; + regs->array = xmalloc(regs->nr * sizeof(struct ff_reg)); + for (i = 0; i < regs->nr; i++) { + struct ff_reg *reg = regs->array + i; + const char *ep = strchr(value, '\n'), *expression; + char *buffer = NULL; + + reg->negate = (*value == '!'); + if (reg->negate && i == regs->nr - 1) + die("Last expression must not be negated: %s", value); + if (*value == '!') + value++; + if (ep) + expression = buffer = xstrndup(value, ep - value); + else + expression = value; + if (regcomp(®->re, expression, 0)) + die("Invalid regexp to look for hunk header: %s", expression); + if (buffer) + free(buffer); + value = ep + 1; + } +} diff --git a/xdiff-interface.h b/xdiff-interface.h index 536f4e4d97..fb742dbb6b 100644 --- a/xdiff-interface.h +++ b/xdiff-interface.h @@ -20,4 +20,6 @@ int parse_hunk_header(char *line, int len, int read_mmfile(mmfile_t *ptr, const char *filename); int buffer_is_binary(const char *ptr, unsigned long size); +extern void xdiff_set_find_func(xdemitconf_t *xecfg, const char *line); + #endif diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index 9402bb0799..c00ddaa6e9 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -73,9 +73,13 @@ typedef struct s_xdemitcb { int (*outf)(void *, mmbuffer_t *, int); } xdemitcb_t; +typedef long (*find_func_t)(const char *line, long line_len, char *buffer, long buffer_size, void *priv); + typedef struct s_xdemitconf { long ctxlen; unsigned long flags; + find_func_t find_func; + void *find_func_priv; } xdemitconf_t; typedef struct s_bdiffparam { diff --git a/xdiff/xemit.c b/xdiff/xemit.c index 4b6e639112..d3d9c845c6 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -69,7 +69,24 @@ static xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg) { } -static void xdl_find_func(xdfile_t *xf, long i, char *buf, long sz, long *ll) { +static long def_ff(const char *rec, long len, char *buf, long sz, void *priv) +{ + if (len > 0 && + (isalpha((unsigned char)*rec) || /* identifier? */ + *rec == '_' || /* also identifier? */ + *rec == '$')) { /* identifiers from VMS and other esoterico */ + if (len > sz) + len = sz; + while (0 < len && isspace((unsigned char)rec[len - 1])) + len--; + memcpy(buf, rec, len); + return len; + } + return -1; +} + +static void xdl_find_func(xdfile_t *xf, long i, char *buf, long sz, long *ll, + find_func_t ff, void *ff_priv) { /* * Be quite stupid about this for now. Find a line in the old file @@ -80,22 +97,12 @@ static void xdl_find_func(xdfile_t *xf, long i, char *buf, long sz, long *ll) { const char *rec; long len; - *ll = 0; while (i-- > 0) { len = xdl_get_rec(xf, i, &rec); - if (len > 0 && - (isalpha((unsigned char)*rec) || /* identifier? */ - *rec == '_' || /* also identifier? */ - *rec == '$')) { /* mysterious GNU diff's invention */ - if (len > sz) - len = sz; - while (0 < len && isspace((unsigned char)rec[len - 1])) - len--; - memcpy(buf, rec, len); - *ll = len; + if ((*ll = ff(rec, len, buf, sz, ff_priv)) >= 0) return; - } } + *ll = 0; } @@ -120,6 +127,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, xdchange_t *xch, *xche; char funcbuf[80]; long funclen = 0; + find_func_t ff = xecfg->find_func ? xecfg->find_func : def_ff; if (xecfg->flags & XDL_EMIT_COMMON) return xdl_emit_common(xe, xscr, ecb, xecfg); @@ -143,7 +151,8 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, if (xecfg->flags & XDL_EMIT_FUNCNAMES) { xdl_find_func(&xe->xdf1, s1, funcbuf, - sizeof(funcbuf), &funclen); + sizeof(funcbuf), &funclen, + ff, xecfg->find_func_priv); } if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, funcbuf, funclen, ecb) < 0) -- cgit v1.3 From e0e324a4dc18a4341e1320a7cfac9733d81f8b0b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 7 Jul 2007 01:49:58 -0700 Subject: Fix configuration syntax to specify customized hunk header patterns. This updates the hunk header customization syntax. The special case 'funcname' attribute is gone. You assign the name of the type of contents to path's "diff" attribute as a string value in .gitattributes like this: *.java diff=java *.perl diff=perl *.doc diff=doc If you supply "diff..funcname" variable via the configuration mechanism (e.g. in $HOME/.gitconfig), the value is used as the regexp set to find the line to use for the hunk header (the variable is called "funcname" because such a line typically is the one that has the name of the function in programming language source text). If there is no such configuration, built-in default is used, if any. Currently there are two default patterns: default and java. Signed-off-by: Junio C Hamano --- diff.c | 147 ++++++++++++++++++++++++++--------------------- diffcore.h | 2 +- t/t4018-diff-funcname.sh | 6 +- 3 files changed, 84 insertions(+), 71 deletions(-) (limited to 'diffcore.h') diff --git a/diff.c b/diff.c index 04e7e91adf..21e61af265 100644 --- a/diff.c +++ b/diff.c @@ -56,6 +56,14 @@ static struct ll_diff_driver { char *cmd; } *user_diff, **user_diff_tail; +static void read_config_if_needed(void) +{ + if (!user_diff_tail) { + user_diff_tail = &user_diff; + git_config(git_diff_ui_config); + } +} + /* * Currently there is only "diff..command" variable; * because there are "diff.color." variables, we are parsing @@ -93,6 +101,45 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val return 0; } +/* + * 'diff..funcname' attribute can be specified in the configuration + * to define a customized regexp to find the beginning of a function to + * be used for hunk header lines of "diff -p" style output. + */ +static struct funcname_pattern { + char *name; + char *pattern; + struct funcname_pattern *next; +} *funcname_pattern_list; + +static int parse_funcname_pattern(const char *var, const char *ep, const char *value) +{ + const char *name; + int namelen; + struct funcname_pattern *pp; + + name = var + 5; /* "diff." */ + namelen = ep - name; + + for (pp = funcname_pattern_list; pp; pp = pp->next) + if (!strncmp(pp->name, name, namelen) && !pp->name[namelen]) + break; + if (!pp) { + char *namebuf; + pp = xcalloc(1, sizeof(*pp)); + namebuf = xmalloc(namelen + 1); + memcpy(namebuf, name, namelen); + namebuf[namelen] = 0; + pp->name = namebuf; + pp->next = funcname_pattern_list; + funcname_pattern_list = pp; + } + if (pp->pattern) + free(pp->pattern); + pp->pattern = xstrdup(value); + return 0; +} + /* * These are to give UI layer defaults. * The core-level commands such as git-diff-files should @@ -122,8 +169,12 @@ int git_diff_ui_config(const char *var, const char *value) if (!prefixcmp(var, "diff.")) { const char *ep = strrchr(var, '.'); - if (ep != var + 4 && !strcmp(ep, ".command")) - return parse_lldiff_command(var, ep, value); + if (ep != var + 4) { + if (!strcmp(ep, ".command")) + return parse_lldiff_command(var, ep, value); + if (!strcmp(ep, ".funcname")) + return parse_funcname_pattern(var, ep, value); + } } if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) { int slot = parse_diff_color_slot(var, 11); @@ -1101,43 +1152,39 @@ static void emit_binary_diff(mmfile_t *one, mmfile_t *two) static void setup_diff_attr_check(struct git_attr_check *check) { static struct git_attr *attr_diff; - static struct git_attr *attr_diff_func_name; if (!attr_diff) { attr_diff = git_attr("diff", 4); - attr_diff_func_name = git_attr("funcname", 8); } check[0].attr = attr_diff; - check[1].attr = attr_diff_func_name; } static void diff_filespec_check_attr(struct diff_filespec *one) { - struct git_attr_check attr_diff_check[2]; + struct git_attr_check attr_diff_check; if (one->checked_attr) return; - setup_diff_attr_check(attr_diff_check); + setup_diff_attr_check(&attr_diff_check); one->is_binary = 0; - one->hunk_header_ident = NULL; + one->funcname_pattern_ident = NULL; - if (!git_checkattr(one->path, ARRAY_SIZE(attr_diff_check), attr_diff_check)) { + if (!git_checkattr(one->path, 1, &attr_diff_check)) { const char *value; /* binaryness */ - value = attr_diff_check[0].value; + value = attr_diff_check.value; if (ATTR_TRUE(value)) ; else if (ATTR_FALSE(value)) one->is_binary = 1; - /* hunk header ident */ - value = attr_diff_check[1].value; + /* funcname pattern ident */ if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value)) ; else - one->hunk_header_ident = value; + one->funcname_pattern_ident = value; } if (!one->data && DIFF_FILE_VALID(one)) @@ -1154,54 +1201,23 @@ int diff_filespec_is_binary(struct diff_filespec *one) return one->is_binary; } -static struct hunk_header_regexp { - char *name; - char *regexp; - struct hunk_header_regexp *next; -} *hunk_header_regexp_list, **hunk_header_regexp_tail; - -static int hunk_header_config(const char *var, const char *value) +static const char *funcname_pattern(const char *ident) { - static const char funcname[] = "funcname."; - struct hunk_header_regexp *hh; + struct funcname_pattern *pp; - if (prefixcmp(var, funcname)) - return 0; - var += strlen(funcname); - for (hh = hunk_header_regexp_list; hh; hh = hh->next) - if (!strcmp(var, hh->name)) { - free(hh->regexp); - hh->regexp = xstrdup(value); - return 0; - } - hh = xcalloc(1, sizeof(*hh)); - hh->name = xstrdup(var); - hh->regexp = xstrdup(value); - hh->next = NULL; - *hunk_header_regexp_tail = hh; - return 0; -} - -static const char *hunk_header_regexp(const char *ident) -{ - struct hunk_header_regexp *hh; - - if (!hunk_header_regexp_tail) { - hunk_header_regexp_tail = &hunk_header_regexp_list; - git_config(hunk_header_config); - } - for (hh = hunk_header_regexp_list; hh; hh = hh->next) - if (!strcmp(ident, hh->name)) - return hh->regexp; + read_config_if_needed(); + for (pp = funcname_pattern_list; pp; pp = pp->next) + if (!strcmp(ident, pp->name)) + return pp->pattern; return NULL; } -static const char *diff_hunk_header_regexp(struct diff_filespec *one) +static const char *diff_funcname_pattern(struct diff_filespec *one) { - const char *ident, *regexp; + const char *ident, *pattern; diff_filespec_check_attr(one); - ident = one->hunk_header_ident; + ident = one->funcname_pattern_ident; if (!ident) /* @@ -1209,12 +1225,12 @@ static const char *diff_hunk_header_regexp(struct diff_filespec *one) * regexp is used; otherwise NULL is returned and xemit uses * the built-in default. */ - return hunk_header_regexp("default"); + return funcname_pattern("default"); /* Look up custom "funcname.$ident" regexp from config. */ - regexp = hunk_header_regexp(ident); - if (regexp) - return regexp; + pattern = funcname_pattern(ident); + if (pattern) + return pattern; /* * And define built-in fallback patterns here. Note that @@ -1304,11 +1320,11 @@ static void builtin_diff(const char *name_a, xdemitconf_t xecfg; xdemitcb_t ecb; struct emit_callback ecbdata; - const char *hunk_header_regexp; + const char *funcname_pattern; - hunk_header_regexp = diff_hunk_header_regexp(one); - if (!hunk_header_regexp) - hunk_header_regexp = diff_hunk_header_regexp(two); + funcname_pattern = diff_funcname_pattern(one); + if (!funcname_pattern) + funcname_pattern = diff_funcname_pattern(two); memset(&xecfg, 0, sizeof(xecfg)); memset(&ecbdata, 0, sizeof(ecbdata)); @@ -1318,8 +1334,8 @@ static void builtin_diff(const char *name_a, xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts; xecfg.ctxlen = o->context; xecfg.flags = XDL_EMIT_FUNCNAMES; - if (hunk_header_regexp) - xdiff_set_find_func(&xecfg, hunk_header_regexp); + if (funcname_pattern) + xdiff_set_find_func(&xecfg, funcname_pattern); if (!diffopts) ; else if (!prefixcmp(diffopts, "--unified=")) @@ -1862,10 +1878,7 @@ static const char *external_diff_attr(const char *name) !ATTR_UNSET(value)) { struct ll_diff_driver *drv; - if (!user_diff_tail) { - user_diff_tail = &user_diff; - git_config(git_diff_ui_config); - } + read_config_if_needed(); for (drv = user_diff; drv; drv = drv->next) if (!strcmp(drv->name, value)) return drv->cmd; diff --git a/diffcore.h b/diffcore.h index 05985147ea..eef17c4ca2 100644 --- a/diffcore.h +++ b/diffcore.h @@ -27,7 +27,7 @@ struct diff_filespec { char *path; void *data; void *cnt_data; - const void *hunk_header_ident; + const char *funcname_pattern_ident; unsigned long size; int xfrm_flags; /* for use by the xfrm */ unsigned short mode; /* file mode */ diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index dc7a47b3f1..f9db81d3ab 100644 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -38,12 +38,12 @@ test_expect_success 'default behaviour' ' ' test_expect_success 'preset java pattern' ' - echo "*.java funcname=java" >.gitattributes && + echo "*.java diff=java" >.gitattributes && git diff Beer.java Beer-correct.java | grep "^@@.*@@ public static void main(" ' -git config funcname.java '!static +git config diff.java.funcname '!static !String [^ ].*s.*' @@ -53,7 +53,7 @@ test_expect_success 'custom pattern' ' ' test_expect_success 'last regexp must not be negated' ' - git config diff.functionnameregexp "!static" && + git config diff.java.funcname "!static" && ! git diff Beer.java Beer-correct.java ' -- cgit v1.3 From eede7b7d110e2c354235d7a3f6c8f1644b5120e5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Sep 2007 15:29:42 -0400 Subject: diffcore-rename: cache file deltas We find rename candidates by computing a fingerprint hash of each file, and then comparing those fingerprints. There are inherently O(n^2) comparisons, so it pays in CPU time to hoist the (rather expensive) computation of the fingerprint out of that loop (or to cache it once we have computed it once). Previously, we didn't keep the filespec information around because then we had the potential to consume a great deal of memory. However, instead of keeping all of the filespec data, we can instead just keep the fingerprint. This patch implements and uses diff_free_filespec_data_large to accomplish that goal. We also have to change estimate_similarity not to needlessly repopulate the filespec data when we already have the hash. Practical tests showed 4.5x speedup for a 10% memory usage increase. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff.c | 7 ++++++- diffcore-rename.c | 7 ++++--- diffcore.h | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'diffcore.h') diff --git a/diff.c b/diff.c index 0ee9ea1c1b..35e3c61986 100644 --- a/diff.c +++ b/diff.c @@ -1675,7 +1675,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only) return 0; } -void diff_free_filespec_data(struct diff_filespec *s) +void diff_free_filespec_data_large(struct diff_filespec *s) { if (s->should_free) free(s->data); @@ -1686,6 +1686,11 @@ void diff_free_filespec_data(struct diff_filespec *s) s->should_free = s->should_munmap = 0; s->data = NULL; } +} + +void diff_free_filespec_data(struct diff_filespec *s) +{ + diff_free_filespec_data_large(s); free(s->cnt_data); s->cnt_data = NULL; } diff --git a/diffcore-rename.c b/diffcore-rename.c index 41b35c3a9e..4fc200064a 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -184,7 +184,8 @@ static int estimate_similarity(struct diff_filespec *src, if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE) return 0; - if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0)) + if ((!src->cnt_data && diff_populate_filespec(src, 0)) + || (!dst->cnt_data && diff_populate_filespec(dst, 0))) return 0; /* error but caught downstream */ @@ -377,10 +378,10 @@ void diffcore_rename(struct diff_options *options) m->score = estimate_similarity(one, two, minimum_score); m->name_score = basename_same(one, two); - diff_free_filespec_data(one); + diff_free_filespec_data_large(one); } /* We do not need the text anymore */ - diff_free_filespec_data(two); + diff_free_filespec_data_large(two); dst_cnt++; } /* cost matrix sorted by most to least similar pair */ diff --git a/diffcore.h b/diffcore.h index eef17c4ca2..4bf175bda9 100644 --- a/diffcore.h +++ b/diffcore.h @@ -48,6 +48,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *, extern int diff_populate_filespec(struct diff_filespec *, int); extern void diff_free_filespec_data(struct diff_filespec *); +extern void diff_free_filespec_data_large(struct diff_filespec *); extern int diff_filespec_is_binary(struct diff_filespec *); struct diff_filepair { -- cgit v1.3 From 8ae92e63895000ff9b12046325ae381f3c17d414 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 2 Oct 2007 21:01:03 -0700 Subject: rename diff_free_filespec_data_large() to diff_free_filespec_blob() Signed-off-by: Junio C Hamano --- diff.c | 4 ++-- diffcore-rename.c | 4 ++-- diffcore.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'diffcore.h') diff --git a/diff.c b/diff.c index 35e3c61986..71b340c536 100644 --- a/diff.c +++ b/diff.c @@ -1675,7 +1675,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only) return 0; } -void diff_free_filespec_data_large(struct diff_filespec *s) +void diff_free_filespec_blob(struct diff_filespec *s) { if (s->should_free) free(s->data); @@ -1690,7 +1690,7 @@ void diff_free_filespec_data_large(struct diff_filespec *s) void diff_free_filespec_data(struct diff_filespec *s) { - diff_free_filespec_data_large(s); + diff_free_filespec_blob(s); free(s->cnt_data); s->cnt_data = NULL; } diff --git a/diffcore-rename.c b/diffcore-rename.c index 4fc200064a..142e5376dd 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -378,10 +378,10 @@ void diffcore_rename(struct diff_options *options) m->score = estimate_similarity(one, two, minimum_score); m->name_score = basename_same(one, two); - diff_free_filespec_data_large(one); + diff_free_filespec_blob(one); } /* We do not need the text anymore */ - diff_free_filespec_data_large(two); + diff_free_filespec_blob(two); dst_cnt++; } /* cost matrix sorted by most to least similar pair */ diff --git a/diffcore.h b/diffcore.h index 4bf175bda9..eb618b1ec0 100644 --- a/diffcore.h +++ b/diffcore.h @@ -48,7 +48,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *, extern int diff_populate_filespec(struct diff_filespec *, int); extern void diff_free_filespec_data(struct diff_filespec *); -extern void diff_free_filespec_data_large(struct diff_filespec *); +extern void diff_free_filespec_blob(struct diff_filespec *); extern int diff_filespec_is_binary(struct diff_filespec *); struct diff_filepair { -- cgit v1.3 From 9fb88419ba85e641006c80db53620423f37f1c93 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 25 Oct 2007 11:19:10 -0700 Subject: Ref-count the filespecs used by diffcore Rather than copy the filespecs when introducing new versions of them (for rename or copy detection), use a refcount and increment the count when reusing the diff_filespec. This avoids unnecessary allocations, but the real reason behind this is a future enhancement: we will want to track shared data across the copy/rename detection. In order to efficiently notice when a filespec is used by a rename, the rename machinery wants to keep track of a rename usage count which is shared across all different users of the filespec. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff.c | 15 +++++++++++---- diffcore-rename.c | 16 ++++++---------- diffcore.h | 2 ++ 3 files changed, 19 insertions(+), 14 deletions(-) (limited to 'diffcore.h') diff --git a/diff.c b/diff.c index dfb8595b70..0b320f6b96 100644 --- a/diff.c +++ b/diff.c @@ -1440,9 +1440,18 @@ struct diff_filespec *alloc_filespec(const char *path) memset(spec, 0, sizeof(*spec)); spec->path = (char *)(spec + 1); memcpy(spec->path, path, namelen+1); + spec->count = 1; return spec; } +void free_filespec(struct diff_filespec *spec) +{ + if (!--spec->count) { + diff_free_filespec_data(spec); + free(spec); + } +} + void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1, unsigned short mode) { @@ -2435,10 +2444,8 @@ struct diff_filepair *diff_queue(struct diff_queue_struct *queue, void diff_free_filepair(struct diff_filepair *p) { - diff_free_filespec_data(p->one); - diff_free_filespec_data(p->two); - free(p->one); - free(p->two); + free_filespec(p->one); + free_filespec(p->two); free(p); } diff --git a/diffcore-rename.c b/diffcore-rename.c index 2077a9b981..3da06b702b 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -209,21 +209,19 @@ static int estimate_similarity(struct diff_filespec *src, static void record_rename_pair(int dst_index, int src_index, int score) { - struct diff_filespec *one, *two, *src, *dst; + struct diff_filespec *src, *dst; struct diff_filepair *dp; if (rename_dst[dst_index].pair) die("internal error: dst already matched."); src = rename_src[src_index].one; - one = alloc_filespec(src->path); - fill_filespec(one, src->sha1, src->mode); + src->count++; dst = rename_dst[dst_index].two; - two = alloc_filespec(dst->path); - fill_filespec(two, dst->sha1, dst->mode); + dst->count++; - dp = diff_queue(NULL, one, two); + dp = diff_queue(NULL, src, dst); dp->renamed_pair = 1; if (!strcmp(src->path, dst->path)) dp->score = rename_src[src_index].score; @@ -526,10 +524,8 @@ void diffcore_rename(struct diff_options *options) } } - for (i = 0; i < rename_dst_nr; i++) { - diff_free_filespec_data(rename_dst[i].two); - free(rename_dst[i].two); - } + for (i = 0; i < rename_dst_nr; i++) + free_filespec(rename_dst[i].two); free(rename_dst); rename_dst = NULL; diff --git a/diffcore.h b/diffcore.h index eb618b1ec0..30055ac5a9 100644 --- a/diffcore.h +++ b/diffcore.h @@ -29,6 +29,7 @@ struct diff_filespec { void *cnt_data; const char *funcname_pattern_ident; unsigned long size; + int count; /* Reference count */ int xfrm_flags; /* for use by the xfrm */ unsigned short mode; /* file mode */ unsigned sha1_valid : 1; /* if true, use sha1 and trust mode; @@ -43,6 +44,7 @@ struct diff_filespec { }; extern struct diff_filespec *alloc_filespec(const char *); +extern void free_filespec(struct diff_filespec *); extern void fill_filespec(struct diff_filespec *, const unsigned char *, unsigned short); -- cgit v1.3 From 644797119d7a3b7a043a51a9cccd8758f8451f91 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 25 Oct 2007 11:20:56 -0700 Subject: copy vs rename detection: avoid unnecessary O(n*m) loops The core rename detection had some rather stupid code to check if a pathname was used by a later modification or rename, which basically walked the whole pathname space for all renames for each rename, in order to tell whether it was a pure rename (no remaining users) or should be considered a copy (other users of the source file remaining). That's really silly, since we can just keep a count of users around, and replace all those complex and expensive loops with just testing that simple counter (but this all depends on the previous commit that shared the diff_filespec data structure by using a separate reference count). Note that the reference count is not the same as the rename count: they behave otherwise rather similarly, but the reference count is tied to the allocation (and decremented at de-allocation, so that when it turns zero we can get rid of the memory), while the rename count is tied to the renames and is decremented when we find a rename (so that when it turns zero we know that it was a rename, not a copy). Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff.c | 40 ++++++++++++++------------------ diffcore-rename.c | 68 ++++++++++++++----------------------------------------- diffcore.h | 2 +- 3 files changed, 35 insertions(+), 75 deletions(-) (limited to 'diffcore.h') diff --git a/diff.c b/diff.c index 0b320f6b96..af85b94d1b 100644 --- a/diff.c +++ b/diff.c @@ -2597,9 +2597,9 @@ void diff_debug_filepair(const struct diff_filepair *p, int i) { diff_debug_filespec(p->one, i, "one"); diff_debug_filespec(p->two, i, "two"); - fprintf(stderr, "score %d, status %c stays %d broken %d\n", + fprintf(stderr, "score %d, status %c rename_used %d broken %d\n", p->score, p->status ? p->status : '?', - p->source_stays, p->broken_pair); + p->one->rename_used, p->broken_pair); } void diff_debug_queue(const char *msg, struct diff_queue_struct *q) @@ -2617,8 +2617,8 @@ void diff_debug_queue(const char *msg, struct diff_queue_struct *q) static void diff_resolve_rename_copy(void) { - int i, j; - struct diff_filepair *p, *pp; + int i; + struct diff_filepair *p; struct diff_queue_struct *q = &diff_queued_diff; diff_debug_queue("resolve-rename-copy", q); @@ -2640,27 +2640,21 @@ static void diff_resolve_rename_copy(void) * either in-place edit or rename/copy edit. */ else if (DIFF_PAIR_RENAME(p)) { - if (p->source_stays) { - p->status = DIFF_STATUS_COPIED; - continue; - } - /* See if there is some other filepair that - * copies from the same source as us. If so - * we are a copy. Otherwise we are either a - * copy if the path stays, or a rename if it - * does not, but we already handled "stays" case. + /* + * A rename might have re-connected a broken + * pair up, causing the pathnames to be the + * same again. If so, that's not a rename at + * all, just a modification.. + * + * Otherwise, see if this source was used for + * multiple renames, in which case we decrement + * the count, and call it a copy. */ - for (j = i + 1; j < q->nr; j++) { - pp = q->queue[j]; - if (strcmp(pp->one->path, p->one->path)) - continue; /* not us */ - if (!DIFF_PAIR_RENAME(pp)) - continue; /* not a rename/copy */ - /* pp is a rename/copy from the same source */ + if (!strcmp(p->one->path, p->two->path)) + p->status = DIFF_STATUS_MODIFIED; + else if (--p->one->rename_used > 0) p->status = DIFF_STATUS_COPIED; - break; - } - if (!p->status) + else p->status = DIFF_STATUS_RENAMED; } else if (hashcmp(p->one->sha1, p->two->sha1) || diff --git a/diffcore-rename.c b/diffcore-rename.c index 3da06b702b..edb2424d13 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -55,12 +55,10 @@ static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two, static struct diff_rename_src { struct diff_filespec *one; unsigned short score; /* to remember the break score */ - unsigned src_path_left : 1; } *rename_src; static int rename_src_nr, rename_src_alloc; static struct diff_rename_src *register_rename_src(struct diff_filespec *one, - int src_path_left, unsigned short score) { int first, last; @@ -92,7 +90,6 @@ static struct diff_rename_src *register_rename_src(struct diff_filespec *one, (rename_src_nr - first - 1) * sizeof(*rename_src)); rename_src[first].one = one; rename_src[first].score = score; - rename_src[first].src_path_left = src_path_left; return &(rename_src[first]); } @@ -216,6 +213,7 @@ static void record_rename_pair(int dst_index, int src_index, int score) die("internal error: dst already matched."); src = rename_src[src_index].one; + src->rename_used++; src->count++; dst = rename_dst[dst_index].two; @@ -227,7 +225,6 @@ static void record_rename_pair(int dst_index, int src_index, int score) dp->score = rename_src[src_index].score; else dp->score = score; - dp->source_stays = rename_src[src_index].src_path_left; rename_dst[dst_index].pair = dp; } @@ -245,21 +242,6 @@ static int score_compare(const void *a_, const void *b_) return b->score - a->score; } -static int compute_stays(struct diff_queue_struct *q, - struct diff_filespec *one) -{ - int i; - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - if (strcmp(one->path, p->two->path)) - continue; - if (DIFF_PAIR_RENAME(p)) { - return 0; /* something else is renamed into this */ - } - } - return 1; -} - /* * Find exact renames first. * @@ -338,15 +320,25 @@ void diffcore_rename(struct diff_options *options) locate_rename_dst(p->two, 1); } else if (!DIFF_FILE_VALID(p->two)) { - /* If the source is a broken "delete", and + /* + * If the source is a broken "delete", and * they did not really want to get broken, * that means the source actually stays. + * So we increment the "rename_used" score + * by one, to indicate ourselves as a user + */ + if (p->broken_pair && !p->score) + p->one->rename_used++; + register_rename_src(p->one, p->score); + } + else if (detect_rename == DIFF_DETECT_COPY) { + /* + * Increment the "rename_used" score by + * one, to indicate ourselves as a user. */ - int stays = (p->broken_pair && !p->score); - register_rename_src(p->one, stays, p->score); + p->one->rename_used++; + register_rename_src(p->one, p->score); } - else if (detect_rename == DIFF_DETECT_COPY) - register_rename_src(p->one, 1, p->score); } if (rename_dst_nr == 0 || rename_src_nr == 0) goto cleanup; /* nothing to do */ @@ -472,16 +464,7 @@ void diffcore_rename(struct diff_options *options) pair_to_free = p; } else { - for (j = 0; j < rename_dst_nr; j++) { - if (!rename_dst[j].pair) - continue; - if (strcmp(rename_dst[j].pair-> - one->path, - p->one->path)) - continue; - break; - } - if (j < rename_dst_nr) + if (p->one->rename_used) /* this path remains */ pair_to_free = p; } @@ -507,23 +490,6 @@ void diffcore_rename(struct diff_options *options) *q = outq; diff_debug_queue("done collapsing", q); - /* We need to see which rename source really stays here; - * earlier we only checked if the path is left in the result, - * but even if a path remains in the result, if that is coming - * from copying something else on top of it, then the original - * source is lost and does not stay. - */ - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - if (DIFF_PAIR_RENAME(p) && p->source_stays) { - /* If one appears as the target of a rename-copy, - * then mark p->source_stays = 0; otherwise - * leave it as is. - */ - p->source_stays = compute_stays(q, p->one); - } - } - for (i = 0; i < rename_dst_nr; i++) free_filespec(rename_dst[i].two); diff --git a/diffcore.h b/diffcore.h index 30055ac5a9..cc96c20734 100644 --- a/diffcore.h +++ b/diffcore.h @@ -31,6 +31,7 @@ struct diff_filespec { unsigned long size; int count; /* Reference count */ int xfrm_flags; /* for use by the xfrm */ + int rename_used; /* Count of rename users */ unsigned short mode; /* file mode */ unsigned sha1_valid : 1; /* if true, use sha1 and trust mode; * if false, use the name and read from @@ -58,7 +59,6 @@ struct diff_filepair { struct diff_filespec *two; unsigned short int score; char status; /* M C R N D U (see Documentation/diff-format.txt) */ - unsigned source_stays : 1; /* all of R/C are copies */ unsigned broken_pair : 1; unsigned renamed_pair : 1; unsigned is_unmerged : 1; -- cgit v1.3 From 264e0b9a3c090930eec8dd16ecacf551dc685ac1 Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Fri, 19 Sep 2008 22:12:53 +0200 Subject: Bust the ghost of long-defunct diffcore-pathspec. This concept was retired by 77882f6 (Retire diffcore-pathspec., 2006-04-10), more than 2 years ago. Signed-off-by: Yann Dirson Signed-off-by: Junio C Hamano --- Documentation/gitdiffcore.txt | 55 +++++++++++++++++-------------------------- diffcore.h | 1 - 2 files changed, 22 insertions(+), 34 deletions(-) (limited to 'diffcore.h') diff --git a/Documentation/gitdiffcore.txt b/Documentation/gitdiffcore.txt index 2bdbc3d4f6..e8041bc08f 100644 --- a/Documentation/gitdiffcore.txt +++ b/Documentation/gitdiffcore.txt @@ -36,11 +36,25 @@ files: - 'git-diff-tree' compares contents of two "tree" objects; -In all of these cases, the commands themselves compare -corresponding paths in the two sets of files. The result of -comparison is passed from these commands to what is internally -called "diffcore", in a format similar to what is output when -the -p option is not used. E.g. +In all of these cases, the commands themselves first optionally limit +the two sets of files by any pathspecs given on their command-lines, +and compare corresponding paths in the two resulting sets of files. + +The pathspecs are used to limit the world diff operates in. They remove +the filepairs outside the specified sets of pathnames. E.g. If the +input set of filepairs included: + +------------------------------------------------ +:100644 100644 bcd1234... 0123456... M junkfile +------------------------------------------------ + +but the command invocation was `git diff-files myfile`, then the +junkfile entry would be removed from the list because only "myfile" +is under consideration. + +The result of comparison is passed from these commands to what is +internally called "diffcore", in a format similar to what is output +when the -p option is not used. E.g. ------------------------------------------------ in-place edit :100644 100644 bcd1234... 0123456... M file0 @@ -52,9 +66,8 @@ unmerged :000000 000000 0000000... 0000000... U file6 The diffcore mechanism is fed a list of such comparison results (each of which is called "filepair", although at this point each of them talks about a single file), and transforms such a list -into another list. There are currently 6 such transformations: +into another list. There are currently 5 such transformations: -- diffcore-pathspec - diffcore-break - diffcore-rename - diffcore-merge-broken @@ -62,38 +75,14 @@ into another list. There are currently 6 such transformations: - diffcore-order These are applied in sequence. The set of filepairs 'git-diff-{asterisk}' -commands find are used as the input to diffcore-pathspec, and -the output from diffcore-pathspec is used as the input to the +commands find are used as the input to diffcore-break, and +the output from diffcore-break is used as the input to the next transformation. The final result is then passed to the output routine and generates either diff-raw format (see Output format sections of the manual for 'git-diff-{asterisk}' commands) or diff-patch format. -diffcore-pathspec: For Ignoring Files Outside Our Consideration ---------------------------------------------------------------- - -The first transformation in the chain is diffcore-pathspec, and -is controlled by giving the pathname parameters to the -'git-diff-{asterisk}' commands on the command line. The pathspec is used -to limit the world diff operates in. It removes the filepairs -outside the specified set of pathnames. E.g. If the input set -of filepairs included: - ------------------------------------------------- -:100644 100644 bcd1234... 0123456... M junkfile ------------------------------------------------- - -but the command invocation was `git diff-files myfile`, then the -junkfile entry would be removed from the list because only "myfile" -is under consideration. - -Implementation note. For performance reasons, 'git-diff-tree' -uses the pathname parameters on the command line to cull set of -filepairs it feeds the diffcore mechanism itself, and does not -use diffcore-pathspec, but the end result is the same. - - diffcore-break: For Splitting Up "Complete Rewrites" ---------------------------------------------------- diff --git a/diffcore.h b/diffcore.h index cc96c20734..8ae35785fd 100644 --- a/diffcore.h +++ b/diffcore.h @@ -92,7 +92,6 @@ extern struct diff_filepair *diff_queue(struct diff_queue_struct *, struct diff_filespec *); extern void diff_q(struct diff_queue_struct *, struct diff_filepair *); -extern void diffcore_pathspec(const char **pathspec); extern void diffcore_break(int); extern void diffcore_rename(struct diff_options *); extern void diffcore_merge_broken(void); -- cgit v1.3 From 122aa6f9c000d0d286898e2eb7b3504ac6cb9ebd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 5 Oct 2008 17:43:36 -0400 Subject: diff: introduce diff..binary The "diff" gitattribute is somewhat overloaded right now. It can say one of three things: 1. this file is definitely binary, or definitely not (i.e., diff or !diff) 2. this file should use an external diff engine (i.e., diff=foo, diff.foo.command = custom-script) 3. this file should use particular funcname patterns (i.e., diff=foo, diff.foo.(x?)funcname = some-regex) Most of the time, there is no conflict between these uses, since using one implies that the other is irrelevant (e.g., an external diff engine will decide for itself whether the file is binary). However, there is at least one conflicting situation: there is no way to say "use the regular rules to determine whether this file is binary, but if we do diff it textually, use this funcname pattern." That is, currently setting diff=foo indicates that the file is definitely text. This patch introduces a "binary" config option for a diff driver, so that one can explicitly set diff.foo.binary. We default this value to "don't know". That is, setting a diff attribute to "foo" and using "diff.foo.funcname" will have no effect on the binaryness of a file. To get the current behavior, one can set diff.foo.binary to true. This patch also has one additional advantage: it cleans up the interface to the userdiff code a bit. Before, calling code had to know more about whether attributes were false, true, or unset to determine binaryness. Now that binaryness is a property of a driver, we can represent these situations just by passing back a driver struct. Signed-off-by: Jeff King Signed-off-by: Shawn O. Pearce --- diff.c | 52 ++++++++++++++++++++++------------------------------ diffcore.h | 8 ++++++-- userdiff.c | 19 ++++++++++++++++--- userdiff.h | 4 +--- 4 files changed, 45 insertions(+), 38 deletions(-) (limited to 'diffcore.h') diff --git a/diff.c b/diff.c index d50355e842..dabd7f50ec 100644 --- a/diff.c +++ b/diff.c @@ -1268,46 +1268,37 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two) emit_binary_diff_body(file, two, one); } -static void diff_filespec_check_attr(struct diff_filespec *one) +void diff_filespec_load_driver(struct diff_filespec *one) { - struct userdiff_driver *drv; - int check_from_data = 0; - - if (one->checked_attr) - return; - - drv = userdiff_find_by_path(one->path); - one->is_binary = 0; - - /* binaryness */ - if (drv == USERDIFF_ATTR_TRUE) - ; - else if (drv == USERDIFF_ATTR_FALSE) - one->is_binary = 1; - else - check_from_data = 1; - - if (check_from_data) { - if (!one->data && DIFF_FILE_VALID(one)) - diff_populate_filespec(one, 0); - - if (one->data) - one->is_binary = buffer_is_binary(one->data, one->size); - } + if (!one->driver) + one->driver = userdiff_find_by_path(one->path); + if (!one->driver) + one->driver = userdiff_find_by_name("default"); } int diff_filespec_is_binary(struct diff_filespec *one) { - diff_filespec_check_attr(one); + if (one->is_binary == -1) { + diff_filespec_load_driver(one); + if (one->driver->binary != -1) + one->is_binary = one->driver->binary; + else { + if (!one->data && DIFF_FILE_VALID(one)) + diff_populate_filespec(one, 0); + if (one->data) + one->is_binary = buffer_is_binary(one->data, + one->size); + if (one->is_binary == -1) + one->is_binary = 0; + } + } return one->is_binary; } static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespec *one) { - struct userdiff_driver *drv = userdiff_find_by_path(one->path); - if (!drv) - drv = userdiff_find_by_name("default"); - return drv && drv->funcname.pattern ? &drv->funcname : NULL; + diff_filespec_load_driver(one); + return one->driver->funcname.pattern ? &one->driver->funcname : NULL; } void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) @@ -1559,6 +1550,7 @@ struct diff_filespec *alloc_filespec(const char *path) spec->path = (char *)(spec + 1); memcpy(spec->path, path, namelen+1); spec->count = 1; + spec->is_binary = -1; return spec; } diff --git a/diffcore.h b/diffcore.h index 8ae35785fd..713cca785c 100644 --- a/diffcore.h +++ b/diffcore.h @@ -22,6 +22,8 @@ #define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */ +struct userdiff_driver; + struct diff_filespec { unsigned char sha1[20]; char *path; @@ -40,8 +42,10 @@ struct diff_filespec { #define DIFF_FILE_VALID(spec) (((spec)->mode) != 0) unsigned should_free : 1; /* data should be free()'ed */ unsigned should_munmap : 1; /* data should be munmap()'ed */ - unsigned checked_attr : 1; - unsigned is_binary : 1; /* data should be considered "binary" */ + + struct userdiff_driver *driver; + /* data should be considered "binary"; -1 means "don't know yet" */ + int is_binary; }; extern struct diff_filespec *alloc_filespec(const char *); diff --git a/userdiff.c b/userdiff.c index 80e2857abb..58478a6912 100644 --- a/userdiff.c +++ b/userdiff.c @@ -7,7 +7,7 @@ static int ndrivers; static int drivers_alloc; #define FUNCNAME(name, pattern) \ - { name, NULL, { pattern, REG_EXTENDED } } + { name, NULL, -1, { pattern, REG_EXTENDED } } static struct userdiff_driver builtin_drivers[] = { FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"), FUNCNAME("java", @@ -32,22 +32,23 @@ FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"), FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"), FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"), FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"), +{ "default", NULL, -1, { NULL, 0 } }, }; #undef FUNCNAME static struct userdiff_driver driver_true = { "diff=true", NULL, + 0, { NULL, 0 } }; -struct userdiff_driver *USERDIFF_ATTR_TRUE = &driver_true; static struct userdiff_driver driver_false = { "!diff", NULL, + 1, { NULL, 0 } }; -struct userdiff_driver *USERDIFF_ATTR_FALSE = &driver_false; static struct userdiff_driver *userdiff_find_by_namelen(const char *k, int len) { @@ -89,6 +90,7 @@ static struct userdiff_driver *parse_driver(const char *var, drv = &drivers[ndrivers++]; memset(drv, 0, sizeof(*drv)); drv->name = xmemdupz(name, namelen); + drv->binary = -1; } return drv; } @@ -109,6 +111,15 @@ static int parse_string(const char **d, const char *k, const char *v) return 1; } +static int parse_tristate(int *b, const char *k, const char *v) +{ + if (v && !strcasecmp(v, "auto")) + *b = -1; + else + *b = git_config_bool(k, v); + return 1; +} + int userdiff_config_basic(const char *k, const char *v) { struct userdiff_driver *drv; @@ -117,6 +128,8 @@ int userdiff_config_basic(const char *k, const char *v) return parse_funcname(&drv->funcname, k, v, 0); if ((drv = parse_driver(k, v, "xfuncname"))) return parse_funcname(&drv->funcname, k, v, REG_EXTENDED); + if ((drv = parse_driver(k, v, "binary"))) + return parse_tristate(&drv->binary, k, v); return 0; } diff --git a/userdiff.h b/userdiff.h index c64c5f5669..1c1eb042b4 100644 --- a/userdiff.h +++ b/userdiff.h @@ -9,12 +9,10 @@ struct userdiff_funcname { struct userdiff_driver { const char *name; const char *external; + int binary; struct userdiff_funcname funcname; }; -extern struct userdiff_driver *USERDIFF_ATTR_TRUE; -extern struct userdiff_driver *USERDIFF_ATTR_FALSE; - int userdiff_config_basic(const char *k, const char *v); int userdiff_config_porcelain(const char *k, const char *v); struct userdiff_driver *userdiff_find_by_name(const char *name); -- cgit v1.3 From a5a323f33cd25829e0dde3939b196cf743d7d9d8 Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Sun, 2 Nov 2008 14:37:28 +0100 Subject: Add reference for status letters in documentation. Also fix error in diff_filepair::status documentation, and point to the in-code reference as well as the doc. Signed-off-by: Yann Dirson Signed-off-by: Junio C Hamano --- Documentation/diff-format.txt | 16 ++++++++++++++++ diffcore.h | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'diffcore.h') diff --git a/Documentation/diff-format.txt b/Documentation/diff-format.txt index 400cbb3b1c..aafd3a3941 100644 --- a/Documentation/diff-format.txt +++ b/Documentation/diff-format.txt @@ -46,6 +46,22 @@ That is, from the left to the right: . path for "dst"; only exists for C or R. . an LF or a NUL when '-z' option is used, to terminate the record. +Possible status letters are: + +- A: addition of a file +- C: copy of a file into a new one +- D: deletion of a file +- M: modification of the contents or mode of a file +- R: renaming of a file +- T: change in the type of the file +- U: file is unmerged (you must complete the merge before it can +be committed) +- X: "unknown" change type (most probably a bug, please report it) + +Status letters C and M are always followed by a score (denoting the +percentage of similarity between the source and target of the move or +copy), and are the only ones to be so. + is shown as all 0's if a file is new on the filesystem and it is out of sync with the index. diff --git a/diffcore.h b/diffcore.h index 8ae35785fd..1ebfdae8b8 100644 --- a/diffcore.h +++ b/diffcore.h @@ -58,7 +58,7 @@ struct diff_filepair { struct diff_filespec *one; struct diff_filespec *two; unsigned short int score; - char status; /* M C R N D U (see Documentation/diff-format.txt) */ + char status; /* M C R A D U etc. (see Documentation/diff-format.txt or DIFF_STATUS_* in diff.h) */ unsigned broken_pair : 1; unsigned renamed_pair : 1; unsigned is_unmerged : 1; -- cgit v1.3