From 051308f6e9cebeb76b8fb4f52b7e9e7ce064445c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 4 May 2006 16:51:44 -0700 Subject: binary patch. This adds "binary patch" to the diff output and teaches apply what to do with them. On the diff generation side, traditionally, we said "Binary files differ\n" without giving anything other than the preimage and postimage object name on the index line. This was good enough for applying a patch generated from your own repository (very useful while rebasing), because the postimage would be available in such a case. However, this was not useful when the recipient of such a patch via e-mail were to apply it, even if the preimage was available. This patch allows the diff to generate "binary" patch when operating under --full-index option. The binary patch follows the usual extended git diff headers, and looks like this: "GIT binary patch\n" "\n" ... "\n" Each line is prefixed with a "length-byte", whose value is upper or lowercase alphabet that encodes number of bytes that the data on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ..., 'Z' means 26, 'a' means 27, ...). is 1 or more groups of 5-byte sequence, each of which encodes up to 4 bytes in base85 encoding. Because 52 / 4 * 5 = 65 and we have the length byte, an output line is capped to 66 characters. The payload is the same diff-delta as we use in the packfiles. On the consumption side, git-apply now can decode and apply the binary patch when --allow-binary-replacement is given, the diff was generated with --full-index, and the receiving repository has the preimage blob, which is the same condition as it always required when accepting an "Binary files differ\n" patch. Signed-off-by: Junio C Hamano --- diff.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 99 insertions(+), 7 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index c845c87113..b14d897f19 100644 --- a/diff.c +++ b/diff.c @@ -8,6 +8,7 @@ #include "quote.h" #include "diff.h" #include "diffcore.h" +#include "delta.h" #include "xdiff-interface.h" static int use_size_cache; @@ -391,6 +392,90 @@ static void show_stats(struct diffstat_t* data) total_files, adds, dels); } +static void *encode_delta_size(void *data, unsigned long size) +{ + unsigned char *cp = data; + *cp++ = size; + size >>= 7; + while (size) { + cp[-1] |= 0x80; + *cp++ = size; + size >>= 7; + } + return cp; +} + +static void *safe_diff_delta(const unsigned char *src, unsigned long src_size, + const unsigned char *dst, unsigned long dst_size, + unsigned long *delta_size) +{ + unsigned long bufsize; + unsigned char *data; + unsigned char *cp; + + if (src_size && dst_size) + return diff_delta(src, src_size, dst, dst_size, delta_size, 0); + + /* diff-delta does not like to do delta with empty, so + * we do that by hand here. Sigh... + */ + + if (!src_size) + /* literal copy can be done only 127-byte at a time. + */ + bufsize = dst_size + (dst_size / 127) + 40; + else + bufsize = 40; + data = xmalloc(bufsize); + cp = encode_delta_size(data, src_size); + cp = encode_delta_size(cp, dst_size); + + if (dst_size) { + /* copy out literally */ + while (dst_size) { + int sz = (127 < dst_size) ? 127 : dst_size; + *cp++ = sz; + dst_size -= sz; + while (sz) { + *cp++ = *dst++; + sz--; + } + } + } + *delta_size = (cp - data); + return data; +} + +static void emit_binary_diff(mmfile_t *one, mmfile_t *two) +{ + void *delta, *cp; + unsigned long delta_size; + + printf("GIT binary patch\n"); + delta = safe_diff_delta(one->ptr, one->size, + two->ptr, two->size, + &delta_size); + if (!delta) + die("unable to generate binary diff"); + + /* emit delta encoded in base85 */ + cp = delta; + while (delta_size) { + int bytes = (52 < delta_size) ? 52 : delta_size; + char line[70]; + delta_size -= bytes; + if (bytes <= 26) + line[0] = bytes + 'A' - 1; + else + line[0] = bytes - 26 + 'a' - 1; + encode_85(line + 1, cp, bytes); + cp += bytes; + puts(line); + } + printf("\n"); + free(delta); +} + #define FIRST_FEW_BYTES 8000 static int mmfile_is_binary(mmfile_t *mf) { @@ -407,6 +492,7 @@ static void builtin_diff(const char *name_a, struct diff_filespec *one, struct diff_filespec *two, const char *xfrm_msg, + struct diff_options *o, int complete_rewrite) { mmfile_t mf1, mf2; @@ -451,8 +537,13 @@ static void builtin_diff(const char *name_a, if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) die("unable to read files to diff"); - if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) - printf("Binary files %s and %s differ\n", lbl[0], lbl[1]); + if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) { + if (o->full_index) + emit_binary_diff(&mf1, &mf2); + else + printf("Binary files %s and %s differ\n", + lbl[0], lbl[1]); + } else { /* Crazy xdl interfaces.. */ const char *diffopts = getenv("GIT_DIFF_OPTS"); @@ -928,6 +1019,7 @@ static void run_diff_cmd(const char *pgm, struct diff_filespec *one, struct diff_filespec *two, const char *xfrm_msg, + struct diff_options *o, int complete_rewrite) { if (pgm) { @@ -937,7 +1029,7 @@ static void run_diff_cmd(const char *pgm, } if (one && two) builtin_diff(name, other ? other : name, - one, two, xfrm_msg, complete_rewrite); + one, two, xfrm_msg, o, complete_rewrite); else printf("* Unmerged path %s\n", name); } @@ -971,7 +1063,7 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o) if (DIFF_PAIR_UNMERGED(p)) { /* unmerged */ - run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0); + run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, o, 0); return; } @@ -1041,14 +1133,14 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o) * needs to be split into deletion and creation. */ struct diff_filespec *null = alloc_filespec(two->path); - run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, one, null, xfrm_msg, o, 0); free(null); null = alloc_filespec(one->path); - run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, null, two, xfrm_msg, o, 0); free(null); } else - run_diff_cmd(pgm, name, other, one, two, xfrm_msg, + run_diff_cmd(pgm, name, other, one, two, xfrm_msg, o, complete_rewrite); free(name_munged); -- cgit v1.3 From 0660626caff6ac3745cd7b7908a2ca540141a2ec Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 5 May 2006 02:41:53 -0700 Subject: binary diff: further updates. This updates the user interface and generated diff data format. * "diff --binary" is used to signal that we want an e-mailable binary patch. It implies --full-index and -p. * "apply --allow-binary-replacement" acquired a short synonym "apply --binary". * After the "GIT binary patch\n" header line there is a token to record which binary patch mechanism was used, so that we can extend it later. Currently there are two mechanisms defined: "literal" and "delta". The former records the deflated postimage and the latter records the deflated delta from the preimage to postimage. For purely implementation convenience, I added the deflated length after these "literal/delta" tokens (otherwise the decoding side needs to guess and reallocate the buffer while inflating). Improvement patches are very welcomed. Signed-off-by: Junio C Hamano --- apply.c | 130 +++++++++++++++++++++++++++++++++++++++++++++---------------- base85.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ cache.h | 1 + diff.c | 134 +++++++++++++++++++++++++++++++++------------------------------ diff.h | 1 + 5 files changed, 304 insertions(+), 96 deletions(-) create mode 100644 base85.c (limited to 'diff.c') diff --git a/apply.c b/apply.c index e37c4ebf52..1b93aab8af 100644 --- a/apply.c +++ b/apply.c @@ -114,6 +114,9 @@ struct patch { char *new_name, *old_name, *def_name; unsigned int old_mode, new_mode; int is_rename, is_copy, is_new, is_delete, is_binary; +#define BINARY_DELTA_DEFLATED 1 +#define BINARY_LITERAL_DEFLATED 2 + unsigned long deflate_origlen; int lines_added, lines_deleted; int score; struct fragment *fragments; @@ -969,9 +972,11 @@ static inline int metadata_changes(struct patch *patch) static int parse_binary(char *buffer, unsigned long size, struct patch *patch) { - /* We have read "GIT binary patch\n"; what follows is a - * sequence of 'length-byte' followed by base-85 encoded - * delta data. + /* We have read "GIT binary patch\n"; what follows is a line + * that says the patch method (currently, either "deflated + * literal" or "deflated delta") and the length of data before + * deflating; a sequence of 'length-byte' followed by base-85 + * encoded data follows. * * Each 5-byte sequence of base-85 encodes up to 4 bytes, * and we would limit the patch line to 66 characters, @@ -982,11 +987,27 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch) */ int llen, used; struct fragment *fragment; - char *delta = NULL; + char *data = NULL; - patch->is_binary = 1; patch->fragments = fragment = xcalloc(1, sizeof(*fragment)); - used = 0; + + /* Grab the type of patch */ + llen = linelen(buffer, size); + used = llen; + linenr++; + + if (!strncmp(buffer, "delta ", 6)) { + patch->is_binary = BINARY_DELTA_DEFLATED; + patch->deflate_origlen = strtoul(buffer + 6, NULL, 10); + } + else if (!strncmp(buffer, "literal ", 8)) { + patch->is_binary = BINARY_LITERAL_DEFLATED; + patch->deflate_origlen = strtoul(buffer + 8, NULL, 10); + } + else + return error("unrecognized binary patch at line %d: %.*s", + linenr-1, llen-1, buffer); + buffer += llen; while (1) { int byte_length, max_byte_length, newsize; llen = linelen(buffer, size); @@ -1015,8 +1036,8 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch) byte_length <= max_byte_length - 4) goto corrupt; newsize = fragment->size + byte_length; - delta = xrealloc(delta, newsize); - if (decode_85(delta + fragment->size, + data = xrealloc(data, newsize); + if (decode_85(data + fragment->size, buffer + 1, byte_length)) goto corrupt; @@ -1024,7 +1045,7 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch) buffer += llen; size -= llen; } - fragment->patch = delta; + fragment->patch = data; return used; corrupt: return error("corrupt binary patch at line %d: %.*s", @@ -1425,6 +1446,61 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag) return offset; } +static char *inflate_it(const void *data, unsigned long size, + unsigned long inflated_size) +{ + z_stream stream; + void *out; + int st; + + memset(&stream, 0, sizeof(stream)); + + stream.next_in = (unsigned char *)data; + stream.avail_in = size; + stream.next_out = out = xmalloc(inflated_size); + stream.avail_out = inflated_size; + inflateInit(&stream); + st = inflate(&stream, Z_FINISH); + if ((st != Z_STREAM_END) || stream.total_out != inflated_size) { + free(out); + return NULL; + } + return out; +} + +static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch) +{ + unsigned long dst_size; + struct fragment *fragment = patch->fragments; + void *data; + void *result; + + data = inflate_it(fragment->patch, fragment->size, + patch->deflate_origlen); + if (!data) + return error("corrupt patch data"); + switch (patch->is_binary) { + case BINARY_DELTA_DEFLATED: + result = patch_delta(desc->buffer, desc->size, + data, + patch->deflate_origlen, + &dst_size); + free(desc->buffer); + desc->buffer = result; + free(data); + break; + case BINARY_LITERAL_DEFLATED: + free(desc->buffer); + desc->buffer = data; + dst_size = patch->deflate_origlen; + break; + } + if (!desc->buffer) + return -1; + desc->size = desc->alloc = dst_size; + return 0; +} + static int apply_binary(struct buffer_desc *desc, struct patch *patch) { const char *name = patch->old_name ? patch->old_name : patch->new_name; @@ -1466,18 +1542,20 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch) "'%s' but it is not empty", name); } - if (desc->buffer) { + get_sha1_hex(patch->new_sha1_prefix, sha1); + if (!memcmp(sha1, null_sha1, 20)) { free(desc->buffer); desc->alloc = desc->size = 0; - } - get_sha1_hex(patch->new_sha1_prefix, sha1); - if (!memcmp(sha1, null_sha1, 20)) + desc->buffer = NULL; return 0; /* deletion patch */ + } if (has_sha1_file(sha1)) { + /* We already have the postimage */ char type[10]; unsigned long size; + free(desc->buffer); desc->buffer = read_sha1_file(sha1, type, &size); if (!desc->buffer) return error("the necessary postimage %s for " @@ -1486,28 +1564,13 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch) desc->alloc = desc->size = size; } else { - char type[10]; - unsigned long src_size, dst_size; - void *src; - - get_sha1_hex(patch->old_sha1_prefix, sha1); - src = read_sha1_file(sha1, type, &src_size); - if (!src) - return error("the necessary preimage %s for " - "'%s' cannot be read", - patch->old_sha1_prefix, name); - - /* patch->fragment->patch has the delta data and - * we should apply it to the preimage. + /* We have verified desc matches the preimage; + * apply the patch data to it, which is stored + * in the patch->fragments->{patch,size}. */ - desc->buffer = patch_delta(src, src_size, - (void*) patch->fragments->patch, - patch->fragments->size, - &dst_size); - if (!desc->buffer) + if (apply_binary_fragment(desc, patch)) return error("binary patch does not apply to '%s'", name); - desc->size = desc->alloc = dst_size; /* verify that the result matches */ write_sha1_file_prepare(desc->buffer, desc->size, blob_type, @@ -2102,7 +2165,8 @@ int main(int argc, char **argv) diffstat = 1; continue; } - if (!strcmp(arg, "--allow-binary-replacement")) { + if (!strcmp(arg, "--allow-binary-replacement") || + !strcmp(arg, "--binary")) { allow_binary_replacement = 1; continue; } diff --git a/base85.c b/base85.c new file mode 100644 index 0000000000..b97f7f933a --- /dev/null +++ b/base85.c @@ -0,0 +1,134 @@ +#include "cache.h" + +#undef DEBUG_85 + +#ifdef DEBUG_85 +#define say(a) fprintf(stderr, a) +#define say1(a,b) fprintf(stderr, a, b) +#define say2(a,b,c) fprintf(stderr, a, b, c) +#else +#define say(a) do {} while(0) +#define say1(a,b) do {} while(0) +#define say2(a,b,c) do {} while(0) +#endif + +static const char en85[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', + 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', + 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', + 'u', 'v', 'w', 'x', 'y', 'z', + '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', + ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', + '|', '}', '~' +}; + +static char de85[256]; +static void prep_base85(void) +{ + int i; + if (de85['Z']) + return; + for (i = 0; i < ARRAY_SIZE(en85); i++) { + int ch = en85[i]; + de85[ch] = i + 1; + } +} + +int decode_85(char *dst, char *buffer, int len) +{ + prep_base85(); + + say2("decode 85 <%.*s>", len/4*5, buffer); + while (len) { + unsigned acc = 0; + int cnt; + for (cnt = 0; cnt < 5; cnt++, buffer++) { + int ch = *((unsigned char *)buffer); + int de = de85[ch]; + if (!de) + return error("invalid base85 alphabet %c", ch); + de--; + if (cnt == 4) { + /* + * Detect overflow. The largest + * 5-letter possible is "|NsC0" to + * encode 0xffffffff, and "|NsC" gives + * 0x03030303 at this point (i.e. + * 0xffffffff = 0x03030303 * 85). + */ + if (0x03030303 < acc || + (0x03030303 == acc && de)) + error("invalid base85 sequence %.5s", + buffer-3); + } + acc = acc * 85 + de; + say1(" <%08x>", acc); + } + say1(" %08x", acc); + for (cnt = 0; cnt < 4 && len; cnt++, len--) { + *dst++ = (acc >> 24) & 0xff; + acc = acc << 8; + } + } + say("\n"); + + return 0; +} + +void encode_85(char *buf, unsigned char *data, int bytes) +{ + prep_base85(); + + say("encode 85"); + while (bytes) { + unsigned acc = 0; + int cnt; + for (cnt = 0; cnt < 4 && bytes; cnt++, bytes--) { + int ch = *data++; + acc |= ch << ((3-cnt)*8); + } + say1(" %08x", acc); + for (cnt = 0; cnt < 5; cnt++) { + int val = acc % 85; + acc /= 85; + buf[4-cnt] = en85[val]; + } + buf += 5; + } + say("\n"); + + *buf = 0; +} + +#ifdef DEBUG_85 +int main(int ac, char **av) +{ + char buf[1024]; + + if (!strcmp(av[1], "-e")) { + int len = strlen(av[2]); + encode_85(buf, av[2], len); + if (len <= 26) len = len + 'A' - 1; + else len = len + 'a' - 26 + 1; + printf("encoded: %c%s\n", len, buf); + return 0; + } + if (!strcmp(av[1], "-d")) { + int len = *av[2]; + if ('A' <= len && len <= 'Z') len = len - 'A' + 1; + else len = len - 'a' + 26 + 1; + decode_85(buf, av[2]+1, len); + printf("decoded: %.*s\n", len, buf); + return 0; + } + if (!strcmp(av[1], "-t")) { + char t[4] = { -1,-1,-1,-1 }; + encode_85(buf, t, 4); + printf("encoded: D%s\n", buf); + return 0; + } +} +#endif diff --git a/cache.h b/cache.h index 2f32f3d62a..4b7a439253 100644 --- a/cache.h +++ b/cache.h @@ -365,5 +365,6 @@ extern void setup_pager(void); /* base85 */ int decode_85(char *dst, char *line, int linelen); +void encode_85(char *buf, unsigned char *data, int bytes); #endif /* CACHE_H */ diff --git a/diff.c b/diff.c index b14d897f19..bfe54c3e09 100644 --- a/diff.c +++ b/diff.c @@ -392,78 +392,78 @@ static void show_stats(struct diffstat_t* data) total_files, adds, dels); } -static void *encode_delta_size(void *data, unsigned long size) +static unsigned char *deflate_it(char *data, + unsigned long size, + unsigned long *result_size) { - unsigned char *cp = data; - *cp++ = size; - size >>= 7; - while (size) { - cp[-1] |= 0x80; - *cp++ = size; - size >>= 7; - } - return cp; + int bound; + unsigned char *deflated; + z_stream stream; + + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, Z_BEST_COMPRESSION); + bound = deflateBound(&stream, size); + deflated = xmalloc(bound); + stream.next_out = deflated; + stream.avail_out = bound; + + stream.next_in = (unsigned char *)data; + stream.avail_in = size; + while (deflate(&stream, Z_FINISH) == Z_OK) + ; /* nothing */ + deflateEnd(&stream); + *result_size = stream.total_out; + return deflated; } -static void *safe_diff_delta(const unsigned char *src, unsigned long src_size, - const unsigned char *dst, unsigned long dst_size, - unsigned long *delta_size) +static void emit_binary_diff(mmfile_t *one, mmfile_t *two) { - unsigned long bufsize; - unsigned char *data; - unsigned char *cp; - - if (src_size && dst_size) - return diff_delta(src, src_size, dst, dst_size, delta_size, 0); + void *cp; + void *delta; + void *deflated; + void *data; + unsigned long orig_size; + unsigned long delta_size; + unsigned long deflate_size; + unsigned long data_size; - /* diff-delta does not like to do delta with empty, so - * we do that by hand here. Sigh... + printf("GIT binary patch\n"); + /* We could do deflated delta, or we could do just deflated two, + * whichever is smaller. */ - - if (!src_size) - /* literal copy can be done only 127-byte at a time. - */ - bufsize = dst_size + (dst_size / 127) + 40; - else - bufsize = 40; - data = xmalloc(bufsize); - cp = encode_delta_size(data, src_size); - cp = encode_delta_size(cp, dst_size); - - if (dst_size) { - /* copy out literally */ - while (dst_size) { - int sz = (127 < dst_size) ? 127 : dst_size; - *cp++ = sz; - dst_size -= sz; - while (sz) { - *cp++ = *dst++; - sz--; - } + delta = NULL; + deflated = deflate_it(two->ptr, two->size, &deflate_size); + if (one->size && two->size) { + delta = diff_delta(one->ptr, one->size, + two->ptr, two->size, + &delta_size, deflate_size); + if (delta) { + void *to_free = delta; + orig_size = delta_size; + delta = deflate_it(delta, delta_size, &delta_size); + free(to_free); } } - *delta_size = (cp - data); - return data; -} -static void emit_binary_diff(mmfile_t *one, mmfile_t *two) -{ - void *delta, *cp; - unsigned long delta_size; + if (delta && delta_size < deflate_size) { + printf("delta %lu\n", orig_size); + free(deflated); + data = delta; + data_size = delta_size; + } + else { + printf("literal %lu\n", two->size); + free(delta); + data = deflated; + data_size = deflate_size; + } - printf("GIT binary patch\n"); - delta = safe_diff_delta(one->ptr, one->size, - two->ptr, two->size, - &delta_size); - if (!delta) - die("unable to generate binary diff"); - - /* emit delta encoded in base85 */ - cp = delta; - while (delta_size) { - int bytes = (52 < delta_size) ? 52 : delta_size; + /* emit data encoded in base85 */ + cp = data; + while (data_size) { + int bytes = (52 < data_size) ? 52 : data_size; char line[70]; - delta_size -= bytes; + data_size -= bytes; if (bytes <= 26) line[0] = bytes + 'A' - 1; else @@ -473,7 +473,7 @@ static void emit_binary_diff(mmfile_t *one, mmfile_t *two) puts(line); } printf("\n"); - free(delta); + free(data); } #define FIRST_FEW_BYTES 8000 @@ -538,7 +538,11 @@ static void builtin_diff(const char *name_a, die("unable to read files to diff"); if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) { - if (o->full_index) + /* Quite common confusing case */ + if (mf1.size == mf2.size && + !memcmp(mf1.ptr, mf2.ptr, mf1.size)) + goto free_ab_and_return; + if (o->binary) emit_binary_diff(&mf1, &mf2); else printf("Binary files %s and %s differ\n", @@ -1239,6 +1243,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) options->rename_limit = strtoul(arg+2, NULL, 10); else if (!strcmp(arg, "--full-index")) options->full_index = 1; + else if (!strcmp(arg, "--binary")) { + options->output_format = DIFF_FORMAT_PATCH; + options->full_index = options->binary = 1; + } else if (!strcmp(arg, "--name-only")) options->output_format = DIFF_FORMAT_NAME; else if (!strcmp(arg, "--name-status")) diff --git a/diff.h b/diff.h index b3b2c4dd28..d052608404 100644 --- a/diff.h +++ b/diff.h @@ -28,6 +28,7 @@ struct diff_options { with_raw:1, with_stat:1, tree_in_recursive:1, + binary:1, full_index:1, silent_on_remove:1, find_copies_harder:1; -- cgit v1.3 From 5d6a9f45e1ba78693cb0fbd5e9724b05d4e30e37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 May 2006 09:46:53 -0700 Subject: Fix "git diff --stat" with long filenames When we cut off the front of a filename to make it fit on the line, we add a "..." in front. However, the way the "git diff" code was written, we will never reset the prefix back to the empty string, so every single filename afterwards will have the "..." prefix, whether appropriate or not. You can see this with "git diff v2.6.16.." on the current kernel tree, since there are filenames with long names that changed there: [ snip snip ] Documentation/filesystems/vfs.txt | 229 .../firmware_class/firmware_sample_driver.c | 3 .../firmware_sample_firmware_class.c | 1 ...Documentation/fujitsu/frv/kernel-ABI.txt | 192 ...Documentation/hwmon/w83627hf | 4 [ snip snip ] notice how the two Documentation/firmware** filenames caused the "..." to be added, but then the later filenames don't want it, and it also screws up the alignment of the line numbering afterwards. Trivially fixed by moving the declaration (and initial setting) of the "prefix" variable into the for-loop where it is used. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index 903afa1689..afaa648dbb 100644 --- a/diff.c +++ b/diff.c @@ -239,7 +239,6 @@ static const char minuses[]= "-------------------------------------------------- static void show_stats(struct diffstat_t* data) { - char *prefix = ""; int i, len, add, del, total, adds = 0, dels = 0; int max, max_change = 0, max_len = 0; int total_files = data->nr; @@ -261,6 +260,7 @@ static void show_stats(struct diffstat_t* data) } for (i = 0; i < data->nr; i++) { + char *prefix = ""; char *name = data->files[i]->name; int added = data->files[i]->added; int deleted = data->files[i]->deleted; -- cgit v1.3 From ee1e5412a702674d4da95a4aef73a73d2834dcc3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 13 May 2006 13:23:48 -0700 Subject: git diff: support "-U" and "--unified" options properly We used to parse "-U" and "--unified" as part of the GIT_DIFF_OPTS environment variable, but strangely enough we would _not_ parse them as part of the normal diff command line (where we only accepted "-u"). This adds parsing of -U and --unified, both with an optional numeric argument. So now you can just say git diff --unified=5 to get a unified diff with a five-line context, instead of having to do something silly like GIT_DIFF_OPTS="--unified=5" git diff -u (that silly format does continue to still work, of course). Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- combine-diff.c | 1 + diff.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- diff.h | 1 + 3 files changed, 53 insertions(+), 1 deletion(-) (limited to 'diff.c') diff --git a/combine-diff.c b/combine-diff.c index 8a8fe3863a..64b20cce24 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -608,6 +608,7 @@ static int show_patch_diff(struct combine_diff_path *elem, int num_parent, int abbrev = opt->full_index ? 40 : DEFAULT_ABBREV; mmfile_t result_file; + context = opt->context; /* Read the result of merge first */ if (!working_tree_file) result = grab_blob(elem->sha1, &result_size); diff --git a/diff.c b/diff.c index 7a7b839e56..be925a3a39 100644 --- a/diff.c +++ b/diff.c @@ -558,7 +558,7 @@ static void builtin_diff(const char *name_a, ecbdata.label_path = lbl; xpp.flags = XDF_NEED_MINIMAL; - xecfg.ctxlen = 3; + xecfg.ctxlen = o->context; xecfg.flags = XDL_EMIT_FUNCNAMES; if (!diffopts) ; @@ -1182,6 +1182,7 @@ void diff_setup(struct diff_options *options) options->line_termination = '\n'; options->break_opt = -1; options->rename_limit = -1; + options->context = 3; options->change = diff_change; options->add_remove = diff_addremove; @@ -1222,11 +1223,60 @@ int diff_setup_done(struct diff_options *options) return 0; } +int opt_arg(const char *arg, int arg_short, const char *arg_long, int *val) +{ + char c, *eq; + int len; + + if (*arg != '-') + return 0; + c = *++arg; + if (!c) + return 0; + if (c == arg_short) { + c = *++arg; + if (!c) + return 1; + if (val && isdigit(c)) { + char *end; + int n = strtoul(arg, &end, 10); + if (*end) + return 0; + *val = n; + return 1; + } + return 0; + } + if (c != '-') + return 0; + arg++; + eq = strchr(arg, '='); + if (eq) + len = eq - arg; + else + len = strlen(arg); + if (!len || strncmp(arg, arg_long, len)) + return 0; + if (eq) { + int n; + char *end; + if (!isdigit(*++eq)) + return 0; + n = strtoul(eq, &end, 10); + if (*end) + return 0; + *val = n; + } + return 1; +} + int diff_opt_parse(struct diff_options *options, const char **av, int ac) { const char *arg = av[0]; if (!strcmp(arg, "-p") || !strcmp(arg, "-u")) options->output_format = DIFF_FORMAT_PATCH; + else if (opt_arg(arg, 'U', "unified", &options->context)) + options->output_format = DIFF_FORMAT_PATCH; else if (!strcmp(arg, "--patch-with-raw")) { options->output_format = DIFF_FORMAT_PATCH; options->with_raw = 1; diff --git a/diff.h b/diff.h index d052608404..bef586d850 100644 --- a/diff.h +++ b/diff.h @@ -32,6 +32,7 @@ struct diff_options { full_index:1, silent_on_remove:1, find_copies_harder:1; + int context; int break_opt; int detect_rename; int line_termination; -- cgit v1.3 From 4bbd261bbd941066735e41dff0ade9f50e6fe84d Mon Sep 17 00:00:00 2001 From: Sean Date: Sun, 14 May 2006 08:13:49 -0400 Subject: Add "--summary" option to git diff. Remove the need to pipe git diff through git apply to get the extended headers summary. Signed-off-by: Sean Estabrooks Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 4 ++ diff.c | 88 +++++++++++++++++++++++++++++++++++++++++- diff.h | 3 +- 3 files changed, 93 insertions(+), 2 deletions(-) (limited to 'diff.c') diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index c183dc9da0..f523ec2fbe 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -10,6 +10,10 @@ --stat:: Generate a diffstat instead of a patch. +--summary:: + Output a condensed summary of extended header information + such as creations, renames and mode changes. + --patch-with-stat:: Generate patch and prepend its diffstat. diff --git a/diff.c b/diff.c index 7a7b839e56..00b1044d3a 100644 --- a/diff.c +++ b/diff.c @@ -1233,6 +1233,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) } else if (!strcmp(arg, "--stat")) options->output_format = DIFF_FORMAT_DIFFSTAT; + else if (!strcmp(arg, "--summary")) + options->summary = 1; else if (!strcmp(arg, "--patch-with-stat")) { options->output_format = DIFF_FORMAT_PATCH; options->with_stat = 1; @@ -1703,6 +1705,85 @@ static void flush_one_pair(struct diff_filepair *p, } } +static void show_file_mode_name(const char *newdelete, struct diff_filespec *fs) +{ + if (fs->mode) + printf(" %s mode %06o %s\n", newdelete, fs->mode, fs->path); + else + printf(" %s %s\n", newdelete, fs->path); +} + + +static void show_mode_change(struct diff_filepair *p, int show_name) +{ + if (p->one->mode && p->two->mode && p->one->mode != p->two->mode) { + if (show_name) + printf(" mode change %06o => %06o %s\n", + p->one->mode, p->two->mode, p->two->path); + else + printf(" mode change %06o => %06o\n", + p->one->mode, p->two->mode); + } +} + +static void show_rename_copy(const char *renamecopy, struct diff_filepair *p) +{ + const char *old, *new; + + /* Find common prefix */ + old = p->one->path; + new = p->two->path; + while (1) { + const char *slash_old, *slash_new; + slash_old = strchr(old, '/'); + slash_new = strchr(new, '/'); + if (!slash_old || + !slash_new || + slash_old - old != slash_new - new || + memcmp(old, new, slash_new - new)) + break; + old = slash_old + 1; + new = slash_new + 1; + } + /* p->one->path thru old is the common prefix, and old and new + * through the end of names are renames + */ + if (old != p->one->path) + printf(" %s %.*s{%s => %s} (%d%%)\n", renamecopy, + (int)(old - p->one->path), p->one->path, + old, new, (int)(0.5 + p->score * 100.0/MAX_SCORE)); + else + printf(" %s %s => %s (%d%%)\n", renamecopy, + p->one->path, p->two->path, + (int)(0.5 + p->score * 100.0/MAX_SCORE)); + show_mode_change(p, 0); +} + +static void diff_summary(struct diff_filepair *p) +{ + switch(p->status) { + case DIFF_STATUS_DELETED: + show_file_mode_name("delete", p->one); + break; + case DIFF_STATUS_ADDED: + show_file_mode_name("create", p->two); + break; + case DIFF_STATUS_COPIED: + show_rename_copy("copy", p); + break; + case DIFF_STATUS_RENAMED: + show_rename_copy("rename", p); + break; + default: + if (p->score) { + printf(" rewrite %s (%d%%)\n", p->two->path, + (int)(0.5 + p->score * 100.0/MAX_SCORE)); + show_mode_change(p, 0); + } else show_mode_change(p, 1); + break; + } +} + void diff_flush(struct diff_options *options) { struct diff_queue_struct *q = &diff_queued_diff; @@ -1736,7 +1817,6 @@ void diff_flush(struct diff_options *options) for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; flush_one_pair(p, diff_output_format, options, diffstat); - diff_free_filepair(p); } if (diffstat) { @@ -1744,6 +1824,12 @@ void diff_flush(struct diff_options *options) free(diffstat); } + for (i = 0; i < q->nr; i++) { + if (options->summary) + diff_summary(q->queue[i]); + diff_free_filepair(q->queue[i]); + } + free(q->queue); q->queue = NULL; q->nr = q->alloc = 0; diff --git a/diff.h b/diff.h index d052608404..70077c6ed9 100644 --- a/diff.h +++ b/diff.h @@ -31,7 +31,8 @@ struct diff_options { binary:1, full_index:1, silent_on_remove:1, - find_copies_harder:1; + find_copies_harder:1, + summary:1; int break_opt; int detect_rename; int line_termination; -- cgit v1.3 From cc908b82a4a09df555fbc4c32cfbe8b8cffae865 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 14 May 2006 22:07:28 -0700 Subject: diffstat rename squashing fix. When renaming leading/a/filename to leading/b/filename (and "filename" is sufficiently long), we tried to squash the rename to "leading/{a => b}/filename". However, when "/a" or "/b" part is empty, we underflowed and tried to print a substring of length -1. Signed-off-by: Junio C Hamano --- diff.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index 7a7b839e56..5285c03667 100644 --- a/diff.c +++ b/diff.c @@ -232,11 +232,16 @@ static char *pprint_rename(const char *a, const char *b) * name-a => name-b */ if (pfx_length + sfx_length) { + int a_midlen = len_a - pfx_length - sfx_length; + int b_midlen = len_b - pfx_length - sfx_length; + if (a_midlen < 0) a_midlen = 0; + if (b_midlen < 0) b_midlen = 0; + name = xmalloc(len_a + len_b - pfx_length - sfx_length + 7); sprintf(name, "%.*s{%.*s => %.*s}%s", pfx_length, a, - len_a - pfx_length - sfx_length, a + pfx_length, - len_b - pfx_length - sfx_length, b + pfx_length, + a_midlen, a + pfx_length, + b_midlen, b + pfx_length, a + len_a - sfx_length); } else { -- cgit v1.3 From 8824689884a5673f803d77cda499f83b84145a06 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 20 May 2006 23:43:13 +0200 Subject: diff family: add --check option Actually, it is a diff option now, so you can say git diff --check to ask if what you are about to commit is a good patch. [jc: this also would work for fmt-patch, but the point is that the check is done before making a commit. format-patch is run from an already created commit, and that is too late to catch whitespace damaged change.] Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- diff.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- diff.h | 1 + 2 files changed, 115 insertions(+), 1 deletion(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index e16e0bfc0a..af5db0ec12 100644 --- a/diff.c +++ b/diff.c @@ -397,6 +397,46 @@ static void show_stats(struct diffstat_t* data) total_files, adds, dels); } +struct checkdiff_t { + struct xdiff_emit_state xm; + const char *filename; + int lineno; +}; + +static void checkdiff_consume(void *priv, char *line, unsigned long len) +{ + struct checkdiff_t *data = priv; + + if (line[0] == '+') { + int i, spaces = 0; + + data->lineno++; + + /* check space before tab */ + for (i = 1; i < len && (line[i] == ' ' || line[i] == '\t'); i++) + if (line[i] == ' ') + spaces++; + if (line[i - 1] == '\t' && spaces) + printf("%s:%d: space before tab:%.*s\n", + data->filename, data->lineno, (int)len, line); + + /* check white space at line end */ + if (line[len - 1] == '\n') + len--; + if (isspace(line[len - 1])) + printf("%s:%d: white space at end: %.*s\n", + data->filename, data->lineno, (int)len, line); + } else if (line[0] == ' ') + data->lineno++; + else if (line[0] == '@') { + char *plus = strchr(line, '+'); + if (plus) + data->lineno = strtol(plus, line + len, 10); + else + die("invalid diff"); + } +} + static unsigned char *deflate_it(char *data, unsigned long size, unsigned long *result_size) @@ -624,6 +664,41 @@ static void builtin_diffstat(const char *name_a, const char *name_b, } } +static void builtin_checkdiff(const char *name_a, const char *name_b, + struct diff_filespec *one, + struct diff_filespec *two) +{ + mmfile_t mf1, mf2; + struct checkdiff_t data; + + if (!two) + return; + + memset(&data, 0, sizeof(data)); + data.xm.consume = checkdiff_consume; + data.filename = name_b ? name_b : name_a; + data.lineno = 0; + + if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) + die("unable to read files to diff"); + + if (mmfile_is_binary(&mf2)) + return; + else { + /* Crazy xdl interfaces.. */ + xpparam_t xpp; + xdemitconf_t xecfg; + xdemitcb_t ecb; + + xpp.flags = XDF_NEED_MINIMAL; + xecfg.ctxlen = 0; + xecfg.flags = 0; + ecb.outf = xdiff_outf; + ecb.priv = &data; + xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + } +} + struct diff_filespec *alloc_filespec(const char *path) { int namelen = strlen(path); @@ -1180,6 +1255,25 @@ static void run_diffstat(struct diff_filepair *p, struct diff_options *o, builtin_diffstat(name, other, p->one, p->two, diffstat, complete_rewrite); } +static void run_checkdiff(struct diff_filepair *p, struct diff_options *o) +{ + const char *name; + const char *other; + + if (DIFF_PAIR_UNMERGED(p)) { + /* unmerged */ + return; + } + + name = p->one->path; + other = (strcmp(name, p->two->path) ? p->two->path : NULL); + + diff_fill_sha1_info(p->one); + diff_fill_sha1_info(p->two); + + builtin_checkdiff(name, other, p->one, p->two); +} + void diff_setup(struct diff_options *options) { memset(options, 0, sizeof(*options)); @@ -1205,7 +1299,8 @@ int diff_setup_done(struct diff_options *options) * recursive bits for other formats here. */ if ((options->output_format == DIFF_FORMAT_PATCH) || - (options->output_format == DIFF_FORMAT_DIFFSTAT)) + (options->output_format == DIFF_FORMAT_DIFFSTAT) || + (options->output_format == DIFF_FORMAT_CHECKDIFF)) options->recursive = 1; if (options->detect_rename && options->rename_limit < 0) @@ -1288,6 +1383,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) } else if (!strcmp(arg, "--stat")) options->output_format = DIFF_FORMAT_DIFFSTAT; + else if (!strcmp(arg, "--check")) + options->output_format = DIFF_FORMAT_CHECKDIFF; else if (!strcmp(arg, "--summary")) options->summary = 1; else if (!strcmp(arg, "--patch-with-stat")) { @@ -1610,6 +1707,19 @@ static void diff_flush_stat(struct diff_filepair *p, struct diff_options *o, run_diffstat(p, o, diffstat); } +static void diff_flush_checkdiff(struct diff_filepair *p, + struct diff_options *o) +{ + if (diff_unmodified_pair(p)) + return; + + if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) || + (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode))) + return; /* no tree diffs in patch format */ + + run_checkdiff(p, o); +} + int diff_queue_is_empty(void) { struct diff_queue_struct *q = &diff_queued_diff; @@ -1740,6 +1850,9 @@ static void flush_one_pair(struct diff_filepair *p, case DIFF_FORMAT_DIFFSTAT: diff_flush_stat(p, options, diffstat); break; + case DIFF_FORMAT_CHECKDIFF: + diff_flush_checkdiff(p, options); + break; case DIFF_FORMAT_PATCH: diff_flush_patch(p, options); break; diff --git a/diff.h b/diff.h index 3027974c1e..c672277df2 100644 --- a/diff.h +++ b/diff.h @@ -153,6 +153,7 @@ extern int diff_queue_is_empty(void); #define DIFF_FORMAT_NAME 4 #define DIFF_FORMAT_NAME_STATUS 5 #define DIFF_FORMAT_DIFFSTAT 6 +#define DIFF_FORMAT_CHECKDIFF 7 extern void diff_flush(struct diff_options*); -- cgit v1.3