From ebcfb3791a53e0455bf8361046e3310993697a8e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 25 Feb 2011 15:43:25 -0800 Subject: write_idx_file: introduce a struct to hold idx customization options Remove two globals, pack_idx_default version and pack_idx_off32_limit, and place them in a pack_idx_option structure. Allow callers to pass it to write_idx_file() as a parameter. Adjust all callers to the API change. Signed-off-by: Junio C Hamano --- pack-write.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'pack-write.c') diff --git a/pack-write.c b/pack-write.c index a905ca4486..f739a0f39b 100644 --- a/pack-write.c +++ b/pack-write.c @@ -2,8 +2,12 @@ #include "pack.h" #include "csum-file.h" -uint32_t pack_idx_default_version = 2; -uint32_t pack_idx_off32_limit = 0x7fffffff; +void reset_pack_idx_option(struct pack_idx_option *opts) +{ + memset(opts, 0, sizeof(*opts)); + opts->version = 2; + opts->off32_limit = 0x7fffffff; +} static int sha1_compare(const void *_a, const void *_b) { @@ -18,7 +22,8 @@ static int sha1_compare(const void *_a, const void *_b) * will be sorted by SHA1 on exit. */ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, - int nr_objects, unsigned char *sha1) + int nr_objects, const struct pack_idx_option *opts, + unsigned char *sha1) { struct sha1file *f; struct pack_idx_entry **sorted_by_sha, **list, **last; @@ -55,7 +60,7 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec f = sha1fd(fd, index_name); /* if last object's offset is >= 2^31 we should use index V2 */ - index_version = (last_obj_offset >> 31) ? 2 : pack_idx_default_version; + index_version = (last_obj_offset >> 31) ? 2 : opts->version; /* index versions 2 and above need a header */ if (index_version >= 2) { @@ -115,7 +120,7 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec list = sorted_by_sha; for (i = 0; i < nr_objects; i++) { struct pack_idx_entry *obj = *list++; - uint32_t offset = (obj->offset <= pack_idx_off32_limit) ? + uint32_t offset = (obj->offset <= opts->off32_limit) ? obj->offset : (0x80000000 | nr_large_offset++); offset = htonl(offset); sha1write(f, &offset, 4); @@ -126,7 +131,7 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec while (nr_large_offset) { struct pack_idx_entry *obj = *list++; uint64_t offset = obj->offset; - if (offset > pack_idx_off32_limit) { + if (offset > opts->off32_limit) { uint32_t split[2]; split[0] = htonl(offset >> 32); split[1] = htonl(offset & 0xffffffff); -- cgit v1.3 From e337a04de298f8c3e64ee1a187423203406b9bae Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 2 Feb 2011 17:29:01 -0800 Subject: index-pack: --verify Given an existing .pack file and the .idx file that describes it, this new mode of operation reads and re-index the packfile and makes sure the existing .idx file matches the result byte-for-byte. All the objects in the .pack file are validated during this operation as well. Unlike verify-pack, which visits each object described in the .idx file in the SHA-1 order, index-pack efficiently exploits the delta-chain to avoid rebuilding the objects that are used as the base of deltified objects over and over again while validating the objects, resulting in much quicker verification of the .pack file and its .idx file. This version however cannot verify a .pack/.idx pair with a handcrafted v2 index that uses 64-bit offset representation for offsets that would fit within 31-bit. You can create such an .idx file by giving a custom offset to --index-version option to the command. Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 46 ++++++++++++++++++++++++++++++++++++++++------ csum-file.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- csum-file.h | 2 ++ pack-write.c | 26 ++++++++++++++++---------- pack.h | 4 ++++ t/t5302-pack-index.sh | 18 ++++++++++++++++++ 6 files changed, 125 insertions(+), 17 deletions(-) (limited to 'pack-write.c') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 4df681885e..24a9a16220 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -11,7 +11,7 @@ #include "exec_cmd.h" static const char index_pack_usage[] = -"git index-pack [-v] [-o ] [ --keep | --keep= ] [--strict] ( | --stdin [--fix-thin] [])"; +"git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; struct object_entry { @@ -891,9 +891,32 @@ static int git_index_pack_config(const char *k, const char *v, void *cb) return git_default_config(k, v, cb); } +static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) +{ + struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1); + + if (!p) + die("Cannot open existing pack file '%s'", pack_name); + if (open_pack_index(p)) + die("Cannot open existing pack idx file for '%s'", pack_name); + + /* Read the attributes from the existing idx file */ + opts->version = p->index_version; + + /* + * Get rid of the idx file as we do not need it anymore. + * NEEDSWORK: extract this bit from free_pack_by_name() in + * sha1_file.c, perhaps? It shouldn't matter very much as we + * know we haven't installed this pack (hence we never have + * read anything from it). + */ + close_pack_index(p); + free(p); +} + int cmd_index_pack(int argc, const char **argv, const char *prefix) { - int i, fix_thin_pack = 0; + int i, fix_thin_pack = 0, verify = 0; const char *curr_pack, *curr_index; const char *index_name = NULL, *pack_name = NULL; const char *keep_name = NULL, *keep_msg = NULL; @@ -922,6 +945,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) fix_thin_pack = 1; } else if (!strcmp(arg, "--strict")) { strict = 1; + } else if (!strcmp(arg, "--verify")) { + verify = 1; } else if (!strcmp(arg, "--keep")) { keep_msg = ""; } else if (!prefixcmp(arg, "--keep=")) { @@ -988,6 +1013,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) strcpy(keep_name_buf + len - 5, ".keep"); keep_name = keep_name_buf; } + if (verify) { + if (!index_name) + die("--verify with no packfile name given"); + read_idx_option(&opts, index_name); + opts.flags |= WRITE_IDX_VERIFY; + } curr_pack = open_pack_file(pack_name); parse_pack_header(); @@ -1038,10 +1069,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1); free(idx_objects); - final(pack_name, curr_pack, - index_name, curr_index, - keep_name, keep_msg, - pack_sha1); + if (!verify) + final(pack_name, curr_pack, + index_name, curr_index, + keep_name, keep_msg, + pack_sha1); + else + close(input_fd); free(objects); free(index_name_buf); free(keep_name_buf); diff --git a/csum-file.c b/csum-file.c index 4d50cc5ce1..f70e3dd7b5 100644 --- a/csum-file.c +++ b/csum-file.c @@ -11,8 +11,20 @@ #include "progress.h" #include "csum-file.h" -static void flush(struct sha1file *f, void * buf, unsigned int count) +static void flush(struct sha1file *f, void *buf, unsigned int count) { + if (0 <= f->check_fd && count) { + unsigned char check_buffer[8192]; + ssize_t ret = read_in_full(f->check_fd, check_buffer, count); + + if (ret < 0) + die_errno("%s: sha1 file read error", f->name); + if (ret < count) + die("%s: sha1 file truncated", f->name); + if (memcmp(buf, check_buffer, count)) + die("sha1 file '%s' validation error", f->name); + } + for (;;) { int ret = xwrite(f->fd, buf, count); if (ret > 0) { @@ -59,6 +71,17 @@ int sha1close(struct sha1file *f, unsigned char *result, unsigned int flags) fd = 0; } else fd = f->fd; + if (0 <= f->check_fd) { + char discard; + int cnt = read_in_full(f->check_fd, &discard, 1); + if (cnt < 0) + die_errno("%s: error when reading the tail of sha1 file", + f->name); + if (cnt) + die("%s: sha1 file has trailing garbage", f->name); + if (close(f->check_fd)) + die_errno("%s: sha1 file error on close", f->name); + } free(f); return fd; } @@ -101,10 +124,31 @@ struct sha1file *sha1fd(int fd, const char *name) return sha1fd_throughput(fd, name, NULL); } +struct sha1file *sha1fd_check(const char *name) +{ + int sink, check; + struct sha1file *f; + + sink = open("/dev/null", O_WRONLY); + if (sink < 0) + return NULL; + check = open(name, O_RDONLY); + if (check < 0) { + int saved_errno = errno; + close(sink); + errno = saved_errno; + return NULL; + } + f = sha1fd(sink, name); + f->check_fd = check; + return f; +} + struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp) { struct sha1file *f = xmalloc(sizeof(*f)); f->fd = fd; + f->check_fd = -1; f->offset = 0; f->total = 0; f->tp = tp; diff --git a/csum-file.h b/csum-file.h index 294add2a91..6a7967c6bf 100644 --- a/csum-file.h +++ b/csum-file.h @@ -6,6 +6,7 @@ struct progress; /* A SHA1-protected file */ struct sha1file { int fd; + int check_fd; unsigned int offset; git_SHA_CTX ctx; off_t total; @@ -21,6 +22,7 @@ struct sha1file { #define CSUM_FSYNC 2 extern struct sha1file *sha1fd(int fd, const char *name); +extern struct sha1file *sha1fd_check(const char *name); extern struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp); extern int sha1close(struct sha1file *, unsigned char *, unsigned int); extern int sha1write(struct sha1file *, void *, unsigned int); diff --git a/pack-write.c b/pack-write.c index f739a0f39b..16529c39a9 100644 --- a/pack-write.c +++ b/pack-write.c @@ -47,17 +47,22 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec else sorted_by_sha = list = last = NULL; - if (!index_name) { - static char tmpfile[PATH_MAX]; - fd = odb_mkstemp(tmpfile, sizeof(tmpfile), "pack/tmp_idx_XXXXXX"); - index_name = xstrdup(tmpfile); + if (opts->flags & WRITE_IDX_VERIFY) { + assert(index_name); + f = sha1fd_check(index_name); } else { - unlink(index_name); - fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600); + if (!index_name) { + static char tmpfile[PATH_MAX]; + fd = odb_mkstemp(tmpfile, sizeof(tmpfile), "pack/tmp_idx_XXXXXX"); + index_name = xstrdup(tmpfile); + } else { + unlink(index_name); + fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600); + } + if (fd < 0) + die_errno("unable to create '%s'", index_name); + f = sha1fd(fd, index_name); } - if (fd < 0) - die_errno("unable to create '%s'", index_name); - f = sha1fd(fd, index_name); /* if last object's offset is >= 2^31 we should use index V2 */ index_version = (last_obj_offset >> 31) ? 2 : opts->version; @@ -142,7 +147,8 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec } sha1write(f, sha1, 20); - sha1close(f, NULL, CSUM_FSYNC); + sha1close(f, NULL, ((opts->flags & WRITE_IDX_VERIFY) + ? CSUM_CLOSE : CSUM_FSYNC)); git_SHA1_Final(sha1, &ctx); return index_name; } diff --git a/pack.h b/pack.h index 953f57e8b2..dddafdd160 100644 --- a/pack.h +++ b/pack.h @@ -35,6 +35,10 @@ struct pack_header { #define PACK_IDX_SIGNATURE 0xff744f63 /* "\377tOc" */ struct pack_idx_option { + unsigned flags; + /* flag bits */ +#define WRITE_IDX_VERIFY 01 + uint32_t version; uint32_t off32_limit; }; diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index b34ea93a80..7c5fa03920 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -65,6 +65,14 @@ test_expect_success \ 'cmp "test-1-${pack1}.idx" "1.idx" && cmp "test-2-${pack2}.idx" "2.idx"' +test_expect_success 'index-pack --verify on index version 1' ' + git index-pack --verify "test-1-${pack1}.pack" +' + +test_expect_success 'index-pack --verify on index version 2' ' + git index-pack --verify "test-2-${pack2}.pack" +' + test_expect_success \ 'index v2: force some 64-bit offsets with pack-objects' \ 'pack3=$(git pack-objects --index-version=2,0x40000 test-3 Date: Fri, 25 Feb 2011 16:54:00 -0800 Subject: write_idx_file: need_large_offset() helper function Signed-off-by: Junio C Hamano --- pack-write.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'pack-write.c') diff --git a/pack-write.c b/pack-write.c index 16529c39a9..92e7eefb40 100644 --- a/pack-write.c +++ b/pack-write.c @@ -16,6 +16,11 @@ static int sha1_compare(const void *_a, const void *_b) return hashcmp(a->sha1, b->sha1); } +static int need_large_offset(off_t offset, const struct pack_idx_option *opts) +{ + return (offset >> 31) || (opts->off32_limit < offset); +} + /* * On entry *sha1 contains the pack content SHA1 hash, on exit it is * the SHA1 hash of sorted object names. The objects array passed in @@ -65,7 +70,7 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec } /* if last object's offset is >= 2^31 we should use index V2 */ - index_version = (last_obj_offset >> 31) ? 2 : opts->version; + index_version = need_large_offset(last_obj_offset, opts) ? 2 : opts->version; /* index versions 2 and above need a header */ if (index_version >= 2) { @@ -125,8 +130,11 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec list = sorted_by_sha; for (i = 0; i < nr_objects; i++) { struct pack_idx_entry *obj = *list++; - uint32_t offset = (obj->offset <= opts->off32_limit) ? - obj->offset : (0x80000000 | nr_large_offset++); + uint32_t offset; + + offset = (need_large_offset(obj->offset, opts) + ? (0x80000000 | nr_large_offset++) + : obj->offset); offset = htonl(offset); sha1write(f, &offset, 4); } @@ -136,13 +144,14 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec while (nr_large_offset) { struct pack_idx_entry *obj = *list++; uint64_t offset = obj->offset; - if (offset > opts->off32_limit) { - uint32_t split[2]; - split[0] = htonl(offset >> 32); - split[1] = htonl(offset & 0xffffffff); - sha1write(f, split, 8); - nr_large_offset--; - } + uint32_t split[2]; + + if (!need_large_offset(offset, opts)) + continue; + split[0] = htonl(offset >> 32); + split[1] = htonl(offset & 0xffffffff); + sha1write(f, split, 8); + nr_large_offset--; } } -- cgit v1.3 From 3c9fc074c220d5d1d2173c84cc6ae57d750e2a2c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 25 Feb 2011 16:55:26 -0800 Subject: index-pack --verify: read anomalous offsets from v2 idx file A pack v2 .idx file usually records offset using 64-bit representation only when the offset does not fit within 31-bit, but you can handcraft your .idx file to record smaller offset using 64-bit, storing all zero in the upper 4-byte. By inspecting the original idx file when running index-pack --verify, encode such low offsets that do not need to be in 64-bit but are encoded using 64-bit just like the original idx file so that we can still validate the pack/idx pair by comparing the idx file recomputed with the original. Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ pack-write.c | 18 +++++++++++++++++- pack.h | 8 ++++++++ t/t5302-pack-index.sh | 2 +- 4 files changed, 74 insertions(+), 2 deletions(-) (limited to 'pack-write.c') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 24a9a16220..513fbbc55f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -891,6 +891,51 @@ static int git_index_pack_config(const char *k, const char *v, void *cb) return git_default_config(k, v, cb); } +static int cmp_uint32(const void *a_, const void *b_) +{ + uint32_t a = *((uint32_t *)a_); + uint32_t b = *((uint32_t *)b_); + + return (a < b) ? -1 : (a != b); +} + +static void read_v2_anomalous_offsets(struct packed_git *p, + struct pack_idx_option *opts) +{ + const uint32_t *idx1, *idx2; + uint32_t i; + + /* The address of the 4-byte offset table */ + idx1 = (((const uint32_t *)p->index_data) + + 2 /* 8-byte header */ + + 256 /* fan out */ + + 5 * p->num_objects /* 20-byte SHA-1 table */ + + p->num_objects /* CRC32 table */ + ); + + /* The address of the 8-byte offset table */ + idx2 = idx1 + p->num_objects; + + for (i = 0; i < p->num_objects; i++) { + uint32_t off = ntohl(idx1[i]); + if (!(off & 0x80000000)) + continue; + off = off & 0x7fffffff; + if (idx2[off * 2]) + continue; + /* + * The real offset is ntohl(idx2[off * 2]) in high 4 + * octets, and ntohl(idx2[off * 2 + 1]) in low 4 + * octets. But idx2[off * 2] is Zero!!! + */ + ALLOC_GROW(opts->anomaly, opts->anomaly_nr + 1, opts->anomaly_alloc); + opts->anomaly[opts->anomaly_nr++] = ntohl(idx2[off * 2 + 1]); + } + + if (1 < opts->anomaly_nr) + qsort(opts->anomaly, opts->anomaly_nr, sizeof(uint32_t), cmp_uint32); +} + static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) { struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1); @@ -903,6 +948,9 @@ static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) /* Read the attributes from the existing idx file */ opts->version = p->index_version; + if (opts->version == 2) + read_v2_anomalous_offsets(p, opts); + /* * Get rid of the idx file as we do not need it anymore. * NEEDSWORK: extract this bit from free_pack_by_name() in diff --git a/pack-write.c b/pack-write.c index 92e7eefb40..9cd3bfbb4b 100644 --- a/pack-write.c +++ b/pack-write.c @@ -16,9 +16,25 @@ static int sha1_compare(const void *_a, const void *_b) return hashcmp(a->sha1, b->sha1); } +static int cmp_uint32(const void *a_, const void *b_) +{ + uint32_t a = *((uint32_t *)a_); + uint32_t b = *((uint32_t *)b_); + + return (a < b) ? -1 : (a != b); +} + static int need_large_offset(off_t offset, const struct pack_idx_option *opts) { - return (offset >> 31) || (opts->off32_limit < offset); + uint32_t ofsval; + + if ((offset >> 31) || (opts->off32_limit < offset)) + return 1; + if (!opts->anomaly_nr) + return 0; + ofsval = offset; + return !!bsearch(&ofsval, opts->anomaly, opts->anomaly_nr, + sizeof(ofsval), cmp_uint32); } /* diff --git a/pack.h b/pack.h index dddafdd160..722a54e00a 100644 --- a/pack.h +++ b/pack.h @@ -41,6 +41,14 @@ struct pack_idx_option { uint32_t version; uint32_t off32_limit; + + /* + * List of offsets that would fit within off32_limit but + * need to be written out as 64-bit entity for byte-for-byte + * verification. + */ + int anomaly_alloc, anomaly_nr; + uint32_t *anomaly; }; extern void reset_pack_idx_option(struct pack_idx_option *); diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index 7c5fa03920..76bcaca988 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -107,7 +107,7 @@ test_expect_success OFF64_T 'index-pack --verify on 64-bit offset v2 (cheat)' ' git index-pack --verify --index-version=2,0x40000 "test-3-${pack3}.pack" ' -test_expect_failure OFF64_T 'index-pack --verify on 64-bit offset v2' ' +test_expect_success OFF64_T 'index-pack --verify on 64-bit offset v2' ' git index-pack --verify "test-3-${pack3}.pack" ' -- cgit v1.3