From c41ee586dc95b757cdff4deae10a30a691ba758b Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 23 Dec 2006 02:33:44 -0500 Subject: Refactor packed_git to prepare for sliding mmap windows. The idea behind the sliding mmap window pack reader implementation is to have multiple mmap regions active against the same pack file, thereby allowing the process to mmap in only the active/hot sections of the pack and reduce overall virtual address space usage. To implement this we need to refactor the mmap related data (pack_base, pack_use_cnt) out of struct packed_git and move them into a new struct pack_window. We are refactoring the code to support a single struct pack_window per packfile, thereby emulating the prior behavior of mmap'ing the entire pack file. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'builtin-pack-objects.c') diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 9e15beb3ba..4a00a1206f 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -438,7 +438,7 @@ static unsigned long write_object(struct sha1file *f, } use_packed_git(p); - buf = (char *) p->pack_base + buf = p->windows->base + entry->in_pack_offset + entry->in_pack_header_size; datalen = find_packed_object_size(p, entry->in_pack_offset) @@ -943,7 +943,7 @@ static void check_object(struct object_entry *entry) struct object_entry *base_entry = NULL; use_packed_git(p); - buf = p->pack_base; + buf = p->windows->base; buf += entry->in_pack_offset; /* We want in_pack_type even if we do not reuse delta. -- cgit v1.3 From 03e79c88aa34ce188eb4fb7509e6d127c79c507d Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 23 Dec 2006 02:34:08 -0500 Subject: Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 16 ++++--- cache.h | 4 +- pack-check.c | 22 +++++----- sha1_file.c | 110 ++++++++++++++++++++++++++++--------------------- 4 files changed, 85 insertions(+), 67 deletions(-) (limited to 'builtin-pack-objects.c') diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 4a00a1206f..6d7ae7f1ae 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -416,6 +416,7 @@ static unsigned long write_object(struct sha1file *f, } else { struct packed_git *p = entry->in_pack; + struct pack_window *w_curs = NULL; if (entry->delta) { obj_type = (allow_ofs_delta && entry->delta->offset) ? @@ -437,16 +438,14 @@ static unsigned long write_object(struct sha1file *f, hdrlen += 20; } - use_packed_git(p); - buf = p->windows->base - + entry->in_pack_offset - + entry->in_pack_header_size; + buf = use_pack(p, &w_curs, entry->in_pack_offset + + entry->in_pack_header_size, NULL); datalen = find_packed_object_size(p, entry->in_pack_offset) - entry->in_pack_header_size; if (!pack_to_stdout && check_inflate(buf, datalen, entry->size)) die("corrupt delta in pack %s", sha1_to_hex(entry->sha1)); sha1write(f, buf, datalen); - unuse_packed_git(p); + unuse_pack(&w_curs); reused++; } if (entry->delta) @@ -937,14 +936,13 @@ static void check_object(struct object_entry *entry) if (entry->in_pack && !entry->preferred_base) { struct packed_git *p = entry->in_pack; + struct pack_window *w_curs = NULL; unsigned long left = p->pack_size - entry->in_pack_offset; unsigned long size, used; unsigned char *buf; struct object_entry *base_entry = NULL; - use_packed_git(p); - buf = p->windows->base; - buf += entry->in_pack_offset; + buf = use_pack(p, &w_curs, entry->in_pack_offset, NULL); /* We want in_pack_type even if we do not reuse delta. * There is no point not reusing non-delta representations. @@ -990,7 +988,7 @@ static void check_object(struct object_entry *entry) if (base_name) base_entry = locate_object_entry(base_name); } - unuse_packed_git(p); + unuse_pack(&w_curs); entry->in_pack_header_size = used; if (base_entry) { diff --git a/cache.h b/cache.h index f8a89280fd..c927f29d31 100644 --- a/cache.h +++ b/cache.h @@ -397,8 +397,8 @@ extern void install_packed_git(struct packed_git *pack); extern struct packed_git *find_sha1_pack(const unsigned char *sha1, struct packed_git *packs); -extern int use_packed_git(struct packed_git *); -extern void unuse_packed_git(struct packed_git *); +extern unsigned char* use_pack(struct packed_git *, struct pack_window **, unsigned long, unsigned int *); +extern void unuse_pack(struct pack_window **); extern struct packed_git *add_packed_git(char *, int, int); extern int num_packed_objects(const struct packed_git *p); extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*); diff --git a/pack-check.c b/pack-check.c index 761cc852e9..972916f406 100644 --- a/pack-check.c +++ b/pack-check.c @@ -1,7 +1,8 @@ #include "cache.h" #include "pack.h" -static int verify_packfile(struct packed_git *p) +static int verify_packfile(struct packed_git *p, + struct pack_window **w_curs) { unsigned long index_size = p->index_size; void *index_base = p->index_base; @@ -13,7 +14,7 @@ static int verify_packfile(struct packed_git *p) int nr_objects, err, i; /* Header consistency check */ - pack_base = p->windows->base; + pack_base = use_pack(p, w_curs, 0, NULL); hdr = (struct pack_header*)pack_base; if (hdr->hdr_signature != htonl(PACK_SIGNATURE)) return error("Packfile %s signature mismatch", p->pack_name); @@ -72,13 +73,14 @@ static int verify_packfile(struct packed_git *p) #define MAX_CHAIN 40 -static void show_pack_info(struct packed_git *p) +static void show_pack_info(struct packed_git *p, + struct pack_window **w_curs) { struct pack_header *hdr; int nr_objects, i; unsigned int chain_histogram[MAX_CHAIN]; - hdr = (struct pack_header*)p->windows->base; + hdr = (struct pack_header*)use_pack(p, w_curs, 0, NULL); nr_objects = ntohl(hdr->hdr_entries); memset(chain_histogram, 0, sizeof(chain_histogram)); @@ -142,18 +144,18 @@ int verify_pack(struct packed_git *p, int verbose) if (!ret) { /* Verify pack file */ - use_packed_git(p); - ret = verify_packfile(p); - unuse_packed_git(p); + struct pack_window *w_curs = NULL; + ret = verify_packfile(p, &w_curs); + unuse_pack(&w_curs); } if (verbose) { if (ret) printf("%s: bad\n", p->pack_name); else { - use_packed_git(p); - show_pack_info(p); - unuse_packed_git(p); + struct pack_window *w_curs = NULL; + show_pack_info(p, &w_curs); + unuse_pack(&w_curs); printf("%s: ok\n", p->pack_name); } } diff --git a/sha1_file.c b/sha1_file.c index 79d2d9dacd..4d80527baf 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -470,10 +470,13 @@ static int unuse_one_packed_git(void) return 1; } -void unuse_packed_git(struct packed_git *p) +void unuse_pack(struct pack_window **w_cursor) { - if (p->windows) - p->windows->inuse_cnt--; + struct pack_window *w = *w_cursor; + if (w) { + w->inuse_cnt--; + *w_cursor = NULL; + } } static void open_packed_git(struct packed_git *p) @@ -518,13 +521,16 @@ static void open_packed_git(struct packed_git *p) die("packfile %s does not match index", p->pack_name); } -int use_packed_git(struct packed_git *p) +unsigned char* use_pack(struct packed_git *p, + struct pack_window **w_cursor, + unsigned long offset, + unsigned int *left) { + struct pack_window *win = p->windows; + if (p->pack_fd == -1) open_packed_git(p); - if (!p->windows) { - struct pack_window *win; - + if (!win) { pack_mapped += p->pack_size; while (packed_git_limit < pack_mapped && unuse_one_packed_git()) ; /* nothing */ @@ -535,9 +541,14 @@ int use_packed_git(struct packed_git *p) die("packfile %s cannot be mapped.", p->pack_name); p->windows = win; } - p->windows->last_used = pack_used_ctr++; - p->windows->inuse_cnt++; - return 0; + if (win != *w_cursor) { + win->last_used = pack_used_ctr++; + win->inuse_cnt++; + *w_cursor = win; + } + if (left) + *left = win->len - offset; + return win->base + offset; } struct packed_git *add_packed_git(char *path, int path_len, int local) @@ -883,12 +894,13 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l } static unsigned long get_delta_base(struct packed_git *p, + struct pack_window **w_curs, unsigned long offset, enum object_type kind, unsigned long delta_obj_offset, unsigned long *base_obj_offset) { - unsigned char *base_info = p->windows->base + offset; + unsigned char *base_info = use_pack(p, w_curs, offset, NULL); unsigned long base_offset; /* there must be at least 20 bytes left regardless of delta type */ @@ -928,6 +940,7 @@ static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep); static int packed_delta_info(struct packed_git *p, + struct pack_window **w_curs, unsigned long offset, enum object_type kind, unsigned long obj_offset, @@ -936,7 +949,8 @@ static int packed_delta_info(struct packed_git *p, { unsigned long base_offset; - offset = get_delta_base(p, offset, kind, obj_offset, &base_offset); + offset = get_delta_base(p, w_curs, offset, kind, + obj_offset, &base_offset); /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta @@ -955,8 +969,7 @@ static int packed_delta_info(struct packed_git *p, memset(&stream, 0, sizeof(stream)); - stream.next_in = p->windows->base + offset; - stream.avail_in = p->pack_size - offset; + stream.next_in = use_pack(p, w_curs, offset, &stream.avail_in); stream.next_out = delta_head; stream.avail_out = sizeof(delta_head); @@ -982,16 +995,18 @@ static int packed_delta_info(struct packed_git *p, return 0; } -static unsigned long unpack_object_header(struct packed_git *p, unsigned long offset, - enum object_type *type, unsigned long *sizep) +static unsigned long unpack_object_header(struct packed_git *p, + struct pack_window **w_curs, + unsigned long offset, + enum object_type *type, + unsigned long *sizep) { + unsigned char *base; + unsigned int left; unsigned long used; - if (p->pack_size <= offset) - die("object offset outside of pack file"); - - used = unpack_object_header_gently(p->windows->base + offset, - p->pack_size - offset, type, sizep); + base = use_pack(p, w_curs, offset, &left); + used = unpack_object_header_gently(base, left, type, sizep); if (!used) die("object offset outside of pack file"); @@ -1006,13 +1021,14 @@ void packed_object_info_detail(struct packed_git *p, unsigned int *delta_chain_length, unsigned char *base_sha1) { + struct pack_window *w_curs = NULL; unsigned long obj_offset, val; unsigned char *next_sha1; enum object_type kind; *delta_chain_length = 0; obj_offset = offset; - offset = unpack_object_header(p, offset, &kind, size); + offset = unpack_object_header(p, &w_curs, offset, &kind, size); for (;;) { switch (kind) { @@ -1025,25 +1041,24 @@ void packed_object_info_detail(struct packed_git *p, case OBJ_TAG: strcpy(type, type_names[kind]); *store_size = 0; /* notyet */ + unuse_pack(&w_curs); return; case OBJ_OFS_DELTA: - get_delta_base(p, offset, kind, obj_offset, &offset); + get_delta_base(p, &w_curs, offset, kind, + obj_offset, &offset); if (*delta_chain_length == 0) { /* TODO: find base_sha1 as pointed by offset */ } break; case OBJ_REF_DELTA: - if (p->pack_size <= offset + 20) - die("pack file %s records an incomplete delta base", - p->pack_name); - next_sha1 = p->windows->base + offset; + next_sha1 = use_pack(p, &w_curs, offset, NULL); if (*delta_chain_length == 0) hashcpy(base_sha1, next_sha1); offset = find_pack_entry_one(next_sha1, p); break; } obj_offset = offset; - offset = unpack_object_header(p, offset, &kind, &val); + offset = unpack_object_header(p, &w_curs, offset, &kind, &val); (*delta_chain_length)++; } } @@ -1051,20 +1066,26 @@ void packed_object_info_detail(struct packed_git *p, static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { + struct pack_window *w_curs = NULL; unsigned long size, obj_offset = offset; enum object_type kind; + int r; - offset = unpack_object_header(p, offset, &kind, &size); + offset = unpack_object_header(p, &w_curs, offset, &kind, &size); switch (kind) { case OBJ_OFS_DELTA: case OBJ_REF_DELTA: - return packed_delta_info(p, offset, kind, obj_offset, type, sizep); + r = packed_delta_info(p, &w_curs, offset, kind, + obj_offset, type, sizep); + unuse_pack(&w_curs); + return r; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: strcpy(type, type_names[kind]); + unuse_pack(&w_curs); break; default: die("pack %s contains unknown object type %d", @@ -1076,6 +1097,7 @@ static int packed_object_info(struct packed_git *p, unsigned long offset, } static void *unpack_compressed_entry(struct packed_git *p, + struct pack_window **w_curs, unsigned long offset, unsigned long size) { @@ -1086,8 +1108,7 @@ static void *unpack_compressed_entry(struct packed_git *p, buffer = xmalloc(size + 1); buffer[size] = 0; memset(&stream, 0, sizeof(stream)); - stream.next_in = p->windows->base + offset; - stream.avail_in = p->pack_size - offset; + stream.next_in = use_pack(p, w_curs, offset, &stream.avail_in); stream.next_out = buffer; stream.avail_out = size; @@ -1103,6 +1124,7 @@ static void *unpack_compressed_entry(struct packed_git *p, } static void *unpack_delta_entry(struct packed_git *p, + struct pack_window **w_curs, unsigned long offset, unsigned long delta_size, enum object_type kind, @@ -1113,13 +1135,14 @@ static void *unpack_delta_entry(struct packed_git *p, void *delta_data, *result, *base; unsigned long result_size, base_size, base_offset; - offset = get_delta_base(p, offset, kind, obj_offset, &base_offset); + offset = get_delta_base(p, w_curs, offset, kind, + obj_offset, &base_offset); base = unpack_entry(p, base_offset, type, &base_size); if (!base) die("failed to read delta base object at %lu from %s", base_offset, p->pack_name); - delta_data = unpack_compressed_entry(p, offset, delta_size); + delta_data = unpack_compressed_entry(p, w_curs, offset, delta_size); result = patch_delta(base, base_size, delta_data, delta_size, &result_size); @@ -1134,17 +1157,17 @@ static void *unpack_delta_entry(struct packed_git *p, void *unpack_entry(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { + struct pack_window *w_curs = NULL; unsigned long size, obj_offset = offset; enum object_type kind; void *retval; - if (use_packed_git(p)) - die("cannot map packed file"); - offset = unpack_object_header(p, offset, &kind, &size); + offset = unpack_object_header(p, &w_curs, offset, &kind, &size); switch (kind) { case OBJ_OFS_DELTA: case OBJ_REF_DELTA: - retval = unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep); + retval = unpack_delta_entry(p, &w_curs, offset, size, + kind, obj_offset, type, sizep); break; case OBJ_COMMIT: case OBJ_TREE: @@ -1152,12 +1175,12 @@ void *unpack_entry(struct packed_git *p, unsigned long offset, case OBJ_TAG: strcpy(type, type_names[kind]); *sizep = size; - retval = unpack_compressed_entry(p, offset, size); + retval = unpack_compressed_entry(p, &w_curs, offset, size); break; default: die("unknown object type %i in %s", kind, p->pack_name); } - unuse_packed_git(p); + unuse_pack(&w_curs); return retval; } @@ -1284,7 +1307,6 @@ static int sha1_loose_object_info(const unsigned char *sha1, char *type, unsigne int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep) { - int status; struct pack_entry e; if (!find_pack_entry(sha1, &e, NULL)) { @@ -1292,11 +1314,7 @@ int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep if (!find_pack_entry(sha1, &e, NULL)) return sha1_loose_object_info(sha1, type, sizep); } - if (use_packed_git(e.p)) - die("cannot map packed file"); - status = packed_object_info(e.p, e.offset, type, sizep); - unuse_packed_git(e.p); - return status; + return packed_object_info(e.p, e.offset, type, sizep); } static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned long *size) -- cgit v1.3 From 079afb18fed078af01bd9ab02e2ebbe5d31893b1 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 23 Dec 2006 02:34:13 -0500 Subject: Loop over pack_windows when inflating/accessing data. When multiple mmaps start getting used for all pack file access it is not possible to get all data associated with a specific object in one contiguous memory region. This limitation prevents simply passing a single address and length to SHA1_Update or to inflate. Instead we need to loop until we have processed all data of interest. As we loop over the data we are always interested in reusing the same window 'cursor', as the prior window will no longer be of any use to us. This allows the use_pack() call to automatically decrement the use count of the prior window before setting up access for us to the next window. Within each loop we need to make use of the available length output parameter of use_pack() to tell us how many bytes are available in the current memory region, as we cannot tell otherwise. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 58 ++++++++++++++++++++++++++++++++++++++++++++------ pack-check.c | 46 +++++++++++++++++---------------------- sha1_file.c | 22 +++++++++++++------ 3 files changed, 87 insertions(+), 39 deletions(-) (limited to 'builtin-pack-objects.c') diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 6d7ae7f1ae..afb926a34c 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -276,7 +276,52 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha * we are going to reuse the existing object data as is. make * sure it is not corrupt. */ -static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect) +static int check_pack_inflate(struct packed_git *p, + struct pack_window **w_curs, + unsigned long offset, + unsigned long len, + unsigned long expect) +{ + z_stream stream; + unsigned char fakebuf[4096], *in; + int st; + + memset(&stream, 0, sizeof(stream)); + inflateInit(&stream); + do { + in = use_pack(p, w_curs, offset, &stream.avail_in); + stream.next_in = in; + stream.next_out = fakebuf; + stream.avail_out = sizeof(fakebuf); + st = inflate(&stream, Z_FINISH); + offset += stream.next_in - in; + } while (st == Z_OK || st == Z_BUF_ERROR); + inflateEnd(&stream); + return (st == Z_STREAM_END && + stream.total_out == expect && + stream.total_in == len) ? 0 : -1; +} + +static void copy_pack_data(struct sha1file *f, + struct packed_git *p, + struct pack_window **w_curs, + unsigned long offset, + unsigned long len) +{ + unsigned char *in; + unsigned int avail; + + while (len) { + in = use_pack(p, w_curs, offset, &avail); + if (avail > len) + avail = len; + sha1write(f, in, avail); + offset += avail; + len -= avail; + } +} + +static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect) { z_stream stream; unsigned char fakebuf[4096]; @@ -323,7 +368,7 @@ static int revalidate_loose_object(struct object_entry *entry, return -1; map += used; mapsize -= used; - return check_inflate(map, mapsize, size); + return check_loose_inflate(map, mapsize, size); } static unsigned long write_object(struct sha1file *f, @@ -417,6 +462,7 @@ static unsigned long write_object(struct sha1file *f, else { struct packed_git *p = entry->in_pack; struct pack_window *w_curs = NULL; + unsigned long offset; if (entry->delta) { obj_type = (allow_ofs_delta && entry->delta->offset) ? @@ -438,13 +484,13 @@ static unsigned long write_object(struct sha1file *f, hdrlen += 20; } - buf = use_pack(p, &w_curs, entry->in_pack_offset - + entry->in_pack_header_size, NULL); + offset = entry->in_pack_offset + entry->in_pack_header_size; datalen = find_packed_object_size(p, entry->in_pack_offset) - entry->in_pack_header_size; - if (!pack_to_stdout && check_inflate(buf, datalen, entry->size)) + if (!pack_to_stdout && check_pack_inflate(p, &w_curs, + offset, datalen, entry->size)) die("corrupt delta in pack %s", sha1_to_hex(entry->sha1)); - sha1write(f, buf, datalen); + copy_pack_data(f, p, &w_curs, offset, datalen); unuse_pack(&w_curs); reused++; } diff --git a/pack-check.c b/pack-check.c index 972916f406..08a9fd8dc0 100644 --- a/pack-check.c +++ b/pack-check.c @@ -8,39 +8,38 @@ static int verify_packfile(struct packed_git *p, void *index_base = p->index_base; SHA_CTX ctx; unsigned char sha1[20]; - unsigned long pack_size = p->pack_size; - void *pack_base; - struct pack_header *hdr; + unsigned long offset = 0, pack_sig = p->pack_size - 20; int nr_objects, err, i; - /* Header consistency check */ - pack_base = use_pack(p, w_curs, 0, NULL); - hdr = (struct pack_header*)pack_base; - if (hdr->hdr_signature != htonl(PACK_SIGNATURE)) - return error("Packfile %s signature mismatch", p->pack_name); - if (!pack_version_ok(hdr->hdr_version)) - return error("Packfile version %d unsupported", - ntohl(hdr->hdr_version)); - nr_objects = ntohl(hdr->hdr_entries); - if (num_packed_objects(p) != nr_objects) - return error("Packfile claims to have %d objects, " - "while idx size expects %d", nr_objects, - num_packed_objects(p)); + /* Note that the pack header checks are actually performed by + * use_pack when it first opens the pack file. If anything + * goes wrong during those checks then the call will die out + * immediately. + */ SHA1_Init(&ctx); - SHA1_Update(&ctx, pack_base, pack_size - 20); + while (offset < pack_sig) { + unsigned int remaining; + unsigned char *in = use_pack(p, w_curs, offset, &remaining); + offset += remaining; + if (offset > pack_sig) + remaining -= offset - pack_sig; + SHA1_Update(&ctx, in, remaining); + } SHA1_Final(sha1, &ctx); - if (hashcmp(sha1, (unsigned char *)pack_base + pack_size - 20)) + if (hashcmp(sha1, use_pack(p, w_curs, pack_sig, NULL))) return error("Packfile %s SHA1 mismatch with itself", p->pack_name); if (hashcmp(sha1, (unsigned char *)index_base + index_size - 40)) return error("Packfile %s SHA1 mismatch with idx", p->pack_name); + unuse_pack(w_curs); /* Make sure everything reachable from idx is valid. Since we * have verified that nr_objects matches between idx and pack, * we do not do scan-streaming check on the pack file. */ + nr_objects = num_packed_objects(p); for (i = err = 0; i < nr_objects; i++) { unsigned char sha1[20]; void *data; @@ -73,15 +72,12 @@ static int verify_packfile(struct packed_git *p, #define MAX_CHAIN 40 -static void show_pack_info(struct packed_git *p, - struct pack_window **w_curs) +static void show_pack_info(struct packed_git *p) { - struct pack_header *hdr; int nr_objects, i; unsigned int chain_histogram[MAX_CHAIN]; - hdr = (struct pack_header*)use_pack(p, w_curs, 0, NULL); - nr_objects = ntohl(hdr->hdr_entries); + nr_objects = num_packed_objects(p); memset(chain_histogram, 0, sizeof(chain_histogram)); for (i = 0; i < nr_objects; i++) { @@ -153,9 +149,7 @@ int verify_pack(struct packed_git *p, int verbose) if (ret) printf("%s: bad\n", p->pack_name); else { - struct pack_window *w_curs = NULL; - show_pack_info(p, &w_curs); - unuse_pack(&w_curs); + show_pack_info(p); printf("%s: ok\n", p->pack_name); } } diff --git a/sha1_file.c b/sha1_file.c index 4d80527baf..c304522519 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -962,19 +962,23 @@ static int packed_delta_info(struct packed_git *p, if (sizep) { const unsigned char *data; - unsigned char delta_head[20]; + unsigned char delta_head[20], *in; unsigned long result_size; z_stream stream; int st; memset(&stream, 0, sizeof(stream)); - - stream.next_in = use_pack(p, w_curs, offset, &stream.avail_in); stream.next_out = delta_head; stream.avail_out = sizeof(delta_head); inflateInit(&stream); - st = inflate(&stream, Z_FINISH); + do { + in = use_pack(p, w_curs, offset, &stream.avail_in); + stream.next_in = in; + st = inflate(&stream, Z_FINISH); + offset += stream.next_in - in; + } while ((st == Z_OK || st == Z_BUF_ERROR) + && stream.total_out < sizeof(delta_head)); inflateEnd(&stream); if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) @@ -1103,17 +1107,21 @@ static void *unpack_compressed_entry(struct packed_git *p, { int st; z_stream stream; - unsigned char *buffer; + unsigned char *buffer, *in; buffer = xmalloc(size + 1); buffer[size] = 0; memset(&stream, 0, sizeof(stream)); - stream.next_in = use_pack(p, w_curs, offset, &stream.avail_in); stream.next_out = buffer; stream.avail_out = size; inflateInit(&stream); - st = inflate(&stream, Z_FINISH); + do { + in = use_pack(p, w_curs, offset, &stream.avail_in); + stream.next_in = in; + st = inflate(&stream, Z_FINISH); + offset += stream.next_in - in; + } while (st == Z_OK || st == Z_BUF_ERROR); inflateEnd(&stream); if ((st != Z_STREAM_END) || stream.total_out != size) { free(buffer); -- cgit v1.3 From f5b1b5a07e2d715c5ee7c098e95bd3dbc8ee745d Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 27 Dec 2006 02:46:23 -0500 Subject: Fix random segfaults in pack-objects. Junio noticed that 'non-trivial' pushes were failing if executed using the sliding window mmap changes. This was somewhat difficult to track down as the failure was appearing randomly. It turns out this was a failure caused by the delta base reference (either ref or offset format) spanning over the end of a mmap window. The error in pack-objects was we were not recalling use_pack after the object header was unpacked, and therefore we did not get the promise of at least 20 bytes in the buffer for the delta base parsing. This would case later memcmp() calls to walk into unassigned address space at the end of the window. The reason Junio and I had hard time tracking this down in current Git repositories is we were both probably packing with offset deltas, which minimized the odds of the delta base reference spanning over the end of the mmap window. Stepping back and repacking with version 1.3.3 (which only supported reference deltas) increased the likelyhood of seeing the bug. The correct technique (as used in sha1_file.c) is to invoke use_pack() after unpack_object_header_gently to ensure we have enough data available for the delta base decoding. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'builtin-pack-objects.c') diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index afb926a34c..e9a1804fa8 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -995,8 +995,6 @@ static void check_object(struct object_entry *entry) */ used = unpack_object_header_gently(buf, left, &entry->in_pack_type, &size); - if (!used || left - used <= 20) - die("corrupt pack for %s", sha1_to_hex(entry->sha1)); /* Check if it is delta, and the base is also an object * we are going to pack. If so we will reuse the existing @@ -1008,10 +1006,13 @@ static void check_object(struct object_entry *entry) /* there is at least 20 bytes left in the pack */ switch (entry->in_pack_type) { case OBJ_REF_DELTA: - base_name = buf + used; + base_name = use_pack(p, &w_curs, + entry->in_pack_offset + used, NULL); used += 20; break; case OBJ_OFS_DELTA: + buf = use_pack(p, &w_curs, + entry->in_pack_offset + used, NULL); c = buf[used++]; ofs = c & 127; while (c & 128) { -- cgit v1.3 From 9c18df19073bad62e16d6b6b8e1939fd15424612 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 28 Dec 2006 22:29:06 -0800 Subject: pack-objects: fix use of use_pack(). The code calls use_pack() to make that the variably encoded offset fits in the mmap'ed window, but it forgot that the operation gives the pointer to the beginning of the asked region. Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'builtin-pack-objects.c') diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index e9a1804fa8..42dd8c87a2 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -1003,6 +1003,7 @@ static void check_object(struct object_entry *entry) if (!no_reuse_delta) { unsigned char c, *base_name; unsigned long ofs; + unsigned long used_0; /* there is at least 20 bytes left in the pack */ switch (entry->in_pack_type) { case OBJ_REF_DELTA: @@ -1013,14 +1014,15 @@ static void check_object(struct object_entry *entry) case OBJ_OFS_DELTA: buf = use_pack(p, &w_curs, entry->in_pack_offset + used, NULL); - c = buf[used++]; + used_0 = 0; + c = buf[used_0++]; ofs = c & 127; while (c & 128) { ofs += 1; if (!ofs || ofs & ~(~0UL >> 7)) die("delta base offset overflow in pack for %s", sha1_to_hex(entry->sha1)); - c = buf[used++]; + c = buf[used_0++]; ofs = (ofs << 7) + (c & 127); } if (ofs >= entry->in_pack_offset) @@ -1028,6 +1030,7 @@ static void check_object(struct object_entry *entry) sha1_to_hex(entry->sha1)); ofs = entry->in_pack_offset - ofs; base_name = find_packed_object_name(p, ofs); + used += used_0; break; default: base_name = NULL; -- cgit v1.3