From a96015a517060e5b69c6dd428f7276f1078ba507 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:23:45 -0500 Subject: pack-bitmap: plug leak in find_objects() The `find_objects()` function creates an object_list for any tips of the reachability query which do not have corresponding bitmaps. The object_list is not used outside of `find_objects()`, but we never free it with `object_list_free()`, resulting in a leak. Let's plug that leak by calling `object_list_free()`, which results in t6113 becoming leak-free. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'pack-bitmap.c') diff --git a/pack-bitmap.c b/pack-bitmap.c index 0260890341..d2f1306960 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1280,6 +1280,8 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, base = fill_in_bitmap(bitmap_git, revs, base, seen); } + object_list_free(¬_mapped); + return base; } -- cgit v1.3 From dab60934e3057642453bcc6153af076cbcac0d47 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:23:56 -0500 Subject: pack-bitmap: pass `bitmapped_pack` struct to pack-reuse functions When trying to assemble a pack with bitmaps using `--use-bitmap-index`, `pack-objects` asks the pack-bitmap machinery for a bitmap which indicates the set of objects we can "reuse" verbatim from on-disk. This set is roughly comprised of: a prefix of objects in the bitmapped pack (or preferred pack, in the case of a multi-pack reachability bitmap), plus any other objects not included in the prefix, excluding any deltas whose base we are not sending in the resulting pack. The pack-bitmap machinery is responsible for computing this bitmap, and does so with the following functions: - reuse_partial_packfile_from_bitmap() - try_partial_reuse() In the existing implementation, the first function is responsible for (a) marking the prefix of objects in the reusable pack, and then (b) calling try_partial_reuse() on any remaining objects to ensure that they are also reusable (and removing them from the bitmapped set if they are not). Likewise, the `try_partial_reuse()` function is responsible for checking whether an isolated object (that is, an object from the bitmapped pack/preferred pack not contained in the prefix from earlier) may be reused, i.e. that it isn't a delta of an object that we are not sending in the resulting pack. These functions are based on two core assumptions, which we will unwind in this and the following commits: 1. There is only a single pack from the bitmap which is eligible for verbatim pack-reuse. For single-pack bitmaps, this is trivially the bitmapped pack. For multi-pack bitmaps, this is (currently) the MIDX's preferred pack. 2. The pack eligible for reuse has its first object in bit position 0, and all objects from that pack follow in pack-order from that first bit position. In order to perform verbatim pack reuse over multiple packs, we must unwind these two assumptions. Most notably, in order to reuse bits from a given packfile, we need to know the first bit position occupied by an object form that packfile. To propagate this information around, pass a `struct bitmapped_pack *` anywhere we previously passed a `struct packed_git *`, since the former contains the bitmap position we're interested in (as well as a pointer to the latter). As an additional step, factor out a sub-routine from the main `reuse_partial_packfile_from_bitmap()` function, called `reuse_partial_packfile_from_bitmap_1()`. This new function will be responsible for figuring out which objects may be reused from a single pack, and the existing function will dispatch multiple calls to its new helper function for each reusable pack. Consequently, `reuse_partial_packfile_from_bitmap()` will now maintain an array of reusable packs instead of a single such pack. We currently expect that array to have only a single element, so this awkward state is short-lived. It will serve as useful scaffolding in subsequent commits as we begin to work towards enabling multi-pack reuse. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 118 +++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 31 deletions(-) (limited to 'pack-bitmap.c') diff --git a/pack-bitmap.c b/pack-bitmap.c index d2f1306960..d64a80c30c 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1836,7 +1836,7 @@ cleanup: * -1 means "stop trying further objects"; 0 means we may or may not have * reused, but you can keep feeding bits. */ -static int try_partial_reuse(struct packed_git *pack, +static int try_partial_reuse(struct bitmapped_pack *pack, size_t pos, struct bitmap *reuse, struct pack_window **w_curs) @@ -1868,11 +1868,11 @@ static int try_partial_reuse(struct packed_git *pack, * preferred pack precede all bits from other packs. */ - if (pos >= pack->num_objects) + if (pos >= pack->p->num_objects) return -1; /* not actually in the pack or MIDX preferred pack */ - offset = delta_obj_offset = pack_pos_to_offset(pack, pos); - type = unpack_object_header(pack, w_curs, &offset, &size); + offset = delta_obj_offset = pack_pos_to_offset(pack->p, pos); + type = unpack_object_header(pack->p, w_curs, &offset, &size); if (type < 0) return -1; /* broken packfile, punt */ @@ -1888,11 +1888,11 @@ static int try_partial_reuse(struct packed_git *pack, * and the normal slow path will complain about it in * more detail. */ - base_offset = get_delta_base(pack, w_curs, &offset, type, + base_offset = get_delta_base(pack->p, w_curs, &offset, type, delta_obj_offset); if (!base_offset) return 0; - if (offset_to_pack_pos(pack, base_offset, &base_pos) < 0) + if (offset_to_pack_pos(pack->p, base_offset, &base_pos) < 0) return 0; /* @@ -1915,14 +1915,14 @@ static int try_partial_reuse(struct packed_git *pack, * to REF_DELTA on the fly. Better to just let the normal * object_entry code path handle it. */ - if (!bitmap_get(reuse, base_pos)) + if (!bitmap_get(reuse, pack->bitmap_pos + base_pos)) return 0; } /* * If we got here, then the object is OK to reuse. Mark it. */ - bitmap_set(reuse, pos); + bitmap_set(reuse, pack->bitmap_pos + pos); return 0; } @@ -1934,29 +1934,13 @@ uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git) return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0)); } -int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, - struct packed_git **packfile_out, - uint32_t *entries, - struct bitmap **reuse_out) +static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git, + struct bitmapped_pack *pack, + struct bitmap *reuse) { - struct repository *r = the_repository; - struct packed_git *pack; struct bitmap *result = bitmap_git->result; - struct bitmap *reuse; struct pack_window *w_curs = NULL; size_t i = 0; - uint32_t offset; - uint32_t objects_nr; - - assert(result); - - load_reverse_index(r, bitmap_git); - - if (bitmap_is_midx(bitmap_git)) - pack = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)]; - else - pack = bitmap_git->pack; - objects_nr = pack->num_objects; while (i < result->word_alloc && result->words[i] == (eword_t)~0) i++; @@ -1969,15 +1953,15 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, * we use it instead of another pack. In single-pack bitmaps, the choice * is made for us. */ - if (i > objects_nr / BITS_IN_EWORD) - i = objects_nr / BITS_IN_EWORD; + if (i > pack->p->num_objects / BITS_IN_EWORD) + i = pack->p->num_objects / BITS_IN_EWORD; - reuse = bitmap_word_alloc(i); memset(reuse->words, 0xFF, i * sizeof(eword_t)); for (; i < result->word_alloc; ++i) { eword_t word = result->words[i]; size_t pos = (i * BITS_IN_EWORD); + size_t offset; for (offset = 0; offset < BITS_IN_EWORD; ++offset) { if ((word >> offset) == 0) @@ -2002,6 +1986,78 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, done: unuse_pack(&w_curs); +} + +static int bitmapped_pack_cmp(const void *va, const void *vb) +{ + const struct bitmapped_pack *a = va; + const struct bitmapped_pack *b = vb; + + if (a->bitmap_pos < b->bitmap_pos) + return -1; + if (a->bitmap_pos > b->bitmap_pos) + return 1; + return 0; +} + +int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, + struct packed_git **packfile_out, + uint32_t *entries, + struct bitmap **reuse_out) +{ + struct repository *r = the_repository; + struct bitmapped_pack *packs = NULL; + struct bitmap *result = bitmap_git->result; + struct bitmap *reuse; + size_t i; + size_t packs_nr = 0, packs_alloc = 0; + size_t word_alloc; + uint32_t objects_nr = 0; + + assert(result); + + load_reverse_index(r, bitmap_git); + + if (bitmap_is_midx(bitmap_git)) { + for (i = 0; i < bitmap_git->midx->num_packs; i++) { + struct bitmapped_pack pack; + if (nth_bitmapped_pack(r, bitmap_git->midx, &pack, i) < 0) { + warning(_("unable to load pack: '%s', disabling pack-reuse"), + bitmap_git->midx->pack_names[i]); + free(packs); + return -1; + } + if (!pack.bitmap_nr) + continue; /* no objects from this pack */ + if (pack.bitmap_pos) + continue; /* not preferred pack */ + + ALLOC_GROW(packs, packs_nr + 1, packs_alloc); + memcpy(&packs[packs_nr++], &pack, sizeof(pack)); + + objects_nr += pack.p->num_objects; + } + + QSORT(packs, packs_nr, bitmapped_pack_cmp); + } else { + ALLOC_GROW(packs, packs_nr + 1, packs_alloc); + + packs[packs_nr].p = bitmap_git->pack; + packs[packs_nr].bitmap_pos = 0; + packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects; + + objects_nr = packs[packs_nr++].p->num_objects; + } + + word_alloc = objects_nr / BITS_IN_EWORD; + if (objects_nr % BITS_IN_EWORD) + word_alloc++; + reuse = bitmap_word_alloc(word_alloc); + + if (packs_nr != 1) + BUG("pack reuse not yet implemented for multiple packs"); + + reuse_partial_packfile_from_bitmap_1(bitmap_git, packs, reuse); *entries = bitmap_popcount(reuse); if (!*entries) { @@ -2014,7 +2070,7 @@ done: * need to be handled separately. */ bitmap_and_not(result, reuse); - *packfile_out = pack; + *packfile_out = packs[0].p; *reuse_out = reuse; return 0; } -- cgit v1.3 From 35e156b9de1dcc43673c6050cdb65735a7457c1a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:01 -0500 Subject: pack-bitmap: simplify `reuse_partial_packfile_from_bitmap()` signature The signature of `reuse_partial_packfile_from_bitmap()` currently takes in a bitmap, as well as three output parameters (filled through pointers, and passed as arguments), and also returns an integer result. The output parameters are filled out with: (a) the packfile used for pack-reuse, (b) the number of objects from that pack that we can reuse, and (c) a bitmap indicating which objects we can reuse. The return value is either -1 (when there are no objects to reuse), or 0 (when there is at least one object to reuse). Some of these parameters are redundant. Notably, we can infer from the bitmap how many objects are reused by calling bitmap_popcount(). And we can similar compute the return value based on that number as well. As such, clean up the signature of this function to drop the "*entries" parameter, as well as the int return value, since the single caller of this function can infer these values themself. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 16 +++++++++------- pack-bitmap.c | 16 +++++++--------- pack-bitmap.h | 7 +++---- 3 files changed, 19 insertions(+), 20 deletions(-) (limited to 'pack-bitmap.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 321d7effb0..c3df6d9657 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3943,13 +3943,15 @@ static int get_object_list_from_bitmap(struct rev_info *revs) if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) return -1; - if (pack_options_allow_reuse() && - !reuse_partial_packfile_from_bitmap( - bitmap_git, - &reuse_packfile, - &reuse_packfile_objects, - &reuse_packfile_bitmap)) { - assert(reuse_packfile_objects); + if (pack_options_allow_reuse()) + reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfile, + &reuse_packfile_bitmap); + + if (reuse_packfile) { + reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); + if (!reuse_packfile_objects) + BUG("expected non-empty reuse bitmap"); + nr_result += reuse_packfile_objects; nr_seen += reuse_packfile_objects; display_progress(progress_state, nr_seen); diff --git a/pack-bitmap.c b/pack-bitmap.c index d64a80c30c..c75a83e9cc 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -2000,10 +2000,9 @@ static int bitmapped_pack_cmp(const void *va, const void *vb) return 0; } -int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, - struct packed_git **packfile_out, - uint32_t *entries, - struct bitmap **reuse_out) +void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, + struct packed_git **packfile_out, + struct bitmap **reuse_out) { struct repository *r = the_repository; struct bitmapped_pack *packs = NULL; @@ -2025,7 +2024,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, warning(_("unable to load pack: '%s', disabling pack-reuse"), bitmap_git->midx->pack_names[i]); free(packs); - return -1; + return; } if (!pack.bitmap_nr) continue; /* no objects from this pack */ @@ -2059,10 +2058,10 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, reuse_partial_packfile_from_bitmap_1(bitmap_git, packs, reuse); - *entries = bitmap_popcount(reuse); - if (!*entries) { + if (bitmap_is_empty(reuse)) { + free(packs); bitmap_free(reuse); - return -1; + return; } /* @@ -2072,7 +2071,6 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, bitmap_and_not(result, reuse); *packfile_out = packs[0].p; *reuse_out = reuse; - return 0; } int bitmap_walk_contains(struct bitmap_index *bitmap_git, diff --git a/pack-bitmap.h b/pack-bitmap.h index b68b213388..ab3fdcde6b 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -78,10 +78,9 @@ int test_bitmap_hashes(struct repository *r); struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects); uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git); -int reuse_partial_packfile_from_bitmap(struct bitmap_index *, - struct packed_git **packfile, - uint32_t *entries, - struct bitmap **reuse_out); +void reuse_partial_packfile_from_bitmap(struct bitmap_index *, + struct packed_git **packfile, + struct bitmap **reuse_out); int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, kh_oid_map_t *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); -- cgit v1.3 From 83296d20e84e248ea539fe1332fca2139cfcfb8b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:04 -0500 Subject: pack-bitmap: return multiple packs via `reuse_partial_packfile_from_bitmap()` Further prepare for enabling verbatim pack-reuse over multiple packfiles by changing the signature of reuse_partial_packfile_from_bitmap() to populate an array of `struct bitmapped_pack *`'s instead of a pointer to a single packfile. Since the array we're filling out is sized dynamically[^1], add an additional `size_t *` parameter which will hold the number of reusable packs (equal to the number of elements in the array). Note that since we still have not implemented true multi-pack reuse, these changes aren't propagated out to the rest of the caller in builtin/pack-objects.c. In the interim state, we expect that the array has a single element, and we use that element to fill out the static `reuse_packfile` variable (which is a bog-standard `struct packed_git *`). Future commits will continue to push this change further out through the pack-objects code. [^1]: That is, even though we know the number of packs which are candidates for pack-reuse, we do not know how many of those candidates we can actually reuse. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 9 +++++++-- pack-bitmap.c | 6 ++++-- pack-bitmap.h | 5 +++-- 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'pack-bitmap.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c3df6d9657..87e16636a8 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3940,14 +3940,19 @@ static int pack_options_allow_reuse(void) static int get_object_list_from_bitmap(struct rev_info *revs) { + struct bitmapped_pack *packs = NULL; + size_t packs_nr = 0; + if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) return -1; if (pack_options_allow_reuse()) - reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfile, + reuse_partial_packfile_from_bitmap(bitmap_git, &packs, + &packs_nr, &reuse_packfile_bitmap); - if (reuse_packfile) { + if (packs) { + reuse_packfile = packs[0].p; reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); if (!reuse_packfile_objects) BUG("expected non-empty reuse bitmap"); diff --git a/pack-bitmap.c b/pack-bitmap.c index c75a83e9cc..4d5a484678 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -2001,7 +2001,8 @@ static int bitmapped_pack_cmp(const void *va, const void *vb) } void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, - struct packed_git **packfile_out, + struct bitmapped_pack **packs_out, + size_t *packs_nr_out, struct bitmap **reuse_out) { struct repository *r = the_repository; @@ -2069,7 +2070,8 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, * need to be handled separately. */ bitmap_and_not(result, reuse); - *packfile_out = packs[0].p; + *packs_out = packs; + *packs_nr_out = packs_nr; *reuse_out = reuse; } diff --git a/pack-bitmap.h b/pack-bitmap.h index ab3fdcde6b..7a12a2ce81 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -78,8 +78,9 @@ int test_bitmap_hashes(struct repository *r); struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects); uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git); -void reuse_partial_packfile_from_bitmap(struct bitmap_index *, - struct packed_git **packfile, +void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, + struct bitmapped_pack **packs_out, + size_t *packs_nr_out, struct bitmap **reuse_out); int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, kh_oid_map_t *reused_bitmaps, int show_progress); -- cgit v1.3 From b1e3333068247ddd44021a0b69457c249ddee7a1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:25 -0500 Subject: midx: implement `midx_preferred_pack()` When performing a binary search over the objects in a MIDX's bitmap (i.e. in pseudo-pack order), the reader reconstructs the pseudo-pack ordering using a combination of (a) the preferred pack, (b) the pack's lexical position in the MIDX based on pack names, and (c) the object offset within the pack. In order to perform this binary search, the reader must know the identity of the preferred pack. This could be stored in the MIDX, but isn't for historical reasons, mostly because it can easily be inferred at read-time by looking at the object in the first bit position and finding out which pack it was selected from in the MIDX, like so: nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0)); In midx_to_pack_pos() which performs this binary search, we look up the identity of the preferred pack before each search. This is relatively quick, since it involves two table-driven lookups (one in the MIDX's revindex for `pack_pos_to_midx()`, and another in the MIDX's object table for `nth_midxed_pack_int_id()`). But since the preferred pack does not change after the MIDX is written, it is safe to cache this value on the MIDX itself. Write a helper to do just that, and rewrite all of the existing call-sites that care about the identity of the preferred pack in terms of this new helper. This will prepare us for a subsequent patch where we will need to binary search through the MIDX's pseudo-pack order multiple times. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 20 ++++++++++++++++++++ midx.h | 2 ++ pack-bitmap.c | 17 +++++++---------- pack-bitmap.h | 1 - pack-revindex.c | 4 +++- t/helper/test-read-midx.c | 13 +++++-------- 6 files changed, 37 insertions(+), 20 deletions(-) (limited to 'pack-bitmap.c') diff --git a/midx.c b/midx.c index beaf0c0de4..85e1c2cd12 100644 --- a/midx.c +++ b/midx.c @@ -21,6 +21,7 @@ #include "refs.h" #include "revision.h" #include "list-objects.h" +#include "pack-revindex.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -177,6 +178,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS); + m->preferred_pack_idx = -1; + cf = init_chunkfile(NULL); if (read_table_of_contents(cf, m->data, midx_size, @@ -460,6 +463,23 @@ int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) return midx_locate_pack(m, idx_or_pack_name, NULL); } +int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id) +{ + if (m->preferred_pack_idx == -1) { + if (load_midx_revindex(m) < 0) { + m->preferred_pack_idx = -2; + return -1; + } + + m->preferred_pack_idx = + nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0)); + } else if (m->preferred_pack_idx == -2) + return -1; /* no revindex */ + + *pack_int_id = m->preferred_pack_idx; + return 0; +} + int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local) { struct multi_pack_index *m; diff --git a/midx.h b/midx.h index 89c5aa637e..f87a8fff26 100644 --- a/midx.h +++ b/midx.h @@ -29,6 +29,7 @@ struct multi_pack_index { unsigned char num_chunks; uint32_t num_packs; uint32_t num_objects; + int preferred_pack_idx; int local; @@ -74,6 +75,7 @@ int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); int midx_locate_pack(struct multi_pack_index *m, const char *idx_or_pack_name, uint32_t *pos); +int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local); /* diff --git a/pack-bitmap.c b/pack-bitmap.c index 4d5a484678..1682f99596 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -338,7 +338,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, struct stat st; char *bitmap_name = midx_bitmap_filename(midx); int fd = git_open(bitmap_name); - uint32_t i; + uint32_t i, preferred_pack; struct packed_git *preferred; if (fd < 0) { @@ -393,7 +393,12 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, } } - preferred = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)]; + if (midx_preferred_pack(bitmap_git->midx, &preferred_pack) < 0) { + warning(_("could not determine MIDX preferred pack")); + goto cleanup; + } + + preferred = bitmap_git->midx->packs[preferred_pack]; if (!is_pack_valid(preferred)) { warning(_("preferred pack (%s) is invalid"), preferred->pack_name); @@ -1926,14 +1931,6 @@ static int try_partial_reuse(struct bitmapped_pack *pack, return 0; } -uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git) -{ - struct multi_pack_index *m = bitmap_git->midx; - if (!m) - BUG("midx_preferred_pack: requires non-empty MIDX"); - return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0)); -} - static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git, struct bitmapped_pack *pack, struct bitmap *reuse) diff --git a/pack-bitmap.h b/pack-bitmap.h index 7a12a2ce81..179b343912 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -77,7 +77,6 @@ int test_bitmap_hashes(struct repository *r); struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects); -uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git); void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct bitmapped_pack **packs_out, size_t *packs_nr_out, diff --git a/pack-revindex.c b/pack-revindex.c index acf1dd9786..7dc6c776d5 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -542,7 +542,9 @@ int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos) * implicitly is preferred (and includes all its objects, since ties are * broken first by pack identifier). */ - key.preferred_pack = nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0)); + if (midx_preferred_pack(key.midx, &key.preferred_pack) < 0) + return error(_("could not determine preferred pack")); + found = bsearch(&key, m->revindex_data, m->num_objects, sizeof(*m->revindex_data), midx_pack_order_cmp); diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index e48557aba1..4acae41bb9 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -6,6 +6,7 @@ #include "pack-bitmap.h" #include "packfile.h" #include "setup.h" +#include "gettext.h" static int read_midx_file(const char *object_dir, int show_objects) { @@ -79,7 +80,7 @@ static int read_midx_checksum(const char *object_dir) static int read_midx_preferred_pack(const char *object_dir) { struct multi_pack_index *midx = NULL; - struct bitmap_index *bitmap = NULL; + uint32_t preferred_pack; setup_git_directory(); @@ -87,16 +88,12 @@ static int read_midx_preferred_pack(const char *object_dir) if (!midx) return 1; - bitmap = prepare_bitmap_git(the_repository); - if (!bitmap) - return 1; - if (!bitmap_is_midx(bitmap)) { - free_bitmap_index(bitmap); + if (midx_preferred_pack(midx, &preferred_pack) < 0) { + warning(_("could not determine MIDX preferred pack")); return 1; } - printf("%s\n", midx->pack_names[midx_preferred_pack(bitmap)]); - free_bitmap_index(bitmap); + printf("%s\n", midx->pack_names[preferred_pack]); return 0; } -- cgit v1.3 From 519e17ff75180c3e94a7c120c3028b926f2a781e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:34 -0500 Subject: pack-bitmap: prepare to mark objects from multiple packs for reuse Now that the pack-objects code is equipped to handle reusing objects from multiple packs, prepare the pack-bitmap code to mark objects from multiple packs as reuse candidates. In order to prepare the pack-bitmap code for this change, remove the same set of assumptions we unwound in previous commits from the helper function `reuse_partial_packfile_from_bitmap_1()`, in preparation for it to be called in a loop over the set of bitmapped packs in a following commit. Most importantly, we can no longer assume that the bit position corresponding to the first object in a given reuse pack candidate is at the beginning of the bitmap itself. For the single pack that this assumption is still true for (in MIDX bitmaps, this is the preferred pack, in single-pack bitmaps it is the pack the bitmap is tied to), we can still use our whole-words optimization. But for all subsequent packs, we can not make use of this optimization, since it assumes that all delta bases are being sent from the same pack, which would break if we are sending OFS_DELTAs down to the client. To understand why, consider two packs, P1 and P2 where: - P1 has object A which is a delta on base B - P2 has its own copy of B, in addition to other objects Suppose that the MIDX which covers P1 and P2 selected its copy of A from P1, but selected its copy of B from P2. Since A is a delta of B, but the base was selected from a different pack, sending the bytes corresponding to A as an OFS_DELTA verbatim from P1 would be incorrect, since we don't guarantee that B is in the same place relative to A in the generated pack as in P1. For now, we detect and reject these cross-pack deltas by searching for the (pack_id, offset) pair for the delta's base object (using the same pack_id as the pack containing the delta'd object) in the MIDX. If we find a match, that means that the MIDX did indeed pick the base object from the same pack, and we are OK to reuse the delta. If we don't find a match, however, that means that the base object was selected from a different pack in the MIDX, and we can let the slower path handle re-delta'ing our candidate object. In the future, there are a couple of other things we could do, namely: - Turn any cross-pack deltas (which are stored as OFS_DELTAs) into REF_DELTAs. We already do this today when reusing an OFS_DELTA without `--delta-base-offset` enabled, so it's not a huge stretch to do the same for cross-pack deltas even when `--delta-base-offset` is enabled. This would work, but would obviously result in larger-than-necessary packs, as we in theory *could* represent these cross-pack deltas by patching an existing OFS_DELTA. But it's not clear how much that would matter in practice. I suspect it would have a lot to do with how you pack your repository in the first place. - Finally, we could patch OFS_DELTAs across packs in a similar fashion as we do today for OFS_DELTAs within a single pack on either side of a gap. This would result in the smallest packs of the three options here, but implementing this would be more involved. At minimum, you'd have to keep the reusable chunks list for all reused packs, not just the one we're currently processing. And you'd have to ensure that any bases which are a part of cross-pack deltas appear before the delta. I think this is possible to do, but would require assembling the reusable chunks list potentially in a different order than they appear in the source packs. For now, let's pursue the simplest approach and reject any cross-pack deltas. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 172 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 106 insertions(+), 66 deletions(-) (limited to 'pack-bitmap.c') diff --git a/pack-bitmap.c b/pack-bitmap.c index 1682f99596..242a5908f7 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1841,8 +1841,10 @@ cleanup: * -1 means "stop trying further objects"; 0 means we may or may not have * reused, but you can keep feeding bits. */ -static int try_partial_reuse(struct bitmapped_pack *pack, - size_t pos, +static int try_partial_reuse(struct bitmap_index *bitmap_git, + struct bitmapped_pack *pack, + size_t bitmap_pos, + uint32_t pack_pos, struct bitmap *reuse, struct pack_window **w_curs) { @@ -1850,33 +1852,10 @@ static int try_partial_reuse(struct bitmapped_pack *pack, enum object_type type; unsigned long size; - /* - * try_partial_reuse() is called either on (a) objects in the - * bitmapped pack (in the case of a single-pack bitmap) or (b) - * objects in the preferred pack of a multi-pack bitmap. - * Importantly, the latter can pretend as if only a single pack - * exists because: - * - * - The first pack->num_objects bits of a MIDX bitmap are - * reserved for the preferred pack, and - * - * - Ties due to duplicate objects are always resolved in - * favor of the preferred pack. - * - * Therefore we do not need to ever ask the MIDX for its copy of - * an object by OID, since it will always select it from the - * preferred pack. Likewise, the selected copy of the base - * object for any deltas will reside in the same pack. - * - * This means that we can reuse pos when looking up the bit in - * the reuse bitmap, too, since bits corresponding to the - * preferred pack precede all bits from other packs. - */ + if (pack_pos >= pack->p->num_objects) + return -1; /* not actually in the pack */ - if (pos >= pack->p->num_objects) - return -1; /* not actually in the pack or MIDX preferred pack */ - - offset = delta_obj_offset = pack_pos_to_offset(pack->p, pos); + offset = delta_obj_offset = pack_pos_to_offset(pack->p, pack_pos); type = unpack_object_header(pack->p, w_curs, &offset, &size); if (type < 0) return -1; /* broken packfile, punt */ @@ -1884,6 +1863,7 @@ static int try_partial_reuse(struct bitmapped_pack *pack, if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { off_t base_offset; uint32_t base_pos; + uint32_t base_bitmap_pos; /* * Find the position of the base object so we can look it up @@ -1897,20 +1877,44 @@ static int try_partial_reuse(struct bitmapped_pack *pack, delta_obj_offset); if (!base_offset) return 0; - if (offset_to_pack_pos(pack->p, base_offset, &base_pos) < 0) - return 0; - /* - * We assume delta dependencies always point backwards. This - * lets us do a single pass, and is basically always true - * due to the way OFS_DELTAs work. You would not typically - * find REF_DELTA in a bitmapped pack, since we only bitmap - * packs we write fresh, and OFS_DELTA is the default). But - * let's double check to make sure the pack wasn't written with - * odd parameters. - */ - if (base_pos >= pos) - return 0; + offset_to_pack_pos(pack->p, base_offset, &base_pos); + + if (bitmap_is_midx(bitmap_git)) { + /* + * Cross-pack deltas are rejected for now, but could + * theoretically be supported in the future. + * + * We would need to ensure that we're sending both + * halves of the delta/base pair, regardless of whether + * or not the two cross a pack boundary. If they do, + * then we must convert the delta to an REF_DELTA to + * refer back to the base in the other pack. + * */ + if (midx_pair_to_pack_pos(bitmap_git->midx, + pack->pack_int_id, + base_offset, + &base_bitmap_pos) < 0) { + return 0; + } + } else { + if (offset_to_pack_pos(pack->p, base_offset, + &base_pos) < 0) + return 0; + /* + * We assume delta dependencies always point backwards. + * This lets us do a single pass, and is basically + * always true due to the way OFS_DELTAs work. You would + * not typically find REF_DELTA in a bitmapped pack, + * since we only bitmap packs we write fresh, and + * OFS_DELTA is the default). But let's double check to + * make sure the pack wasn't written with odd + * parameters. + */ + if (base_pos >= pack_pos) + return 0; + base_bitmap_pos = pack->bitmap_pos + base_pos; + } /* * And finally, if we're not sending the base as part of our @@ -1920,14 +1924,14 @@ static int try_partial_reuse(struct bitmapped_pack *pack, * to REF_DELTA on the fly. Better to just let the normal * object_entry code path handle it. */ - if (!bitmap_get(reuse, pack->bitmap_pos + base_pos)) + if (!bitmap_get(reuse, base_bitmap_pos)) return 0; } /* * If we got here, then the object is OK to reuse. Mark it. */ - bitmap_set(reuse, pack->bitmap_pos + pos); + bitmap_set(reuse, bitmap_pos); return 0; } @@ -1937,36 +1941,72 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git { struct bitmap *result = bitmap_git->result; struct pack_window *w_curs = NULL; - size_t i = 0; - - while (i < result->word_alloc && result->words[i] == (eword_t)~0) - i++; - - /* - * Don't mark objects not in the packfile or preferred pack. This bitmap - * marks objects eligible for reuse, but the pack-reuse code only - * understands how to reuse a single pack. Since the preferred pack is - * guaranteed to have all bases for its deltas (in a multi-pack bitmap), - * we use it instead of another pack. In single-pack bitmaps, the choice - * is made for us. - */ - if (i > pack->p->num_objects / BITS_IN_EWORD) - i = pack->p->num_objects / BITS_IN_EWORD; + size_t pos = pack->bitmap_pos / BITS_IN_EWORD; - memset(reuse->words, 0xFF, i * sizeof(eword_t)); + if (!pack->bitmap_pos) { + /* + * If we're processing the first (in the case of a MIDX, the + * preferred pack) or the only (in the case of single-pack + * bitmaps) pack, then we can reuse whole words at a time. + * + * This is because we know that any deltas in this range *must* + * have their bases chosen from the same pack, since: + * + * - In the single pack case, there is no other pack to choose + * them from. + * + * - In the MIDX case, the first pack is the preferred pack, so + * all ties are broken in favor of that pack (i.e. the one + * we're currently processing). So any duplicate bases will be + * resolved in favor of the pack we're processing. + */ + while (pos < result->word_alloc && + pos < pack->bitmap_nr / BITS_IN_EWORD && + result->words[pos] == (eword_t)~0) + pos++; + memset(reuse->words, 0xFF, pos * sizeof(eword_t)); + } - for (; i < result->word_alloc; ++i) { - eword_t word = result->words[i]; - size_t pos = (i * BITS_IN_EWORD); + for (; pos < result->word_alloc; pos++) { + eword_t word = result->words[pos]; size_t offset; - for (offset = 0; offset < BITS_IN_EWORD; ++offset) { - if ((word >> offset) == 0) + for (offset = 0; offset < BITS_IN_EWORD; offset++) { + size_t bit_pos; + uint32_t pack_pos; + + if (word >> offset == 0) break; offset += ewah_bit_ctz64(word >> offset); - if (try_partial_reuse(pack, pos + offset, - reuse, &w_curs) < 0) { + + bit_pos = pos * BITS_IN_EWORD + offset; + if (bit_pos < pack->bitmap_pos) + continue; + if (bit_pos >= pack->bitmap_pos + pack->bitmap_nr) + goto done; + + if (bitmap_is_midx(bitmap_git)) { + uint32_t midx_pos; + off_t ofs; + + midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos); + ofs = nth_midxed_offset(bitmap_git->midx, midx_pos); + + if (offset_to_pack_pos(pack->p, ofs, &pack_pos) < 0) + BUG("could not find object in pack %s " + "at offset %"PRIuMAX" in MIDX", + pack_basename(pack->p), (uintmax_t)ofs); + } else { + pack_pos = cast_size_t_to_uint32_t(st_sub(bit_pos, pack->bitmap_pos)); + if (pack_pos >= pack->p->num_objects) + BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")", + pack_basename(pack->p), (uintmax_t)pack_pos, + pack->p->num_objects); + } + + if (try_partial_reuse(bitmap_git, pack, bit_pos, + pack_pos, reuse, &w_curs) < 0) { /* * try_partial_reuse indicated we couldn't reuse * any bits, so there is no point in trying more -- cgit v1.3 From af626ac0e02570e3afac8b4238199157181d43c2 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:44 -0500 Subject: pack-bitmap: enable reuse from all bitmapped packs Now that both the pack-bitmap and pack-objects code are prepared to handle marking and using objects from multiple bitmapped packs for verbatim reuse, allow marking objects from all bitmapped packs as eligible for reuse. Within the `reuse_partial_packfile_from_bitmap()` function, we no longer only mark the pack whose first object is at bit position zero for reuse, and instead mark any pack contained in the MIDX as a reuse candidate. Provide a handful of test cases in a new script (t5332) exercising interesting behavior for multi-pack reuse to ensure that we performed all of the previous steps correctly. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/pack.txt | 16 ++-- builtin/pack-objects.c | 6 +- pack-bitmap.c | 34 ++++--- pack-bitmap.h | 3 +- t/t5332-multi-pack-reuse.sh | 203 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 245 insertions(+), 17 deletions(-) create mode 100755 t/t5332-multi-pack-reuse.sh (limited to 'pack-bitmap.c') diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index fe100d0fb7..9c630863e6 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -28,11 +28,17 @@ all existing objects. You can force recompression by passing the -F option to linkgit:git-repack[1]. pack.allowPackReuse:: - When true or "single", and when reachability bitmaps are enabled, - pack-objects will try to send parts of the bitmapped packfile - verbatim. This can reduce memory and CPU usage to serve fetches, - but might result in sending a slightly larger pack. Defaults to - true. + When true or "single", and when reachability bitmaps are + enabled, pack-objects will try to send parts of the bitmapped + packfile verbatim. When "multi", and when a multi-pack + reachability bitmap is available, pack-objects will try to send + parts of all packs in the MIDX. ++ + If only a single pack bitmap is available, and + `pack.allowPackReuse` is set to "multi", reuse parts of just the + bitmapped packfile. This can reduce memory and CPU usage to + serve fetches, but might result in sending a slightly larger + pack. Defaults to true. pack.island:: An extended regular expression configuring a set of delta diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 684698f679..5d3c42035b 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -232,6 +232,7 @@ static int use_bitmap_index = -1; static enum { NO_PACK_REUSE = 0, SINGLE_PACK_REUSE, + MULTI_PACK_REUSE, } allow_pack_reuse = SINGLE_PACK_REUSE; static enum { WRITE_BITMAP_FALSE = 0, @@ -3251,6 +3252,8 @@ static int git_pack_config(const char *k, const char *v, if (res < 0) { if (!strcasecmp(v, "single")) allow_pack_reuse = SINGLE_PACK_REUSE; + else if (!strcasecmp(v, "multi")) + allow_pack_reuse = MULTI_PACK_REUSE; else die(_("invalid pack.allowPackReuse value: '%s'"), v); } else if (res) { @@ -4029,7 +4032,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs) reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfiles, &reuse_packfiles_nr, - &reuse_packfile_bitmap); + &reuse_packfile_bitmap, + allow_pack_reuse == MULTI_PACK_REUSE); if (reuse_packfiles) { reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); diff --git a/pack-bitmap.c b/pack-bitmap.c index 242a5908f7..229a11fb00 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -2040,7 +2040,8 @@ static int bitmapped_pack_cmp(const void *va, const void *vb) void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct bitmapped_pack **packs_out, size_t *packs_nr_out, - struct bitmap **reuse_out) + struct bitmap **reuse_out, + int multi_pack_reuse) { struct repository *r = the_repository; struct bitmapped_pack *packs = NULL; @@ -2064,15 +2065,30 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, free(packs); return; } + if (!pack.bitmap_nr) - continue; /* no objects from this pack */ - if (pack.bitmap_pos) - continue; /* not preferred pack */ + continue; + + if (!multi_pack_reuse && pack.bitmap_pos) { + /* + * If we're only reusing a single pack, skip + * over any packs which are not positioned at + * the beginning of the MIDX bitmap. + * + * This is consistent with the existing + * single-pack reuse behavior, which only reuses + * parts of the MIDX's preferred pack. + */ + continue; + } ALLOC_GROW(packs, packs_nr + 1, packs_alloc); memcpy(&packs[packs_nr++], &pack, sizeof(pack)); objects_nr += pack.p->num_objects; + + if (!multi_pack_reuse) + break; } QSORT(packs, packs_nr, bitmapped_pack_cmp); @@ -2080,10 +2096,10 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, ALLOC_GROW(packs, packs_nr + 1, packs_alloc); packs[packs_nr].p = bitmap_git->pack; - packs[packs_nr].bitmap_pos = 0; packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects; + packs[packs_nr].bitmap_pos = 0; - objects_nr = packs[packs_nr++].p->num_objects; + objects_nr = packs[packs_nr++].bitmap_nr; } word_alloc = objects_nr / BITS_IN_EWORD; @@ -2091,10 +2107,8 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, word_alloc++; reuse = bitmap_word_alloc(word_alloc); - if (packs_nr != 1) - BUG("pack reuse not yet implemented for multiple packs"); - - reuse_partial_packfile_from_bitmap_1(bitmap_git, packs, reuse); + for (i = 0; i < packs_nr; i++) + reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse); if (bitmap_is_empty(reuse)) { free(packs); diff --git a/pack-bitmap.h b/pack-bitmap.h index 179b343912..c7dea13217 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -80,7 +80,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct bitmapped_pack **packs_out, size_t *packs_nr_out, - struct bitmap **reuse_out); + struct bitmap **reuse_out, + int multi_pack_reuse); int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, kh_oid_map_t *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh new file mode 100755 index 0000000000..2ba788b042 --- /dev/null +++ b/t/t5332-multi-pack-reuse.sh @@ -0,0 +1,203 @@ +#!/bin/sh + +test_description='pack-objects multi-pack reuse' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-bitmap.sh + +objdir=.git/objects +packdir=$objdir/pack + +test_pack_reused () { + test_trace2_data pack-objects pack-reused "$1" +} + +test_packs_reused () { + test_trace2_data pack-objects packs-reused "$1" +} + + +# pack_position objects && + grep "$1" objects | cut -d" " -f1 +} + +test_expect_success 'preferred pack is reused for single-pack reuse' ' + test_config pack.allowPackReuse single && + + for i in A B + do + test_commit "$i" && + git repack -d || return 1 + done && + + git multi-pack-index write --bitmap && + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 3 in <<-EOF && + $(git rev-parse C) + ^$(git rev-parse A) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs /dev/null && + + test_pack_reused 6 trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 9 in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs /dev/null && + + test_pack_reused 3 in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs /dev/null && + + test_pack_reused 3 f && + git add f && + test_tick && + git commit -m "delta" && + delta="$(git rev-parse HEAD)" && + + test_seq 64 >f && + test_tick && + git commit -a -m "base" && + base="$(git rev-parse HEAD)" && + + test_commit other && + + git repack -d && + + have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" && + + git multi-pack-index write --bitmap && + + cat >in <<-EOF && + $(git rev-parse other) + ^$base + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs /dev/null && + + # We can only reuse the 3 objects corresponding to "other" from + # the latest pack. + # + # This is because even though we want "delta", we do not want + # "base", meaning that we have to inflate the delta/base-pair + # corresponding to the blob in commit "delta", which bypasses + # the pack-reuse mechanism. + # + # The remaining objects from the other pack are similarly not + # reused because their objects are on the uninteresting side of + # the query. + test_pack_reused 3 in <<-EOF && + $(git rev-parse $base) + ^$(git rev-parse $delta) + EOF + + P="$(git pack-objects --revs $packdir/pack trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --all >/dev/null && + + packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" && + objects_nr="$(git rev-list --count --all --objects)" && + + test_pack_reused $(($objects_nr - 1))