From 3f7c0e722e2733aede32b1e531caf83e7043d1bd Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:51 -0400 Subject: pack-objects: support excluded-open packs with --stdin-packs In cd846bacc7d (pack-objects: introduce '--stdin-packs=follow', 2025-06-23), pack-objects learned to traverse through commits in included packs when using '--stdin-packs=follow', rescuing reachable objects from unlisted packs into the output. When we encounter a commit in an excluded pack during this rescuing phase we will traverse through its parents. But because we set `revs.no_kept_objects = 1`, commit simplification will prevent us from showing it via `get_revision()`. (In practice, `--stdin-packs=follow` walks commits down to the roots, but only opens up trees for ones that do not appear in an excluded pack.) But there are certain cases where we *do* need to see the parents of an object in an excluded pack. Namely, if an object is rescue-able, but only reachable from object(s) which appear in excluded packs, then commit simplification will exclude those commits from the object traversal, and we will never see a copy of that object, and thus not rescue it. This is what causes the failure in the previous commit during repacking. When performing a geometric repack, packs above the geometric split that weren't part of the previous MIDX (e.g., packs pushed directly into `$GIT_DIR/objects/pack`) may not have full object closure. When those packs are listed as excluded via the '^' marker, the reachability traversal encounters the sequence described above, and may miss objects which we expect to rescue with `--stdin-packs=follow`. Introduce a new "excluded-open" pack prefix, '!'. Like '^'-prefixed packs, objects from '!'-prefixed packs are excluded from the resulting pack. But unlike '^', commits in '!'-prefixed packs *are* used as starting points for the follow traversal, and the traversal does not treat them as a closure boundary. In order to distinguish excluded-closed from excluded-open packs during the traversal, introduce a new `pack_keep_in_core_open` bit on `struct packed_git`, along with a corresponding `KEPT_PACK_IN_CORE_OPEN` flag for the kept-pack cache. In `add_object_entry_from_pack()`, move the `want_object_in_pack()` check to *after* `add_pending_oid()`. This is necessary so that commits from excluded-open packs are added as traversal tips even though their objects won't appear in the output. As a consequence, the caller `for_each_object_in_pack()` will always provide a non-NULL 'p', hence we are able to drop the "if (p)" conditional. The `include_check` and `include_check_obj` callbacks on `rev_info` are used to halt the walk at closed-excluded packs, since objects behind a '^' boundary are guaranteed to have closure and need not be rescued. The following commit will make use of this new functionality within the repack layer to resolve the test failure demonstrated in the previous commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 116 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 25 deletions(-) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 945100b405..7b97784d6c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -217,6 +217,7 @@ static int have_non_local_packs; static int incremental; static int ignore_packed_keep_on_disk; static int ignore_packed_keep_in_core; +static int ignore_packed_keep_in_core_open; static int ignore_packed_keep_in_core_has_cruft; static int allow_ofs_delta; static struct pack_idx_option pack_idx_opts; @@ -1618,7 +1619,8 @@ static int want_found_object(const struct object_id *oid, int exclude, /* * Then handle .keep first, as we have a fast(er) path there. */ - if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) { + if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core || + ignore_packed_keep_in_core_open) { /* * Set the flags for the kept-pack cache to be the ones we want * to ignore. @@ -1632,6 +1634,8 @@ static int want_found_object(const struct object_id *oid, int exclude, flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) flags |= KEPT_PACK_IN_CORE; + if (ignore_packed_keep_in_core_open) + flags |= KEPT_PACK_IN_CORE_OPEN; /* * If the object is in a pack that we want to ignore, *and* we @@ -1643,6 +1647,8 @@ static int want_found_object(const struct object_id *oid, int exclude, return 0; if (ignore_packed_keep_in_core && p->pack_keep_in_core) return 0; + if (ignore_packed_keep_in_core_open && p->pack_keep_in_core_open) + return 0; if (has_object_kept_pack(p->repo, oid, flags)) return 0; } else { @@ -3742,6 +3748,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, void *_data) { off_t ofs; + struct object_info oi = OBJECT_INFO_INIT; enum object_type type = OBJ_NONE; display_progress(progress_state, ++nr_seen); @@ -3749,29 +3756,34 @@ static int add_object_entry_from_pack(const struct object_id *oid, if (have_duplicate_entry(oid, 0)) return 0; - ofs = nth_packed_object_offset(p, pos); - if (!want_object_in_pack(oid, 0, &p, &ofs)) - return 0; + stdin_packs_found_nr++; - if (p) { - struct object_info oi = OBJECT_INFO_INIT; - - oi.typep = &type; - if (packed_object_info(p, ofs, &oi) < 0) { - die(_("could not get type of object %s in pack %s"), - oid_to_hex(oid), p->pack_name); - } else if (type == OBJ_COMMIT) { - struct rev_info *revs = _data; - /* - * commits in included packs are used as starting points for the - * subsequent revision walk - */ - add_pending_oid(revs, NULL, oid, 0); - } + ofs = nth_packed_object_offset(p, pos); - stdin_packs_found_nr++; + oi.typep = &type; + if (packed_object_info(p, ofs, &oi) < 0) { + die(_("could not get type of object %s in pack %s"), + oid_to_hex(oid), p->pack_name); + } else if (type == OBJ_COMMIT) { + struct rev_info *revs = _data; + /* + * commits in included packs are used as starting points + * for the subsequent revision walk + * + * Note that we do want to walk through commits that are + * present in excluded-open ('!') packs to pick up any + * objects reachable from them not present in the + * excluded-closed ('^') packs. + * + * However, we'll only add those objects to the packing + * list after checking `want_object_in_pack()` below. + */ + add_pending_oid(revs, NULL, oid, 0); } + if (!want_object_in_pack(oid, 0, &p, &ofs)) + return 0; + create_object_entry(oid, type, 0, 0, 0, p, ofs); return 0; @@ -3832,12 +3844,18 @@ static void show_commit_pack_hint(struct commit *commit, void *data) * - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag * bit set should be excluded from the output pack. * - * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are - * used as traversal tips when invoked with --stdin-packs=follow. + * - STDIN_PACK_EXCLUDE_OPEN: objects in any packs with this flag + * bit set should be excluded from the output pack, but are not + * guaranteed to be closed under reachability. + * + * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE or + * STDIN_PACK_EXCLUDE_OPEN are used as traversal tips when invoked + * with --stdin-packs=follow. */ enum stdin_pack_info_kind { STDIN_PACK_INCLUDE = (1<<0), STDIN_PACK_EXCLUDE_CLOSED = (1<<1), + STDIN_PACK_EXCLUDE_OPEN = (1<<2), }; struct stdin_pack_info { @@ -3862,6 +3880,17 @@ static int pack_mtime_cmp(const void *_a, const void *_b) return 0; } +static int stdin_packs_include_check_obj(struct object *obj, void *data UNUSED) +{ + return !has_object_kept_pack(to_pack.repo, &obj->oid, + KEPT_PACK_IN_CORE); +} + +static int stdin_packs_include_check(struct commit *commit, void *data) +{ + return stdin_packs_include_check_obj((struct object *)commit, data); +} + static void stdin_packs_add_pack_entries(struct strmap *packs, struct rev_info *revs) { @@ -3888,7 +3917,19 @@ static void stdin_packs_add_pack_entries(struct strmap *packs, for_each_string_list_item(item, &keys) { struct stdin_pack_info *info = item->util; - if (info->kind & STDIN_PACK_INCLUDE) + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * When open-excluded packs ("!") are present, stop + * the parent walk at closed-excluded ("^") packs. + * Objects behind a "^" boundary are guaranteed to + * have closure and should not be rescued. + */ + revs->include_check = stdin_packs_include_check; + revs->include_check_obj = stdin_packs_include_check_obj; + } + + if ((info->kind & STDIN_PACK_INCLUDE) || + (info->kind & STDIN_PACK_EXCLUDE_OPEN)) for_each_object_in_pack(info->p, add_object_entry_from_pack, revs, @@ -3898,7 +3939,8 @@ static void stdin_packs_add_pack_entries(struct strmap *packs, string_list_clear(&keys, 0); } -static void stdin_packs_read_input(struct rev_info *revs) +static void stdin_packs_read_input(struct rev_info *revs, + enum stdin_packs_mode mode) { struct strbuf buf = STRBUF_INIT; struct strmap packs = STRMAP_INIT; @@ -3913,6 +3955,8 @@ static void stdin_packs_read_input(struct rev_info *revs) continue; else if (*key == '^') kind = STDIN_PACK_EXCLUDE_CLOSED; + else if (*key == '!' && mode == STDIN_PACKS_MODE_FOLLOW) + kind = STDIN_PACK_EXCLUDE_OPEN; if (kind != STDIN_PACK_INCLUDE) key++; @@ -3959,6 +4003,20 @@ static void stdin_packs_read_input(struct rev_info *revs) p->pack_keep_in_core = 1; } + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * Marking excluded open packs as kept in-core + * (open) for the same reason as we marked + * exclude closed packs as kept in-core. + * + * Use a separate flag here to ensure we don't + * halt our traversal at these packs, since they + * are not guaranteed to have closure. + * + */ + p->pack_keep_in_core_open = 1; + } + info->p = p; } @@ -4002,7 +4060,15 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; - stdin_packs_read_input(&revs); + if (mode == STDIN_PACKS_MODE_FOLLOW) { + /* + * In '--stdin-packs=follow' mode, additionally ignore + * objects in excluded-open packs to prevent them from + * appearing in the resulting pack. + */ + ignore_packed_keep_in_core_open = 1; + } + stdin_packs_read_input(&revs, mode); if (rev_list_unpacked) add_unreachable_loose_objects(&revs); -- cgit v1.3