From 2cebca05824057493f4b2ef9cd86333a04ed4a7e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Oct 2025 17:56:01 -0400 Subject: builtin/cat-file.c: simplify calling `report_object_status()` In b0b910e052 (cat-file.c: add batch handling for submodules, 2025-06-02), we began handling submodule entries specially when batching cat-file like so: $ echo :sha1collisiondetection | git.compile cat-file --batch-check 855827c583bc30645ba427885caa40c5b81764d2 submodule Commit b0b910e052 notes that submodules are handled differently than non-existent objects, which print " ", since there is (a) no object to resolve the OID of in the first place, and as commit b0b910e052 notes, (b) for submodules in particular, it is useful to know what commit it points at without having to spawn another Git process. That commit does so by calling report_object_status() and passing in "oid_to_hex(&data->oid)" for the "obj_name" parameter. This is unnecessary, however, since report_object_status() will do the same automatically if given a NULL "obj_name" argument. That behavior dates back to 6a951937ae (cat-file: add --batch-all-objects option, 2015-06-22), so rely on that instead of having the caller open-code that part of report_object_status(). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 4b23fcecbd..71b94c8b3f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -497,7 +497,7 @@ static void batch_object_write(const char *obj_name, OBJECT_INFO_LOOKUP_REPLACE); if (ret < 0) { if (data->mode == S_IFGITLINK) - report_object_status(opt, oid_to_hex(&data->oid), &data->oid, "submodule"); + report_object_status(opt, NULL, &data->oid, "submodule"); else report_object_status(opt, obj_name, &data->oid, "missing"); return; -- cgit v1.3-5-g9baa From 20b4eeddce165f11d7c5bffb1ecb69017df4a05e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:18 -0400 Subject: builtin/repack.c: avoid "the_repository" in `cmd_repack()` Reduce builtin/repack.c's reliance on `the_repository` by using the currently-UNUSED "repo" parameter within cmd_repack(). The following commits will continue to reduce the usage of the_repository in other places within builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index e8730808c5..305782b2c9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1247,7 +1247,7 @@ static const char *find_pack_prefix(const char *packdir, const char *packtmp) int cmd_repack(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; @@ -1344,7 +1344,7 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(the_repository, repack_config, &cruft_po_args); + repo_config(repo, repack_config, &cruft_po_args); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); @@ -1354,7 +1354,7 @@ int cmd_repack(int argc, po_args.depth = xstrdup_or_null(opt_depth); po_args.threads = xstrdup_or_null(opt_threads); - if (delete_redundant && the_repository->repository_format_precious_objects) + if (delete_redundant && repo->repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A", @@ -1376,7 +1376,7 @@ int cmd_repack(int argc, die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && - odb_has_alternates(the_repository->objects)) { + odb_has_alternates(repo->objects)) { /* * When asked to do a local repack, but we have * packfiles that are inherited from an alternate, then @@ -1391,7 +1391,8 @@ int cmd_repack(int argc, if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; - strbuf_addf(&path, "%s/%s_XXXXXX", repo_get_object_directory(the_repository), + strbuf_addf(&path, "%s/%s_XXXXXX", + repo_get_object_directory(repo), "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); @@ -1400,7 +1401,7 @@ int cmd_repack(int argc, strbuf_release(&path); } - packdir = mkpathdup("%s/pack", repo_get_object_directory(the_repository)); + packdir = mkpathdup("%s/pack", repo_get_object_directory(repo)); packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); @@ -1439,7 +1440,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--reflog"); strvec_push(&cmd.args, "--indexed-objects"); } - if (repo_has_promisor_remote(the_repository)) + if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); if (!write_midx) { if (write_bitmaps > 0) @@ -1535,7 +1536,7 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(the_repository->objects->sources)) + if (!get_multi_pack_index(repo->objects->sources)) midx_must_contain_cruft = 1; } @@ -1618,9 +1619,9 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(the_repository->objects->sources)) { + if (get_multi_pack_index(repo->objects->sources)) { struct multi_pack_index *m = - get_multi_pack_index(the_repository->objects->sources); + get_multi_pack_index(repo->objects->sources); ALLOC_ARRAY(midx_pack_names, m->num_packs + m->num_packs_in_base); @@ -1631,7 +1632,7 @@ int cmd_repack(int argc, xstrdup(m->pack_names[i]); } - close_object_store(the_repository->objects); + close_object_store(repo->objects); /* * Ok we have prepared all new packfiles. @@ -1688,7 +1689,7 @@ int cmd_repack(int argc, goto cleanup; } - odb_reprepare(the_repository->objects); + odb_reprepare(repo->objects); if (delete_redundant) { int opts = 0; @@ -1704,18 +1705,18 @@ int cmd_repack(int argc, if (!keep_unreachable && (!(pack_everything & LOOSEN_UNREACHABLE) || unpack_unreachable) && - is_repository_shallow(the_repository)) + is_repository_shallow(repo)) prune_shallow(PRUNE_QUICK); } if (run_update_server_info) - update_server_info(the_repository, 0); + update_server_info(repo, 0); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(the_repository->objects->sources, + write_midx_file(repo->objects->sources, NULL, NULL, flags); } -- cgit v1.3-5-g9baa From df3a499bd6b951a9e23894793afec11f0850834a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:21 -0400 Subject: builtin/repack.c: avoid "the_repository" in existing packs API There are a number of spots within builtin/repack.c which refer to "the_repository", and either make use of the "existing packs" API or otherwise have a 'struct existing_packs *' in scope. Add a "repo" member to "struct existing_packs" and use that instead of "the_repository" in such locations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 305782b2c9..7223553bed 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -126,6 +126,7 @@ static void pack_objects_args_release(struct pack_objects_args *args) } struct existing_packs { + struct repository *repo; struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; @@ -265,7 +266,7 @@ static void existing_packs_release(struct existing_packs *existing) static void collect_pack_filenames(struct existing_packs *existing, const struct string_list *extra_keep) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -498,7 +499,7 @@ static void init_pack_geometry(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -1139,7 +1140,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; size_t i; @@ -1405,6 +1406,7 @@ int cmd_repack(int argc, packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); + existing.repo = repo; collect_pack_filenames(&existing, &keep_pack_list); if (geometry.split_factor) { -- cgit v1.3-5-g9baa From 94d99de7724bce0325de8293fa1c2312d5960d7c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:24 -0400 Subject: builtin/repack.c: avoid "the_repository" when taking a ref snapshot Avoid using "the_repository" in various MIDX-related ref snapshotting functions. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 7223553bed..113f5fc67f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -771,6 +771,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, } struct midx_snapshot_ref_data { + struct repository *repo; struct tempfile *f; struct oidset seen; int preferred; @@ -784,13 +785,13 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, struct midx_snapshot_ref_data *data = _data; struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!peel_iterated_oid(data->repo, oid, &peeled)) oid = &peeled; if (oidset_insert(&data->seen, oid)) return 0; /* already seen */ - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) return 0; fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", @@ -799,11 +800,12 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, return 0; } -static void midx_snapshot_refs(struct tempfile *f) +static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) { struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(the_repository); + const struct string_list *preferred = bitmap_preferred_tips(repo); + data.repo = repo; data.f = f; data.preferred = 0; oidset_init(&data.seen, 0); @@ -817,13 +819,13 @@ static void midx_snapshot_refs(struct tempfile *f) data.preferred = 1; for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(the_repository), + refs_for_each_ref_in(get_main_ref_store(repo), item->string, midx_snapshot_ref_one, &data); data.preferred = 0; } - refs_for_each_ref(get_main_ref_store(the_repository), + refs_for_each_ref(get_main_ref_store(repo), midx_snapshot_ref_one, &data); if (close_tempfile_gently(f)) { @@ -1397,7 +1399,7 @@ int cmd_repack(int argc, "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); - midx_snapshot_refs(refs_snapshot); + midx_snapshot_refs(repo, refs_snapshot); strbuf_release(&path); } -- cgit v1.3-5-g9baa From 03015747584e9f96c7ad6b57ecd99aa694312333 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:27 -0400 Subject: builtin/repack.c: avoid "the_repository" when removing packs The 'remove_redundant_pack()' function uses "the_repository" to obtain, and optionally remove, the repository's MIDX. Instead of relying on "the_repository", pass around a "struct repository *" parameter through its callers, and use that instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 113f5fc67f..93802531e1 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,33 +221,35 @@ static void mark_packs_for_deletion(struct existing_packs *existing, mark_packs_for_deletion_1(names, &existing->cruft_packs); } -static void remove_redundant_pack(const char *dir_name, const char *base_name) +static void remove_redundant_pack(struct repository *repo, + const char *dir_name, const char *base_name) { struct strbuf buf = STRBUF_INIT; - struct odb_source *source = the_repository->objects->sources; + struct odb_source *source = repo->objects->sources; struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(the_repository); + clear_midx_file(repo); strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } -static void remove_redundant_packs_1(struct string_list *packs) +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs) { struct string_list_item *item; for_each_string_list_item(item, packs) { if (!pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(packdir, item->string); + remove_redundant_pack(repo, packdir, item->string); } } static void remove_redundant_existing_packs(struct existing_packs *existing) { - remove_redundant_packs_1(&existing->non_kept_packs); - remove_redundant_packs_1(&existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs); } static void existing_packs_release(struct existing_packs *existing) @@ -685,7 +687,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(packdir, buf.buf); + remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); -- cgit v1.3-5-g9baa From cae9e2abbd8fb2fd483e101275cee15ef27d5953 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:30 -0400 Subject: builtin/repack.c: avoid "the_repository" when repacking promisor objects Pass a "struct repository" pointer to the 'repack_promisor_objects()' function to avoid using "the_repository". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 93802531e1..4f08b57ddb 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -407,7 +407,8 @@ static int has_pack_ext(const struct generated_pack_data *data, BUG("unknown pack extension: '%s'", ext); } -static void repack_promisor_objects(const struct pack_objects_args *args, +static void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, struct string_list *names) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -424,7 +425,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(the_repository, write_oid, &cmd, + for_each_packed_object(repo, write_oid, &cmd, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { @@ -1458,7 +1459,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(&po_args, &names); + repack_promisor_objects(repo, &po_args, &names); if (has_existing_non_kept_packs(&existing) && delete_redundant && -- cgit v1.3-5-g9baa From 3758052c0f43fd01d25fc7381c7939daba66c015 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:33 -0400 Subject: builtin/repack.c: avoid "the_hash_algo" when deleting packs The "mark_packs_for_deletion_1" function uses "the_hash_algo->hexsz" to isolate a pack's checksum before deleting it to avoid deleting a newly written pack having the same checksum (that is, some generated pack wound up identical to an existing pack). Avoid this by passing down a "struct git_hash_algo" pointer, and refer to the hash algorithm through it instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 4f08b57ddb..094f5a0cc2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -168,11 +168,12 @@ static int pack_is_retained(struct string_list_item *item) return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(struct string_list *names, +static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, struct string_list *list) { struct string_list_item *item; - const int hexsz = the_hash_algo->hexsz; + const int hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -217,8 +218,9 @@ static void mark_packs_for_deletion(struct existing_packs *existing, struct string_list *names) { - mark_packs_for_deletion_1(names, &existing->non_kept_packs); - mark_packs_for_deletion_1(names, &existing->cruft_packs); + const struct git_hash_algo *algop = existing->repo->hash_algo; + mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); + mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, -- cgit v1.3-5-g9baa From 9a53583b77c35576f87b7e29cb109b46d29ad803 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:36 -0400 Subject: builtin/repack.c: avoid "the_hash_algo" in `write_oid()` In a similar spirit as the previous commit, avoid referring directly to "the_hash_algo" within builtin/repack.c::write_oid(). Unlike the previous commit, we are within a callback function, so must introduce a new struct to pass additional data through its "data" pointer. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 094f5a0cc2..7d62959dc2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -339,6 +339,11 @@ static void prepare_pack_objects(struct child_process *cmd, cmd->out = -1; } +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + /* * Write oid to the given struct child_process's stdin, starting it first if * necessary. @@ -347,14 +352,15 @@ static int write_oid(const struct object_id *oid, struct packed_git *pack UNUSED, uint32_t pos UNUSED, void *data) { - struct child_process *cmd = data; + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; if (cmd->in == -1) { if (start_command(cmd)) die(_("could not start pack-objects to repack promisor objects")); } - if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || write_in_full(cmd->in, "\n", 1) < 0) die(_("failed to feed promisor objects to pack-objects")); return 0; @@ -413,6 +419,7 @@ static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) { + struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; FILE *out; struct strbuf line = STRBUF_INIT; @@ -427,7 +434,9 @@ static void repack_promisor_objects(struct repository *repo, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(repo, write_oid, &cmd, + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { -- cgit v1.3-5-g9baa From a7a5a607b9c21c7988782cf8ed04078ca320c784 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:38 -0400 Subject: builtin/repack: avoid "the_hash_algo" in `repack_promisor_objects()` In a similar spirit as the previous commits, avoid referring directly to "the_hash_algo" within builtin/repack.c::repack_promisor_objects(). Since there is already a repository pointer in scope, use its hash_algo value instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 7d62959dc2..a7e94ed03c 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -452,7 +452,7 @@ static void repack_promisor_objects(struct repository *repo, struct string_list_item *item; char *promisor_name; - if (line.len != the_hash_algo->hexsz) + if (line.len != repo->hash_algo->hexsz) die(_("repack: Expecting full hex object ID lines only from pack-objects.")); item = string_list_append(names, line.buf); -- cgit v1.3-5-g9baa From c660b0dbcbb70647f5103a4573963397522a1f0f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:41 -0400 Subject: builtin/repack.c: avoid "the_hash_algo" in `finish_pack_objects_cmd()` In a similar spirit as previous commits, avoid referring directly to "the_hash_algo" in builtin/repack.c::finish_pack_objects_cmd() and instead accept one as a parameter to the function. Since this function has a number of callers throughout the builtin, the diff is a little noisier than previous commits. However, each hunk is limited to passing the hash_algo parameter from a repository pointer that is already in scope. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index a7e94ed03c..a043704aa8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1073,7 +1073,8 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } -static int finish_pack_objects_cmd(struct child_process *cmd, +static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct child_process *cmd, struct string_list *names, int local) { @@ -1084,7 +1085,7 @@ static int finish_pack_objects_cmd(struct child_process *cmd, while (strbuf_getline_lf(&line, out) != EOF) { struct string_list_item *item; - if (line.len != the_hash_algo->hexsz) + if (line.len != algop->hexsz) die(_("repack: Expecting full hex object ID lines only " "from pack-objects.")); /* @@ -1150,7 +1151,8 @@ static int write_filtered_pack(const struct pack_objects_args *args, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -1247,7 +1249,8 @@ static int write_cruft_pack(const struct pack_objects_args *args, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static const char *find_pack_prefix(const char *packdir, const char *packtmp) @@ -1534,7 +1537,7 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(&cmd, &names, 1); + ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); if (ret) goto cleanup; -- cgit v1.3-5-g9baa From 8a5d4bd87d3fa8e9de9bc3b2ddb7ca527fcfeb68 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:50 -0400 Subject: builtin/repack.c: avoid using `hash_to_hex()` in pack geometry In previous commits, we started passing either repository or git_hash_algo pointers around to various spots within builtin/repack.c to reduce our dependency on the_repository in the hope of undef'ing USE_THE_REPOSITORY_VARIABLE. This commit takes us as far as we can (easily) go in that direction by removing the only use of a convenience function that only exists when USE_THE_REPOSITORY_VARIABLE is defined. Unfortunately, the only other such function is "is_bare_repository()", which is less than straightforward to convert into, say, "repo_is_bare()", the latter of the two accepting a repository pointer. Punt on that for now, and declare this commit as the stopping point for our efforts in the direction of undef'ing USE_THE_REPOSITORY_VARIABLE. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index a043704aa8..0d35f15b4b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -683,12 +683,14 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing) { + const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; uint32_t i; for (i = 0; i < geometry->split; i++) { struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex(p->hash))) + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) continue; strbuf_reset(&buf); -- cgit v1.3-5-g9baa From c7a120722ed60c07fa6a32f43b56f8361bfe38af Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:53 -0400 Subject: repack: introduce new compilation unit Over the years, builtin/repack.c has turned into a grab-bag of functionality powering the 'git repack' builtin. Among its many capabilities, it: - can build and spawn 'git pack-objects' commands, which in turn generate new packs - has infrastructure to manage the set of existing packs in a repository - has infrastructure to split a sequence of packs into a geometric progression based on object size - can manage both generating and combining cruft packs together - can write new MIDXs to name a few. As a result, this builtin has accumulated a lot of code, making adding new functionality difficult. In the future, 'repack' will learn how to manage a chain of incremental MIDXs, adding yet more functionality into the builtin. As a prerequisite step, let's first move some of the functionality in the builtin into its own repack.[ch]. This will be done over the course of many steps, since there are many individual components, some of which will end up in other, yet-to-exist compilation units of their own. Some of the code movement here is also non-trivial, so performing it in individual steps will make it easier to verify. Let's start by migrating 'struct pack_objects_args' (and the related corresponding pack_objects_args_release() function) into repack.h, and teach both the Makefile and Meson how to build the new compilation unit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 25 +------------------------ meson.build | 1 + repack.c | 11 +++++++++++ repack.h | 23 +++++++++++++++++++++++ 5 files changed, 37 insertions(+), 24 deletions(-) create mode 100644 repack.c create mode 100644 repack.h (limited to 'builtin') diff --git a/Makefile b/Makefile index 4c95affadb..c0df6da237 100644 --- a/Makefile +++ b/Makefile @@ -1136,6 +1136,7 @@ LIB_OBJS += refs/packed-backend.o LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o +LIB_OBJS += repack.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index 0d35f15b4b..6dfcb3327e 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -19,6 +19,7 @@ #include "prune-packed.h" #include "odb.h" #include "promisor-remote.h" +#include "repack.h" #include "shallow.h" #include "pack.h" #include "pack-bitmap.h" @@ -53,21 +54,6 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); -struct pack_objects_args { - char *window; - char *window_memory; - char *depth; - char *threads; - unsigned long max_pack_size; - int no_reuse_delta; - int no_reuse_object; - int quiet; - int local; - int name_hash_version; - int path_walk; - struct list_objects_filter_options filter_options; -}; - static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { @@ -116,15 +102,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void pack_objects_args_release(struct pack_objects_args *args) -{ - free(args->window); - free(args->window_memory); - free(args->depth); - free(args->threads); - list_objects_filter_release(&args->filter_options); -} - struct existing_packs { struct repository *repo; struct string_list kept_packs; diff --git a/meson.build b/meson.build index b3dfcc0497..993e8f368f 100644 --- a/meson.build +++ b/meson.build @@ -462,6 +462,7 @@ libgit_sources = [ 'reftable/tree.c', 'reftable/writer.c', 'remote.c', + 'repack.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack.c b/repack.c new file mode 100644 index 0000000000..a1f5b796fb --- /dev/null +++ b/repack.c @@ -0,0 +1,11 @@ +#include "git-compat-util.h" +#include "repack.h" + +void pack_objects_args_release(struct pack_objects_args *args) +{ + free(args->window); + free(args->window_memory); + free(args->depth); + free(args->threads); + list_objects_filter_release(&args->filter_options); +} diff --git a/repack.h b/repack.h new file mode 100644 index 0000000000..421d439d5a --- /dev/null +++ b/repack.h @@ -0,0 +1,23 @@ +#ifndef REPACK_H +#define REPACK_H + +#include "list-objects-filter-options.h" + +struct pack_objects_args { + char *window; + char *window_memory; + char *depth; + char *threads; + unsigned long max_pack_size; + int no_reuse_delta; + int no_reuse_object; + int quiet; + int local; + int name_hash_version; + int path_walk; + struct list_objects_filter_options filter_options; +}; + +void pack_objects_args_release(struct pack_objects_args *args); + +#endif /* REPACK_H */ -- cgit v1.3-5-g9baa From 19f6e8d023057113fe8c5890349593e70541bec2 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:56 -0400 Subject: builtin/repack.c: pass both pack_objects args to repack_config A subsequent commit will remove 'delta_base_offset' as a static variable within builtin/repack.c, and reintroduce it as a member of the 'struct pack_objects_args'. As a result, the repack_config callback will need to have both the cruft- and non-cruft 'struct pack_objects_args's in scope. Introduce a new 'struct repack_config_ctx' to allow the callee to provide both pointers to the callback. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 6dfcb3327e..af6de8d77a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -54,10 +54,16 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); +struct repack_config_ctx { + struct pack_objects_args *po_args; + struct pack_objects_args *cruft_po_args; +}; + static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { - struct pack_objects_args *cruft_po_args = cb; + struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { delta_base_offset = git_config_bool(var, value); return 0; @@ -1260,6 +1266,7 @@ int cmd_repack(int argc, size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ + struct repack_config_ctx config_ctx; int delete_redundant = 0; const char *unpack_unreachable = NULL; int keep_unreachable = 0; @@ -1343,7 +1350,11 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(repo, repack_config, &cruft_po_args); + memset(&config_ctx, 0, sizeof(config_ctx)); + config_ctx.po_args = &po_args; + config_ctx.cruft_po_args = &cruft_po_args; + + repo_config(repo, repack_config, &config_ctx); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); -- cgit v1.3-5-g9baa From e35ef71e003cb0731d9f33605f598e1b99746441 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:58 -0400 Subject: repack: move 'delta_base_offset' to 'struct pack_objects_args' The static variable 'delta_base_offset' determines whether or not we pass the "--delta-base-offset" command-line argument when spawning pack-objects as a child process. Its introduction dates back to when repack was rewritten in C, all the way back in a1bbc6c017 (repack: rewrite the shell script in C, 2013-09-15). 'struct pack_objects_args' was introduced much later on in 4571324b99 (builtin/repack.c: allow configuring cruft pack generation, 2022-05-20), but did not move the 'delta_base_offset' variable. Since the 'delta_base_offset' is a property of an individual pack-objects command, re-introduce that variable as a member of 'struct pack_objects_args', which will enable further code movement in the subsequent commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 11 ++++++----- repack.h | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index af6de8d77a..f4af830353 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -34,7 +34,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int delta_base_offset = 1; static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; @@ -63,9 +62,10 @@ static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *po_args = repack_ctx->po_args; struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { - delta_base_offset = git_config_bool(var, value); + po_args->delta_base_offset = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.packkeptobjects")) { @@ -315,7 +315,7 @@ static void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--local"); if (args->quiet) strvec_push(&cmd->args, "--quiet"); - if (delta_base_offset) + if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; @@ -1271,8 +1271,8 @@ int cmd_repack(int argc, const char *unpack_unreachable = NULL; int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; - struct pack_objects_args po_args = { 0 }; - struct pack_objects_args cruft_po_args = { 0 }; + struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; + struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; int write_midx = 0; const char *cruft_expiration = NULL; const char *expire_to = NULL; @@ -1567,6 +1567,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; + cruft_po_args.delta_base_offset = po_args.delta_base_offset; ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, cruft_expiration, diff --git a/repack.h b/repack.h index 421d439d5a..12632d7fec 100644 --- a/repack.h +++ b/repack.h @@ -15,9 +15,12 @@ struct pack_objects_args { int local; int name_hash_version; int path_walk; + int delta_base_offset; struct list_objects_filter_options filter_options; }; +#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } + void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From 7005d2594b73d30beae7abebdd035becca05299d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:01 -0400 Subject: repack: remove 'prepare_pack_objects' from the builtin Now that the 'prepare_pack_objects' function no longer refers to external, static variables, move it out to repack.h as generic functionality. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 34 ---------------------------------- repack.c | 35 +++++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 40 insertions(+), 34 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index f4af830353..ff93654cfe 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -288,40 +288,6 @@ static void collect_pack_filenames(struct existing_packs *existing, strbuf_release(&buf); } -static void prepare_pack_objects(struct child_process *cmd, - const struct pack_objects_args *args, - const char *out) -{ - strvec_push(&cmd->args, "pack-objects"); - if (args->window) - strvec_pushf(&cmd->args, "--window=%s", args->window); - if (args->window_memory) - strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); - if (args->depth) - strvec_pushf(&cmd->args, "--depth=%s", args->depth); - if (args->threads) - strvec_pushf(&cmd->args, "--threads=%s", args->threads); - if (args->max_pack_size) - strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); - if (args->no_reuse_delta) - strvec_pushf(&cmd->args, "--no-reuse-delta"); - if (args->no_reuse_object) - strvec_pushf(&cmd->args, "--no-reuse-object"); - if (args->name_hash_version) - strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); - if (args->path_walk) - strvec_pushf(&cmd->args, "--path-walk"); - if (args->local) - strvec_push(&cmd->args, "--local"); - if (args->quiet) - strvec_push(&cmd->args, "--quiet"); - if (args->delta_base_offset) - strvec_push(&cmd->args, "--delta-base-offset"); - strvec_push(&cmd->args, out); - cmd->git_cmd = 1; - cmd->out = -1; -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index a1f5b796fb..91b6e1cc09 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,40 @@ #include "git-compat-util.h" #include "repack.h" +#include "run-command.h" + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out) +{ + strvec_push(&cmd->args, "pack-objects"); + if (args->window) + strvec_pushf(&cmd->args, "--window=%s", args->window); + if (args->window_memory) + strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); + if (args->depth) + strvec_pushf(&cmd->args, "--depth=%s", args->depth); + if (args->threads) + strvec_pushf(&cmd->args, "--threads=%s", args->threads); + if (args->max_pack_size) + strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); + if (args->no_reuse_delta) + strvec_pushf(&cmd->args, "--no-reuse-delta"); + if (args->no_reuse_object) + strvec_pushf(&cmd->args, "--no-reuse-object"); + if (args->name_hash_version) + strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); + if (args->path_walk) + strvec_pushf(&cmd->args, "--path-walk"); + if (args->local) + strvec_push(&cmd->args, "--local"); + if (args->quiet) + strvec_push(&cmd->args, "--quiet"); + if (args->delta_base_offset) + strvec_push(&cmd->args, "--delta-base-offset"); + strvec_push(&cmd->args, out); + cmd->git_cmd = 1; + cmd->out = -1; +} void pack_objects_args_release(struct pack_objects_args *args) { diff --git a/repack.h b/repack.h index 12632d7fec..3f7ec20735 100644 --- a/repack.h +++ b/repack.h @@ -21,6 +21,11 @@ struct pack_objects_args { #define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +struct child_process; + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out); void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From a0dcecb14613e5bdfdc06616271bffac9e1366e8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:04 -0400 Subject: builtin/repack.c: rename many 'struct existing_packs' functions Rename many of the 'struct existing_packs'-related functions according to the convention introduced in and described by 541204aabe (Documentation: document naming schema for structs and their functions, 2024-07-30). Note that some functions which operate over an individual entry in the list of existing packs are prefixed with "existing_pack_" instead of the plural form. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 66 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 32 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index ff93654cfe..f82e6c3930 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -121,39 +121,39 @@ struct existing_packs { .cruft_packs = STRING_LIST_INIT_DUP, \ } -static int has_existing_non_kept_packs(const struct existing_packs *existing) +static int existing_packs_has_non_kept(const struct existing_packs *existing) { return existing->non_kept_packs.nr || existing->cruft_packs.nr; } -static void pack_mark_for_deletion(struct string_list_item *item) +static void existing_pack_mark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | DELETE_PACK); } -static void pack_unmark_for_deletion(struct string_list_item *item) +static void existing_pack_unmark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); } -static int pack_is_marked_for_deletion(struct string_list_item *item) +static int existing_pack_is_marked_for_deletion(struct string_list_item *item) { return (uintptr_t)item->util & DELETE_PACK; } -static void pack_mark_retained(struct string_list_item *item) +static void existing_packs_mark_retained(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); } -static int pack_is_retained(struct string_list_item *item) +static int existing_pack_is_retained(struct string_list_item *item) { return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) { struct string_list_item *item; const int hexsz = algop->hexsz; @@ -165,8 +165,8 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, continue; sha1 = item->string + len - hexsz; - if (pack_is_retained(item)) { - pack_unmark_for_deletion(item); + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); } else if (!string_list_has_string(names, sha1)) { /* * Mark this pack for deletion, which ensures @@ -175,13 +175,13 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, * will actually delete this pack (if `-d` was * given). */ - pack_mark_for_deletion(item); + existing_pack_mark_for_deletion(item); } } } -static void retain_cruft_pack(struct existing_packs *existing, - struct packed_git *cruft) +static void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) { struct strbuf buf = STRBUF_INIT; struct string_list_item *item; @@ -193,17 +193,19 @@ static void retain_cruft_pack(struct existing_packs *existing, if (!item) BUG("could not find cruft pack '%s'", pack_basename(cruft)); - pack_mark_retained(item); + existing_packs_mark_retained(item); strbuf_release(&buf); } -static void mark_packs_for_deletion(struct existing_packs *existing, - struct string_list *names) +static void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) { const struct git_hash_algo *algop = existing->repo->hash_algo; - mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); - mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, @@ -225,13 +227,13 @@ static void remove_redundant_packs_1(struct repository *repo, { struct string_list_item *item; for_each_string_list_item(item, packs) { - if (!pack_is_marked_for_deletion(item)) + if (!existing_pack_is_marked_for_deletion(item)) continue; remove_redundant_pack(repo, packdir, item->string); } } -static void remove_redundant_existing_packs(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing) { remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); remove_redundant_packs_1(existing->repo, &existing->cruft_packs); @@ -250,7 +252,7 @@ static void existing_packs_release(struct existing_packs *existing) * .keep file or not. Packs without a .keep file are not to be kept * if we are going to pack everything into one file. */ -static void collect_pack_filenames(struct existing_packs *existing, +static void existing_packs_collect(struct existing_packs *existing, const struct string_list *extra_keep) { struct packfile_store *packs = existing->repo->objects->packfiles; @@ -721,7 +723,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, item = string_list_lookup(&existing->non_kept_packs, pack_name); - if (item && !pack_is_marked_for_deletion(item)) + if (item && !existing_pack_is_marked_for_deletion(item)) continue; } @@ -851,7 +853,7 @@ static void midx_included_packs(struct string_list *include, } } else { for_each_string_list_item(item, &existing->non_kept_packs) { - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -888,10 +890,10 @@ static void midx_included_packs(struct string_list *include, * --geometric case, but doing so is unnecessary * since no packs are marked as pending * deletion (since we only call - * `mark_packs_for_deletion()` when doing an - * all-into-one repack). + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). */ - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -1128,7 +1130,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, if (p->pack_size < combine_cruft_below_size) { fprintf(in, "-%s\n", pack_basename(p)); } else { - retain_cruft_pack(existing, p); + existing_packs_retain_cruft(existing, p); fprintf(in, "%s\n", pack_basename(p)); } } @@ -1382,7 +1384,7 @@ int cmd_repack(int argc, packtmp = mkpathdup("%s/%s", packdir, packtmp_name); existing.repo = repo; - collect_pack_filenames(&existing, &keep_pack_list); + existing_packs_collect(&existing, &keep_pack_list); if (geometry.split_factor) { if (pack_everything) @@ -1431,7 +1433,7 @@ int cmd_repack(int argc, if (pack_everything & ALL_INTO_ONE) { repack_promisor_objects(repo, &po_args, &names); - if (has_existing_non_kept_packs(&existing) && + if (existing_packs_has_non_kept(&existing) && delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { @@ -1647,7 +1649,7 @@ int cmd_repack(int argc, /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) - mark_packs_for_deletion(&existing, &names); + existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; @@ -1671,7 +1673,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - remove_redundant_existing_packs(&existing); + existing_packs_remove_redundant(&existing); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, -- cgit v1.3-5-g9baa From f905f49c68f9cf3aff93f0dcd065dd95345c21d5 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:07 -0400 Subject: repack: remove 'remove_redundant_pack' from the builtin Extract "remove_redundant_pack()" as generic repack-related functionality by moving its implementation to the repack.[ch] compilation unit. This is a prerequisite to moving the "existing_packs" API, which is one of the callers of this function. (The remaining caller in the pack geometry code will eventually move to its own compilation unit as well, and will likewise rely on this function.) While moving it over, prefix the function name with "repack_" to indicate that it belongs to the repack-subsystem. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++---------------- repack.c | 18 ++++++++++++++++++ repack.h | 3 +++ 3 files changed, 23 insertions(+), 16 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index f82e6c3930..31137cf711 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -208,20 +208,6 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, &existing->cruft_packs); } -static void remove_redundant_pack(struct repository *repo, - const char *dir_name, const char *base_name) -{ - struct strbuf buf = STRBUF_INIT; - struct odb_source *source = repo->objects->sources; - struct multi_pack_index *m = get_multi_pack_index(source); - strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(repo); - strbuf_insertf(&buf, 0, "%s/", dir_name); - unlink_pack_path(buf.buf, 1); - strbuf_release(&buf); -} - static void remove_redundant_packs_1(struct repository *repo, struct string_list *packs) { @@ -229,7 +215,7 @@ static void remove_redundant_packs_1(struct repository *repo, for_each_string_list_item(item, packs) { if (!existing_pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(repo, packdir, item->string); + repack_remove_redundant_pack(repo, packdir, item->string); } } @@ -652,7 +638,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(existing->repo, packdir, buf.buf); + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); diff --git a/repack.c b/repack.c index 91b6e1cc09..3aaa351b5b 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,9 @@ #include "git-compat-util.h" +#include "midx.h" +#include "odb.h" +#include "packfile.h" #include "repack.h" +#include "repository.h" #include "run-command.h" void prepare_pack_objects(struct child_process *cmd, @@ -44,3 +48,17 @@ void pack_objects_args_release(struct pack_objects_args *args) free(args->threads); list_objects_filter_release(&args->filter_options); } + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name) +{ + struct strbuf buf = STRBUF_INIT; + struct odb_source *source = repo->objects->sources; + struct multi_pack_index *m = get_multi_pack_index(source); + strbuf_addf(&buf, "%s.pack", base_name); + if (m && source->local && midx_contains_pack(m, buf.buf)) + clear_midx_file(repo); + strbuf_insertf(&buf, 0, "%s/", dir_name); + unlink_pack_path(buf.buf, 1); + strbuf_release(&buf); +} diff --git a/repack.h b/repack.h index 3f7ec20735..a62bfa2ff9 100644 --- a/repack.h +++ b/repack.h @@ -28,4 +28,7 @@ void prepare_pack_objects(struct child_process *cmd, const char *out); void pack_objects_args_release(struct pack_objects_args *args); +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name); + #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From 9574e8f31d6d920973213ae5dbab6b77d2deeadf Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:10 -0400 Subject: builtin/repack.c: pass "packdir" when removing packs builtin/repack.c defines a static "packdir" to instruct pack-objects on where to write any new packfiles. This is also the directory scanned when removing any packfiles which were made redundant by the latest repack. Prepare to move the "existing_packs_remove_redundant" function to its own compilation unit by passing in this information as a parameter to that function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 31137cf711..c5a88eda12 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -209,7 +209,8 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, } static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs) + struct string_list *packs, + const char *packdir) { struct string_list_item *item; for_each_string_list_item(item, packs) { @@ -219,10 +220,13 @@ static void remove_redundant_packs_1(struct repository *repo, } } -static void existing_packs_remove_redundant(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) { - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); } static void existing_packs_release(struct existing_packs *existing) @@ -1659,7 +1663,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - existing_packs_remove_redundant(&existing); + existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, -- cgit v1.3-5-g9baa From dab24e4bcbd8499c9262da5e259212765b28b77c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:12 -0400 Subject: builtin/repack.c: avoid unnecessary numeric casts in existing_packs There are a couple of spots that cause warnings within the existing_packs API without DISABLE_SIGN_COMPARE_WARNINGS under DEVELOPER=1 mode. In both cases, we have int values that are being compared against size_t ones. Neither of these two cases are incorrect, and the cast is completely OK in practice. But both are unnecessary, since: - in existing_packs_mark_for_deletion_1(), 'hexsz' should be defined as a size_t anyway, since algop->hexsz is. - in existing_packs_collect(), 'i' should be defined as a size_t since it is counting up to the value of a string_list's 'nr' field. (This patch is a little bit of noise, but I would rather see us squelch these warnings ahead of moving the existing_packs API into a separate compilation unit to avoid having to define DISABLE_SIGN_COMPARE_WARNINGS in repack.c.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index c5a88eda12..e13943b637 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -156,7 +156,7 @@ static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop struct string_list *list) { struct string_list_item *item; - const int hexsz = algop->hexsz; + const size_t hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -250,7 +250,7 @@ static void existing_packs_collect(struct existing_packs *existing, struct strbuf buf = STRBUF_INIT; for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - int i; + size_t i; const char *base; if (!p->pack_local) -- cgit v1.3-5-g9baa From 7d1f4425889ea7f663ca30dd1d63591e52a628f6 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:15 -0400 Subject: repack: remove 'existing_packs' API from the builtin The repack builtin defines an API for keeping track of which packs were found in the repository at the beginning of the repack operation. This is used to classify what state a pack was in (kept, non-kept, or cruft), and is also used to mark which packs to delete (or keep) at the end of a repack operation. Now that the prerequisite refactoring is complete, this API is isolated enough that it can be moved out to repack.[ch] and removed from the builtin entirely. As a result, some of its functions become static within repack.c, cleaning up the visible API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 173 ------------------------------------------------------- repack.c | 157 ++++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 35 +++++++++++ 3 files changed, 192 insertions(+), 173 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index e13943b637..a168c88791 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -3,7 +3,6 @@ #include "builtin.h" #include "config.h" -#include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" @@ -108,178 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct existing_packs { - struct repository *repo; - struct string_list kept_packs; - struct string_list non_kept_packs; - struct string_list cruft_packs; -}; - -#define EXISTING_PACKS_INIT { \ - .kept_packs = STRING_LIST_INIT_DUP, \ - .non_kept_packs = STRING_LIST_INIT_DUP, \ - .cruft_packs = STRING_LIST_INIT_DUP, \ -} - -static int existing_packs_has_non_kept(const struct existing_packs *existing) -{ - return existing->non_kept_packs.nr || existing->cruft_packs.nr; -} - -static void existing_pack_mark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | DELETE_PACK); -} - -static void existing_pack_unmark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); -} - -static int existing_pack_is_marked_for_deletion(struct string_list_item *item) -{ - return (uintptr_t)item->util & DELETE_PACK; -} - -static void existing_packs_mark_retained(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); -} - -static int existing_pack_is_retained(struct string_list_item *item) -{ - return (uintptr_t)item->util & RETAIN_PACK; -} - -static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) -{ - struct string_list_item *item; - const size_t hexsz = algop->hexsz; - - for_each_string_list_item(item, list) { - char *sha1; - size_t len = strlen(item->string); - if (len < hexsz) - continue; - sha1 = item->string + len - hexsz; - - if (existing_pack_is_retained(item)) { - existing_pack_unmark_for_deletion(item); - } else if (!string_list_has_string(names, sha1)) { - /* - * Mark this pack for deletion, which ensures - * that this pack won't be included in a MIDX - * (if `--write-midx` was given) and that we - * will actually delete this pack (if `-d` was - * given). - */ - existing_pack_mark_for_deletion(item); - } - } -} - -static void existing_packs_retain_cruft(struct existing_packs *existing, - struct packed_git *cruft) -{ - struct strbuf buf = STRBUF_INIT; - struct string_list_item *item; - - strbuf_addstr(&buf, pack_basename(cruft)); - strbuf_strip_suffix(&buf, ".pack"); - - item = string_list_lookup(&existing->cruft_packs, buf.buf); - if (!item) - BUG("could not find cruft pack '%s'", pack_basename(cruft)); - - existing_packs_mark_retained(item); - strbuf_release(&buf); -} - -static void existing_packs_mark_for_deletion(struct existing_packs *existing, - struct string_list *names) - -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - existing_packs_mark_for_deletion_1(algop, names, - &existing->non_kept_packs); - existing_packs_mark_for_deletion_1(algop, names, - &existing->cruft_packs); -} - -static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs, - const char *packdir) -{ - struct string_list_item *item; - for_each_string_list_item(item, packs) { - if (!existing_pack_is_marked_for_deletion(item)) - continue; - repack_remove_redundant_pack(repo, packdir, item->string); - } -} - -static void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir) -{ - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, - packdir); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs, - packdir); -} - -static void existing_packs_release(struct existing_packs *existing) -{ - string_list_clear(&existing->kept_packs, 0); - string_list_clear(&existing->non_kept_packs, 0); - string_list_clear(&existing->cruft_packs, 0); -} - -/* - * Adds all packs hex strings (pack-$HASH) to either packs->non_kept - * or packs->kept based on whether each pack has a corresponding - * .keep file or not. Packs without a .keep file are not to be kept - * if we are going to pack everything into one file. - */ -static void existing_packs_collect(struct existing_packs *existing, - const struct string_list *extra_keep) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - size_t i; - const char *base; - - if (!p->pack_local) - continue; - - base = pack_basename(p); - - for (i = 0; i < extra_keep->nr; i++) - if (!fspathcmp(base, extra_keep->items[i].string)) - break; - - strbuf_reset(&buf); - strbuf_addstr(&buf, base); - strbuf_strip_suffix(&buf, ".pack"); - - if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) - string_list_append(&existing->kept_packs, buf.buf); - else if (p->is_cruft) - string_list_append(&existing->cruft_packs, buf.buf); - else - string_list_append(&existing->non_kept_packs, buf.buf); - } - - string_list_sort(&existing->kept_packs); - string_list_sort(&existing->non_kept_packs); - string_list_sort(&existing->cruft_packs); - strbuf_release(&buf); -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index 3aaa351b5b..9182e1c50b 100644 --- a/repack.c +++ b/repack.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "dir.h" #include "midx.h" #include "odb.h" #include "packfile.h" @@ -62,3 +63,159 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } + +#define DELETE_PACK 1 +#define RETAIN_PACK 2 + +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + size_t i; + const char *base; + + if (!p->pack_local) + continue; + + base = pack_basename(p); + + for (i = 0; i < extra_keep->nr; i++) + if (!fspathcmp(base, extra_keep->items[i].string)) + break; + + strbuf_reset(&buf); + strbuf_addstr(&buf, base); + strbuf_strip_suffix(&buf, ".pack"); + + if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) + string_list_append(&existing->kept_packs, buf.buf); + else if (p->is_cruft) + string_list_append(&existing->cruft_packs, buf.buf); + else + string_list_append(&existing->non_kept_packs, buf.buf); + } + + string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); + strbuf_release(&buf); +} + +int existing_packs_has_non_kept(const struct existing_packs *existing) +{ + return existing->non_kept_packs.nr || existing->cruft_packs.nr; +} + +static void existing_pack_mark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | DELETE_PACK); +} + +static void existing_pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + +int existing_pack_is_marked_for_deletion(struct string_list_item *item) +{ + return (uintptr_t)item->util & DELETE_PACK; +} + +static void existing_packs_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int existing_pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) +{ + struct string_list_item *item; + const size_t hexsz = algop->hexsz; + + for_each_string_list_item(item, list) { + char *sha1; + size_t len = strlen(item->string); + if (len < hexsz) + continue; + sha1 = item->string + len - hexsz; + + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ + existing_pack_mark_for_deletion(item); + } + } +} + +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + existing_packs_mark_retained(item); + strbuf_release(&buf); +} + +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) + +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); +} + +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs, + const char *packdir) +{ + struct string_list_item *item; + for_each_string_list_item(item, packs) { + if (!existing_pack_is_marked_for_deletion(item)) + continue; + repack_remove_redundant_pack(repo, packdir, item->string); + } +} + +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) +{ + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); +} + +void existing_packs_release(struct existing_packs *existing) +{ + string_list_clear(&existing->kept_packs, 0); + string_list_clear(&existing->non_kept_packs, 0); + string_list_clear(&existing->cruft_packs, 0); +} diff --git a/repack.h b/repack.h index a62bfa2ff9..19796e2243 100644 --- a/repack.h +++ b/repack.h @@ -2,6 +2,7 @@ #define REPACK_H #include "list-objects-filter-options.h" +#include "string-list.h" struct pack_objects_args { char *window; @@ -31,4 +32,38 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct repository; +struct packed_git; + +struct existing_packs { + struct repository *repo; + struct string_list kept_packs; + struct string_list non_kept_packs; + struct string_list cruft_packs; +}; + +#define EXISTING_PACKS_INIT { \ + .kept_packs = STRING_LIST_INIT_DUP, \ + .non_kept_packs = STRING_LIST_INIT_DUP, \ + .cruft_packs = STRING_LIST_INIT_DUP, \ +} + +/* + * Adds all packs hex strings (pack-$HASH) to either packs->non_kept + * or packs->kept based on whether each pack has a corresponding + * .keep file or not. Packs without a .keep file are not to be kept + * if we are going to pack everything into one file. + */ +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep); +int existing_packs_has_non_kept(const struct existing_packs *existing); +int existing_pack_is_marked_for_deletion(struct string_list_item *item); +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft); +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names); +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir); +void existing_packs_release(struct existing_packs *existing); + #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From 2b72c1236725915b353b9740a27a32c107dfe3b0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:18 -0400 Subject: builtin/repack.c: rename "struct generated_pack_data" The name "generated_pack_data" is somewhat redundant, since the contents of the struct *is* the data associated with the generated pack. Rename the structure to just "generated_pack", resulting in less awkward function names, like "generated_pack_has_ext()" which is preferable to "generated_pack_data_has_ext()". Rename a few related functions to align with the convention that functions to do with a struct "S" should be prefixed with "S_". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index a168c88791..a4d80b6b04 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -146,15 +146,15 @@ static struct { {".idx"}, }; -struct generated_pack_data { +struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack_data *populate_pack_exts(const char *name) +static struct generated_pack *generated_pack_populate(const char *name) { struct stat statbuf; struct strbuf path = STRBUF_INIT; - struct generated_pack_data *data = xcalloc(1, sizeof(*data)); + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { @@ -164,21 +164,21 @@ static struct generated_pack_data *populate_pack_exts(const char *name) if (stat(path.buf, &statbuf)) continue; - data->tempfiles[i] = register_tempfile(path.buf); + pack->tempfiles[i] = register_tempfile(path.buf); } strbuf_release(&path); - return data; + return pack; } -static int has_pack_ext(const struct generated_pack_data *data, - const char *ext) +static int generated_pack_has_ext(const struct generated_pack *pack, + const char *ext) { int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { if (strcmp(exts[i].name, ext)) continue; - return !!data->tempfiles[i]; + return !!pack->tempfiles[i]; } BUG("unknown pack extension: '%s'", ext); } @@ -239,7 +239,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = populate_pack_exts(item->string); + item->util = generated_pack_populate(item->string); free(promisor_name); } @@ -780,8 +780,8 @@ static int write_midx_included_packs(struct string_list *include, * will suffice, so pick the first one.) */ for_each_string_list_item(item, names) { - struct generated_pack_data *data = item->util; - if (has_pack_ext(data, ".mtimes")) + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) continue; strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", @@ -864,7 +864,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = populate_pack_exts(line.buf); + item->util = generated_pack_populate(line.buf); } } fclose(out); @@ -1435,7 +1435,7 @@ int cmd_repack(int argc, * Ok we have prepared all new packfiles. */ for_each_string_list_item(item, &names) { - struct generated_pack_data *data = item->util; + struct generated_pack *pack = item->util; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { char *fname; @@ -1443,8 +1443,8 @@ int cmd_repack(int argc, fname = mkpathdup("%s/pack-%s%s", packdir, item->string, exts[ext].name); - if (data->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(data->tempfiles[ext]); + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); struct stat statbuffer; if (!stat(fname_old, &statbuffer)) { @@ -1452,7 +1452,7 @@ int cmd_repack(int argc, chmod(fname_old, statbuffer.st_mode); } - if (rename_tempfile(&data->tempfiles[ext], fname)) + if (rename_tempfile(&pack->tempfiles[ext], fname)) die_errno(_("renaming pack to '%s' failed"), fname); } else if (!exts[ext].optional) die(_("pack-objects did not write a '%s' file for pack %s-%s"), -- cgit v1.3-5-g9baa From c0427692cb0fe03eb32fffc5bd06fad4ee434561 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:20 -0400 Subject: builtin/repack.c: factor out "generated_pack_install" Once all new packs are known to exist, 'repack' installs their contents from their temporary location into their permanent one. This is a semi-involved procedure for each pack, since for each extension (e.g., ".idx", ".pack", ".mtimes", and so on) we have to either: - adjust the filemode of the temporary file before renaming it into place, or - die() if we are missing a non-optional extension, or - unlink() any existing file for extensions that we did not generate (e.g., if a non-cruft pack we generated was identical to, say, a cruft pack which existed at the beginning of the process, we have to remove the ".mtimes" file). Extract this procedure into its own function, and call it "generated_pack_install"(). This will set us up for pulling this function out of the builtin entirely and making it part of the repack.h API, which will be done in a future commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 65 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 30 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index a4d80b6b04..bf413a6ee2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -183,6 +183,38 @@ static int generated_pack_has_ext(const struct generated_pack *pack, BUG("unknown pack extension: '%s'", ext); } +static void generated_pack_install(struct generated_pack *pack, + const char *name) +{ + int ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} + static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) @@ -1045,7 +1077,7 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; - int i, ext, ret; + int i, ret; int show_progress; char **midx_pack_names = NULL; size_t midx_pack_names_nr = 0; @@ -1434,35 +1466,8 @@ int cmd_repack(int argc, /* * Ok we have prepared all new packfiles. */ - for_each_string_list_item(item, &names) { - struct generated_pack *pack = item->util; - - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", - packdir, item->string, exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, item->string); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } - } + for_each_string_list_item(item, &names) + generated_pack_install(item->util, item->string); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) -- cgit v1.3-5-g9baa From 184f0abeb802f44c0e23abe3c8a3fc7448c78b99 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:23 -0400 Subject: builtin/repack.c: pass "packtmp" to `generated_pack_populate()` In a similar spirit as previous commits, this function needs to know the temporary pack prefix, which it currently accesses through the static "packtmp" variable within builtin/repack.c. Pass it explicitly as a function parameter to facilitate moving this function out of builtin/repack.c entirely. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index bf413a6ee2..bed902adde 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -150,7 +150,8 @@ struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack *generated_pack_populate(const char *name) +static struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) { struct stat statbuf; struct strbuf path = STRBUF_INIT; @@ -271,7 +272,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = generated_pack_populate(item->string); + item->util = generated_pack_populate(item->string, packtmp); free(promisor_name); } @@ -896,7 +897,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf); + item->util = generated_pack_populate(line.buf, packtmp); } } fclose(out); -- cgit v1.3-5-g9baa From 7036d131ae514f1bc854670a9d26b31064fcd88d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:26 -0400 Subject: builtin/repack.c: provide pack locations to `generated_pack_install()` Repeat what was done in the preceding commit for the `generated_pack_install()` function, which needs both "packdir" and "packtmp". (As an aside, it is somewhat unfortunate that the final three parameters to this function are all "const char *", making errors like passing "packdir" and "packtmp" in the wrong order easy. We could define a new structure here, but that may be too heavy-handed.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index bed902adde..966db27613 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -185,7 +185,8 @@ static int generated_pack_has_ext(const struct generated_pack *pack, } static void generated_pack_install(struct generated_pack *pack, - const char *name) + const char *name, + const char *packdir, const char *packtmp) { int ext; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { @@ -1468,7 +1469,8 @@ int cmd_repack(int argc, * Ok we have prepared all new packfiles. */ for_each_string_list_item(item, &names) - generated_pack_install(item->util, item->string); + generated_pack_install(item->util, item->string, packdir, + packtmp); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) -- cgit v1.3-5-g9baa From f053ab6c2be6a9869cbdfaabe5bd844a2471f8b7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:29 -0400 Subject: repack: remove 'generated_pack' API from the builtin Now that we have factored the "generated_pack" API, we can move it to repack.ch, further slimming down builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 83 -------------------------------------------------------- repack.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 8 ++++++ 3 files changed, 91 insertions(+), 83 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 966db27613..0e11c3b2c9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -134,89 +134,6 @@ static int write_oid(const struct object_id *oid, return 0; } -static struct { - const char *name; - unsigned optional:1; -} exts[] = { - {".pack"}, - {".rev", 1}, - {".mtimes", 1}, - {".bitmap", 1}, - {".promisor", 1}, - {".idx"}, -}; - -struct generated_pack { - struct tempfile *tempfiles[ARRAY_SIZE(exts)]; -}; - -static struct generated_pack *generated_pack_populate(const char *name, - const char *packtmp) -{ - struct stat statbuf; - struct strbuf path = STRBUF_INIT; - struct generated_pack *pack = xcalloc(1, sizeof(*pack)); - int i; - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_reset(&path); - strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); - - if (stat(path.buf, &statbuf)) - continue; - - pack->tempfiles[i] = register_tempfile(path.buf); - } - - strbuf_release(&path); - return pack; -} - -static int generated_pack_has_ext(const struct generated_pack *pack, - const char *ext) -{ - int i; - for (i = 0; i < ARRAY_SIZE(exts); i++) { - if (strcmp(exts[i].name, ext)) - continue; - return !!pack->tempfiles[i]; - } - BUG("unknown pack extension: '%s'", ext); -} - -static void generated_pack_install(struct generated_pack *pack, - const char *name, - const char *packdir, const char *packtmp) -{ - int ext; - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", packdir, name, - exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), - fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, name); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } -} - static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) diff --git a/repack.c b/repack.c index 9182e1c50b..d8afdd352d 100644 --- a/repack.c +++ b/repack.c @@ -3,9 +3,11 @@ #include "midx.h" #include "odb.h" #include "packfile.h" +#include "path.h" #include "repack.h" #include "repository.h" #include "run-command.h" +#include "tempfile.h" void prepare_pack_objects(struct child_process *cmd, const struct pack_objects_args *args, @@ -219,3 +221,84 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); } + +static struct { + const char *name; + unsigned optional:1; +} exts[] = { + {".pack"}, + {".rev", 1}, + {".mtimes", 1}, + {".bitmap", 1}, + {".promisor", 1}, + {".idx"}, +}; + +struct generated_pack { + struct tempfile *tempfiles[ARRAY_SIZE(exts)]; +}; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) +{ + struct stat statbuf; + struct strbuf path = STRBUF_INIT; + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); + size_t i; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_reset(&path); + strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); + + if (stat(path.buf, &statbuf)) + continue; + + pack->tempfiles[i] = register_tempfile(path.buf); + } + + strbuf_release(&path); + return pack; +} + +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext) +{ + size_t i; + for (i = 0; i < ARRAY_SIZE(exts); i++) { + if (strcmp(exts[i].name, ext)) + continue; + return !!pack->tempfiles[i]; + } + BUG("unknown pack extension: '%s'", ext); +} + +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp) +{ + size_t ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} diff --git a/repack.h b/repack.h index 19796e2243..f37eb49524 100644 --- a/repack.h +++ b/repack.h @@ -66,4 +66,12 @@ void existing_packs_remove_redundant(struct existing_packs *existing, const char *packdir); void existing_packs_release(struct existing_packs *existing); +struct generated_pack; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp); +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp); + #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From bebf941f7db5de3de88962199b4400de8207f9b1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:32 -0400 Subject: builtin/repack.c: pass "packtmp" to `repack_promisor_objects()` In a similar spirit as previous commit(s), pass the "packtmp" variable to "repack_promisor_objects()" as an explicit parameter of the function, preparing us to move this function in a following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 0e11c3b2c9..2c67111b33 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int write_oid(const struct object_id *oid, static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, - struct string_list *names) + struct string_list *names, + const char *packtmp) { struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; @@ -1199,7 +1200,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(repo, &po_args, &names); + repack_promisor_objects(repo, &po_args, &names, packtmp); if (existing_packs_has_non_kept(&existing) && delete_redundant && -- cgit v1.3-5-g9baa From 29e935515d1b49fa08b2781371625e5c55d2bf13 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:35 -0400 Subject: builtin/repack.c: remove "repack_promisor_objects()" from the builtin Now that we have properly factored the portion of the builtin which is responsible for repacking promisor objects, we can move that function (and associated dependencies) out of the builtin entirely. Similar to previous extractions, this function is declared in repack.h, but implemented in a separate repack-promisor.c file. This is done to separate promisor-specific repacking functionality from generic repack utilities (like "existing_packs", and "generated_pack" APIs). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 95 -------------------------------------------------- meson.build | 1 + repack-promisor.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 +++ 5 files changed, 108 insertions(+), 95 deletions(-) create mode 100644 repack-promisor.c (limited to 'builtin') diff --git a/Makefile b/Makefile index c0df6da237..2a01bd92dc 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index 2c67111b33..24b5e5049b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,101 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct write_oid_context { - struct child_process *cmd; - const struct git_hash_algo *algop; -}; - -/* - * Write oid to the given struct child_process's stdin, starting it first if - * necessary. - */ -static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) -{ - struct write_oid_context *ctx = data; - struct child_process *cmd = ctx->cmd; - - if (cmd->in == -1) { - if (start_command(cmd)) - die(_("could not start pack-objects to repack promisor objects")); - } - - if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || - write_in_full(cmd->in, "\n", 1) < 0) - die(_("failed to feed promisor objects to pack-objects")); - return 0; -} - -static void repack_promisor_objects(struct repository *repo, - const struct pack_objects_args *args, - struct string_list *names, - const char *packtmp) -{ - struct write_oid_context ctx; - struct child_process cmd = CHILD_PROCESS_INIT; - FILE *out; - struct strbuf line = STRBUF_INIT; - - prepare_pack_objects(&cmd, args, packtmp); - cmd.in = -1; - - /* - * NEEDSWORK: Giving pack-objects only the OIDs without any ordering - * hints may result in suboptimal deltas in the resulting pack. See if - * the OIDs can be sent with fake paths such that pack-objects can use a - * {type -> existing pack order} ordering when computing deltas instead - * of a {type -> size} ordering, which may produce better deltas. - */ - ctx.cmd = &cmd; - ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); - - if (cmd.in == -1) { - /* No packed objects; cmd was never started */ - child_process_clear(&cmd); - return; - } - - close(cmd.in); - - out = xfdopen(cmd.out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - char *promisor_name; - - if (line.len != repo->hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only from pack-objects.")); - item = string_list_append(names, line.buf); - - /* - * pack-objects creates the .pack and .idx files, but not the - * .promisor file. Create the .promisor file, which is empty. - * - * NEEDSWORK: fetch-pack sometimes generates non-empty - * .promisor files containing the ref names and associated - * hashes at the point of generation of the corresponding - * packfile, but this would not preserve their contents. Maybe - * concatenate the contents of all .promisor files instead of - * just creating a new empty file. - */ - promisor_name = mkpathdup("%s-%s.promisor", packtmp, - line.buf); - write_promisor_file(promisor_name, NULL, 0); - - item->util = generated_pack_populate(item->string, packtmp); - - free(promisor_name); - } - - fclose(out); - if (finish_command(&cmd)) - die(_("could not finish pack-objects to repack promisor objects")); - strbuf_release(&line); -} - struct pack_geometry { struct packed_git **pack; uint32_t pack_nr, pack_alloc; diff --git a/meson.build b/meson.build index 993e8f368f..1fbb8c52a6 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack-promisor.c b/repack-promisor.c new file mode 100644 index 0000000000..ee6e0669f6 --- /dev/null +++ b/repack-promisor.c @@ -0,0 +1,102 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hex.h" +#include "pack.h" +#include "packfile.h" +#include "path.h" +#include "repository.h" +#include "run-command.h" + +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + +/* + * Write oid to the given struct child_process's stdin, starting it first if + * necessary. + */ +static int write_oid(const struct object_id *oid, + struct packed_git *pack UNUSED, + uint32_t pos UNUSED, void *data) +{ + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; + + if (cmd->in == -1) { + if (start_command(cmd)) + die(_("could not start pack-objects to repack promisor objects")); + } + + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || + write_in_full(cmd->in, "\n", 1) < 0) + die(_("failed to feed promisor objects to pack-objects")); + return 0; +} + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp) +{ + struct write_oid_context ctx; + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + + prepare_pack_objects(&cmd, args, packtmp); + cmd.in = -1; + + /* + * NEEDSWORK: Giving pack-objects only the OIDs without any ordering + * hints may result in suboptimal deltas in the resulting pack. See if + * the OIDs can be sent with fake paths such that pack-objects can use a + * {type -> existing pack order} ordering when computing deltas instead + * of a {type -> size} ordering, which may produce better deltas. + */ + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, + FOR_EACH_OBJECT_PROMISOR_ONLY); + + if (cmd.in == -1) { + /* No packed objects; cmd was never started */ + child_process_clear(&cmd); + return; + } + + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + char *promisor_name; + + if (line.len != repo->hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only from pack-objects.")); + item = string_list_append(names, line.buf); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + * + * NEEDSWORK: fetch-pack sometimes generates non-empty + * .promisor files containing the ref names and associated + * hashes at the point of generation of the corresponding + * packfile, but this would not preserve their contents. Maybe + * concatenate the contents of all .promisor files instead of + * just creating a new empty file. + */ + promisor_name = mkpathdup("%s-%s.promisor", packtmp, + line.buf); + write_promisor_file(promisor_name, NULL, 0); + + item->util = generated_pack_populate(item->string, packtmp); + + free(promisor_name); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack promisor objects")); + strbuf_release(&line); +} diff --git a/repack.h b/repack.h index f37eb49524..19dc4fd738 100644 --- a/repack.h +++ b/repack.h @@ -74,4 +74,8 @@ int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); void generated_pack_install(struct generated_pack *pack, const char *name, const char *packdir, const char *packtmp); +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp); + #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From e05c2d55668dcaa6a912372d93fb8f82d418d390 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:38 -0400 Subject: builtin/repack.c: rename various pack_geometry functions Rename functions which work with 'struct pack_geometry' to begin with "pack_geometry_". While we're at it, change `free_pack_geometry()` to instead be named `pack_geometry_release()` to match our conventions, and make clear that that function frees the contents of the struct, not the memory allocated to hold the struct itself. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 24b5e5049b..42f05d2ebf 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -115,17 +115,17 @@ struct pack_geometry { int split_factor; }; -static uint32_t geometry_pack_weight(struct packed_git *p) +static uint32_t pack_geometry_weight(struct packed_git *p) { if (open_pack_index(p)) die(_("cannot open index for %s"), p->pack_name); return p->num_objects; } -static int geometry_cmp(const void *va, const void *vb) +static int pack_geometry_cmp(const void *va, const void *vb) { - uint32_t aw = geometry_pack_weight(*(struct packed_git **)va), - bw = geometry_pack_weight(*(struct packed_git **)vb); + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); if (aw < bw) return -1; @@ -134,7 +134,7 @@ static int geometry_cmp(const void *va, const void *vb) return 0; } -static void init_pack_geometry(struct pack_geometry *geometry, +static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { @@ -184,11 +184,11 @@ static void init_pack_geometry(struct pack_geometry *geometry, geometry->pack_nr++; } - QSORT(geometry->pack, geometry->pack_nr, geometry_cmp); + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); strbuf_release(&buf); } -static void split_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_split(struct pack_geometry *geometry) { uint32_t i; uint32_t split; @@ -208,13 +208,13 @@ static void split_pack_geometry(struct pack_geometry *geometry) struct packed_git *prev = geometry->pack[i - 1]; if (unsigned_mult_overflows(geometry->split_factor, - geometry_pack_weight(prev))) + pack_geometry_weight(prev))) die(_("pack %s too large to consider in geometric " "progression"), prev->pack_name); - if (geometry_pack_weight(ours) < - geometry->split_factor * geometry_pack_weight(prev)) + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) break; } @@ -242,9 +242,9 @@ static void split_pack_geometry(struct pack_geometry *geometry) for (i = 0; i < split; i++) { struct packed_git *p = geometry->pack[i]; - if (unsigned_add_overflows(total_size, geometry_pack_weight(p))) + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) die(_("pack %s too large to roll up"), p->pack_name); - total_size += geometry_pack_weight(p); + total_size += pack_geometry_weight(p); } for (i = split; i < geometry->pack_nr; i++) { struct packed_git *ours = geometry->pack[i]; @@ -253,15 +253,15 @@ static void split_pack_geometry(struct pack_geometry *geometry) total_size)) die(_("pack %s too large to roll up"), ours->pack_name); - if (geometry_pack_weight(ours) < + if (pack_geometry_weight(ours) < geometry->split_factor * total_size) { if (unsigned_add_overflows(total_size, - geometry_pack_weight(ours))) + pack_geometry_weight(ours))) die(_("pack %s too large to roll up"), ours->pack_name); split++; - total_size += geometry_pack_weight(ours); + total_size += pack_geometry_weight(ours); } else break; } @@ -269,7 +269,7 @@ static void split_pack_geometry(struct pack_geometry *geometry) geometry->split = split; } -static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) +static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) { uint32_t i; @@ -304,9 +304,9 @@ static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) return NULL; } -static void geometry_remove_redundant_packs(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing) +static void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -332,7 +332,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, strbuf_release(&buf); } -static void free_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_release(struct pack_geometry *geometry) { if (!geometry) return; @@ -599,7 +599,7 @@ static int write_midx_included_packs(struct string_list *include, { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = get_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(geometry); FILE *in; int ret; @@ -1063,8 +1063,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing, &po_args); - split_pack_geometry(&geometry); + pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_split(&geometry); } prepare_pack_objects(&cmd, &po_args, packtmp); @@ -1324,8 +1324,8 @@ int cmd_repack(int argc, existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) - geometry_remove_redundant_packs(&geometry, &names, - &existing); + pack_geometry_remove_redundant(&geometry, &names, + &existing); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); @@ -1352,7 +1352,7 @@ cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); existing_packs_release(&existing); - free_pack_geometry(&geometry); + pack_geometry_release(&geometry); for (size_t i = 0; i < midx_pack_names_nr; i++) free(midx_pack_names[i]); free(midx_pack_names); -- cgit v1.3-5-g9baa From 2a15a739a231d3eac774e13b53003faa7377719c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:41 -0400 Subject: builtin/repack.c: pass 'pack_kept_objects' to `pack_geometry_init()` Prepare to move pack_geometry-related APIs to their own compilation unit by passing in the static "pack_kept_objects" variable directly as a parameter to the 'pack_geometry_init()' function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 42f05d2ebf..ac8c80d0a5 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int pack_geometry_cmp(const void *va, const void *vb) static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args) + const struct pack_objects_args *args, + int pack_kept_objects) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -1063,7 +1064,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_init(&geometry, &existing, &po_args, + pack_kept_objects); pack_geometry_split(&geometry); } -- cgit v1.3-5-g9baa From b2ebeed1d82c5da8f7bb604594701629dcaf472b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:44 -0400 Subject: builtin/repack.c: pass 'packdir' to `pack_geometry_remove_redundant()` For similar reasons as the preceding commit, pass the "packdir" variable directly to `pack_geometry_remove_redundant()` as a parameter to the function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index ac8c80d0a5..f6d04b33a7 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -307,7 +307,8 @@ static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geo static void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, - struct existing_packs *existing) + struct existing_packs *existing, + const char *packdir) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -1327,7 +1328,7 @@ int cmd_repack(int argc, if (geometry.split_factor) pack_geometry_remove_redundant(&geometry, &names, - &existing); + &existing, packdir); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); -- cgit v1.3-5-g9baa From 62d3fa09b3890631af7c572cb6132088a14d2653 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:47 -0400 Subject: repack: remove pack_geometry API from the builtin Now that the pack_geometry API is fully factored and isolated from the rest of the builtin, declare it within repack.h and move its implementation to "repack-geometry.c" as a separate component. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 235 ------------------------------------------------------ meson.build | 1 + repack-geometry.c | 234 +++++++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 20 +++++ 5 files changed, 256 insertions(+), 235 deletions(-) create mode 100644 repack-geometry.c (limited to 'builtin') diff --git a/Makefile b/Makefile index 2a01bd92dc..3ee8d27dba 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-geometry.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index f6d04b33a7..e2313c80c3 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,241 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct pack_geometry { - struct packed_git **pack; - uint32_t pack_nr, pack_alloc; - uint32_t split; - - int split_factor; -}; - -static uint32_t pack_geometry_weight(struct packed_git *p) -{ - if (open_pack_index(p)) - die(_("cannot open index for %s"), p->pack_name); - return p->num_objects; -} - -static int pack_geometry_cmp(const void *va, const void *vb) -{ - uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), - bw = pack_geometry_weight(*(struct packed_git **)vb); - - if (aw < bw) - return -1; - if (aw > bw) - return 1; - return 0; -} - -static void pack_geometry_init(struct pack_geometry *geometry, - struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (args->local && !p->pack_local) - /* - * When asked to only repack local packfiles we skip - * over any packfiles that are borrowed from alternate - * object directories. - */ - continue; - - if (!pack_kept_objects) { - /* - * Any pack that has its pack_keep bit set will - * appear in existing->kept_packs below, but - * this saves us from doing a more expensive - * check. - */ - if (p->pack_keep) - continue; - - /* - * The pack may be kept via the --keep-pack - * option; check 'existing->kept_packs' to - * determine whether to ignore it. - */ - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (string_list_has_string(&existing->kept_packs, buf.buf)) - continue; - } - if (p->is_cruft) - continue; - - ALLOC_GROW(geometry->pack, - geometry->pack_nr + 1, - geometry->pack_alloc); - - geometry->pack[geometry->pack_nr] = p; - geometry->pack_nr++; - } - - QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); - strbuf_release(&buf); -} - -static void pack_geometry_split(struct pack_geometry *geometry) -{ - uint32_t i; - uint32_t split; - off_t total_size = 0; - - if (!geometry->pack_nr) { - geometry->split = geometry->pack_nr; - return; - } - - /* - * First, count the number of packs (in descending order of size) which - * already form a geometric progression. - */ - for (i = geometry->pack_nr - 1; i > 0; i--) { - struct packed_git *ours = geometry->pack[i]; - struct packed_git *prev = geometry->pack[i - 1]; - - if (unsigned_mult_overflows(geometry->split_factor, - pack_geometry_weight(prev))) - die(_("pack %s too large to consider in geometric " - "progression"), - prev->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * pack_geometry_weight(prev)) - break; - } - - split = i; - - if (split) { - /* - * Move the split one to the right, since the top element in the - * last-compared pair can't be in the progression. Only do this - * when we split in the middle of the array (otherwise if we got - * to the end, then the split is in the right place). - */ - split++; - } - - /* - * Then, anything to the left of 'split' must be in a new pack. But, - * creating that new pack may cause packs in the heavy half to no longer - * form a geometric progression. - * - * Compute an expected size of the new pack, and then determine how many - * packs in the heavy half need to be joined into it (if any) to restore - * the geometric progression. - */ - for (i = 0; i < split; i++) { - struct packed_git *p = geometry->pack[i]; - - if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) - die(_("pack %s too large to roll up"), p->pack_name); - total_size += pack_geometry_weight(p); - } - for (i = split; i < geometry->pack_nr; i++) { - struct packed_git *ours = geometry->pack[i]; - - if (unsigned_mult_overflows(geometry->split_factor, - total_size)) - die(_("pack %s too large to roll up"), ours->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * total_size) { - if (unsigned_add_overflows(total_size, - pack_geometry_weight(ours))) - die(_("pack %s too large to roll up"), - ours->pack_name); - - split++; - total_size += pack_geometry_weight(ours); - } else - break; - } - - geometry->split = split; -} - -static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) -{ - uint32_t i; - - if (!geometry) { - /* - * No geometry means either an all-into-one repack (in which - * case there is only one pack left and it is the largest) or an - * incremental one. - * - * If repacking incrementally, then we could check the size of - * all packs to determine which should be preferred, but leave - * this for later. - */ - return NULL; - } - if (geometry->split == geometry->pack_nr) - return NULL; - - /* - * The preferred pack is the largest pack above the split line. In - * other words, it is the largest pack that does not get rolled up in - * the geometric repack. - */ - for (i = geometry->pack_nr; i > geometry->split; i--) - /* - * A pack that is not local would never be included in a - * multi-pack index. We thus skip over any non-local packs. - */ - if (geometry->pack[i - 1]->pack_local) - return geometry->pack[i - 1]; - - return NULL; -} - -static void pack_geometry_remove_redundant(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing, - const char *packdir) -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - struct strbuf buf = STRBUF_INIT; - uint32_t i; - - for (i = 0; i < geometry->split; i++) { - struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex_algop(p->hash, - algop))) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if ((p->pack_keep) || - (string_list_has_string(&existing->kept_packs, buf.buf))) - continue; - - repack_remove_redundant_pack(existing->repo, packdir, buf.buf); - } - - strbuf_release(&buf); -} - -static void pack_geometry_release(struct pack_geometry *geometry) -{ - if (!geometry) - return; - - free(geometry->pack); -} - static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, diff --git a/meson.build b/meson.build index 1fbb8c52a6..47b05089ee 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-geometry.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-geometry.c b/repack-geometry.c new file mode 100644 index 0000000000..f58f1fc7f0 --- /dev/null +++ b/repack-geometry.c @@ -0,0 +1,234 @@ +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "hex.h" +#include "packfile.h" + +static uint32_t pack_geometry_weight(struct packed_git *p) +{ + if (open_pack_index(p)) + die(_("cannot open index for %s"), p->pack_name); + return p->num_objects; +} + +static int pack_geometry_cmp(const void *va, const void *vb) +{ + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); + + if (aw < bw) + return -1; + if (aw > bw) + return 1; + return 0; +} + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + + if (!pack_kept_objects) { + /* + * Any pack that has its pack_keep bit set will + * appear in existing->kept_packs below, but + * this saves us from doing a more expensive + * check. + */ + if (p->pack_keep) + continue; + + /* + * The pack may be kept via the --keep-pack + * option; check 'existing->kept_packs' to + * determine whether to ignore it. + */ + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (string_list_has_string(&existing->kept_packs, buf.buf)) + continue; + } + if (p->is_cruft) + continue; + + ALLOC_GROW(geometry->pack, + geometry->pack_nr + 1, + geometry->pack_alloc); + + geometry->pack[geometry->pack_nr] = p; + geometry->pack_nr++; + } + + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); + strbuf_release(&buf); +} + +void pack_geometry_split(struct pack_geometry *geometry) +{ + uint32_t i; + uint32_t split; + off_t total_size = 0; + + if (!geometry->pack_nr) { + geometry->split = geometry->pack_nr; + return; + } + + /* + * First, count the number of packs (in descending order of size) which + * already form a geometric progression. + */ + for (i = geometry->pack_nr - 1; i > 0; i--) { + struct packed_git *ours = geometry->pack[i]; + struct packed_git *prev = geometry->pack[i - 1]; + + if (unsigned_mult_overflows(geometry->split_factor, + pack_geometry_weight(prev))) + die(_("pack %s too large to consider in geometric " + "progression"), + prev->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) + break; + } + + split = i; + + if (split) { + /* + * Move the split one to the right, since the top element in the + * last-compared pair can't be in the progression. Only do this + * when we split in the middle of the array (otherwise if we got + * to the end, then the split is in the right place). + */ + split++; + } + + /* + * Then, anything to the left of 'split' must be in a new pack. But, + * creating that new pack may cause packs in the heavy half to no longer + * form a geometric progression. + * + * Compute an expected size of the new pack, and then determine how many + * packs in the heavy half need to be joined into it (if any) to restore + * the geometric progression. + */ + for (i = 0; i < split; i++) { + struct packed_git *p = geometry->pack[i]; + + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) + die(_("pack %s too large to roll up"), p->pack_name); + total_size += pack_geometry_weight(p); + } + for (i = split; i < geometry->pack_nr; i++) { + struct packed_git *ours = geometry->pack[i]; + + if (unsigned_mult_overflows(geometry->split_factor, + total_size)) + die(_("pack %s too large to roll up"), ours->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * total_size) { + if (unsigned_add_overflows(total_size, + pack_geometry_weight(ours))) + die(_("pack %s too large to roll up"), + ours->pack_name); + + split++; + total_size += pack_geometry_weight(ours); + } else + break; + } + + geometry->split = split; +} + +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) +{ + uint32_t i; + + if (!geometry) { + /* + * No geometry means either an all-into-one repack (in which + * case there is only one pack left and it is the largest) or an + * incremental one. + * + * If repacking incrementally, then we could check the size of + * all packs to determine which should be preferred, but leave + * this for later. + */ + return NULL; + } + if (geometry->split == geometry->pack_nr) + return NULL; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; +} + +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + for (i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if ((p->pack_keep) || + (string_list_has_string(&existing->kept_packs, buf.buf))) + continue; + + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + } + + strbuf_release(&buf); +} + +void pack_geometry_release(struct pack_geometry *geometry) +{ + if (!geometry) + return; + + free(geometry->pack); +} diff --git a/repack.h b/repack.h index 19dc4fd738..cea7969ae4 100644 --- a/repack.h +++ b/repack.h @@ -78,4 +78,24 @@ void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names, const char *packtmp); +struct pack_geometry { + struct packed_git **pack; + uint32_t pack_nr, pack_alloc; + uint32_t split; + + int split_factor; +}; + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects); +void pack_geometry_split(struct pack_geometry *geometry); +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir); +void pack_geometry_release(struct pack_geometry *geometry); + #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From ccb7f822d520472026a12250e1390683706a8154 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:50 -0400 Subject: builtin/repack.c: remove ref snapshotting from builtin When writing a MIDX, 'git repack' takes a snapshot of the repository's references and writes the result out to a file, which it then passes to 'git multi-pack-index write' via the '--refs-snapshot'. This is done in order to make bitmap selections with respect to what we are packing, thus avoiding a race where an incoming reference update causes us to try and write a bitmap for a commit not present in the MIDX. Extract this functionality out into a new repack-midx.c compilation unit, and expose the necessary functions via the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 68 ------------------------------------------------- meson.build | 1 + repack-midx.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 +++ 5 files changed, 83 insertions(+), 68 deletions(-) create mode 100644 repack-midx.c (limited to 'builtin') diff --git a/Makefile b/Makefile index 3ee8d27dba..b214277163 100644 --- a/Makefile +++ b/Makefile @@ -1138,6 +1138,7 @@ LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o LIB_OBJS += repack-geometry.o +LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index e2313c80c3..7713721826 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -178,74 +178,6 @@ static int midx_has_unknown_packs(char **midx_pack_names, return 0; } -struct midx_snapshot_ref_data { - struct repository *repo; - struct tempfile *f; - struct oidset seen; - int preferred; -}; - -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) -{ - struct midx_snapshot_ref_data *data = _data; - struct object_id peeled; - - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; - - if (oidset_insert(&data->seen, oid)) - return 0; /* already seen */ - - if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) - return 0; - - fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); - - return 0; -} - -static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) -{ - struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(repo); - - data.repo = repo; - data.f = f; - data.preferred = 0; - oidset_init(&data.seen, 0); - - if (!fdopen_tempfile(f, "w")) - die(_("could not open tempfile %s for writing"), - get_tempfile_path(f)); - - if (preferred) { - struct string_list_item *item; - - data.preferred = 1; - for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(repo), - item->string, - midx_snapshot_ref_one, &data); - data.preferred = 0; - } - - refs_for_each_ref(get_main_ref_store(repo), - midx_snapshot_ref_one, &data); - - if (close_tempfile_gently(f)) { - int save_errno = errno; - delete_tempfile(&f); - errno = save_errno; - die_errno(_("could not close refs snapshot tempfile")); - } - - oidset_clear(&data.seen); -} - static void midx_included_packs(struct string_list *include, struct existing_packs *existing, char **midx_pack_names, diff --git a/meson.build b/meson.build index 47b05089ee..0423ed30c4 100644 --- a/meson.build +++ b/meson.build @@ -464,6 +464,7 @@ libgit_sources = [ 'remote.c', 'repack.c', 'repack-geometry.c', + 'repack-midx.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-midx.c b/repack-midx.c new file mode 100644 index 0000000000..354df729a5 --- /dev/null +++ b/repack-midx.c @@ -0,0 +1,77 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hash.h" +#include "hex.h" +#include "odb.h" +#include "oidset.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "tempfile.h" + +struct midx_snapshot_ref_data { + struct repository *repo; + struct tempfile *f; + struct oidset seen; + int preferred; +}; + +static int midx_snapshot_ref_one(const char *refname UNUSED, + const char *referent UNUSED, + const struct object_id *oid, + int flag UNUSED, void *_data) +{ + struct midx_snapshot_ref_data *data = _data; + struct object_id peeled; + + if (!peel_iterated_oid(data->repo, oid, &peeled)) + oid = &peeled; + + if (oidset_insert(&data->seen, oid)) + return 0; /* already seen */ + + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + return 0; + + fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", + oid_to_hex(oid)); + + return 0; +} + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f) +{ + struct midx_snapshot_ref_data data; + const struct string_list *preferred = bitmap_preferred_tips(repo); + + data.repo = repo; + data.f = f; + data.preferred = 0; + oidset_init(&data.seen, 0); + + if (!fdopen_tempfile(f, "w")) + die(_("could not open tempfile %s for writing"), + get_tempfile_path(f)); + + if (preferred) { + struct string_list_item *item; + + data.preferred = 1; + for_each_string_list_item(item, preferred) + refs_for_each_ref_in(get_main_ref_store(repo), + item->string, + midx_snapshot_ref_one, &data); + data.preferred = 0; + } + + refs_for_each_ref(get_main_ref_store(repo), + midx_snapshot_ref_one, &data); + + if (close_tempfile_gently(f)) { + int save_errno = errno; + delete_tempfile(&f); + errno = save_errno; + die_errno(_("could not close refs snapshot tempfile")); + } + + oidset_clear(&data.seen); +} diff --git a/repack.h b/repack.h index cea7969ae4..803e129224 100644 --- a/repack.h +++ b/repack.h @@ -98,4 +98,8 @@ void pack_geometry_remove_redundant(struct pack_geometry *geometry, const char *packdir); void pack_geometry_release(struct pack_geometry *geometry); +struct tempfile; + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f); + #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From e6b09077216ecc1c767506f39be736ba3dcccecb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:53 -0400 Subject: builtin/repack.c: extract opts struct for 'write_midx_included_packs()' The function 'write_midx_included_packs()', which is responsible for writing a new MIDX with a given set of included packs, currently takes a list of six arguments. In order to extract this function out of the builtin, we have to pass in a few additional parameters, like 'midx_must_contain_cruft' and 'packdir', which are currently declared as static variables within the builtin/repack.c compilation unit. Instead of adding additional parameters to `write_midx_included_packs()` extract out an "opts" struct that names these parameters, and pass a pointer to that, making it less cumbersome to add additional parameters. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 7713721826..1a79d1d834 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,6 +107,17 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +struct repack_write_midx_opts { + struct string_list *include; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, @@ -290,19 +301,15 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } -static int write_midx_included_packs(struct string_list *include, - struct pack_geometry *geometry, - struct string_list *names, - const char *refs_snapshot, - int show_progress, int write_bitmaps) +static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret; - if (!include->nr) + if (!opts->include->nr) return 0; cmd.in = -1; @@ -311,18 +318,18 @@ static int write_midx_included_packs(struct string_list *include, strvec_push(&cmd.args, "multi-pack-index"); strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - if (show_progress) + if (opts->show_progress) strvec_push(&cmd.args, "--progress"); else strvec_push(&cmd.args, "--no-progress"); - if (write_bitmaps) + if (opts->write_bitmaps) strvec_push(&cmd.args, "--bitmap"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", pack_basename(preferred)); - else if (names->nr) { + else if (opts->names->nr) { /* The largest pack was repacked, meaning that either * one or two packs exist depending on whether the * repository has a cruft pack or not. @@ -335,7 +342,7 @@ static int write_midx_included_packs(struct string_list *include, * `--max-pack-size` was given, but any one of them * will suffice, so pick the first one.) */ - for_each_string_list_item(item, names) { + for_each_string_list_item(item, opts->names) { struct generated_pack *pack = item->util; if (generated_pack_has_ext(pack, ".mtimes")) continue; @@ -355,15 +362,16 @@ static int write_midx_included_packs(struct string_list *include, ; } - if (refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); ret = start_command(&cmd); if (ret) return ret; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, include) + for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); @@ -1001,15 +1009,23 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; + struct repack_write_midx_opts opts = { + .include = &include, + .geometry = &geometry, + .names = &names, + .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, + .packdir = packdir, + .show_progress = show_progress, + .write_bitmaps = write_bitmaps > 0, + .midx_must_contain_cruft = midx_must_contain_cruft + }; midx_included_packs(&include, &existing, midx_pack_names, midx_pack_names_nr, &names, &geometry); - ret = write_midx_included_packs(&include, &geometry, &names, - refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, - show_progress, write_bitmaps > 0); + ret = write_midx_included_packs(&opts); if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, packdir); + remove_redundant_bitmaps(&include, opts.packdir); string_list_clear(&include, 0); -- cgit v1.3-5-g9baa From c3690c97d7b08d9876fcaf0a572b4956bc9b4c33 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:56 -0400 Subject: builtin/repack.c: use a string_list for 'midx_pack_names' When writing a new MIDX, repack must determine whether or not there are any packs in the MIDX it is replacing (if one exists) that are not somehow represented in the new MIDX (e.g., either by preserving the pack verbatim, or rolling it up as part of a geometric repack, etc.). In order to do this, it keeps track of a list of pack names from the MIDX present in the repository at the start of the repack operation. Since we manipulate and close the object store, we cannot rely on the repository's in-core representation of the MIDX, since this is subject to change and/or go away. When this behavior was introduced in 5ee86c273b (repack: exclude cruft pack(s) from the MIDX where possible, 2025-06-23), we maintained an array of character pointers instead of using a convenience API, such as string-list.h. Store the list of MIDX pack names in a string_list, thereby reducing the number of parameters we have to pass to `midx_has_unknown_packs()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 1a79d1d834..dda533f171 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,18 +118,17 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(char **midx_pack_names, - size_t midx_pack_names_nr, +static int midx_has_unknown_packs(struct string_list *midx_pack_names, struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { - size_t i; + struct string_list_item *item; string_list_sort(include); - for (i = 0; i < midx_pack_names_nr; i++) { - const char *pack_name = midx_pack_names[i]; + for_each_string_list_item(item, midx_pack_names) { + const char *pack_name = item->string; /* * Determine whether or not each MIDX'd pack from the existing @@ -191,8 +190,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - char **midx_pack_names, - size_t midx_pack_names_nr, + struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -247,8 +245,8 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, - include, geometry, existing)) { + midx_has_unknown_packs(midx_pack_names, include, geometry, + existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -606,13 +604,12 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; + struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; int i, ret; int show_progress; - char **midx_pack_names = NULL; - size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ struct repack_config_ctx config_ctx; @@ -985,13 +982,12 @@ int cmd_repack(int argc, struct multi_pack_index *m = get_multi_pack_index(repo->objects->sources); - ALLOC_ARRAY(midx_pack_names, - m->num_packs + m->num_packs_in_base); - - for (; m; m = m->base_midx) - for (uint32_t i = 0; i < m->num_packs; i++) - midx_pack_names[midx_pack_names_nr++] = - xstrdup(m->pack_names[i]); + for (; m; m = m->base_midx) { + for (uint32_t i = 0; i < m->num_packs; i++) { + string_list_append(&midx_pack_names, + m->pack_names[i]); + } + } } close_object_store(repo->objects); @@ -1019,8 +1015,8 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, midx_pack_names, - midx_pack_names_nr, &names, &geometry); + midx_included_packs(&include, &existing, &midx_pack_names, + &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1067,11 +1063,9 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); + string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); - for (size_t i = 0; i < midx_pack_names_nr; i++) - free(midx_pack_names[i]); - free(midx_pack_names); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); -- cgit v1.3-5-g9baa From 2fee63a71ae8113fd91d8e5924ae4a5619ad0cd3 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:59 -0400 Subject: repack: keep track of MIDX pack names using existing_packs Instead of storing the list of MIDX pack names separately, let's inline it into the existing_packs struct, further reducing the number of parameters we have to pass around. This amounts to adding a new string_list to the existing_packs struct, and populating it via `existing_packs_collect()`. This is fairly straightforward to do, since we are already looping over all packs, all we need to do is: if (p->multi_pack_index) string_list_append(&existing->midx_packs, pack_basename(p)); Note, however, that this check *must* come before other conditions where we discard and do not keep track of a pack, including the condition "if (!p->pack_local)" immediately below. This is because the existing routine which collects MIDX pack names does so blindly, and does not discard, for example, non-local packs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 26 ++++---------------------- repack.c | 5 +++++ repack.h | 1 + 3 files changed, 10 insertions(+), 22 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index dda533f171..a57a14ef60 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,8 +118,7 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(struct string_list *midx_pack_names, - struct string_list *include, +static int midx_has_unknown_packs(struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { @@ -127,7 +126,7 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, string_list_sort(include); - for_each_string_list_item(item, midx_pack_names) { + for_each_string_list_item(item, &existing->midx_packs) { const char *pack_name = item->string; /* @@ -190,7 +189,6 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -245,8 +243,7 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, include, geometry, - existing)) { + midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -604,7 +601,6 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; - struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; @@ -978,18 +974,6 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(repo->objects->sources)) { - struct multi_pack_index *m = - get_multi_pack_index(repo->objects->sources); - - for (; m; m = m->base_midx) { - for (uint32_t i = 0; i < m->num_packs; i++) { - string_list_append(&midx_pack_names, - m->pack_names[i]); - } - } - } - close_object_store(repo->objects); /* @@ -1015,8 +999,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &midx_pack_names, - &names, &geometry); + midx_included_packs(&include, &existing, &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1063,7 +1046,6 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); - string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); pack_objects_args_release(&po_args); diff --git a/repack.c b/repack.c index d8afdd352d..1d485e0112 100644 --- a/repack.c +++ b/repack.c @@ -80,6 +80,9 @@ void existing_packs_collect(struct existing_packs *existing, size_t i; const char *base; + if (p->multi_pack_index) + string_list_append(&existing->midx_packs, + pack_basename(p)); if (!p->pack_local) continue; @@ -104,6 +107,7 @@ void existing_packs_collect(struct existing_packs *existing, string_list_sort(&existing->kept_packs); string_list_sort(&existing->non_kept_packs); string_list_sort(&existing->cruft_packs); + string_list_sort(&existing->midx_packs); strbuf_release(&buf); } @@ -220,6 +224,7 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->kept_packs, 0); string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); + string_list_clear(&existing->midx_packs, 0); } static struct { diff --git a/repack.h b/repack.h index 803e129224..6aa5b4e0f0 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct existing_packs { struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; + struct string_list midx_packs; }; #define EXISTING_PACKS_INIT { \ -- cgit v1.3-5-g9baa From 42088e3d4ae5c5bc77a49fcbba79832d10d03499 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:02 -0400 Subject: builtin/repack.c: reorder `remove_redundant_bitmaps()` The next commit will inline the call to `remove_redundant_bitmaps()` into `write_midx_included_packs()`. Reorder these two functions to avoid a forward declaration to `remove_redundant_bitmaps()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 58 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index a57a14ef60..865e0af039 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -296,6 +296,35 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -373,35 +402,6 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) return finish_command(&cmd); } -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, -- cgit v1.3-5-g9baa From 337baea7212f0cf1aaa00a885d75098e260a22b0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:05 -0400 Subject: builtin/repack.c: inline `remove_redundant_bitmaps()` After writing a new MIDX, the repack command removes any bitmaps belonging to packs which were written into the MIDX. This is currently done in a separate function outside of `write_midx_included_packs()`, which forces the caller to keep track of the set of packs written into the MIDX. Prepare to no longer require the caller to keep track of such information by inlining the clean-up into `write_midx_included_packs()`. Future commits will make the caller oblivious to the set of packs included in the MIDX altogether. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 865e0af039..271c869268 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -331,10 +331,10 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; - int ret; + int ret = 0; if (!opts->include->nr) - return 0; + goto done; cmd.in = -1; cmd.git_cmd = 1; @@ -392,14 +392,18 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) ret = start_command(&cmd); if (ret) - return ret; + goto done; in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); - return finish_command(&cmd); + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(opts->include, opts->packdir); + return ret; } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, @@ -1003,9 +1007,6 @@ int cmd_repack(int argc, ret = write_midx_included_packs(&opts); - if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, opts.packdir); - string_list_clear(&include, 0); if (ret) -- cgit v1.3-5-g9baa From f07263fd9fcb6b03f1e1db041269e2d5b85ccff8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:08 -0400 Subject: builtin/repack.c: pass `repack_write_midx_opts` to `midx_included_packs` Instead of passing individual parameters (in this case, "existing", "names", and "geometry") to `midx_included_packs()`, pass a pointer to a `repack_write_midx_opts` structure instead. Besides reducing the number of parameters necessary to call the `midx_included_packs` function, this refactoring sets us up nicely to inline the call to `midx_included_packs()` into `write_midx_included_packs()`, thus making the caller (in this case, `cmd_repack()`) oblivious to the set of packs being written into the MIDX. In order to do this, `repack_write_midx_opts` has to keep track of the set of existing packs, so add an additional field to point to that set. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 271c869268..a518a2b2f3 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,6 +108,7 @@ static int repack_config(const char *var, const char *value, } struct repack_write_midx_opts { + struct existing_packs *existing; struct string_list *include; struct pack_geometry *geometry; struct string_list *names; @@ -188,10 +189,11 @@ static int midx_has_unknown_packs(struct string_list *include, } static void midx_included_packs(struct string_list *include, - struct existing_packs *existing, - struct string_list *names, - struct pack_geometry *geometry) + struct repack_write_midx_opts *opts) { + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; struct string_list_item *item; struct strbuf buf = STRBUF_INIT; @@ -242,7 +244,7 @@ static void midx_included_packs(struct string_list *include, } } - if (midx_must_contain_cruft || + if (opts->midx_must_contain_cruft || midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see @@ -994,6 +996,7 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { + .existing = &existing, .include = &include, .geometry = &geometry, .names = &names, @@ -1003,7 +1006,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &names, &geometry); + midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); -- cgit v1.3-5-g9baa From f17757487b2d212f86edaaf02306972e1a555bbd Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:10 -0400 Subject: builtin/repack.c: inline packs within `write_midx_included_packs()` To write a MIDX at the end of a repack operation, 'git repack' presently computes the set of packs to write into the MIDX, before invoking `write_midx_included_packs()` with a `string_list` containing those packs. The logic for computing which packs are supposed to appear in the resulting MIDX is within `midx_included_packs()`, where it is aware of details like which cruft pack(s) were written/combined, if/how we did a geometric repack, etc. Computing this list ourselves before providing it to the sole function to make use of that list `write_midx_included_packs()` is somewhat awkward. In the future, repack will learn how to write incremental MIDXs, which will use a very different pack selection routine. Instead of doing something like: struct string_list included_packs = STRING_LIST_INIT_DUP; if (incremental) { midx_incremental_included_packs(&included_packs, ...): write_midx_incremental_included_packs(&included_packs, ...); } else { midx_included_packs(&included_packs, ...): write_midx_included_packs(&included_packs, ...); } in the future, let's have each function that writes a MIDX be responsible for itself computing the list of included packs. Inline the declaration and initialization of `included_packs` into the `write_midx_included_packs()` function itself, and repeat that pattern in the future when we introduce new ways to write MIDXs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index a518a2b2f3..fad10be42a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -109,7 +109,6 @@ static int repack_config(const char *var, const char *value, struct repack_write_midx_opts { struct existing_packs *existing; - struct string_list *include; struct pack_geometry *geometry; struct string_list *names; const char *refs_snapshot; @@ -330,12 +329,14 @@ static void remove_redundant_bitmaps(struct string_list *include, static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret = 0; - if (!opts->include->nr) + midx_included_packs(&include, opts); + if (!include.nr) goto done; cmd.in = -1; @@ -397,14 +398,17 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) goto done; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, opts->include) + for_each_string_list_item(item, &include) fprintf(in, "%s\n", item->string); fclose(in); ret = finish_command(&cmd); done: if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(opts->include, opts->packdir); + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + return ret; } @@ -994,10 +998,8 @@ int cmd_repack(int argc, existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { - struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { .existing = &existing, - .include = &include, .geometry = &geometry, .names = &names, .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, @@ -1006,12 +1008,9 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); - string_list_clear(&include, 0); - if (ret) goto cleanup; } -- cgit v1.3-5-g9baa From 6d05eb135f67d2d45a0fbd110a32d28b1e28c95d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:13 -0400 Subject: repack: 'write_midx_included_packs' API from the builtin Now that we have sufficiently cleaned up the write_midx_included_packs() function, we can move it (along with the struct repack_write_midx_opts) out of the builtin, and into the repack.h header. Since this function (and the static ones that it depends on) are MIDX-specific details of the repacking process, move them to the repack-midx.c compilation unit instead of the general repack.c one. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 305 ------------------------------------------------------- repack-midx.c | 295 +++++++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 12 +++ 3 files changed, 307 insertions(+), 305 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index fad10be42a..2f49a18283 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,311 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct repack_write_midx_opts { - struct existing_packs *existing; - struct pack_geometry *geometry; - struct string_list *names; - const char *refs_snapshot; - const char *packdir; - int show_progress; - int write_bitmaps; - int midx_must_contain_cruft; -}; - -static int midx_has_unknown_packs(struct string_list *include, - struct pack_geometry *geometry, - struct existing_packs *existing) -{ - struct string_list_item *item; - - string_list_sort(include); - - for_each_string_list_item(item, &existing->midx_packs) { - const char *pack_name = item->string; - - /* - * Determine whether or not each MIDX'd pack from the existing - * MIDX (if any) is represented in the new MIDX. For each pack - * in the MIDX, it must either be: - * - * - In the "include" list of packs to be included in the new - * MIDX. Note this function is called before the include - * list is populated with any cruft pack(s). - * - * - Below the geometric split line (if using pack geometry), - * indicating that the pack won't be included in the new - * MIDX, but its contents were rolled up as part of the - * geometric repack. - * - * - In the existing non-kept packs list (if not using pack - * geometry), and marked as non-deleted. - */ - if (string_list_has_string(include, pack_name)) { - continue; - } else if (geometry) { - struct strbuf buf = STRBUF_INIT; - uint32_t j; - - for (j = 0; j < geometry->split; j++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(geometry->pack[j])); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - if (!strcmp(pack_name, buf.buf)) { - strbuf_release(&buf); - break; - } - } - - strbuf_release(&buf); - - if (j < geometry->split) - continue; - } else { - struct string_list_item *item; - - item = string_list_lookup(&existing->non_kept_packs, - pack_name); - if (item && !existing_pack_is_marked_for_deletion(item)) - continue; - } - - /* - * If we got to this point, the MIDX includes some pack that we - * don't know about. - */ - return 1; - } - - return 0; -} - -static void midx_included_packs(struct string_list *include, - struct repack_write_midx_opts *opts) -{ - struct existing_packs *existing = opts->existing; - struct pack_geometry *geometry = opts->geometry; - struct string_list *names = opts->names; - struct string_list_item *item; - struct strbuf buf = STRBUF_INIT; - - for_each_string_list_item(item, &existing->kept_packs) { - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - for_each_string_list_item(item, names) { - strbuf_reset(&buf); - strbuf_addf(&buf, "pack-%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - if (geometry->split_factor) { - uint32_t i; - - for (i = geometry->split; i < geometry->pack_nr; i++) { - struct packed_git *p = geometry->pack[i]; - - /* - * The multi-pack index never refers to packfiles part - * of an alternate object database, so we skip these. - * While git-multi-pack-index(1) would silently ignore - * them anyway, this allows us to skip executing the - * command completely when we have only non-local - * packfiles. - */ - if (!p->pack_local) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - string_list_insert(include, buf.buf); - } - } else { - for_each_string_list_item(item, &existing->non_kept_packs) { - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } - - if (opts->midx_must_contain_cruft || - midx_has_unknown_packs(include, geometry, existing)) { - /* - * If there are one or more unknown pack(s) present (see - * midx_has_unknown_packs() for what makes a pack - * "unknown") in the MIDX before the repack, keep them - * as they may be required to form a reachability - * closure if the MIDX is bitmapped. - * - * For example, a cruft pack can be required to form a - * reachability closure if the MIDX is bitmapped and one - * or more of the bitmap's selected commits reaches a - * once-cruft object that was later made reachable. - */ - for_each_string_list_item(item, &existing->cruft_packs) { - /* - * When doing a --geometric repack, there is no - * need to check for deleted packs, since we're - * by definition not doing an ALL_INTO_ONE - * repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any - * packs which are enqueued for deletion. - * - * So we could omit the conditional below in the - * --geometric case, but doing so is unnecessary - * since no packs are marked as pending - * deletion (since we only call - * `existing_packs_mark_for_deletion()` when - * doing an all-into-one repack). - */ - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } else { - /* - * Modern versions of Git (with the appropriate - * configuration setting) will write new copies of - * once-cruft objects when doing a --geometric repack. - * - * If the MIDX has no cruft pack, new packs written - * during a --geometric repack will not rely on the - * cruft pack to form a reachability closure, so we can - * avoid including them in the MIDX in that case. - */ - ; - } - - strbuf_release(&buf); -} - -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - -static int write_midx_included_packs(struct repack_write_midx_opts *opts) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list include = STRING_LIST_INIT_DUP; - struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); - FILE *in; - int ret = 0; - - midx_included_packs(&include, opts); - if (!include.nr) - goto done; - - cmd.in = -1; - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (opts->show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (opts->write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); - - if (preferred) - strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(preferred)); - else if (opts->names->nr) { - /* The largest pack was repacked, meaning that either - * one or two packs exist depending on whether the - * repository has a cruft pack or not. - * - * Select the non-cruft one as preferred to encourage - * pack-reuse among packs containing reachable objects - * over unreachable ones. - * - * (Note we could write multiple packs here if - * `--max-pack-size` was given, but any one of them - * will suffice, so pick the first one.) - */ - for_each_string_list_item(item, opts->names) { - struct generated_pack *pack = item->util; - if (generated_pack_has_ext(pack, ".mtimes")) - continue; - - strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", - item->string); - break; - } - } else { - /* - * No packs were kept, and no packs were written. The - * only thing remaining are .keep packs (unless - * --pack-kept-objects was given). - * - * Set the `--preferred-pack` arbitrarily here. - */ - ; - } - - if (opts->refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", - opts->refs_snapshot); - - ret = start_command(&cmd); - if (ret) - goto done; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, &include) - fprintf(in, "%s\n", item->string); - fclose(in); - - ret = finish_command(&cmd); -done: - if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(&include, opts->packdir); - - string_list_clear(&include, 0); - - return ret; -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, diff --git a/repack-midx.c b/repack-midx.c index 354df729a5..6f6202c5bc 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -6,6 +6,7 @@ #include "oidset.h" #include "pack-bitmap.h" #include "refs.h" +#include "run-command.h" #include "tempfile.h" struct midx_snapshot_ref_data { @@ -75,3 +76,297 @@ void midx_snapshot_refs(struct repository *repo, struct tempfile *f) oidset_clear(&data.seen); } + +static int midx_has_unknown_packs(struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + struct string_list_item *item; + + string_list_sort(include); + + for_each_string_list_item(item, &existing->midx_packs) { + const char *pack_name = item->string; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !existing_pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + +static void midx_included_packs(struct string_list *include, + struct repack_write_midx_opts *opts) +{ + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->kept_packs) { + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + for_each_string_list_item(item, names) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + if (geometry->split_factor) { + uint32_t i; + + for (i = geometry->split; i < geometry->pack_nr; i++) { + struct packed_git *p = geometry->pack[i]; + + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + string_list_insert(include, buf.buf); + } + } else { + for_each_string_list_item(item, &existing->non_kept_packs) { + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } + + if (opts->midx_must_contain_cruft || + midx_has_unknown_packs(include, geometry, existing)) { + /* + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. + * + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. + */ + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). + */ + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; + } + + strbuf_release(&buf); +} + +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + +int write_midx_included_packs(struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; + struct string_list_item *item; + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); + FILE *in; + int ret = 0; + + midx_included_packs(&include, opts); + if (!include.nr) + goto done; + + cmd.in = -1; + cmd.git_cmd = 1; + + strvec_push(&cmd.args, "multi-pack-index"); + strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); + + if (opts->show_progress) + strvec_push(&cmd.args, "--progress"); + else + strvec_push(&cmd.args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd.args, "--bitmap"); + + if (preferred) + strvec_pushf(&cmd.args, "--preferred-pack=%s", + pack_basename(preferred)); + else if (opts->names->nr) { + /* The largest pack was repacked, meaning that either + * one or two packs exist depending on whether the + * repository has a cruft pack or not. + * + * Select the non-cruft one as preferred to encourage + * pack-reuse among packs containing reachable objects + * over unreachable ones. + * + * (Note we could write multiple packs here if + * `--max-pack-size` was given, but any one of them + * will suffice, so pick the first one.) + */ + for_each_string_list_item(item, opts->names) { + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) + continue; + + strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", + item->string); + break; + } + } else { + /* + * No packs were kept, and no packs were written. The + * only thing remaining are .keep packs (unless + * --pack-kept-objects was given). + * + * Set the `--preferred-pack` arbitrarily here. + */ + ; + } + + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); + + ret = start_command(&cmd); + if (ret) + goto done; + + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, &include) + fprintf(in, "%s\n", item->string); + fclose(in); + + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + + return ret; +} diff --git a/repack.h b/repack.h index 6aa5b4e0f0..25a31ac0a0 100644 --- a/repack.h +++ b/repack.h @@ -101,6 +101,18 @@ void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; +struct repack_write_midx_opts { + struct existing_packs *existing; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + void midx_snapshot_refs(struct repository *repo, struct tempfile *f); +int write_midx_included_packs(struct repack_write_midx_opts *opts); #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From 7a9c81a38ddb3b382103ccd45345c4892053fdfc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:16 -0400 Subject: builtin/repack.c: introduce `struct write_pack_opts` There are various functions within the 'repack' builtin which are responsible for writing different kinds of packs. They include: - `static int write_filtered_pack(...)` - `static int write_cruft_pack(...)` as well as the function `finish_pack_objects_cmd()`, which is responsible for finalizing a new pack write, and recording the checksum of its contents in the 'names' list. Both of these `write_` functions have a few things in common. They both take a pointer to the 'pack_objects_args' struct, as well as a pair of character pointers for `destination` and `pack_prefix`. Instead of repeating those arguments for each function, let's extract an options struct called "write_pack_opts" which has these three parameters as member fields. While we're at it, add fields for "packdir," and "packtmp", both of which are static variables within the builtin, and need to be read from within these two functions. This will shorten the list of parameters that callers have to provide to `write_filtered_pack()`, avoid ambiguity when passing multiple variables of the same type, and provide a unified interface for the two functions mentioned earlier. (Note that "pack_prefix" can be derived on the fly as a function of "packdir" and "packtmp", making it unnecessary to store "pack_prefix" explicitly. This commit ignores that potential cleanup in the name of doing as few things as possible, but a later commit will make that change.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 30 ++++++++++++++++-------------- repack.h | 8 ++++++++ 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 2f49a18283..45ce469898 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -138,9 +138,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, return finish_command(cmd); } -static int write_filtered_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) { @@ -150,9 +148,9 @@ static int write_filtered_pack(const struct pack_objects_args *args, int ret; const char *caret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--stdin-packs"); @@ -175,7 +173,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -665,14 +663,18 @@ int cmd_repack(int argc, } if (po_args.filter_options.choice) { - if (!filter_to) - filter_to = packtmp; - - ret = write_filtered_pack(&po_args, - filter_to, - find_pack_prefix(packdir, packtmp), - &existing, - &names); + struct write_pack_opts opts = { + .po_args = &po_args, + .destination = filter_to, + .pack_prefix = find_pack_prefix(packdir, packtmp), + .packdir = packdir, + .packtmp = packtmp, + }; + + if (!opts.destination) + opts.destination = packtmp; + + ret = write_filtered_pack(&opts, &existing, &names); if (ret) goto cleanup; } diff --git a/repack.h b/repack.h index 25a31ac0a0..6ef503f623 100644 --- a/repack.h +++ b/repack.h @@ -32,6 +32,14 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct write_pack_opts { + struct pack_objects_args *po_args; + const char *destination; + const char *pack_prefix; + const char *packdir; + const char *packtmp; +}; + struct repository; struct packed_git; -- cgit v1.3-5-g9baa From 3d2ac2065e2ac230c92cb87cc46053a0f3db1616 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:19 -0400 Subject: builtin/repack.c: use `write_pack_opts` within `write_cruft_pack()` Similar to the changes made in the previous commit to `write_filtered_pack()`, teach `write_cruft_pack()` to take a `write_pack_opts` struct and use that where possible. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 45ce469898..7295135ec2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,9 +221,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, strbuf_release(&buf); } -static int write_cruft_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_cruft_pack(const struct write_pack_opts *opts, const char *cruft_expiration, unsigned long combine_cruft_below_size, struct string_list *names, @@ -234,9 +232,9 @@ static int write_cruft_pack(const struct pack_objects_args *args, FILE *in; int ret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--cruft"); if (cruft_expiration) @@ -267,7 +265,7 @@ static int write_cruft_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -599,6 +597,13 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) { const char *pack_prefix = find_pack_prefix(packdir, packtmp); + struct write_pack_opts opts = { + .po_args = &cruft_po_args, + .destination = packtmp, + .pack_prefix = pack_prefix, + .packtmp = packtmp, + .packdir = packdir, + }; if (!cruft_po_args.window) cruft_po_args.window = xstrdup_or_null(po_args.window); @@ -615,8 +620,7 @@ int cmd_repack(int argc, cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; - ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, - cruft_expiration, + ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, &existing); if (ret) @@ -651,11 +655,8 @@ int cmd_repack(int argc, * pack, but rather removing all cruft packs from the * main repository regardless of size. */ - ret = write_cruft_pack(&cruft_po_args, expire_to, - pack_prefix, - NULL, - 0ul, - &names, + opts.destination = expire_to; + ret = write_cruft_pack(&opts, NULL, 0ul, &names, &existing); if (ret) goto cleanup; -- cgit v1.3-5-g9baa From 98fa0d50a75099df3f2d62f9181e4c1bbf70f063 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:21 -0400 Subject: repack: move `find_pack_prefix()` out of the builtin Both callers within the repack builtin which call functions that take a 'write_pack_opts' structure have the following pattern: struct write_pack_opts opts = { .packdir = packdir, .packtmp = packtmp, .pack_prefix = find_pack_prefix(packdir, packtmp), /* ... */ }; int ret = write_some_kind_of_pack(&opts, /* ... */); , but both "packdir" and "packtmp" are fields within the write_pack_opts struct itself! Instead of also computing the pack_prefix ahead of time, let's have the callees compute it themselves by moving `find_pack_prefix()` out of the repack builtin, and have it take a write_pack_opts pointer instead of the "packdir" and "packtmp" fields directly. This avoids the callers having to do some prep work that is common between the two of them, but also avoids the potential pitfall of accidentally writing: .pack_prefix = find_pack_prefix(packtmp, packdir), (which is well-typed) when the caller meant to instead write: .pack_prefix = find_pack_prefix(packdir, packtmp), Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 ++++---------------- repack.c | 11 +++++++++++ repack.h | 3 ++- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 7295135ec2..b21799c650 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -149,6 +149,7 @@ static int write_filtered_pack(const struct write_pack_opts *opts, const char *caret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -173,7 +174,7 @@ static int write_filtered_pack(const struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -233,6 +234,7 @@ static int write_cruft_pack(const struct write_pack_opts *opts, int ret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -265,7 +267,7 @@ static int write_cruft_pack(const struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -283,17 +285,6 @@ static int write_cruft_pack(const struct write_pack_opts *opts, local); } -static const char *find_pack_prefix(const char *packdir, const char *packtmp) -{ - const char *pack_prefix; - if (!skip_prefix(packtmp, packdir, &pack_prefix)) - die(_("pack prefix %s does not begin with objdir %s"), - packtmp, packdir); - if (*pack_prefix == '/') - pack_prefix++; - return pack_prefix; -} - int cmd_repack(int argc, const char **argv, const char *prefix, @@ -596,11 +587,9 @@ int cmd_repack(int argc, } if (pack_everything & PACK_CRUFT) { - const char *pack_prefix = find_pack_prefix(packdir, packtmp); struct write_pack_opts opts = { .po_args = &cruft_po_args, .destination = packtmp, - .pack_prefix = pack_prefix, .packtmp = packtmp, .packdir = packdir, }; @@ -667,7 +656,6 @@ int cmd_repack(int argc, struct write_pack_opts opts = { .po_args = &po_args, .destination = filter_to, - .pack_prefix = find_pack_prefix(packdir, packtmp), .packdir = packdir, .packtmp = packtmp, }; diff --git a/repack.c b/repack.c index 1d485e0112..19fd1d6d5b 100644 --- a/repack.c +++ b/repack.c @@ -66,6 +66,17 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, strbuf_release(&buf); } +const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts) +{ + const char *pack_prefix; + if (!skip_prefix(opts->packtmp, opts->packdir, &pack_prefix)) + die(_("pack prefix %s does not begin with objdir %s"), + opts->packtmp, opts->packdir); + if (*pack_prefix == '/') + pack_prefix++; + return pack_prefix; +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 6ef503f623..5852e2407f 100644 --- a/repack.h +++ b/repack.h @@ -35,11 +35,12 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, struct write_pack_opts { struct pack_objects_args *po_args; const char *destination; - const char *pack_prefix; const char *packdir; const char *packtmp; }; +const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts); + struct repository; struct packed_git; -- cgit v1.3-5-g9baa From 2f79c79bba0da415eed3a8e1b32823b7c388b7f4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:24 -0400 Subject: repack: extract `write_pack_opts_is_local()` Similar to the previous commit, the functions `write_cruft_pack()` and `write_filtered_pack()` both compute a "local" variable via the exact same mechanism: const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); Not only does this cause us to repeat the same pair of lines, it also introduces an unnecessary "scratch" variable that is common between both functions. Instead of repeating ourselves, let's extract that functionality into a new function in the repack.h API called "write_pack_opts_is_local()". That function takes a pointer to a "struct write_pack_opts" (which has as fields both "destination" and "packdir"), and can encapsulate the dangling "scratch" field. Extract that function and make it visible within the repack.h API, and use it within both `write_cruft_pack()` and `write_filtered_pack()`. While we're at it, match our modern conventions by returning a "bool" instead of "int", and use `starts_with()` instead of `skip_prefix()` to avoid storing the dummy "scratch" variable. The remaining duplication (that is, that both `write_cruft_pack()` and `write_filtered_pack()` still both call `write_pack_opts_is_local()`) will be addressed in the following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++---- repack.c | 5 +++++ repack.h | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index b21799c650..d1449cfe13 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -147,8 +147,7 @@ static int write_filtered_pack(const struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + bool local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -232,8 +231,7 @@ static int write_cruft_pack(const struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + bool local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); diff --git a/repack.c b/repack.c index 19fd1d6d5b..d2ee9f2460 100644 --- a/repack.c +++ b/repack.c @@ -77,6 +77,11 @@ const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts) return pack_prefix; } +bool write_pack_opts_is_local(const struct write_pack_opts *opts) +{ + return starts_with(opts->destination, opts->packdir); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 5852e2407f..26d1954ae2 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct write_pack_opts { }; const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts); +bool write_pack_opts_is_local(const struct write_pack_opts *opts); struct repository; struct packed_git; -- cgit v1.3-5-g9baa From 80db3cd18985609340f40b2b06f4ef9f86a2cbe0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:27 -0400 Subject: builtin/repack.c: pass `write_pack_opts` to `finish_pack_objects_cmd()` To prepare to move the `finish_pack_objects_cmd()` function out of the builtin and into the repack.h API, there are a couple of things we need to do first: - First, let's take advantage of `write_pack_opts_is_local()` function introduced in the previous commit instead of passing "local" explicitly. - Let's also avoid referring to the static 'packtmp' field within builtin/repack.c by instead accessing it through the write_pack_opts argument. There are three callers which need to adjust themselves in order to account for this change. The callers which reside in write_cruft_pack() and write_filtered_pack() both already have an "opts" in scope, so they can pass it through transparently. The other call (at the bottom of `cmd_repack()`) needs to initialize its own write_pack_opts to pass the necessary fields over to the direct call to `finish_pack_objects_cmd()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index d1449cfe13..5f382aaf19 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,11 +108,12 @@ static int repack_config(const char *var, const char *value, } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + const struct write_pack_opts *opts, struct child_process *cmd, - struct string_list *names, - int local) + struct string_list *names) { FILE *out; + bool local = write_pack_opts_is_local(opts); struct strbuf line = STRBUF_INIT; out = xfdopen(cmd->out, "r"); @@ -128,7 +129,8 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, packtmp); + item->util = generated_pack_populate(line.buf, + opts->packtmp); } } fclose(out); @@ -147,7 +149,6 @@ static int write_filtered_pack(const struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - bool local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -183,8 +184,8 @@ static int write_filtered_pack(const struct write_pack_opts *opts, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -231,7 +232,6 @@ static int write_cruft_pack(const struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - bool local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -279,8 +279,8 @@ static int write_cruft_pack(const struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } int cmd_repack(int argc, @@ -560,9 +560,17 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); - if (ret) - goto cleanup; + { + struct write_pack_opts opts = { + .packdir = packdir, + .destination = packdir, + .packtmp = packtmp, + }; + ret = finish_pack_objects_cmd(repo->hash_algo, &opts, &cmd, + &names); + if (ret) + goto cleanup; + } if (!names.nr) { if (!po_args.quiet) -- cgit v1.3-5-g9baa From fa0787a6cc1d8e7ef1e2e8398bdc13b987c61d69 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:30 -0400 Subject: repack: move `finish_pack_objects_cmd()` out of the builtin In a similar spirit as the previous commit(s), now that the function `finish_pack_objects_cmd()` has no explicit dependencies within the repack builtin, let's extract it. This prepares us to extract the remaining two functions within the repack builtin that explicitly write packfiles, which are `write_cruft_pack()` and `write_filtered_pack()`, which will be done in the future commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 33 --------------------------------- repack.c | 33 +++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 38 insertions(+), 33 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 5f382aaf19..71abcfa0b7 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,39 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int finish_pack_objects_cmd(const struct git_hash_algo *algop, - const struct write_pack_opts *opts, - struct child_process *cmd, - struct string_list *names) -{ - FILE *out; - bool local = write_pack_opts_is_local(opts); - struct strbuf line = STRBUF_INIT; - - out = xfdopen(cmd->out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - - if (line.len != algop->hexsz) - die(_("repack: Expecting full hex object ID lines only " - "from pack-objects.")); - /* - * Avoid putting packs written outside of the repository in the - * list of names. - */ - if (local) { - item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, - opts->packtmp); - } - } - fclose(out); - - strbuf_release(&line); - - return finish_command(cmd); -} - static int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) diff --git a/repack.c b/repack.c index d2ee9f2460..2c478970f3 100644 --- a/repack.c +++ b/repack.c @@ -82,6 +82,39 @@ bool write_pack_opts_is_local(const struct write_pack_opts *opts) return starts_with(opts->destination, opts->packdir); } +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + const struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names) +{ + FILE *out; + bool local = write_pack_opts_is_local(opts); + struct strbuf line = STRBUF_INIT; + + out = xfdopen(cmd->out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + + if (line.len != algop->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + /* + * Avoid putting packs written outside of the repository in the + * list of names. + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = generated_pack_populate(line.buf, + opts->packtmp); + } + } + fclose(out); + + strbuf_release(&line); + + return finish_command(cmd); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 26d1954ae2..3244f601e2 100644 --- a/repack.h +++ b/repack.h @@ -42,6 +42,11 @@ struct write_pack_opts { const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts); bool write_pack_opts_is_local(const struct write_pack_opts *opts); +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + const struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names); + struct repository; struct packed_git; -- cgit v1.3-5-g9baa From d278970aef66e2cfcbcbab650c1fc1b6613b40db Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:33 -0400 Subject: repack: move `pack_kept_objects` to `struct pack_objects_args` The "pack_kept_objects" variable is defined as static to the repack builtin, but is inherently related to the pack-objects arguments that the builtin uses when generating new packs. Move that field into the "struct pack_objects_args", and shuffle around where we append the corresponding command-line option when preparing a pack-objects process. Specifically: - `write_cruft_pack()` always wants to pass "--honor-pack-keep", so explicitly set the `pack_kept_objects` field to "0" when initializing the `write_pack_opts` struct before calling `write_cruft_pack()`. - `write_filtered_pack()` no longer needs to handle writing the command-line option "--honor-pack-keep" when preparing a pack-objects process, since its call to `prepare_pack_objects()` will have already taken care of that. `write_filtered_pack()` also reads the `pack_kept_objects` field to determine whether to write the existing kept packs with a leading "^" character, so update that to read through the `po_args` pointer instead. - `cmd_repack()` also no longer has to write the "--honor-pack-keep" flag explicitly, since this is also handled via its call to `prepare_pack_objects()`. Since there is a default value for "pack_kept_objects" that relies on whether or not we are writing a bitmap (and not writing a MIDX), extract a default initializer for `struct pack_objects_args` that keeps this conditional default behavior. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 +++++++------------- repack-geometry.c | 5 ++--- repack.c | 2 ++ repack.h | 9 ++++++--- 4 files changed, 17 insertions(+), 19 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 71abcfa0b7..3c6d7e91fd 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -33,7 +33,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; @@ -68,7 +67,7 @@ static int repack_config(const char *var, const char *value, return 0; } if (!strcmp(var, "repack.packkeptobjects")) { - pack_kept_objects = git_config_bool(var, value); + po_args->pack_kept_objects = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.writebitmaps") || @@ -122,8 +121,6 @@ static int write_filtered_pack(const struct write_pack_opts *opts, strvec_push(&cmd.args, "--stdin-packs"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for_each_string_list_item(item, &existing->kept_packs) strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); @@ -146,7 +143,7 @@ static int write_filtered_pack(const struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) fprintf(in, "%s.pack\n", item->string); - caret = pack_kept_objects ? "" : "^"; + caret = opts->po_args->pack_kept_objects ? "" : "^"; for_each_string_list_item(item, &existing->kept_packs) fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); @@ -208,7 +205,6 @@ static int write_cruft_pack(const struct write_pack_opts *opts, strvec_pushf(&cmd.args, "--cruft-expiration=%s", cruft_expiration); - strvec_push(&cmd.args, "--honor-pack-keep"); strvec_push(&cmd.args, "--non-empty"); cmd.in = -1; @@ -332,7 +328,7 @@ int cmd_repack(int argc, OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), - OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, + OPT_BOOL(0, "pack-kept-objects", &po_args.pack_kept_objects, N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), N_("do not repack this pack")), @@ -378,8 +374,8 @@ int cmd_repack(int argc, (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } - if (pack_kept_objects < 0) - pack_kept_objects = write_bitmaps > 0 && !write_midx; + if (po_args.pack_kept_objects < 0) + po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); @@ -420,8 +416,7 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args, - pack_kept_objects); + pack_geometry_init(&geometry, &existing, &po_args); pack_geometry_split(&geometry); } @@ -430,8 +425,6 @@ int cmd_repack(int argc, show_progress = !po_args.quiet && isatty(2); strvec_push(&cmd.args, "--keep-true-parents"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for (i = 0; i < keep_pack_list.nr; i++) strvec_pushf(&cmd.args, "--keep-pack=%s", keep_pack_list.items[i].string); @@ -581,6 +574,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; + cruft_po_args.pack_kept_objects = 0; ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, diff --git a/repack-geometry.c b/repack-geometry.c index f58f1fc7f0..e2f9794d7d 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -27,8 +27,7 @@ static int pack_geometry_cmp(const void *va, const void *vb) void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) + const struct pack_objects_args *args) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -43,7 +42,7 @@ void pack_geometry_init(struct pack_geometry *geometry, */ continue; - if (!pack_kept_objects) { + if (!args->pack_kept_objects) { /* * Any pack that has its pack_keep bit set will * appear in existing->kept_packs below, but diff --git a/repack.c b/repack.c index 2c478970f3..2ab33c665a 100644 --- a/repack.c +++ b/repack.c @@ -38,6 +38,8 @@ void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--quiet"); if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); + if (!args->pack_kept_objects) + strvec_push(&cmd->args, "--honor-pack-keep"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; cmd->out = -1; diff --git a/repack.h b/repack.h index 3244f601e2..0432379815 100644 --- a/repack.h +++ b/repack.h @@ -17,10 +17,14 @@ struct pack_objects_args { int name_hash_version; int path_walk; int delta_base_offset; + int pack_kept_objects; struct list_objects_filter_options filter_options; }; -#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +#define PACK_OBJECTS_ARGS_INIT { \ + .delta_base_offset = 1, \ + .pack_kept_objects = -1, \ +} struct child_process; @@ -104,8 +108,7 @@ struct pack_geometry { void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects); + const struct pack_objects_args *args); void pack_geometry_split(struct pack_geometry *geometry); struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); void pack_geometry_remove_redundant(struct pack_geometry *geometry, -- cgit v1.3-5-g9baa From 7ac4231b4283f4f8dc8447439730a5a2b8ed7eb4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:35 -0400 Subject: repack: move `write_filtered_pack()` out of the builtin In a similar fashion as in previous commits, move the function `write_filtered_pack()` out of the builtin and into its own compilation unit. This function is now part of the repack.h API, but implemented in its own "repack-filtered.c" unit as it is a separate component from other kinds of repacking operations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 46 ---------------------------------------------- meson.build | 1 + repack-filtered.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 ++++ 5 files changed, 57 insertions(+), 46 deletions(-) create mode 100644 repack-filtered.c (limited to 'builtin') diff --git a/Makefile b/Makefile index b214277163..ba4f3bbfa2 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o diff --git a/builtin/repack.c b/builtin/repack.c index 3c6d7e91fd..f65880d8f6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,52 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int write_filtered_pack(const struct write_pack_opts *opts, - struct existing_packs *existing, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *caret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--stdin-packs"); - - for_each_string_list_item(item, &existing->kept_packs) - strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * Here 'names' contains only the pack(s) that were just - * written, which is exactly the packs we want to keep. Also - * 'existing_kept_packs' already contains the packs in - * 'keep_pack_list'. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "%s.pack\n", item->string); - caret = opts->po_args->pack_kept_objects ? "" : "^"; - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s%s.pack\n", caret, item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { diff --git a/meson.build b/meson.build index 0423ed30c4..7124b158ae 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', 'repack-promisor.c', diff --git a/repack-filtered.c b/repack-filtered.c new file mode 100644 index 0000000000..edcf7667c5 --- /dev/null +++ b/repack-filtered.c @@ -0,0 +1,51 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "string-list.h" + +int write_filtered_pack(const struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *caret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--stdin-packs"); + + for_each_string_list_item(item, &existing->kept_packs) + strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * Here 'names' contains only the pack(s) that were just + * written, which is exactly the packs we want to keep. Also + * 'existing_kept_packs' already contains the packs in + * 'keep_pack_list'. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "%s.pack\n", item->string); + caret = opts->po_args->pack_kept_objects ? "" : "^"; + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s%s.pack\n", caret, item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index 0432379815..c790c90ef8 100644 --- a/repack.h +++ b/repack.h @@ -133,4 +133,8 @@ struct repack_write_midx_opts { void midx_snapshot_refs(struct repository *repo, struct tempfile *f); int write_midx_included_packs(struct repack_write_midx_opts *opts); +int write_filtered_pack(const struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names); + #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From 09797bd9666bb9cc6232e414498578deb2697c2a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:38 -0400 Subject: repack: move `write_cruft_pack()` out of the builtin In an identical fashion as the previous commit, move the function `write_cruft_pack()` into its own compilation unit, and make the function visible through the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 94 ----------------------------------------------------- meson.build | 1 + repack-cruft.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 6 ++++ 5 files changed, 107 insertions(+), 94 deletions(-) create mode 100644 repack-cruft.c (limited to 'builtin') diff --git a/Makefile b/Makefile index ba4f3bbfa2..e3c4bf1b4a 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-cruft.o LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o diff --git a/builtin/repack.c b/builtin/repack.c index f65880d8f6..a68c22f605 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,100 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, - struct existing_packs *existing) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - size_t i; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (!(p->is_cruft && p->pack_local)) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (!string_list_has_string(&existing->cruft_packs, buf.buf)) - continue; - - if (p->pack_size < combine_cruft_below_size) { - fprintf(in, "-%s\n", pack_basename(p)); - } else { - existing_packs_retain_cruft(existing, p); - fprintf(in, "%s\n", pack_basename(p)); - } - } - - for (i = 0; i < existing->non_kept_packs.nr; i++) - fprintf(in, "-%s.pack\n", - existing->non_kept_packs.items[i].string); - - strbuf_release(&buf); -} - -static int write_cruft_pack(const struct write_pack_opts *opts, - const char *cruft_expiration, - unsigned long combine_cruft_below_size, - struct string_list *names, - struct existing_packs *existing) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--cruft"); - if (cruft_expiration) - strvec_pushf(&cmd.args, "--cruft-expiration=%s", - cruft_expiration); - - strvec_push(&cmd.args, "--non-empty"); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * names has a confusing double use: it both provides the list - * of just-written new packs, and accepts the name of the cruft - * pack we are writing. - * - * By the time it is read here, it contains only the pack(s) - * that were just written, which is exactly the set of packs we - * want to consider kept. - * - * If `--expire-to` is given, the double-use served by `names` - * ensures that the pack written to `--expire-to` excludes any - * objects contained in the cruft pack. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - if (combine_cruft_below_size && !cruft_expiration) { - combine_small_cruft_packs(in, combine_cruft_below_size, - existing); - } else { - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); - } - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s.pack\n", item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - int cmd_repack(int argc, const char **argv, const char *prefix, diff --git a/meson.build b/meson.build index 7124b158ae..39152b37ba 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-cruft.c', 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', diff --git a/repack-cruft.c b/repack-cruft.c new file mode 100644 index 0000000000..c51df36722 --- /dev/null +++ b/repack-cruft.c @@ -0,0 +1,99 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "packfile.h" +#include "repository.h" +#include "run-command.h" + +static void combine_small_cruft_packs(FILE *in, off_t combine_cruft_below_size, + struct existing_packs *existing) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + size_t i; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (p->pack_size < combine_cruft_below_size) { + fprintf(in, "-%s\n", pack_basename(p)); + } else { + existing_packs_retain_cruft(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); +} + +int write_cruft_pack(const struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--cruft"); + if (cruft_expiration) + strvec_pushf(&cmd.args, "--cruft-expiration=%s", + cruft_expiration); + + strvec_push(&cmd.args, "--non-empty"); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * names has a confusing double use: it both provides the list + * of just-written new packs, and accepts the name of the cruft + * pack we are writing. + * + * By the time it is read here, it contains only the pack(s) + * that were just written, which is exactly the set of packs we + * want to consider kept. + * + * If `--expire-to` is given, the double-use served by `names` + * ensures that the pack written to `--expire-to` excludes any + * objects contained in the cruft pack. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + if (combine_cruft_below_size && !cruft_expiration) { + combine_small_cruft_packs(in, combine_cruft_below_size, + existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s.pack\n", item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index c790c90ef8..3a688a12ee 100644 --- a/repack.h +++ b/repack.h @@ -137,4 +137,10 @@ int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names); +int write_cruft_pack(const struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing); + #endif /* REPACK_H */ -- cgit v1.3-5-g9baa From 935ab44a0a4fae54f9cd378ede16f19e563e53d9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:41 -0400 Subject: builtin/repack.c: clean up unused `#include`s Over the past several dozen commits, we have moved a large amount of functionality out of the repack builtin and into other files like repack.c, repack-cruft.c, repack-filtered.c, repack-midx.c, and repack-promisor.c. These files specify the minimal set of `#include`s that they need to compile successfully, but we did not change the set of `#include`s in the repack builtin itself. Now that the code movement is complete, let's clean up that set of `#include`s and trim down the builtin to include the minimal amount of external headers necessary to compile. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index a68c22f605..cfdb4c0920 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -4,26 +4,17 @@ #include "builtin.h" #include "config.h" #include "environment.h" -#include "gettext.h" -#include "hex.h" #include "parse-options.h" #include "path.h" #include "run-command.h" #include "server-info.h" -#include "strbuf.h" #include "string-list.h" -#include "strvec.h" #include "midx.h" #include "packfile.h" #include "prune-packed.h" -#include "odb.h" #include "promisor-remote.h" #include "repack.h" #include "shallow.h" -#include "pack.h" -#include "pack-bitmap.h" -#include "refs.h" -#include "list-objects-filter-options.h" #define ALL_INTO_ONE 1 #define LOOSEN_UNREACHABLE 2 -- cgit v1.3-5-g9baa From 026ad6016070748a66ed9a977ad90efc08df2225 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:55 -0500 Subject: builtin/repo: rename repo_info() to cmd_repo_info() Subcommand functions are often prefixed with `cmd_` to denote that they are an entrypoint. Rename repo_info() to cmd_repo_info() accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/repo.c b/builtin/repo.c index bbb0966f2d..eeeab8fbd2 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -136,8 +136,8 @@ static int parse_format_cb(const struct option *opt, return 0; } -static int repo_info(int argc, const char **argv, const char *prefix, - struct repository *repo) +static int cmd_repo_info(int argc, const char **argv, const char *prefix, + struct repository *repo) { enum output_format format = FORMAT_KEYVALUE; struct option options[] = { @@ -161,7 +161,7 @@ int cmd_repo(int argc, const char **argv, const char *prefix, { parse_opt_subcommand_fn *fn = NULL; struct option options[] = { - OPT_SUBCOMMAND("info", &fn, repo_info), + OPT_SUBCOMMAND("info", &fn, cmd_repo_info), OPT_END() }; -- cgit v1.3-5-g9baa From bbb2b9334856ae0a2b18e65e5924a42c31a83c6b Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:58 -0500 Subject: builtin/repo: introduce structure subcommand The structure of a repository's history can have huge impacts on the performance and health of the repository itself. Currently, Git lacks a means to surface repository metrics regarding its structure/shape via a single command. Acquiring this information requires users to be familiar with the relevant data points and the various Git commands required to surface them. To fill this gap, supplemental tools such as git-sizer(1) have been developed. To allow users to more readily identify repository structure related information, introduce the "structure" subcommand in git-repo(1). The goal of this subcommand is to eventually provide similar functionality to git-sizer(1), but natively in Git. The initial version of this command only iterates through all references in the repository and tracks the count of branches, tags, remote refs, and other reference types. The corresponding information is displayed in a human-friendly table formatted in a very similar manner to git-sizer(1). The width of each table column is adjusted automatically to satisfy the requirements of the widest row contained. Subsequent commits will surface additional relevant data points to output and also provide other more machine-friendly output formats. Based-on-patch-by: Derrick Stolee Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 10 +++ builtin/repo.c | 200 ++++++++++++++++++++++++++++++++++++++++++++ t/meson.build | 1 + t/t1901-repo-structure.sh | 61 ++++++++++++++ 4 files changed, 272 insertions(+) create mode 100755 t/t1901-repo-structure.sh (limited to 'builtin') diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 209afd1b61..8193298dd5 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -9,6 +9,7 @@ SYNOPSIS -------- [synopsis] git repo info [--format=(keyvalue|nul)] [-z] [...] +git repo structure DESCRIPTION ----------- @@ -43,6 +44,15 @@ supported: + `-z` is an alias for `--format=nul`. +`structure`:: + Retrieve statistics about the current repository structure. The + following kinds of information are reported: ++ +* Reference counts categorized by type + ++ +The table output format may change and is not intended for machine parsing. + INFO KEYS --------- In order to obtain a set of values from `git repo info`, you should provide diff --git a/builtin/repo.c b/builtin/repo.c index eeeab8fbd2..e77e8db563 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -4,12 +4,16 @@ #include "environment.h" #include "parse-options.h" #include "quote.h" +#include "ref-filter.h" #include "refs.h" #include "strbuf.h" +#include "string-list.h" #include "shallow.h" +#include "utf8.h" static const char *const repo_usage[] = { "git repo info [--format=(keyvalue|nul)] [-z] [...]", + "git repo structure", NULL }; @@ -156,12 +160,208 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, return print_fields(argc, argv, repo, format); } +struct ref_stats { + size_t branches; + size_t remotes; + size_t tags; + size_t others; +}; + +struct stats_table { + struct string_list rows; + + int name_col_width; + int value_col_width; +}; + +/* + * Holds column data that gets stored for each row. + */ +struct stats_table_entry { + char *value; +}; + +static void stats_table_vaddf(struct stats_table *table, + struct stats_table_entry *entry, + const char *format, va_list ap) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + char *formatted_name; + int name_width; + + strbuf_vaddf(&buf, format, ap); + formatted_name = strbuf_detach(&buf, NULL); + name_width = utf8_strwidth(formatted_name); + + item = string_list_append_nodup(&table->rows, formatted_name); + item->util = entry; + + if (name_width > table->name_col_width) + table->name_col_width = name_width; + if (entry) { + int value_width = utf8_strwidth(entry->value); + if (value_width > table->value_col_width) + table->value_col_width = value_width; + } +} + +static void stats_table_addf(struct stats_table *table, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + stats_table_vaddf(table, NULL, format, ap); + va_end(ap); +} + +static void stats_table_count_addf(struct stats_table *table, size_t value, + const char *format, ...) +{ + struct stats_table_entry *entry; + va_list ap; + + CALLOC_ARRAY(entry, 1); + entry->value = xstrfmt("%" PRIuMAX, (uintmax_t)value); + + va_start(ap, format); + stats_table_vaddf(table, entry, format, ap); + va_end(ap); +} + +static inline size_t get_total_reference_count(struct ref_stats *stats) +{ + return stats->branches + stats->remotes + stats->tags + stats->others; +} + +static void stats_table_setup_structure(struct stats_table *table, + struct ref_stats *refs) +{ + size_t ref_total; + + ref_total = get_total_reference_count(refs); + stats_table_addf(table, "* %s", _("References")); + stats_table_count_addf(table, ref_total, " * %s", _("Count")); + stats_table_count_addf(table, refs->branches, " * %s", _("Branches")); + stats_table_count_addf(table, refs->tags, " * %s", _("Tags")); + stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); + stats_table_count_addf(table, refs->others, " * %s", _("Others")); +} + +static void stats_table_print_structure(const struct stats_table *table) +{ + const char *name_col_title = _("Repository structure"); + const char *value_col_title = _("Value"); + int name_col_width = utf8_strwidth(name_col_title); + int value_col_width = utf8_strwidth(value_col_title); + struct string_list_item *item; + + if (table->name_col_width > name_col_width) + name_col_width = table->name_col_width; + if (table->value_col_width > value_col_width) + value_col_width = table->value_col_width; + + printf("| %-*s | %-*s |\n", name_col_width, name_col_title, + value_col_width, value_col_title); + printf("| "); + for (int i = 0; i < name_col_width; i++) + putchar('-'); + printf(" | "); + for (int i = 0; i < value_col_width; i++) + putchar('-'); + printf(" |\n"); + + for_each_string_list_item(item, &table->rows) { + struct stats_table_entry *entry = item->util; + const char *value = ""; + + if (entry) { + struct stats_table_entry *entry = item->util; + value = entry->value; + } + + printf("| %-*s | %*s |\n", name_col_width, item->string, + value_col_width, value); + } +} + +static void stats_table_clear(struct stats_table *table) +{ + struct stats_table_entry *entry; + struct string_list_item *item; + + for_each_string_list_item(item, &table->rows) { + entry = item->util; + if (entry) + free(entry->value); + } + + string_list_clear(&table->rows, 1); +} + +static int count_references(const char *refname, + const char *referent UNUSED, + const struct object_id *oid UNUSED, + int flags UNUSED, void *cb_data) +{ + struct ref_stats *stats = cb_data; + + switch (ref_kind_from_refname(refname)) { + case FILTER_REFS_BRANCHES: + stats->branches++; + break; + case FILTER_REFS_REMOTES: + stats->remotes++; + break; + case FILTER_REFS_TAGS: + stats->tags++; + break; + case FILTER_REFS_OTHERS: + stats->others++; + break; + default: + BUG("unexpected reference type"); + } + + return 0; +} + +static void structure_count_references(struct ref_stats *stats, + struct repository *repo) +{ + refs_for_each_ref(get_main_ref_store(repo), count_references, &stats); +} + +static int cmd_repo_structure(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + struct stats_table table = { + .rows = STRING_LIST_INIT_DUP, + }; + struct ref_stats stats = { 0 }; + struct option options[] = { 0 }; + + argc = parse_options(argc, argv, prefix, options, repo_usage, 0); + if (argc) + usage(_("too many arguments")); + + structure_count_references(&stats, repo); + + stats_table_setup_structure(&table, &stats); + stats_table_print_structure(&table); + + stats_table_clear(&table); + + return 0; +} + int cmd_repo(int argc, const char **argv, const char *prefix, struct repository *repo) { parse_opt_subcommand_fn *fn = NULL; struct option options[] = { OPT_SUBCOMMAND("info", &fn, cmd_repo_info), + OPT_SUBCOMMAND("structure", &fn, cmd_repo_structure), OPT_END() }; diff --git a/t/meson.build b/t/meson.build index 7974795fe4..9e426f8edc 100644 --- a/t/meson.build +++ b/t/meson.build @@ -236,6 +236,7 @@ integration_tests = [ 't1701-racy-split-index.sh', 't1800-hook.sh', 't1900-repo.sh', + 't1901-repo-structure.sh', 't2000-conflict-when-checking-files-out.sh', 't2002-checkout-cache-u.sh', 't2003-checkout-cache-mkdir.sh', diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh new file mode 100755 index 0000000000..e592eea0eb --- /dev/null +++ b/t/t1901-repo-structure.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +test_description='test git repo structure' + +. ./test-lib.sh + +test_expect_success 'empty repository' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + cat >expect <<-\EOF && + | Repository structure | Value | + | -------------------- | ----- | + | * References | | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | + EOF + + git repo structure >out 2>err && + + test_cmp expect out && + test_line_count = 0 err + ) +' + +test_expect_success 'repository with references' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git commit --allow-empty -m init && + git tag -a foo -m bar && + + oid="$(git rev-parse HEAD)" && + git update-ref refs/remotes/origin/foo "$oid" && + + git notes add -m foo && + + cat >expect <<-\EOF && + | Repository structure | Value | + | -------------------- | ----- | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + EOF + + git repo structure >out 2>err && + + test_cmp expect out && + test_line_count = 0 err + ) +' + +test_done -- cgit v1.3-5-g9baa From eb5cf58ffcd4bb117c870d448b0df0193df52c82 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:59 -0500 Subject: builtin/repo: add object counts in structure output The amount of objects in a repository can provide insight regarding its shape. To surface this information, use the path-walk API to count the number of reachable objects in the repository by object type. All regular references are used to determine the reachable set of objects. The object counts are appended to the same table containing the reference information. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 1 + builtin/repo.c | 105 +++++++++++++++++++++++++++++++++++++++++--- t/t1901-repo-structure.sh | 19 +++++++- 3 files changed, 117 insertions(+), 8 deletions(-) (limited to 'builtin') diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 8193298dd5..ae62d2415f 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -49,6 +49,7 @@ supported: following kinds of information are reported: + * Reference counts categorized by type +* Reachable object counts categorized by type + The table output format may change and is not intended for machine parsing. diff --git a/builtin/repo.c b/builtin/repo.c index e77e8db563..f39f06ee8c 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -3,9 +3,11 @@ #include "builtin.h" #include "environment.h" #include "parse-options.h" +#include "path-walk.h" #include "quote.h" #include "ref-filter.h" #include "refs.h" +#include "revision.h" #include "strbuf.h" #include "string-list.h" #include "shallow.h" @@ -167,6 +169,18 @@ struct ref_stats { size_t others; }; +struct object_stats { + size_t tags; + size_t commits; + size_t trees; + size_t blobs; +}; + +struct repo_structure { + struct ref_stats refs; + struct object_stats objects; +}; + struct stats_table { struct string_list rows; @@ -234,9 +248,17 @@ static inline size_t get_total_reference_count(struct ref_stats *stats) return stats->branches + stats->remotes + stats->tags + stats->others; } +static inline size_t get_total_object_count(struct object_stats *stats) +{ + return stats->tags + stats->commits + stats->trees + stats->blobs; +} + static void stats_table_setup_structure(struct stats_table *table, - struct ref_stats *refs) + struct repo_structure *stats) { + struct object_stats *objects = &stats->objects; + struct ref_stats *refs = &stats->refs; + size_t object_total; size_t ref_total; ref_total = get_total_reference_count(refs); @@ -246,6 +268,15 @@ static void stats_table_setup_structure(struct stats_table *table, stats_table_count_addf(table, refs->tags, " * %s", _("Tags")); stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); stats_table_count_addf(table, refs->others, " * %s", _("Others")); + + object_total = get_total_object_count(objects); + stats_table_addf(table, ""); + stats_table_addf(table, "* %s", _("Reachable objects")); + stats_table_count_addf(table, object_total, " * %s", _("Count")); + stats_table_count_addf(table, objects->commits, " * %s", _("Commits")); + stats_table_count_addf(table, objects->trees, " * %s", _("Trees")); + stats_table_count_addf(table, objects->blobs, " * %s", _("Blobs")); + stats_table_count_addf(table, objects->tags, " * %s", _("Tags")); } static void stats_table_print_structure(const struct stats_table *table) @@ -299,12 +330,18 @@ static void stats_table_clear(struct stats_table *table) string_list_clear(&table->rows, 1); } +struct count_references_data { + struct ref_stats *stats; + struct rev_info *revs; +}; + static int count_references(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, + const struct object_id *oid, int flags UNUSED, void *cb_data) { - struct ref_stats *stats = cb_data; + struct count_references_data *data = cb_data; + struct ref_stats *stats = data->stats; switch (ref_kind_from_refname(refname)) { case FILTER_REFS_BRANCHES: @@ -323,13 +360,64 @@ static int count_references(const char *refname, BUG("unexpected reference type"); } + /* + * While iterating through references for counting, also add OIDs in + * preparation for the path walk. + */ + add_pending_oid(data->revs, NULL, oid, 0); + return 0; } static void structure_count_references(struct ref_stats *stats, + struct rev_info *revs, struct repository *repo) { - refs_for_each_ref(get_main_ref_store(repo), count_references, &stats); + struct count_references_data data = { + .stats = stats, + .revs = revs, + }; + + refs_for_each_ref(get_main_ref_store(repo), count_references, &data); +} + + +static int count_objects(const char *path UNUSED, struct oid_array *oids, + enum object_type type, void *cb_data) +{ + struct object_stats *stats = cb_data; + + switch (type) { + case OBJ_TAG: + stats->tags += oids->nr; + break; + case OBJ_COMMIT: + stats->commits += oids->nr; + break; + case OBJ_TREE: + stats->trees += oids->nr; + break; + case OBJ_BLOB: + stats->blobs += oids->nr; + break; + default: + BUG("invalid object type"); + } + + return 0; +} + +static void structure_count_objects(struct object_stats *stats, + struct rev_info *revs) +{ + struct path_walk_info info = PATH_WALK_INFO_INIT; + + info.revs = revs; + info.path_fn = count_objects; + info.path_fn_data = stats; + + walk_objects_by_path(&info); + path_walk_info_clear(&info); } static int cmd_repo_structure(int argc, const char **argv, const char *prefix, @@ -338,19 +426,24 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, struct stats_table table = { .rows = STRING_LIST_INIT_DUP, }; - struct ref_stats stats = { 0 }; + struct repo_structure stats = { 0 }; + struct rev_info revs; struct option options[] = { 0 }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); if (argc) usage(_("too many arguments")); - structure_count_references(&stats, repo); + repo_init_revisions(repo, &revs, prefix); + + structure_count_references(&stats.refs, &revs, repo); + structure_count_objects(&stats.objects, &revs); stats_table_setup_structure(&table, &stats); stats_table_print_structure(&table); stats_table_clear(&table); + release_revisions(&revs); return 0; } diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index e592eea0eb..c32cf4e239 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -18,6 +18,13 @@ test_expect_success 'empty repository' ' | * Tags | 0 | | * Remotes | 0 | | * Others | 0 | + | | | + | * Reachable objects | | + | * Count | 0 | + | * Commits | 0 | + | * Trees | 0 | + | * Blobs | 0 | + | * Tags | 0 | EOF git repo structure >out 2>err && @@ -27,17 +34,18 @@ test_expect_success 'empty repository' ' ) ' -test_expect_success 'repository with references' ' +test_expect_success 'repository with references and objects' ' test_when_finished "rm -rf repo" && git init repo && ( cd repo && - git commit --allow-empty -m init && + test_commit_bulk 42 && git tag -a foo -m bar && oid="$(git rev-parse HEAD)" && git update-ref refs/remotes/origin/foo "$oid" && + # Also creates a commit, tree, and blob. git notes add -m foo && cat >expect <<-\EOF && @@ -49,6 +57,13 @@ test_expect_success 'repository with references' ' | * Tags | 1 | | * Remotes | 1 | | * Others | 1 | + | | | + | * Reachable objects | | + | * Count | 130 | + | * Commits | 43 | + | * Trees | 43 | + | * Blobs | 43 | + | * Tags | 1 | EOF git repo structure >out 2>err && -- cgit v1.3-5-g9baa From 17215675b5a2c2eab54b295a7e92d953af2e8779 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:26:00 -0500 Subject: builtin/repo: add keyvalue and nul format for structure stats All repository structure stats are outputted in a human-friendly table form. This format is not suitable for machine parsing. Add a --format option that supports three output modes: `table`, `keyvalue`, and `nul`. The `table` mode is the default format and prints the same table output as before. With the `keyvalue` mode, each line of output contains a key-value pair of a repository stat. The '=' character is used to delimit between keys and values. The `nul` mode is similar to `keyvalue`, but key-values are delimited by a NUL character instead of a newline. Also, instead of a '=' character to delimit between keys and values, a newline character is used. This allows stat values to support special characters without having to cquote them. These two new modes provides output that is more machine-friendly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 25 ++++++++++++++++++--- builtin/repo.c | 55 +++++++++++++++++++++++++++++++++++++++++---- t/t1901-repo-structure.sh | 33 +++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 7 deletions(-) (limited to 'builtin') diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index ae62d2415f..ce43cb19c8 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -9,7 +9,7 @@ SYNOPSIS -------- [synopsis] git repo info [--format=(keyvalue|nul)] [-z] [...] -git repo structure +git repo structure [--format=(table|keyvalue|nul)] DESCRIPTION ----------- @@ -44,7 +44,7 @@ supported: + `-z` is an alias for `--format=nul`. -`structure`:: +`structure [--format=(table|keyvalue|nul)]`:: Retrieve statistics about the current repository structure. The following kinds of information are reported: + @@ -52,7 +52,26 @@ supported: * Reachable object counts categorized by type + -The table output format may change and is not intended for machine parsing. +The output format can be chosen through the flag `--format`. Three formats are +supported: ++ +`table`::: + Outputs repository stats in a human-friendly table. This format may + change and is not intended for machine parsing. This is the default + format. + +`keyvalue`::: + Each line of output contains a key-value pair for a repository stat. + The '=' character is used to delimit between the key and the value. + Values containing "unusual" characters are quoted as explained for the + configuration variable `core.quotePath` (see linkgit:git-config[1]). + +`nul`::: + Similar to `keyvalue`, but uses a NUL character to delimit between + key-value pairs instead of a newline. Also uses a newline character as + the delimiter between the key and value instead of '='. Unlike the + `keyvalue` format, values containing "unusual" characters are never + quoted. INFO KEYS --------- diff --git a/builtin/repo.c b/builtin/repo.c index f39f06ee8c..1754cc7e5d 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -15,13 +15,14 @@ static const char *const repo_usage[] = { "git repo info [--format=(keyvalue|nul)] [-z] [...]", - "git repo structure", + "git repo structure [--format=(table|keyvalue|nul)]", NULL }; typedef int get_value_fn(struct repository *repo, struct strbuf *buf); enum output_format { + FORMAT_TABLE, FORMAT_KEYVALUE, FORMAT_NUL_TERMINATED, }; @@ -136,6 +137,8 @@ static int parse_format_cb(const struct option *opt, *format = FORMAT_NUL_TERMINATED; else if (!strcmp(arg, "keyvalue")) *format = FORMAT_KEYVALUE; + else if (!strcmp(arg, "table")) + *format = FORMAT_TABLE; else die(_("invalid format '%s'"), arg); @@ -158,6 +161,8 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); + if (format != FORMAT_KEYVALUE && format != FORMAT_NUL_TERMINATED) + die(_("unsupported output format")); return print_fields(argc, argv, repo, format); } @@ -330,6 +335,30 @@ static void stats_table_clear(struct stats_table *table) string_list_clear(&table->rows, 1); } +static void structure_keyvalue_print(struct repo_structure *stats, + char key_delim, char value_delim) +{ + printf("references.branches.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.branches, value_delim); + printf("references.tags.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.tags, value_delim); + printf("references.remotes.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.remotes, value_delim); + printf("references.others.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.others, value_delim); + + printf("objects.commits.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.commits, value_delim); + printf("objects.trees.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.trees, value_delim); + printf("objects.blobs.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.blobs, value_delim); + printf("objects.tags.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.tags, value_delim); + + fflush(stdout); +} + struct count_references_data { struct ref_stats *stats; struct rev_info *revs; @@ -426,9 +455,15 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, struct stats_table table = { .rows = STRING_LIST_INIT_DUP, }; + enum output_format format = FORMAT_TABLE; struct repo_structure stats = { 0 }; struct rev_info revs; - struct option options[] = { 0 }; + struct option options[] = { + OPT_CALLBACK_F(0, "format", &format, N_("format"), + N_("output format"), + PARSE_OPT_NONEG, parse_format_cb), + OPT_END() + }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); if (argc) @@ -439,8 +474,20 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, structure_count_references(&stats.refs, &revs, repo); structure_count_objects(&stats.objects, &revs); - stats_table_setup_structure(&table, &stats); - stats_table_print_structure(&table); + switch (format) { + case FORMAT_TABLE: + stats_table_setup_structure(&table, &stats); + stats_table_print_structure(&table); + break; + case FORMAT_KEYVALUE: + structure_keyvalue_print(&stats, '=', '\n'); + break; + case FORMAT_NUL_TERMINATED: + structure_keyvalue_print(&stats, '\n', '\0'); + break; + default: + BUG("invalid output format"); + } stats_table_clear(&table); release_revisions(&revs); diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index c32cf4e239..14bd8aede5 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -73,4 +73,37 @@ test_expect_success 'repository with references and objects' ' ) ' +test_expect_success 'keyvalue and nul format' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit_bulk 42 && + git tag -a foo -m bar && + + cat >expect <<-\EOF && + references.branches.count=1 + references.tags.count=1 + references.remotes.count=0 + references.others.count=0 + objects.commits.count=42 + objects.trees.count=42 + objects.blobs.count=42 + objects.tags.count=1 + EOF + + git repo structure --format=keyvalue >out 2>err && + + test_cmp expect out && + test_line_count = 0 err && + + # Replace key and value delimiters for nul format. + tr "\n=" "\0\n" expect_nul && + git repo structure --format=nul >out 2>err && + + test_cmp expect_nul out && + test_line_count = 0 err + ) +' + test_done -- cgit v1.3-5-g9baa From 16a93c03c7824a40b034a6ee1cb1c68c8ef48682 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:26:01 -0500 Subject: builtin/repo: add progress meter for structure stats When using the structure subcommand for git-repo(1), evaluating a repository may take some time depending on its shape. Add a progress meter to provide feedback to the user about what is happening. The progress meter is enabled by default when the command is executed from a tty. It can also be explicitly enabled/disabled via the --[no-]progress option. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 46 ++++++++++++++++++++++++++++++++++++++++------ t/t1901-repo-structure.sh | 20 ++++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) (limited to 'builtin') diff --git a/builtin/repo.c b/builtin/repo.c index 1754cc7e5d..9d4749f79b 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -4,6 +4,7 @@ #include "environment.h" #include "parse-options.h" #include "path-walk.h" +#include "progress.h" #include "quote.h" #include "ref-filter.h" #include "refs.h" @@ -362,6 +363,7 @@ static void structure_keyvalue_print(struct repo_structure *stats, struct count_references_data { struct ref_stats *stats; struct rev_info *revs; + struct progress *progress; }; static int count_references(const char *refname, @@ -371,6 +373,7 @@ static int count_references(const char *refname, { struct count_references_data *data = cb_data; struct ref_stats *stats = data->stats; + size_t ref_count; switch (ref_kind_from_refname(refname)) { case FILTER_REFS_BRANCHES: @@ -395,26 +398,41 @@ static int count_references(const char *refname, */ add_pending_oid(data->revs, NULL, oid, 0); + ref_count = get_total_reference_count(stats); + display_progress(data->progress, ref_count); + return 0; } static void structure_count_references(struct ref_stats *stats, struct rev_info *revs, - struct repository *repo) + struct repository *repo, + int show_progress) { struct count_references_data data = { .stats = stats, .revs = revs, }; + if (show_progress) + data.progress = start_delayed_progress(repo, + _("Counting references"), 0); + refs_for_each_ref(get_main_ref_store(repo), count_references, &data); + stop_progress(&data.progress); } +struct count_objects_data { + struct object_stats *stats; + struct progress *progress; +}; static int count_objects(const char *path UNUSED, struct oid_array *oids, enum object_type type, void *cb_data) { - struct object_stats *stats = cb_data; + struct count_objects_data *data = cb_data; + struct object_stats *stats = data->stats; + size_t object_count; switch (type) { case OBJ_TAG: @@ -433,20 +451,31 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, BUG("invalid object type"); } + object_count = get_total_object_count(stats); + display_progress(data->progress, object_count); + return 0; } static void structure_count_objects(struct object_stats *stats, - struct rev_info *revs) + struct rev_info *revs, + struct repository *repo, int show_progress) { struct path_walk_info info = PATH_WALK_INFO_INIT; + struct count_objects_data data = { + .stats = stats, + }; info.revs = revs; info.path_fn = count_objects; - info.path_fn_data = stats; + info.path_fn_data = &data; + + if (show_progress) + data.progress = start_delayed_progress(repo, _("Counting objects"), 0); walk_objects_by_path(&info); path_walk_info_clear(&info); + stop_progress(&data.progress); } static int cmd_repo_structure(int argc, const char **argv, const char *prefix, @@ -458,10 +487,12 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, enum output_format format = FORMAT_TABLE; struct repo_structure stats = { 0 }; struct rev_info revs; + int show_progress = -1; struct option options[] = { OPT_CALLBACK_F(0, "format", &format, N_("format"), N_("output format"), PARSE_OPT_NONEG, parse_format_cb), + OPT_BOOL(0, "progress", &show_progress, N_("show progress")), OPT_END() }; @@ -471,8 +502,11 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, repo_init_revisions(repo, &revs, prefix); - structure_count_references(&stats.refs, &revs, repo); - structure_count_objects(&stats.objects, &revs); + if (show_progress < 0) + show_progress = isatty(2); + + structure_count_references(&stats.refs, &revs, repo, show_progress); + structure_count_objects(&stats.objects, &revs, repo, show_progress); switch (format) { case FORMAT_TABLE: diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 14bd8aede5..36a71a144e 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -106,4 +106,24 @@ test_expect_success 'keyvalue and nul format' ' ) ' +test_expect_success 'progress meter option' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit foo && + + GIT_PROGRESS_DELAY=0 git repo structure --progress >out 2>err && + + test_file_not_empty out && + test_grep "Counting references: 2, done." err && + test_grep "Counting objects: 3, done." err && + + GIT_PROGRESS_DELAY=0 git repo structure --no-progress >out 2>err && + + test_file_not_empty out && + test_line_count = 0 err + ) +' + test_done -- cgit v1.3-5-g9baa From bdbebe5714b25dc9d215b48efbb80f410925d7dd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:10 +0200 Subject: refs: introduce wrapper struct for `each_ref_fn` The `each_ref_fn` callback function type is used across our code base for several different functions that iterate through reference. There's a bunch of callbacks implementing this type, which makes any changes to the callback signature extremely noisy. An example of the required churn is e8207717f1 (refs: add referent to each_ref_fn, 2024-08-09): adding a single argument required us to change 48 files. It was already proposed back then [1] that we might want to introduce a wrapper structure to alleviate the pain going forward. While this of course requires the same kind of global refactoring as just introducing a new parameter, it at least allows us to more change the callback type afterwards by just extending the wrapper structure. One counterargument to this refactoring is that it makes the structure more opaque. While it is obvious which callsites need to be fixed up when we change the function type, it's not obvious anymore once we use a structure. That being said, we only have a handful of sites that actually need to populate this wrapper structure: our ref backends, "refs/iterator.c" as well as very few sites that invoke the iterator callback functions directly. Introduce this wrapper structure so that we can adapt the iterator interfaces more readily. [1]: Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- bisect.c | 24 +++++++--------- builtin/bisect.c | 17 ++++-------- builtin/checkout.c | 6 ++-- builtin/describe.c | 18 ++++++------ builtin/fetch.c | 13 +++------ builtin/fsck.c | 33 ++++++++++++---------- builtin/gc.c | 15 ++++------ builtin/name-rev.c | 17 ++++++------ builtin/pack-objects.c | 27 ++++++++---------- builtin/receive-pack.c | 13 ++++----- builtin/remote.c | 44 +++++++++++++---------------- builtin/replace.c | 21 ++++++-------- builtin/repo.c | 9 ++---- builtin/rev-parse.c | 12 ++++---- builtin/show-branch.c | 35 +++++++++++------------ builtin/show-ref.c | 20 ++++++-------- builtin/submodule--helper.c | 10 ++----- builtin/worktree.c | 6 +--- commit-graph.c | 14 ++++------ delta-islands.c | 9 +++--- fetch-pack.c | 16 +++-------- help.c | 10 +++---- http-backend.c | 20 ++++++-------- log-tree.c | 24 ++++++++-------- ls-refs.c | 36 ++++++++++++++---------- midx-write.c | 17 ++++++------ negotiator/default.c | 7 ++--- negotiator/skipping.c | 7 ++--- notes.c | 8 ++---- object-name.c | 10 +++---- pseudo-merge.c | 21 ++++++-------- reachable.c | 9 +++--- ref-filter.c | 24 ++++++++-------- reflog.c | 9 ++---- refs.c | 67 +++++++++++++++++++++++++-------------------- refs.h | 26 +++++++++++++++--- refs/files-backend.c | 7 ++--- refs/iterator.c | 9 +++++- remote.c | 27 ++++++++---------- repack-midx.c | 16 +++++------ replace-object.c | 16 ++++------- revision.c | 12 ++++---- server-info.c | 12 ++++---- shallow.c | 16 +++-------- submodule.c | 12 ++------ t/helper/test-ref-store.c | 5 ++-- upload-pack.c | 29 +++++++++----------- walker.c | 8 ++---- worktree.c | 11 ++++++-- 49 files changed, 392 insertions(+), 462 deletions(-) (limited to 'builtin') diff --git a/bisect.c b/bisect.c index a6dc76b15c..326b59c0dc 100644 --- a/bisect.c +++ b/bisect.c @@ -450,21 +450,20 @@ void find_bisection(struct commit_list **commit_list, int *reaches, clear_commit_weight(&commit_weight); } -static int register_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb_data UNUSED) +static int register_ref(const struct reference *ref, void *cb_data UNUSED) { struct strbuf good_prefix = STRBUF_INIT; strbuf_addstr(&good_prefix, term_good); strbuf_addstr(&good_prefix, "-"); - if (!strcmp(refname, term_bad)) { + if (!strcmp(ref->name, term_bad)) { free(current_bad_oid); current_bad_oid = xmalloc(sizeof(*current_bad_oid)); - oidcpy(current_bad_oid, oid); - } else if (starts_with(refname, good_prefix.buf)) { - oid_array_append(&good_revs, oid); - } else if (starts_with(refname, "skip-")) { - oid_array_append(&skipped_revs, oid); + oidcpy(current_bad_oid, ref->oid); + } else if (starts_with(ref->name, good_prefix.buf)) { + oid_array_append(&good_revs, ref->oid); + } else if (starts_with(ref->name, "skip-")) { + oid_array_append(&skipped_revs, ref->oid); } strbuf_release(&good_prefix); @@ -1178,14 +1177,11 @@ int estimate_bisect_steps(int all) return (e < 3 * x) ? n : n - 1; } -static int mark_for_removal(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cb_data) +static int mark_for_removal(const struct reference *ref, void *cb_data) { struct string_list *refs = cb_data; - char *ref = xstrfmt("refs/bisect%s", refname); - string_list_append(refs, ref); + char *bisect_ref = xstrfmt("refs/bisect%s", ref->name); + string_list_append(refs, bisect_ref); return 0; } diff --git a/builtin/bisect.c b/builtin/bisect.c index 8b8d870cd1..5b2024be62 100644 --- a/builtin/bisect.c +++ b/builtin/bisect.c @@ -358,10 +358,7 @@ static int check_and_set_terms(struct bisect_terms *terms, const char *cmd) return 0; } -static int inc_nr(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cb_data) +static int inc_nr(const struct reference *ref UNUSED, void *cb_data) { unsigned int *nr = (unsigned int *)cb_data; (*nr)++; @@ -549,12 +546,11 @@ finish: return res; } -static int add_bisect_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb) +static int add_bisect_ref(const struct reference *ref, void *cb) { struct add_bisect_ref_data *data = cb; - add_pending_oid(data->revs, refname, oid, data->object_flags); + add_pending_oid(data->revs, ref->name, ref->oid, data->object_flags); return 0; } @@ -1165,12 +1161,9 @@ static int bisect_visualize(struct bisect_terms *terms, int argc, return run_command(&cmd); } -static int get_first_good(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int get_first_good(const struct reference *ref, void *cb_data) { - oidcpy(cb_data, oid); + oidcpy(cb_data, ref->oid); return 1; } diff --git a/builtin/checkout.c b/builtin/checkout.c index f9453473fe..66b69df6e6 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -1063,11 +1063,9 @@ static void update_refs_for_switch(const struct checkout_opts *opts, report_tracking(new_branch_info); } -static int add_pending_uninteresting_ref(const char *refname, const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int add_pending_uninteresting_ref(const struct reference *ref, void *cb_data) { - add_pending_oid(cb_data, refname, oid, UNINTERESTING); + add_pending_oid(cb_data, ref->name, ref->oid, UNINTERESTING); return 0; } diff --git a/builtin/describe.c b/builtin/describe.c index ffaf8d9f0a..7954535044 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -154,20 +154,19 @@ static void add_to_known_names(const char *path, } } -static int get_name(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int get_name(const struct reference *ref, void *cb_data UNUSED) { int is_tag = 0; struct object_id peeled; int is_annotated, prio; const char *path_to_match = NULL; - if (skip_prefix(path, "refs/tags/", &path_to_match)) { + if (skip_prefix(ref->name, "refs/tags/", &path_to_match)) { is_tag = 1; } else if (all) { if ((exclude_patterns.nr || patterns.nr) && - !skip_prefix(path, "refs/heads/", &path_to_match) && - !skip_prefix(path, "refs/remotes/", &path_to_match)) { + !skip_prefix(ref->name, "refs/heads/", &path_to_match) && + !skip_prefix(ref->name, "refs/remotes/", &path_to_match)) { /* Only accept reference of known type if there are match/exclude patterns */ return 0; } @@ -209,10 +208,10 @@ static int get_name(const char *path, const char *referent UNUSED, const struct } /* Is it annotated? */ - if (!peel_iterated_oid(the_repository, oid, &peeled)) { - is_annotated = !oideq(oid, &peeled); + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { + is_annotated = !oideq(ref->oid, &peeled); } else { - oidcpy(&peeled, oid); + oidcpy(&peeled, ref->oid); is_annotated = 0; } @@ -229,7 +228,8 @@ static int get_name(const char *path, const char *referent UNUSED, const struct else prio = 0; - add_to_known_names(all ? path + 5 : path + 10, &peeled, prio, oid); + add_to_known_names(all ? ref->name + 5 : ref->name + 10, + &peeled, prio, ref->oid); return 0; } diff --git a/builtin/fetch.c b/builtin/fetch.c index c7ff3480fb..7052e6ff21 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -289,13 +289,11 @@ static struct refname_hash_entry *refname_hash_add(struct hashmap *map, return ent; } -static int add_one_refname(const char *refname, const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *cbdata) +static int add_one_refname(const struct reference *ref, void *cbdata) { struct hashmap *refname_map = cbdata; - (void) refname_hash_add(refname_map, refname, oid); + (void) refname_hash_add(refname_map, ref->name, ref->oid); return 0; } @@ -1416,14 +1414,11 @@ static void set_option(struct transport *transport, const char *name, const char } -static int add_oid(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int add_oid(const struct reference *ref, void *cb_data) { struct oid_array *oids = cb_data; - oid_array_append(oids, oid); + oid_array_append(oids, ref->oid); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index 8ee95e0d67..ed4eea1680 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -530,14 +530,13 @@ static int fsck_handle_reflog(const char *logname, void *cb_data) return 0; } -static int fsck_handle_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int fsck_handle_ref(const struct reference *ref, void *cb_data UNUSED) { struct object *obj; - obj = parse_object(the_repository, oid); + obj = parse_object(the_repository, ref->oid); if (!obj) { - if (is_promisor_object(the_repository, oid)) { + if (is_promisor_object(the_repository, ref->oid)) { /* * Increment default_refs anyway, because this is a * valid ref. @@ -546,19 +545,19 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con return 0; } error(_("%s: invalid sha1 pointer %s"), - refname, oid_to_hex(oid)); + ref->name, oid_to_hex(ref->oid)); errors_found |= ERROR_REACHABLE; /* We'll continue with the rest despite the error.. */ return 0; } - if (obj->type != OBJ_COMMIT && is_branch(refname)) { - error(_("%s: not a commit"), refname); + if (obj->type != OBJ_COMMIT && is_branch(ref->name)) { + error(_("%s: not a commit"), ref->name); errors_found |= ERROR_REFS; } default_refs++; obj->flags |= USED; fsck_put_object_name(&fsck_walk_options, - oid, "%s", refname); + ref->oid, "%s", ref->name); mark_object_reachable(obj); return 0; @@ -580,13 +579,19 @@ static void get_default_heads(void) worktrees = get_worktrees(); for (p = worktrees; *p; p++) { struct worktree *wt = *p; - struct strbuf ref = STRBUF_INIT; + struct strbuf refname = STRBUF_INIT; - strbuf_worktree_ref(wt, &ref, "HEAD"); - fsck_head_link(ref.buf, &head_points_at, &head_oid); - if (head_points_at && !is_null_oid(&head_oid)) - fsck_handle_ref(ref.buf, NULL, &head_oid, 0, NULL); - strbuf_release(&ref); + strbuf_worktree_ref(wt, &refname, "HEAD"); + fsck_head_link(refname.buf, &head_points_at, &head_oid); + if (head_points_at && !is_null_oid(&head_oid)) { + struct reference ref = { + .name = refname.buf, + .oid = &head_oid, + }; + + fsck_handle_ref(&ref, NULL); + } + strbuf_release(&refname); if (include_reflogs) refs_for_each_reflog(get_worktree_ref_store(wt), diff --git a/builtin/gc.c b/builtin/gc.c index e19e13d978..9de5de175f 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1100,24 +1100,21 @@ struct cg_auto_data { int limit; }; -static int dfs_on_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int dfs_on_ref(const struct reference *ref, void *cb_data) { struct cg_auto_data *data = (struct cg_auto_data *)cb_data; int result = 0; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct commit_list *stack = NULL; struct commit *commit; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + maybe_peeled = &peeled; + if (odb_read_object_info(the_repository->objects, maybe_peeled, NULL) != OBJ_COMMIT) return 0; - commit = lookup_commit(the_repository, oid); + commit = lookup_commit(the_repository, maybe_peeled); if (!commit) return 0; if (repo_parse_commit(the_repository, commit) || diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 74512e54a3..615f7d1aae 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -339,10 +339,9 @@ static int cmp_by_tag_and_age(const void *a_, const void *b_) return a->taggerdate != b->taggerdate; } -static int name_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int name_ref(const struct reference *ref, void *cb_data) { - struct object *o = parse_object(the_repository, oid); + struct object *o = parse_object(the_repository, ref->oid); struct name_ref_data *data = cb_data; int can_abbreviate_output = data->tags_only && data->name_only; int deref = 0; @@ -350,14 +349,14 @@ static int name_ref(const char *path, const char *referent UNUSED, const struct struct commit *commit = NULL; timestamp_t taggerdate = TIME_MAX; - if (data->tags_only && !starts_with(path, "refs/tags/")) + if (data->tags_only && !starts_with(ref->name, "refs/tags/")) return 0; if (data->exclude_filters.nr) { struct string_list_item *item; for_each_string_list_item(item, &data->exclude_filters) { - if (subpath_matches(path, item->string) >= 0) + if (subpath_matches(ref->name, item->string) >= 0) return 0; } } @@ -378,7 +377,7 @@ static int name_ref(const char *path, const char *referent UNUSED, const struct * shouldn't stop when seeing 'refs/tags/v1.4' matches * 'refs/tags/v*'. We should show it as 'v1.4'. */ - switch (subpath_matches(path, item->string)) { + switch (subpath_matches(ref->name, item->string)) { case -1: /* did not match */ break; case 0: /* matched fully */ @@ -406,13 +405,13 @@ static int name_ref(const char *path, const char *referent UNUSED, const struct } if (o && o->type == OBJ_COMMIT) { commit = (struct commit *)o; - from_tag = starts_with(path, "refs/tags/"); + from_tag = starts_with(ref->name, "refs/tags/"); if (taggerdate == TIME_MAX) taggerdate = commit->date; } - add_to_tip_table(oid, path, can_abbreviate_output, commit, taggerdate, - from_tag, deref); + add_to_tip_table(ref->oid, ref->name, can_abbreviate_output, + commit, taggerdate, from_tag, deref); return 0; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5bdc44fb2d..39633a0158 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -831,15 +831,14 @@ static enum write_one_status write_one(struct hashfile *f, return WRITE_ONE_WRITTEN; } -static int mark_tagged(const char *path UNUSED, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int mark_tagged(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - struct object_entry *entry = packlist_find(&to_pack, oid); + struct object_entry *entry = packlist_find(&to_pack, ref->oid); if (entry) entry->tagged = 1; - if (!peel_iterated_oid(the_repository, oid, &peeled)) { + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { entry = packlist_find(&to_pack, &peeled); if (entry) entry->tagged = 1; @@ -3306,13 +3305,12 @@ static void add_tag_chain(const struct object_id *oid) } } -static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int add_ref_tag(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled) && obj_is_packed(&peeled)) - add_tag_chain(oid); + if (!peel_iterated_oid(the_repository, ref->oid, &peeled) && obj_is_packed(&peeled)) + add_tag_chain(ref->oid); return 0; } @@ -4533,19 +4531,16 @@ static void record_recent_commit(struct commit *commit, void *data UNUSED) oid_array_append(&recent_objects, &commit->object.oid); } -static int mark_bitmap_preferred_tip(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *data UNUSED) +static int mark_bitmap_preferred_tip(const struct reference *ref, void *data UNUSED) { + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct object *object; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + maybe_peeled = &peeled; - object = parse_object_or_die(the_repository, oid, refname); + object = parse_object_or_die(the_repository, maybe_peeled, ref->name); if (object->type == OBJ_COMMIT) object->flags |= NEEDS_BITMAP; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c9288a9c7e..e8ee0e7321 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -305,13 +305,12 @@ static void show_ref(const char *path, const struct object_id *oid) } } -static int show_ref_cb(const char *path_full, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *data) +static int show_ref_cb(const struct reference *ref, void *data) { struct oidset *seen = data; - const char *path = strip_namespace(path_full); + const char *path = strip_namespace(ref->name); - if (ref_is_hidden(path, path_full, &hidden_refs)) + if (ref_is_hidden(path, ref->name, &hidden_refs)) return 0; /* @@ -320,13 +319,13 @@ static int show_ref_cb(const char *path_full, const char *referent UNUSED, const * transfer but will otherwise ignore them. */ if (!path) { - if (oidset_insert(seen, oid)) + if (oidset_insert(seen, ref->oid)) return 0; path = ".have"; } else { - oidset_insert(seen, oid); + oidset_insert(seen, ref->oid); } - show_ref(path, oid); + show_ref(path, ref->oid); return 0; } diff --git a/builtin/remote.c b/builtin/remote.c index 8a7ed4299a..7ffc14ba15 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -570,17 +570,14 @@ struct branches_for_remote { struct known_remotes *keep; }; -static int add_branch_for_removal(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, void *cb_data) +static int add_branch_for_removal(const struct reference *ref, void *cb_data) { struct branches_for_remote *branches = cb_data; struct refspec_item refspec; struct known_remote *kr; memset(&refspec, 0, sizeof(refspec)); - refspec.dst = (char *)refname; + refspec.dst = (char *)ref->name; if (remote_find_tracking(branches->remote, &refspec)) return 0; free(refspec.src); @@ -588,7 +585,7 @@ static int add_branch_for_removal(const char *refname, /* don't delete a branch if another remote also uses it */ for (kr = branches->keep->list; kr; kr = kr->next) { memset(&refspec, 0, sizeof(refspec)); - refspec.dst = (char *)refname; + refspec.dst = (char *)ref->name; if (!remote_find_tracking(kr->remote, &refspec)) { free(refspec.src); return 0; @@ -596,16 +593,16 @@ static int add_branch_for_removal(const char *refname, } /* don't delete non-remote-tracking refs */ - if (!starts_with(refname, "refs/remotes/")) { + if (!starts_with(ref->name, "refs/remotes/")) { /* advise user how to delete local branches */ - if (starts_with(refname, "refs/heads/")) + if (starts_with(ref->name, "refs/heads/")) string_list_append(branches->skipped, - abbrev_branch(refname)); + abbrev_branch(ref->name)); /* silently skip over other non-remote refs */ return 0; } - string_list_append(branches->branches, refname); + string_list_append(branches->branches, ref->name); return 0; } @@ -713,18 +710,18 @@ out: return error; } -static int rename_one_ref(const char *old_refname, const char *referent, - const struct object_id *oid, - int flags, void *cb_data) +static int rename_one_ref(const struct reference *ref, void *cb_data) { struct strbuf new_referent = STRBUF_INIT; struct strbuf new_refname = STRBUF_INIT; struct rename_info *rename = cb_data; + const struct object_id *oid = ref->oid; + const char *referent = ref->target; int error; - compute_renamed_ref(rename, old_refname, &new_refname); + compute_renamed_ref(rename, ref->name, &new_refname); - if (flags & REF_ISSYMREF) { + if (ref->flags & REF_ISSYMREF) { /* * Stupidly enough `referent` is not pointing to the immediate * target of a symref, but it's the recursively resolved value. @@ -732,25 +729,25 @@ static int rename_one_ref(const char *old_refname, const char *referent, * unborn symrefs don't have any value for the `referent` at all. */ referent = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - old_refname, RESOLVE_REF_NO_RECURSE, + ref->name, RESOLVE_REF_NO_RECURSE, NULL, NULL); compute_renamed_ref(rename, referent, &new_referent); oid = NULL; } - error = ref_transaction_delete(rename->transaction, old_refname, + error = ref_transaction_delete(rename->transaction, ref->name, oid, referent, REF_NO_DEREF, NULL, rename->err); if (error < 0) goto out; error = ref_transaction_update(rename->transaction, new_refname.buf, oid, null_oid(the_hash_algo), - (flags & REF_ISSYMREF) ? new_referent.buf : NULL, NULL, + (ref->flags & REF_ISSYMREF) ? new_referent.buf : NULL, NULL, REF_SKIP_CREATE_REFLOG | REF_NO_DEREF | REF_SKIP_OID_VERIFICATION, NULL, rename->err); if (error < 0) goto out; - error = rename_one_reflog(old_refname, oid, rename); + error = rename_one_reflog(ref->name, oid, rename); if (error < 0) goto out; @@ -1125,19 +1122,16 @@ static void free_remote_ref_states(struct ref_states *states) string_list_clear_func(&states->push, clear_push_info); } -static int append_ref_to_tracked_list(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags, void *cb_data) +static int append_ref_to_tracked_list(const struct reference *ref, void *cb_data) { struct ref_states *states = cb_data; struct refspec_item refspec; - if (flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) return 0; memset(&refspec, 0, sizeof(refspec)); - refspec.dst = (char *)refname; + refspec.dst = (char *)ref->name; if (!remote_find_tracking(states->remote, &refspec)) { string_list_append(&states->tracked, abbrev_branch(refspec.src)); free(refspec.src); diff --git a/builtin/replace.c b/builtin/replace.c index 900b560a77..4c62c5ab58 100644 --- a/builtin/replace.c +++ b/builtin/replace.c @@ -47,30 +47,27 @@ struct show_data { enum replace_format format; }; -static int show_reference(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int show_reference(const struct reference *ref, void *cb_data) { struct show_data *data = cb_data; - if (!wildmatch(data->pattern, refname, 0)) { + if (!wildmatch(data->pattern, ref->name, 0)) { if (data->format == REPLACE_FORMAT_SHORT) - printf("%s\n", refname); + printf("%s\n", ref->name); else if (data->format == REPLACE_FORMAT_MEDIUM) - printf("%s -> %s\n", refname, oid_to_hex(oid)); + printf("%s -> %s\n", ref->name, oid_to_hex(ref->oid)); else { /* data->format == REPLACE_FORMAT_LONG */ struct object_id object; enum object_type obj_type, repl_type; - if (repo_get_oid(data->repo, refname, &object)) - return error(_("failed to resolve '%s' as a valid ref"), refname); + if (repo_get_oid(data->repo, ref->name, &object)) + return error(_("failed to resolve '%s' as a valid ref"), ref->name); obj_type = odb_read_object_info(data->repo->objects, &object, NULL); - repl_type = odb_read_object_info(data->repo->objects, oid, NULL); + repl_type = odb_read_object_info(data->repo->objects, ref->oid, NULL); - printf("%s (%s) -> %s (%s)\n", refname, type_name(obj_type), - oid_to_hex(oid), type_name(repl_type)); + printf("%s (%s) -> %s (%s)\n", ref->name, type_name(obj_type), + oid_to_hex(ref->oid), type_name(repl_type)); } } diff --git a/builtin/repo.c b/builtin/repo.c index 9d4749f79b..f26640bd6e 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -366,16 +366,13 @@ struct count_references_data { struct progress *progress; }; -static int count_references(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int count_references(const struct reference *ref, void *cb_data) { struct count_references_data *data = cb_data; struct ref_stats *stats = data->stats; size_t ref_count; - switch (ref_kind_from_refname(refname)) { + switch (ref_kind_from_refname(ref->name)) { case FILTER_REFS_BRANCHES: stats->branches++; break; @@ -396,7 +393,7 @@ static int count_references(const char *refname, * While iterating through references for counting, also add OIDs in * preparation for the path walk. */ - add_pending_oid(data->revs, NULL, oid, 0); + add_pending_oid(data->revs, NULL, ref->oid, 0); ref_count = get_total_reference_count(stats); display_progress(data->progress, ref_count); diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 9da92b990d..3578591b4f 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -217,19 +217,17 @@ static int show_default(void) return 0; } -static int show_reference(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int show_reference(const struct reference *ref, void *cb_data UNUSED) { - if (ref_excluded(&ref_excludes, refname)) + if (ref_excluded(&ref_excludes, ref->name)) return 0; - show_rev(NORMAL, oid, refname); + show_rev(NORMAL, ref->oid, ref->name); return 0; } -static int anti_reference(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int anti_reference(const struct reference *ref, void *cb_data UNUSED) { - show_rev(REVERSED, oid, refname); + show_rev(REVERSED, ref->oid, ref->name); return 0; } diff --git a/builtin/show-branch.c b/builtin/show-branch.c index 441babf2e3..10475a6b5e 100644 --- a/builtin/show-branch.c +++ b/builtin/show-branch.c @@ -413,34 +413,32 @@ static int append_ref(const char *refname, const struct object_id *oid, return 0; } -static int append_head_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int append_head_ref(const struct reference *ref, void *cb_data UNUSED) { struct object_id tmp; int ofs = 11; - if (!starts_with(refname, "refs/heads/")) + if (!starts_with(ref->name, "refs/heads/")) return 0; /* If both heads/foo and tags/foo exists, get_sha1 would * get confused. */ - if (repo_get_oid(the_repository, refname + ofs, &tmp) || !oideq(&tmp, oid)) + if (repo_get_oid(the_repository, ref->name + ofs, &tmp) || !oideq(&tmp, ref->oid)) ofs = 5; - return append_ref(refname + ofs, oid, 0); + return append_ref(ref->name + ofs, ref->oid, 0); } -static int append_remote_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int append_remote_ref(const struct reference *ref, void *cb_data UNUSED) { struct object_id tmp; int ofs = 13; - if (!starts_with(refname, "refs/remotes/")) + if (!starts_with(ref->name, "refs/remotes/")) return 0; /* If both heads/foo and tags/foo exists, get_sha1 would * get confused. */ - if (repo_get_oid(the_repository, refname + ofs, &tmp) || !oideq(&tmp, oid)) + if (repo_get_oid(the_repository, ref->name + ofs, &tmp) || !oideq(&tmp, ref->oid)) ofs = 5; - return append_ref(refname + ofs, oid, 0); + return append_ref(ref->name + ofs, ref->oid, 0); } static int append_tag_ref(const char *refname, const struct object_id *oid, @@ -454,27 +452,26 @@ static int append_tag_ref(const char *refname, const struct object_id *oid, static const char *match_ref_pattern = NULL; static int match_ref_slash = 0; -static int append_matching_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int append_matching_ref(const struct reference *ref, void *cb_data) { /* we want to allow pattern hold/ to show all * branches under refs/heads/hold/, and v0.99.9? to show * refs/tags/v0.99.9a and friends. */ const char *tail; - int slash = count_slashes(refname); - for (tail = refname; *tail && match_ref_slash < slash; ) + int slash = count_slashes(ref->name); + for (tail = ref->name; *tail && match_ref_slash < slash; ) if (*tail++ == '/') slash--; if (!*tail) return 0; if (wildmatch(match_ref_pattern, tail, 0)) return 0; - if (starts_with(refname, "refs/heads/")) - return append_head_ref(refname, NULL, oid, flag, cb_data); - if (starts_with(refname, "refs/tags/")) - return append_tag_ref(refname, oid, flag, cb_data); - return append_ref(refname, oid, 0); + if (starts_with(ref->name, "refs/heads/")) + return append_head_ref(ref, cb_data); + if (starts_with(ref->name, "refs/tags/")) + return append_tag_ref(ref->name, ref->oid, ref->flags, cb_data); + return append_ref(ref->name, ref->oid, 0); } static void snarf_refs(int head, int remotes) diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 0b6f9edf86..4803b5e598 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -66,26 +66,25 @@ struct show_ref_data { int show_head; }; -static int show_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cbdata) +static int show_ref(const struct reference *ref, void *cbdata) { struct show_ref_data *data = cbdata; - if (data->show_head && !strcmp(refname, "HEAD")) + if (data->show_head && !strcmp(ref->name, "HEAD")) goto match; if (data->patterns) { - int reflen = strlen(refname); + int reflen = strlen(ref->name); const char **p = data->patterns, *m; while ((m = *p++) != NULL) { int len = strlen(m); if (len > reflen) continue; - if (memcmp(m, refname + reflen - len, len)) + if (memcmp(m, ref->name + reflen - len, len)) continue; if (len == reflen) goto match; - if (refname[reflen - len - 1] == '/') + if (ref->name[reflen - len - 1] == '/') goto match; } return 0; @@ -94,18 +93,15 @@ static int show_ref(const char *refname, const char *referent UNUSED, const stru match: data->found_match++; - show_one(data->show_one_opts, refname, oid); + show_one(data->show_one_opts, ref->name, ref->oid); return 0; } -static int add_existing(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cbdata) +static int add_existing(const struct reference *ref, void *cbdata) { struct string_list *list = (struct string_list *)cbdata; - string_list_insert(list, refname); + string_list_insert(list, ref->name); return 0; } diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index fcd73abe53..35f6cf735e 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -593,16 +593,12 @@ static void print_status(unsigned int flags, char state, const char *path, printf("\n"); } -static int handle_submodule_head_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int handle_submodule_head_ref(const struct reference *ref, void *cb_data) { struct object_id *output = cb_data; - if (oid) - oidcpy(output, oid); + if (ref->oid) + oidcpy(output, ref->oid); return 0; } diff --git a/builtin/worktree.c b/builtin/worktree.c index 812774a5ca..b7f323b5e4 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -635,11 +635,7 @@ static void print_preparing_worktree_line(int detach, * * Returns 0 on failure and non-zero on success. */ -static int first_valid_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data UNUSED) +static int first_valid_ref(const struct reference *ref UNUSED, void *cb_data UNUSED) { return 1; } diff --git a/commit-graph.c b/commit-graph.c index 474454db73..f91af41625 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1851,18 +1851,16 @@ struct refs_cb_data { struct progress *progress; }; -static int add_ref_to_set(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int add_ref_to_set(const struct reference *ref, void *cb_data) { + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct refs_cb_data *data = (struct refs_cb_data *)cb_data; - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; - if (odb_read_object_info(data->repo->objects, oid, NULL) == OBJ_COMMIT) - oidset_insert(data->commits, oid); + if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + maybe_peeled = &peeled; + if (odb_read_object_info(data->repo->objects, maybe_peeled, NULL) == OBJ_COMMIT) + oidset_insert(data->commits, maybe_peeled); display_progress(data->progress, oidset_size(data->commits)); diff --git a/delta-islands.c b/delta-islands.c index 36c94799d6..7cfebc4162 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -390,8 +390,7 @@ static void add_ref_to_island(kh_str_t *remote_islands, const char *island_name, rl->hash += sha_core; } -static int find_island_for_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb) +static int find_island_for_ref(const struct reference *ref, void *cb) { struct island_load_data *ild = cb; @@ -406,7 +405,7 @@ static int find_island_for_ref(const char *refname, const char *referent UNUSED, /* walk backwards to get last-one-wins ordering */ for (i = ild->nr - 1; i >= 0; i--) { - if (!regexec(&ild->rx[i], refname, + if (!regexec(&ild->rx[i], ref->name, ARRAY_SIZE(matches), matches, 0)) break; } @@ -428,10 +427,10 @@ static int find_island_for_ref(const char *refname, const char *referent UNUSED, if (island_name.len) strbuf_addch(&island_name, '-'); - strbuf_add(&island_name, refname + match->rm_so, match->rm_eo - match->rm_so); + strbuf_add(&island_name, ref->name + match->rm_so, match->rm_eo - match->rm_so); } - add_ref_to_island(ild->remote_islands, island_name.buf, oid); + add_ref_to_island(ild->remote_islands, island_name.buf, ref->oid); strbuf_release(&island_name); return 0; } diff --git a/fetch-pack.c b/fetch-pack.c index fe7a84bf2f..78c45d4a15 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -188,13 +188,9 @@ static int rev_list_insert_ref(struct fetch_negotiator *negotiator, return 0; } -static int rev_list_insert_ref_oid(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int rev_list_insert_ref_oid(const struct reference *ref, void *cb_data) { - return rev_list_insert_ref(cb_data, oid); + return rev_list_insert_ref(cb_data, ref->oid); } enum ack_type { @@ -616,13 +612,9 @@ static int mark_complete(const struct object_id *oid) return 0; } -static int mark_complete_oid(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int mark_complete_oid(const struct reference *ref, void *cb_data UNUSED) { - return mark_complete(oid); + return mark_complete(ref->oid); } static void mark_recent_complete_commits(struct fetch_pack_args *args, diff --git a/help.c b/help.c index 5854dd4a7e..20e114432d 100644 --- a/help.c +++ b/help.c @@ -851,18 +851,16 @@ struct similar_ref_cb { struct string_list *similar_refs; }; -static int append_similar_ref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, void *cb_data) +static int append_similar_ref(const struct reference *ref, void *cb_data) { struct similar_ref_cb *cb = (struct similar_ref_cb *)(cb_data); - char *branch = strrchr(refname, '/') + 1; + char *branch = strrchr(ref->name, '/') + 1; /* A remote branch of the same name is deemed similar */ - if (starts_with(refname, "refs/remotes/") && + if (starts_with(ref->name, "refs/remotes/") && !strcmp(branch, cb->base_ref)) string_list_append_nodup(cb->similar_refs, - refs_shorten_unambiguous_ref(get_main_ref_store(the_repository), refname, 1)); + refs_shorten_unambiguous_ref(get_main_ref_store(the_repository), ref->name, 1)); return 0; } diff --git a/http-backend.c b/http-backend.c index 9084058f1e..92e1733f14 100644 --- a/http-backend.c +++ b/http-backend.c @@ -513,18 +513,17 @@ static void run_service(const char **argv, int buffer_input) exit(1); } -static int show_text_ref(const char *name, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int show_text_ref(const struct reference *ref, void *cb_data) { - const char *name_nons = strip_namespace(name); + const char *name_nons = strip_namespace(ref->name); struct strbuf *buf = cb_data; - struct object *o = parse_object(the_repository, oid); + struct object *o = parse_object(the_repository, ref->oid); if (!o) return 0; - strbuf_addf(buf, "%s\t%s\n", oid_to_hex(oid), name_nons); + strbuf_addf(buf, "%s\t%s\n", oid_to_hex(ref->oid), name_nons); if (o->type == OBJ_TAG) { - o = deref_tag(the_repository, o, name, 0); + o = deref_tag(the_repository, o, ref->name, 0); if (!o) return 0; strbuf_addf(buf, "%s\t%s^{}\n", oid_to_hex(&o->oid), @@ -569,21 +568,20 @@ static void get_info_refs(struct strbuf *hdr, char *arg UNUSED) strbuf_release(&buf); } -static int show_head_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int show_head_ref(const struct reference *ref, void *cb_data) { struct strbuf *buf = cb_data; - if (flag & REF_ISSYMREF) { + if (ref->flags & REF_ISSYMREF) { const char *target = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - refname, + ref->name, RESOLVE_REF_READING, NULL, NULL); if (target) strbuf_addf(buf, "ref: %s\n", strip_namespace(target)); } else { - strbuf_addf(buf, "%s\n", oid_to_hex(oid)); + strbuf_addf(buf, "%s\n", oid_to_hex(ref->oid)); } return 0; diff --git a/log-tree.c b/log-tree.c index 7d917f2a83..1729b0c201 100644 --- a/log-tree.c +++ b/log-tree.c @@ -147,9 +147,7 @@ static int ref_filter_match(const char *refname, return 1; } -static int add_ref_decoration(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int add_ref_decoration(const struct reference *ref, void *cb_data) { int i; struct object *obj; @@ -158,16 +156,16 @@ static int add_ref_decoration(const char *refname, const char *referent UNUSED, struct decoration_filter *filter = (struct decoration_filter *)cb_data; const char *git_replace_ref_base = ref_namespace[NAMESPACE_REPLACE].ref; - if (filter && !ref_filter_match(refname, filter)) + if (filter && !ref_filter_match(ref->name, filter)) return 0; - if (starts_with(refname, git_replace_ref_base)) { + if (starts_with(ref->name, git_replace_ref_base)) { struct object_id original_oid; if (!replace_refs_enabled(the_repository)) return 0; - if (get_oid_hex(refname + strlen(git_replace_ref_base), + if (get_oid_hex(ref->name + strlen(git_replace_ref_base), &original_oid)) { - warning("invalid replace ref %s", refname); + warning("invalid replace ref %s", ref->name); return 0; } obj = parse_object(the_repository, &original_oid); @@ -176,10 +174,10 @@ static int add_ref_decoration(const char *refname, const char *referent UNUSED, return 0; } - objtype = odb_read_object_info(the_repository->objects, oid, NULL); + objtype = odb_read_object_info(the_repository->objects, ref->oid, NULL); if (objtype < 0) return 0; - obj = lookup_object_by_type(the_repository, oid, objtype); + obj = lookup_object_by_type(the_repository, ref->oid, objtype); for (i = 0; i < ARRAY_SIZE(ref_namespace); i++) { struct ref_namespace_info *info = &ref_namespace[i]; @@ -187,24 +185,24 @@ static int add_ref_decoration(const char *refname, const char *referent UNUSED, if (!info->decoration) continue; if (info->exact) { - if (!strcmp(refname, info->ref)) { + if (!strcmp(ref->name, info->ref)) { deco_type = info->decoration; break; } - } else if (starts_with(refname, info->ref)) { + } else if (starts_with(ref->name, info->ref)) { deco_type = info->decoration; break; } } - add_name_decoration(deco_type, refname, obj); + add_name_decoration(deco_type, ref->name, obj); while (obj->type == OBJ_TAG) { if (!obj->parsed) parse_object(the_repository, &obj->oid); obj = ((struct tag *)obj)->tagged; if (!obj) break; - add_name_decoration(DECORATION_REF_TAG, refname, obj); + add_name_decoration(DECORATION_REF_TAG, ref->name, obj); } return 0; } diff --git a/ls-refs.c b/ls-refs.c index c47acde07f..64d0272369 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -75,42 +75,42 @@ struct ls_refs_data { unsigned unborn : 1; }; -static int send_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int send_ref(const struct reference *ref, void *cb_data) { struct ls_refs_data *data = cb_data; - const char *refname_nons = strip_namespace(refname); + const char *refname_nons = strip_namespace(ref->name); strbuf_reset(&data->buf); - if (ref_is_hidden(refname_nons, refname, &data->hidden_refs)) + if (ref_is_hidden(refname_nons, ref->name, &data->hidden_refs)) return 0; if (!ref_match(&data->prefixes, refname_nons)) return 0; - if (oid) - strbuf_addf(&data->buf, "%s %s", oid_to_hex(oid), refname_nons); + if (ref->oid) + strbuf_addf(&data->buf, "%s %s", oid_to_hex(ref->oid), refname_nons); else strbuf_addf(&data->buf, "unborn %s", refname_nons); - if (data->symrefs && flag & REF_ISSYMREF) { + if (data->symrefs && ref->flags & REF_ISSYMREF) { + int unused_flag; struct object_id unused; const char *symref_target = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - refname, + ref->name, 0, &unused, - &flag); + &unused_flag); if (!symref_target) - die("'%s' is a symref but it is not?", refname); + die("'%s' is a symref but it is not?", ref->name); strbuf_addf(&data->buf, " symref-target:%s", strip_namespace(symref_target)); } - if (data->peel && oid) { + if (data->peel && ref->oid) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) strbuf_addf(&data->buf, " peeled:%s", oid_to_hex(&peeled)); } @@ -131,9 +131,17 @@ static void send_possibly_unborn_head(struct ls_refs_data *data) if (!refs_resolve_ref_unsafe(get_main_ref_store(the_repository), namespaced.buf, 0, &oid, &flag)) return; /* bad ref */ oid_is_null = is_null_oid(&oid); + if (!oid_is_null || - (data->unborn && data->symrefs && (flag & REF_ISSYMREF))) - send_ref(namespaced.buf, NULL, oid_is_null ? NULL : &oid, flag, data); + (data->unborn && data->symrefs && (flag & REF_ISSYMREF))) { + struct reference ref = { + .name = namespaced.buf, + .oid = oid_is_null ? NULL : &oid, + .flags = flag, + }; + + send_ref(&ref, data); + } strbuf_release(&namespaced); } diff --git a/midx-write.c b/midx-write.c index c73010df6d..f4dd875747 100644 --- a/midx-write.c +++ b/midx-write.c @@ -697,28 +697,27 @@ static void prepare_midx_packing_data(struct packing_data *pdata, trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo); } -static int add_ref_to_pending(const char *refname, const char *referent UNUSED, - const struct object_id *oid, - int flag, void *cb_data) +static int add_ref_to_pending(const struct reference *ref, void *cb_data) { struct rev_info *revs = (struct rev_info*)cb_data; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct object *object; - if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { - warning("symbolic ref is dangling: %s", refname); + if ((ref->flags & REF_ISSYMREF) && (ref->flags & REF_ISBROKEN)) { + warning("symbolic ref is dangling: %s", ref->name); return 0; } - if (!peel_iterated_oid(revs->repo, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(revs->repo, ref->oid, &peeled)) + maybe_peeled = &peeled; - object = parse_object_or_die(revs->repo, oid, refname); + object = parse_object_or_die(revs->repo, maybe_peeled, ref->name); if (object->type != OBJ_COMMIT) return 0; add_pending_object(revs, object, ""); - if (bitmap_is_preferred_refname(revs->repo, refname)) + if (bitmap_is_preferred_refname(revs->repo, ref->name)) object->flags |= NEEDS_BITMAP; return 0; } diff --git a/negotiator/default.c b/negotiator/default.c index c479da9b09..116dedcf83 100644 --- a/negotiator/default.c +++ b/negotiator/default.c @@ -38,11 +38,10 @@ static void rev_list_push(struct negotiation_state *ns, } } -static int clear_marks(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int clear_marks(const struct reference *ref, void *cb_data UNUSED) { - struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0); + struct object *o = deref_tag(the_repository, parse_object(the_repository, ref->oid), + ref->name, 0); if (o && o->type == OBJ_COMMIT) clear_commit_marks((struct commit *)o, diff --git a/negotiator/skipping.c b/negotiator/skipping.c index 616df6bf3a..0a272130fb 100644 --- a/negotiator/skipping.c +++ b/negotiator/skipping.c @@ -75,11 +75,10 @@ static struct entry *rev_list_push(struct data *data, struct commit *commit, int return entry; } -static int clear_marks(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int clear_marks(const struct reference *ref, void *cb_data UNUSED) { - struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0); + struct object *o = deref_tag(the_repository, parse_object(the_repository, ref->oid), + ref->name, 0); if (o && o->type == OBJ_COMMIT) clear_commit_marks((struct commit *)o, diff --git a/notes.c b/notes.c index 9a2e9181fe..8e00fd8c47 100644 --- a/notes.c +++ b/notes.c @@ -938,13 +938,11 @@ out: return ret; } -static int string_list_add_one_ref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cb) +static int string_list_add_one_ref(const struct reference *ref, void *cb) { struct string_list *refs = cb; - if (!unsorted_string_list_has_string(refs, refname)) - string_list_append(refs, refname); + if (!unsorted_string_list_has_string(refs, ref->name)) + string_list_append(refs, ref->name); return 0; } diff --git a/object-name.c b/object-name.c index f6902e140d..7e8109f25f 100644 --- a/object-name.c +++ b/object-name.c @@ -1444,18 +1444,16 @@ struct handle_one_ref_cb { struct commit_list **list; }; -static int handle_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int handle_one_ref(const struct reference *ref, void *cb_data) { struct handle_one_ref_cb *cb = cb_data; struct commit_list **list = cb->list; - struct object *object = parse_object(cb->repo, oid); + struct object *object = parse_object(cb->repo, ref->oid); if (!object) return 0; if (object->type == OBJ_TAG) { - object = deref_tag(cb->repo, object, path, - strlen(path)); + object = deref_tag(cb->repo, object, ref->name, + strlen(ref->name)); if (!object) return 0; } diff --git a/pseudo-merge.c b/pseudo-merge.c index 893b763fe4..0abd51b42c 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -221,28 +221,25 @@ void load_pseudo_merges_from_config(struct repository *r, } } -static int find_pseudo_merge_group_for_ref(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *_data) +static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_data) { struct bitmap_writer *writer = _data; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct commit *c; uint32_t i; int has_bitmap; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + maybe_peeled = &peeled; - c = lookup_commit(the_repository, oid); + c = lookup_commit(the_repository, maybe_peeled); if (!c) return 0; - if (!packlist_find(writer->to_pack, oid)) + if (!packlist_find(writer->to_pack, maybe_peeled)) return 0; - has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid); + has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, maybe_peeled); for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { struct pseudo_merge_group *group; @@ -252,7 +249,7 @@ static int find_pseudo_merge_group_for_ref(const char *refname, size_t j; group = writer->pseudo_merge_groups.items[i].util; - if (regexec(group->pattern, refname, ARRAY_SIZE(captures), + if (regexec(group->pattern, ref->name, ARRAY_SIZE(captures), captures, 0)) continue; @@ -269,7 +266,7 @@ static int find_pseudo_merge_group_for_ref(const char *refname, if (group_name.len) strbuf_addch(&group_name, '-'); - strbuf_add(&group_name, refname + match->rm_so, + strbuf_add(&group_name, ref->name + match->rm_so, match->rm_eo - match->rm_so); } diff --git a/reachable.c b/reachable.c index 22266db523..b753c39553 100644 --- a/reachable.c +++ b/reachable.c @@ -83,18 +83,17 @@ static void add_rebase_files(struct rev_info *revs) free_worktrees(worktrees); } -static int add_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int add_one_ref(const struct reference *ref, void *cb_data) { struct rev_info *revs = (struct rev_info *)cb_data; struct object *object; - if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { - warning("symbolic ref is dangling: %s", path); + if ((ref->flags & REF_ISSYMREF) && (ref->flags & REF_ISBROKEN)) { + warning("symbolic ref is dangling: %s", ref->name); return 0; } - object = parse_object_or_die(the_repository, oid, path); + object = parse_object_or_die(the_repository, ref->oid, ref->name); add_pending_object(revs, object, ""); return 0; diff --git a/ref-filter.c b/ref-filter.c index 30cc488d8a..6837fa60a9 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2954,14 +2954,15 @@ struct ref_filter_cbdata { * A call-back given to for_each_ref(). Filter refs and keep them for * later object processing. */ -static int filter_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data) +static int filter_one(const struct reference *ref, void *cb_data) { struct ref_filter_cbdata *ref_cbdata = cb_data; - struct ref_array_item *ref; + struct ref_array_item *item; - ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter); - if (ref) - ref_array_append(ref_cbdata->array, ref); + item = apply_ref_filter(ref->name, ref->target, ref->oid, + ref->flags, ref_cbdata->filter); + if (item) + ref_array_append(ref_cbdata->array, item); return 0; } @@ -2990,17 +2991,18 @@ struct ref_filter_and_format_cbdata { } internal; }; -static int filter_and_format_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data) +static int filter_and_format_one(const struct reference *ref, void *cb_data) { struct ref_filter_and_format_cbdata *ref_cbdata = cb_data; - struct ref_array_item *ref; + struct ref_array_item *item; struct strbuf output = STRBUF_INIT, err = STRBUF_INIT; - ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter); - if (!ref) + item = apply_ref_filter(ref->name, ref->target, ref->oid, + ref->flags, ref_cbdata->filter); + if (!item) return 0; - if (format_ref_array_item(ref, ref_cbdata->format, &output, &err)) + if (format_ref_array_item(item, ref_cbdata->format, &output, &err)) die("%s", err.buf); if (output.len || !ref_cbdata->format->array_opts.omit_empty) { @@ -3010,7 +3012,7 @@ static int filter_and_format_one(const char *refname, const char *referent, cons strbuf_release(&output); strbuf_release(&err); - free_array_item(ref); + free_array_item(item); /* * Increment the running count of refs that match the filter. If diff --git a/reflog.c b/reflog.c index 65ef259b4f..ac87e20c4f 100644 --- a/reflog.c +++ b/reflog.c @@ -423,16 +423,13 @@ int should_expire_reflog_ent_verbose(struct object_id *ooid, return expire; } -static int push_tip_to_list(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags, void *cb_data) +static int push_tip_to_list(const struct reference *ref, void *cb_data) { struct commit_list **list = cb_data; struct commit *tip_commit; - if (flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) return 0; - tip_commit = lookup_commit_reference_gently(the_repository, oid, 1); + tip_commit = lookup_commit_reference_gently(the_repository, ref->oid, 1); if (!tip_commit) return 0; commit_list_insert(tip_commit, list); diff --git a/refs.c b/refs.c index 965381367e..25f0579d61 100644 --- a/refs.c +++ b/refs.c @@ -426,17 +426,19 @@ int refs_ref_exists(struct ref_store *refs, const char *refname) NULL, NULL); } -static int for_each_filter_refs(const char *refname, const char *referent, - const struct object_id *oid, - int flags, void *data) +static int for_each_filter_refs(const struct reference *ref, void *data) { struct for_each_ref_filter *filter = data; - if (wildmatch(filter->pattern, refname, 0)) + if (wildmatch(filter->pattern, ref->name, 0)) return 0; - if (filter->prefix) - skip_prefix(refname, filter->prefix, &refname); - return filter->fn(refname, referent, oid, flags, filter->cb_data); + if (filter->prefix) { + struct reference skipped = *ref; + skip_prefix(skipped.name, filter->prefix, &skipped.name); + return filter->fn(&skipped, filter->cb_data); + } else { + return filter->fn(ref, filter->cb_data); + } } struct warn_if_dangling_data { @@ -447,17 +449,15 @@ struct warn_if_dangling_data { int dry_run; }; -static int warn_if_dangling_symref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags, void *cb_data) +static int warn_if_dangling_symref(const struct reference *ref, void *cb_data) { struct warn_if_dangling_data *d = cb_data; const char *resolves_to, *msg; - if (!(flags & REF_ISSYMREF)) + if (!(ref->flags & REF_ISSYMREF)) return 0; - resolves_to = refs_resolve_ref_unsafe(d->refs, refname, 0, NULL, NULL); + resolves_to = refs_resolve_ref_unsafe(d->refs, ref->name, 0, NULL, NULL); if (!resolves_to || !string_list_has_string(d->refnames, resolves_to)) { return 0; @@ -466,7 +466,7 @@ static int warn_if_dangling_symref(const char *refname, const char *referent UNU msg = d->dry_run ? _("%s%s will become dangling after %s is deleted\n") : _("%s%s has become dangling after %s was deleted\n"); - fprintf(d->fp, msg, d->indent, refname, resolves_to); + fprintf(d->fp, msg, d->indent, ref->name, resolves_to); return 0; } @@ -507,8 +507,15 @@ int refs_head_ref_namespaced(struct ref_store *refs, each_ref_fn fn, void *cb_da int flag; strbuf_addf(&buf, "%sHEAD", get_git_namespace()); - if (!refs_read_ref_full(refs, buf.buf, RESOLVE_REF_READING, &oid, &flag)) - ret = fn(buf.buf, NULL, &oid, flag, cb_data); + if (!refs_read_ref_full(refs, buf.buf, RESOLVE_REF_READING, &oid, &flag)) { + struct reference ref = { + .name = buf.buf, + .oid = &oid, + .flags = flag, + }; + + ret = fn(&ref, cb_data); + } strbuf_release(&buf); return ret; @@ -1741,8 +1748,15 @@ int refs_head_ref(struct ref_store *refs, each_ref_fn fn, void *cb_data) int flag; if (refs_resolve_ref_unsafe(refs, "HEAD", RESOLVE_REF_READING, - &oid, &flag)) - return fn("HEAD", NULL, &oid, flag, cb_data); + &oid, &flag)) { + struct reference ref = { + .name = "HEAD", + .oid = &oid, + .flags = flag, + }; + + return fn(&ref, cb_data); + } return 0; } @@ -2753,14 +2767,10 @@ struct do_for_each_reflog_help { void *cb_data; }; -static int do_for_each_reflog_helper(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data) +static int do_for_each_reflog_helper(const struct reference *ref, void *cb_data) { struct do_for_each_reflog_help *hp = cb_data; - return hp->fn(refname, hp->cb_data); + return hp->fn(ref->name, hp->cb_data); } int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_data) @@ -2976,25 +2986,24 @@ struct migration_data { uint64_t index; }; -static int migrate_one_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags, void *cb_data) +static int migrate_one_ref(const struct reference *ref, void *cb_data) { struct migration_data *data = cb_data; struct strbuf symref_target = STRBUF_INIT; int ret; - if (flags & REF_ISSYMREF) { - ret = refs_read_symbolic_ref(data->old_refs, refname, &symref_target); + if (ref->flags & REF_ISSYMREF) { + ret = refs_read_symbolic_ref(data->old_refs, ref->name, &symref_target); if (ret < 0) goto done; - ret = ref_transaction_update(data->transaction, refname, NULL, null_oid(the_hash_algo), + ret = ref_transaction_update(data->transaction, ref->name, NULL, null_oid(the_hash_algo), symref_target.buf, NULL, REF_SKIP_CREATE_REFLOG | REF_NO_DEREF, NULL, data->errbuf); if (ret < 0) goto done; } else { - ret = ref_transaction_create(data->transaction, refname, oid, NULL, + ret = ref_transaction_create(data->transaction, ref->name, ref->oid, NULL, REF_SKIP_CREATE_REFLOG | REF_SKIP_OID_VERIFICATION, NULL, data->errbuf); if (ret < 0) diff --git a/refs.h b/refs.h index 4e6bd63aa8..68d235438c 100644 --- a/refs.h +++ b/refs.h @@ -355,14 +355,32 @@ struct ref_transaction; */ #define REF_BAD_NAME 0x08 +/* A reference passed to `for_each_ref()`-style callbacks. */ +struct reference { + /* The fully-qualified name of the reference. */ + const char *name; + + /* The target of a symbolic ref. `NULL` for direct references. */ + const char *target; + + /* + * The object ID of a reference. Either the direct object ID or the + * resolved object ID in the case of a symbolic ref. May be the zero + * object ID in case the symbolic ref cannot be resolved. + */ + const struct object_id *oid; + + /* A bitfield of `REF_` flags. */ + int flags; +}; + /* * The signature for the callback function for the for_each_*() - * functions below. The memory pointed to by the refname and oid - * arguments is only guaranteed to be valid for the duration of a + * functions below. The memory pointed to by the `struct reference` + * argument is only guaranteed to be valid for the duration of a * single callback invocation. */ -typedef int each_ref_fn(const char *refname, const char *referent, - const struct object_id *oid, int flags, void *cb_data); +typedef int each_ref_fn(const struct reference *ref, void *cb_data); /* * The following functions invoke the specified callback function for diff --git a/refs/files-backend.c b/refs/files-backend.c index 8d7007f4aa..eb3142f8f2 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3150,14 +3150,11 @@ static int parse_and_write_reflog(struct files_ref_store *refs, return 0; } -static int ref_present(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data) +static int ref_present(const struct reference *ref, void *cb_data) { struct string_list *affected_refnames = cb_data; - return string_list_has_string(affected_refnames, refname); + return string_list_has_string(affected_refnames, ref->name); } static int files_transaction_finish_initial(struct files_ref_store *refs, diff --git a/refs/iterator.c b/refs/iterator.c index 17ef841d8a..7f2e718f1c 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -476,7 +476,14 @@ int do_for_each_ref_iterator(struct ref_iterator *iter, current_ref_iter = iter; while ((ok = ref_iterator_advance(iter)) == ITER_OK) { - retval = fn(iter->refname, iter->referent, iter->oid, iter->flags, cb_data); + struct reference ref = { + .name = iter->refname, + .target = iter->referent, + .oid = iter->oid, + .flags = iter->flags, + }; + + retval = fn(&ref, cb_data); if (retval) goto out; } diff --git a/remote.c b/remote.c index df9675cd33..59b3715120 100644 --- a/remote.c +++ b/remote.c @@ -2315,21 +2315,19 @@ int format_tracking_info(struct branch *branch, struct strbuf *sb, return 1; } -static int one_local_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int one_local_ref(const struct reference *ref, void *cb_data) { struct ref ***local_tail = cb_data; - struct ref *ref; + struct ref *local_ref; /* we already know it starts with refs/ to get here */ - if (check_refname_format(refname + 5, 0)) + if (check_refname_format(ref->name + 5, 0)) return 0; - ref = alloc_ref(refname); - oidcpy(&ref->new_oid, oid); - **local_tail = ref; - *local_tail = &ref->next; + local_ref = alloc_ref(ref->name); + oidcpy(&local_ref->new_oid, ref->oid); + **local_tail = local_ref; + *local_tail = &local_ref->next; return 0; } @@ -2402,15 +2400,14 @@ struct stale_heads_info { struct refspec *rs; }; -static int get_stale_heads_cb(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags, void *cb_data) +static int get_stale_heads_cb(const struct reference *ref, void *cb_data) { struct stale_heads_info *info = cb_data; struct string_list matches = STRING_LIST_INIT_DUP; struct refspec_item query; int i, stale = 1; memset(&query, 0, sizeof(struct refspec_item)); - query.dst = (char *)refname; + query.dst = (char *)ref->name; refspec_find_all_matches(info->rs, &query, &matches); if (matches.nr == 0) @@ -2423,7 +2420,7 @@ static int get_stale_heads_cb(const char *refname, const char *referent UNUSED, * overlapping refspecs, we need to go over all of the * matching refs. */ - if (flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) goto clean_exit; for (i = 0; stale && i < matches.nr; i++) @@ -2431,8 +2428,8 @@ static int get_stale_heads_cb(const char *refname, const char *referent UNUSED, stale = 0; if (stale) { - struct ref *ref = make_linked_ref(refname, &info->stale_refs_tail); - oidcpy(&ref->new_oid, oid); + struct ref *linked_ref = make_linked_ref(ref->name, &info->stale_refs_tail); + oidcpy(&linked_ref->new_oid, ref->oid); } clean_exit: diff --git a/repack-midx.c b/repack-midx.c index 6f6202c5bc..349f7e20b5 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -16,25 +16,23 @@ struct midx_snapshot_ref_data { int preferred; }; -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) +static int midx_snapshot_ref_one(const struct reference *ref, void *_data) { struct midx_snapshot_ref_data *data = _data; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + maybe_peeled = &peeled; - if (oidset_insert(&data->seen, oid)) + if (oidset_insert(&data->seen, maybe_peeled)) return 0; /* already seen */ - if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + if (odb_read_object_info(data->repo->objects, maybe_peeled, NULL) != OBJ_COMMIT) return 0; fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); + oid_to_hex(maybe_peeled)); return 0; } diff --git a/replace-object.c b/replace-object.c index 3eae051074..03d0f1f083 100644 --- a/replace-object.c +++ b/replace-object.c @@ -8,31 +8,27 @@ #include "repository.h" #include "commit.h" -static int register_replace_ref(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int register_replace_ref(const struct reference *ref, void *cb_data) { struct repository *r = cb_data; /* Get sha1 from refname */ - const char *slash = strrchr(refname, '/'); - const char *hash = slash ? slash + 1 : refname; + const char *slash = strrchr(ref->name, '/'); + const char *hash = slash ? slash + 1 : ref->name; struct replace_object *repl_obj = xmalloc(sizeof(*repl_obj)); if (get_oid_hex_algop(hash, &repl_obj->original.oid, r->hash_algo)) { free(repl_obj); - warning(_("bad replace ref name: %s"), refname); + warning(_("bad replace ref name: %s"), ref->name); return 0; } /* Copy sha1 from the read ref */ - oidcpy(&repl_obj->replacement, oid); + oidcpy(&repl_obj->replacement, ref->oid); /* Register new object */ if (oidmap_put(&r->objects->replace_map, repl_obj)) - die(_("duplicate replace ref: %s"), refname); + die(_("duplicate replace ref: %s"), ref->name); return 0; } diff --git a/revision.c b/revision.c index cf5e6c1ec9..5f0850ae5c 100644 --- a/revision.c +++ b/revision.c @@ -1644,19 +1644,17 @@ struct all_refs_cb { struct worktree *wt; }; -static int handle_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int handle_one_ref(const struct reference *ref, void *cb_data) { struct all_refs_cb *cb = cb_data; struct object *object; - if (ref_excluded(&cb->all_revs->ref_excludes, path)) + if (ref_excluded(&cb->all_revs->ref_excludes, ref->name)) return 0; - object = get_reference(cb->all_revs, path, oid, cb->all_flags); - add_rev_cmdline(cb->all_revs, object, path, REV_CMD_REF, cb->all_flags); - add_pending_object(cb->all_revs, object, path); + object = get_reference(cb->all_revs, ref->name, ref->oid, cb->all_flags); + add_rev_cmdline(cb->all_revs, object, ref->name, REV_CMD_REF, cb->all_flags); + add_pending_object(cb->all_revs, object, ref->name); return 0; } diff --git a/server-info.c b/server-info.c index 1d33de821e..0a07c722e8 100644 --- a/server-info.c +++ b/server-info.c @@ -148,23 +148,21 @@ out: return ret; } -static int add_info_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int add_info_ref(const struct reference *ref, void *cb_data) { struct update_info_ctx *uic = cb_data; - struct object *o = parse_object(uic->repo, oid); + struct object *o = parse_object(uic->repo, ref->oid); if (!o) return -1; - if (uic_printf(uic, "%s %s\n", oid_to_hex(oid), path) < 0) + if (uic_printf(uic, "%s %s\n", oid_to_hex(ref->oid), ref->name) < 0) return -1; if (o->type == OBJ_TAG) { - o = deref_tag(uic->repo, o, path, 0); + o = deref_tag(uic->repo, o, ref->name, 0); if (o) if (uic_printf(uic, "%s %s^{}\n", - oid_to_hex(&o->oid), path) < 0) + oid_to_hex(&o->oid), ref->name) < 0) return -1; } return 0; diff --git a/shallow.c b/shallow.c index d9cd4e219c..55b9cd9d3f 100644 --- a/shallow.c +++ b/shallow.c @@ -626,14 +626,10 @@ static void paint_down(struct paint_info *info, const struct object_id *oid, free(tmp); } -static int mark_uninteresting(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data UNUSED) +static int mark_uninteresting(const struct reference *ref, void *cb_data UNUSED) { struct commit *commit = lookup_commit_reference_gently(the_repository, - oid, 1); + ref->oid, 1); if (!commit) return 0; commit->object.flags |= UNINTERESTING; @@ -742,16 +738,12 @@ struct commit_array { size_t nr, alloc; }; -static int add_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int add_ref(const struct reference *ref, void *cb_data) { struct commit_array *ca = cb_data; ALLOC_GROW(ca->commits, ca->nr + 1, ca->alloc); ca->commits[ca->nr] = lookup_commit_reference_gently(the_repository, - oid, 1); + ref->oid, 1); if (ca->commits[ca->nr]) ca->nr++; return 0; diff --git a/submodule.c b/submodule.c index 35c55155f7..40a5c6fb9d 100644 --- a/submodule.c +++ b/submodule.c @@ -934,10 +934,7 @@ static void free_submodules_data(struct string_list *submodules) string_list_clear(submodules, 1); } -static int has_remote(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, void *cb_data UNUSED) +static int has_remote(const struct reference *ref UNUSED, void *cb_data UNUSED) { return 1; } @@ -1255,13 +1252,10 @@ int push_unpushed_submodules(struct repository *r, return ret; } -static int append_oid_to_array(const char *ref UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *data) +static int append_oid_to_array(const struct reference *ref, void *data) { struct oid_array *array = data; - oid_array_append(array, oid); + oid_array_append(array, ref->oid); return 0; } diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index 83b06d39a3..b1215947c5 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -154,10 +154,9 @@ static int cmd_rename_ref(struct ref_store *refs, const char **argv) return refs_rename_ref(refs, oldref, newref, logmsg); } -static int each_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags, void *cb_data UNUSED) +static int each_ref(const struct reference *ref, void *cb_data UNUSED) { - printf("%s %s 0x%x\n", oid_to_hex(oid), refname, flags); + printf("%s %s 0x%x\n", oid_to_hex(ref->oid), ref->name, ref->flags); return 0; } diff --git a/upload-pack.c b/upload-pack.c index 1e87ae9559..0d563ae74e 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -870,8 +870,8 @@ static void send_unshallow(struct upload_pack_data *data) } } -static int check_ref(const char *refname_full, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data); +static int check_ref(const struct reference *ref, void *cb_data); + static void deepen(struct upload_pack_data *data, int depth) { if (depth == INFINITE_DEPTH && !is_repository_shallow(the_repository)) { @@ -1224,13 +1224,12 @@ static int mark_our_ref(const char *refname, const char *refname_full, return 0; } -static int check_ref(const char *refname_full, const char *referent UNUSED,const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int check_ref(const struct reference *ref, void *cb_data) { - const char *refname = strip_namespace(refname_full); + const char *refname = strip_namespace(ref->name); struct upload_pack_data *data = cb_data; - mark_our_ref(refname, refname_full, oid, &data->hidden_refs); + mark_our_ref(refname, ref->name, ref->oid, &data->hidden_refs); return 0; } @@ -1292,27 +1291,25 @@ static void write_v0_ref(struct upload_pack_data *data, return; } -static int send_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int send_ref(const struct reference *ref, void *cb_data) { - write_v0_ref(cb_data, refname, strip_namespace(refname), oid); + write_v0_ref(cb_data, ref->name, strip_namespace(ref->name), ref->oid); return 0; } -static int find_symref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag, void *cb_data) +static int find_symref(const struct reference *ref, void *cb_data) { const char *symref_target; struct string_list_item *item; + int flag; - if ((flag & REF_ISSYMREF) == 0) + if ((ref->flags & REF_ISSYMREF) == 0) return 0; symref_target = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - refname, 0, NULL, &flag); + ref->name, 0, NULL, &flag); if (!symref_target || (flag & REF_ISSYMREF) == 0) - die("'%s' is a symref but it is not?", refname); - item = string_list_append(cb_data, strip_namespace(refname)); + die("'%s' is a symref but it is not?", ref->name); + item = string_list_append(cb_data, strip_namespace(ref->name)); item->util = xstrdup(strip_namespace(symref_target)); return 0; } diff --git a/walker.c b/walker.c index 8073754517..409b646578 100644 --- a/walker.c +++ b/walker.c @@ -226,14 +226,10 @@ static int interpret_target(struct walker *walker, char *target, struct object_i return -1; } -static int mark_complete(const char *path UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int mark_complete(const struct reference *ref, void *cb_data UNUSED) { struct commit *commit = lookup_commit_reference_gently(the_repository, - oid, 1); + ref->oid, 1); if (commit) { commit->object.flags |= COMPLETE; diff --git a/worktree.c b/worktree.c index a2a5f51f29..9308389cb6 100644 --- a/worktree.c +++ b/worktree.c @@ -595,8 +595,15 @@ int other_head_refs(each_ref_fn fn, void *cb_data) if (refs_resolve_ref_unsafe(get_main_ref_store(the_repository), refname.buf, RESOLVE_REF_READING, - &oid, &flag)) - ret = fn(refname.buf, NULL, &oid, flag, cb_data); + &oid, &flag)) { + struct reference ref = { + .name = refname.buf, + .oid = &oid, + .flags = flag, + }; + + ret = fn(&ref, cb_data); + } if (ret) break; } -- cgit v1.3-5-g9baa From f89866163704528f1a6570e134853dbb99120e7c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:14 +0200 Subject: refs: expose peeled object ID via the iterator Both the "files" and "reftable" backend are able to store peeled values for tags in the respective formats. This allows for a more efficient lookup of the target object of such a tag without having to manually peel via the object database. The infrastructure to access these peeled object IDs is somewhat funky though. When iterating through objects, we store a pointer reference to the current iterator in a global variable. The callbacks invoked by that iterator are then expected to call `peel_iterated_oid()`, which checks whether the globally-stored iterator's current reference refers to the one handed into that function. If so, we ask the iterator to peel the object, otherwise we manually peel the object via the object database. Depending on global state like this is somewhat weird and also quite fragile. Introduce a new `struct reference::peeled_oid` field that can be populated by the reference backends. This field can be accessed via a new function `reference_get_peeled_oid()` that either uses that value, if set, or alternatively peels via the ODB. With this change we don't have to rely on global state anymore, but make the peeled object ID available to the callback functions directly. Adjust trivial callers that already have a `struct reference` available. Remaining callers will be adjusted in subsequent commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/describe.c | 2 +- builtin/gc.c | 2 +- builtin/pack-objects.c | 7 ++++--- commit-graph.c | 2 +- ls-refs.c | 2 +- midx-write.c | 2 +- pseudo-merge.c | 2 +- refs.c | 12 ++++++++++++ refs.h | 19 +++++++++++++++++++ refs/packed-backend.c | 1 + refs/reftable-backend.c | 5 +++++ repack-midx.c | 2 +- 12 files changed, 48 insertions(+), 10 deletions(-) (limited to 'builtin') diff --git a/builtin/describe.c b/builtin/describe.c index 7954535044..443546aaac 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -208,7 +208,7 @@ static int get_name(const struct reference *ref, void *cb_data UNUSED) } /* Is it annotated? */ - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { is_annotated = !oideq(ref->oid, &peeled); } else { oidcpy(&peeled, ref->oid); diff --git a/builtin/gc.c b/builtin/gc.c index 9de5de175f..f0cf20d423 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1109,7 +1109,7 @@ static int dfs_on_ref(const struct reference *ref, void *cb_data) struct commit_list *stack = NULL; struct commit *commit; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) maybe_peeled = &peeled; if (odb_read_object_info(the_repository->objects, maybe_peeled, NULL) != OBJ_COMMIT) return 0; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 39633a0158..1613fecb66 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -838,7 +838,7 @@ static int mark_tagged(const struct reference *ref, void *cb_data UNUSED) if (entry) entry->tagged = 1; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { entry = packlist_find(&to_pack, &peeled); if (entry) entry->tagged = 1; @@ -3309,7 +3309,8 @@ static int add_ref_tag(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled) && obj_is_packed(&peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled) && + obj_is_packed(&peeled)) add_tag_chain(ref->oid); return 0; } @@ -4537,7 +4538,7 @@ static int mark_bitmap_preferred_tip(const struct reference *ref, void *data UNU struct object_id peeled; struct object *object; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) maybe_peeled = &peeled; object = parse_object_or_die(the_repository, maybe_peeled, ref->name); diff --git a/commit-graph.c b/commit-graph.c index f91af41625..80be2ff2c3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1857,7 +1857,7 @@ static int add_ref_to_set(const struct reference *ref, void *cb_data) struct object_id peeled; struct refs_cb_data *data = (struct refs_cb_data *)cb_data; - if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + if (!reference_get_peeled_oid(data->repo, ref, &peeled)) maybe_peeled = &peeled; if (odb_read_object_info(data->repo->objects, maybe_peeled, NULL) == OBJ_COMMIT) oidset_insert(data->commits, maybe_peeled); diff --git a/ls-refs.c b/ls-refs.c index 64d0272369..8641281b86 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -110,7 +110,7 @@ static int send_ref(const struct reference *ref, void *cb_data) if (data->peel && ref->oid) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) strbuf_addf(&data->buf, " peeled:%s", oid_to_hex(&peeled)); } diff --git a/midx-write.c b/midx-write.c index f4dd875747..23e61cb000 100644 --- a/midx-write.c +++ b/midx-write.c @@ -709,7 +709,7 @@ static int add_ref_to_pending(const struct reference *ref, void *cb_data) return 0; } - if (!peel_iterated_oid(revs->repo, ref->oid, &peeled)) + if (!reference_get_peeled_oid(revs->repo, ref, &peeled)) maybe_peeled = &peeled; object = parse_object_or_die(revs->repo, maybe_peeled, ref->name); diff --git a/pseudo-merge.c b/pseudo-merge.c index 0abd51b42c..a2d5bd85f9 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -230,7 +230,7 @@ static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_d uint32_t i; int has_bitmap; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) maybe_peeled = &peeled; c = lookup_commit(the_repository, maybe_peeled); diff --git a/refs.c b/refs.c index f96cf43b12..1b1551f981 100644 --- a/refs.c +++ b/refs.c @@ -2334,6 +2334,18 @@ int peel_iterated_oid(struct repository *r, const struct object_id *base, struct return peel_object(r, base, peeled) ? -1 : 0; } +int reference_get_peeled_oid(struct repository *repo, + const struct reference *ref, + struct object_id *peeled_oid) +{ + if (ref->peeled_oid) { + oidcpy(peeled_oid, ref->peeled_oid); + return 0; + } + + return peel_object(repo, ref->oid, peeled_oid) ? -1 : 0; +} + int refs_update_symref(struct ref_store *refs, const char *ref, const char *target, const char *logmsg) { diff --git a/refs.h b/refs.h index 4f0a685714..886ed2c0f4 100644 --- a/refs.h +++ b/refs.h @@ -371,10 +371,29 @@ struct reference { */ const struct object_id *oid; + /* + * An optional peeled object ID. This field _may_ be set for tags in + * case the peeled value is present in the backend. Please refer to + * `reference_get_peeled_oid()`. + */ + const struct object_id *peeled_oid; + /* A bitfield of `enum reference_status` flags. */ unsigned flags; }; +/* + * Peel the tag to a non-tag commit. If present, this uses the peeled object ID + * exposed by the reference backend. Otherwise, the object is peeled via the + * object database, which is less efficient. + * + * Return `0` if the reference could be peeled, a negative error code + * otherwise. + */ +int reference_get_peeled_oid(struct repository *repo, + const struct reference *ref, + struct object_id *peeled_oid); + /* * The signature for the callback function for the for_each_*() * functions below. The memory pointed to by the `struct reference` diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 711e07f832..1fefefd54e 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -963,6 +963,7 @@ static int next_record(struct packed_ref_iterator *iter) iter->base.ref.flags &= ~REF_KNOWS_PEELED; } else { iter->base.ref.flags |= REF_KNOWS_PEELED; + iter->base.ref.peeled_oid = &iter->peeled; } } else { oidclr(&iter->peeled, iter->repo->hash_algo); diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 728886eafd..e214e120d7 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -547,6 +547,7 @@ struct reftable_ref_iterator { struct reftable_iterator iter; struct reftable_ref_record ref; struct object_id oid; + struct object_id peeled_oid; char *prefix; size_t prefix_len; @@ -671,6 +672,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) case REFTABLE_REF_VAL2: oidread(&iter->oid, iter->ref.value.val2.value, refs->base.repo->hash_algo); + oidread(&iter->peeled_oid, iter->ref.value.val2.target_value, + refs->base.repo->hash_algo); break; case REFTABLE_REF_SYMREF: referent = refs_resolve_ref_unsafe(&iter->refs->base, @@ -708,6 +711,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) iter->base.ref.name = iter->ref.refname; iter->base.ref.target = referent; iter->base.ref.oid = &iter->oid; + if (iter->ref.value_type == REFTABLE_REF_VAL2) + iter->base.ref.peeled_oid = &iter->peeled_oid; iter->base.ref.flags = flags; break; diff --git a/repack-midx.c b/repack-midx.c index 349f7e20b5..74bdfa3a6e 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -22,7 +22,7 @@ static int midx_snapshot_ref_one(const struct reference *ref, void *_data) const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; - if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + if (!reference_get_peeled_oid(data->repo, ref, &peeled)) maybe_peeled = &peeled; if (oidset_insert(&data->seen, maybe_peeled)) -- cgit v1.3-5-g9baa From 70b783c3a194746d8b747677615f33b94454146f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:16 +0200 Subject: ref-filter: propagate peeled object ID When queueing a reference in the "ref-filter" subsystem we end up creating a new ref array item that contains the reference's info. One bit of info that we always discard though is the peeled object ID, and because of that we are forced to use `peel_iterated_oid()`. Refactor the code to propagate the peeled object ID via the ref array, if available. This allows us to manually peel tags without having to go through the object database. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/ls-remote.c | 2 +- builtin/tag.c | 2 +- builtin/verify-tag.c | 2 +- ref-filter.c | 66 ++++++++++++++++++++++++++++++---------------------- ref-filter.h | 5 +++- 5 files changed, 45 insertions(+), 32 deletions(-) (limited to 'builtin') diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index df09000b30..fe77829557 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -156,7 +156,7 @@ int cmd_ls_remote(int argc, continue; if (!tail_match(&pattern, ref->name)) continue; - item = ref_array_push(&ref_array, ref->name, &ref->old_oid); + item = ref_array_push(&ref_array, ref->name, &ref->old_oid, NULL); item->symref = xstrdup_or_null(ref->symref); } diff --git a/builtin/tag.c b/builtin/tag.c index f0665af3ac..01eba90c5c 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -153,7 +153,7 @@ static int verify_tag(const char *name, const char *ref UNUSED, return -1; if (format->format) - pretty_print_ref(name, oid, format); + pretty_print_ref(name, oid, NULL, format); return 0; } diff --git a/builtin/verify-tag.c b/builtin/verify-tag.c index cd6bc11095..558121eaa1 100644 --- a/builtin/verify-tag.c +++ b/builtin/verify-tag.c @@ -67,7 +67,7 @@ int cmd_verify_tag(int argc, } if (format.format) - pretty_print_ref(name, &oid, &format); + pretty_print_ref(name, &oid, NULL, &format); } return had_error; } diff --git a/ref-filter.c b/ref-filter.c index 6837fa60a9..7fd8babec8 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2578,8 +2578,15 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) * If it is a tag object, see if we use the peeled value. If we do, * grab the peeled OID. */ - if (need_tagged && peel_iterated_oid(the_repository, &obj->oid, &oi_deref.oid)) - die("bad tag"); + if (need_tagged) { + if (!is_null_oid(&ref->peeled_oid)) { + oidcpy(&oi_deref.oid, &ref->peeled_oid); + } else if (!peel_object(the_repository, &obj->oid, &oi_deref.oid)) { + /* We managed to peel the object ourselves. */ + } else { + die("bad tag"); + } + } return get_object(ref, 1, &obj, &oi_deref, err); } @@ -2807,12 +2814,15 @@ static int match_points_at(struct oid_array *points_at, * Callers can then fill in other struct members at their leisure. */ static struct ref_array_item *new_ref_array_item(const char *refname, - const struct object_id *oid) + const struct object_id *oid, + const struct object_id *peeled_oid) { struct ref_array_item *ref; FLEX_ALLOC_STR(ref, refname, refname); oidcpy(&ref->objectname, oid); + if (peeled_oid) + oidcpy(&ref->peeled_oid, peeled_oid); ref->rest = NULL; return ref; @@ -2826,9 +2836,10 @@ static void ref_array_append(struct ref_array *array, struct ref_array_item *ref struct ref_array_item *ref_array_push(struct ref_array *array, const char *refname, - const struct object_id *oid) + const struct object_id *oid, + const struct object_id *peeled_oid) { - struct ref_array_item *ref = new_ref_array_item(refname, oid); + struct ref_array_item *ref = new_ref_array_item(refname, oid, peeled_oid); ref_array_append(array, ref); return ref; } @@ -2871,25 +2882,25 @@ static int filter_ref_kind(struct ref_filter *filter, const char *refname) return ref_kind_from_refname(refname); } -static struct ref_array_item *apply_ref_filter(const char *refname, const char *referent, const struct object_id *oid, - int flag, struct ref_filter *filter) +static struct ref_array_item *apply_ref_filter(const struct reference *ref, + struct ref_filter *filter) { - struct ref_array_item *ref; + struct ref_array_item *item; struct commit *commit = NULL; unsigned int kind; - if (flag & REF_BAD_NAME) { - warning(_("ignoring ref with broken name %s"), refname); + if (ref->flags & REF_BAD_NAME) { + warning(_("ignoring ref with broken name %s"), ref->name); return NULL; } - if (flag & REF_ISBROKEN) { - warning(_("ignoring broken ref %s"), refname); + if (ref->flags & REF_ISBROKEN) { + warning(_("ignoring broken ref %s"), ref->name); return NULL; } /* Obtain the current ref kind from filter_ref_kind() and ignore unwanted refs. */ - kind = filter_ref_kind(filter, refname); + kind = filter_ref_kind(filter, ref->name); /* * Generally HEAD refs are printed with special description denoting a rebase, @@ -2902,13 +2913,13 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * else if (!(kind & filter->kind)) return NULL; - if (!filter_pattern_match(filter, refname)) + if (!filter_pattern_match(filter, ref->name)) return NULL; - if (filter_exclude_match(filter, refname)) + if (filter_exclude_match(filter, ref->name)) return NULL; - if (filter->points_at.nr && !match_points_at(&filter->points_at, oid, refname)) + if (filter->points_at.nr && !match_points_at(&filter->points_at, ref->oid, ref->name)) return NULL; /* @@ -2918,7 +2929,7 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * */ if (filter->reachable_from || filter->unreachable_from || filter->with_commit || filter->no_commit || filter->verbose) { - commit = lookup_commit_reference_gently(the_repository, oid, 1); + commit = lookup_commit_reference_gently(the_repository, ref->oid, 1); if (!commit) return NULL; /* We perform the filtering for the '--contains' option... */ @@ -2936,13 +2947,13 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * * to do its job and the resulting list may yet to be pruned * by maxcount logic. */ - ref = new_ref_array_item(refname, oid); - ref->commit = commit; - ref->flag = flag; - ref->kind = kind; - ref->symref = xstrdup_or_null(referent); + item = new_ref_array_item(ref->name, ref->oid, ref->peeled_oid); + item->commit = commit; + item->flag = ref->flags; + item->kind = kind; + item->symref = xstrdup_or_null(ref->target); - return ref; + return item; } struct ref_filter_cbdata { @@ -2959,8 +2970,7 @@ static int filter_one(const struct reference *ref, void *cb_data) struct ref_filter_cbdata *ref_cbdata = cb_data; struct ref_array_item *item; - item = apply_ref_filter(ref->name, ref->target, ref->oid, - ref->flags, ref_cbdata->filter); + item = apply_ref_filter(ref, ref_cbdata->filter); if (item) ref_array_append(ref_cbdata->array, item); @@ -2997,8 +3007,7 @@ static int filter_and_format_one(const struct reference *ref, void *cb_data) struct ref_array_item *item; struct strbuf output = STRBUF_INIT, err = STRBUF_INIT; - item = apply_ref_filter(ref->name, ref->target, ref->oid, - ref->flags, ref_cbdata->filter); + item = apply_ref_filter(ref, ref_cbdata->filter); if (!item) return 0; @@ -3585,13 +3594,14 @@ void print_formatted_ref_array(struct ref_array *array, struct ref_format *forma } void pretty_print_ref(const char *name, const struct object_id *oid, + const struct object_id *peeled_oid, struct ref_format *format) { struct ref_array_item *ref_item; struct strbuf output = STRBUF_INIT; struct strbuf err = STRBUF_INIT; - ref_item = new_ref_array_item(name, oid); + ref_item = new_ref_array_item(name, oid, peeled_oid); ref_item->kind = ref_kind_from_refname(name); if (format_ref_array_item(ref_item, format, &output, &err)) die("%s", err.buf); diff --git a/ref-filter.h b/ref-filter.h index 235c60f79c..120221b47f 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -41,6 +41,7 @@ enum ref_sorting_order { struct ref_array_item { struct object_id objectname; + struct object_id peeled_oid; const char *rest; int flag; unsigned int kind; @@ -187,6 +188,7 @@ void print_formatted_ref_array(struct ref_array *array, struct ref_format *forma * name must be a fully qualified refname. */ void pretty_print_ref(const char *name, const struct object_id *oid, + const struct object_id *peeled_oid, struct ref_format *format); /* @@ -195,7 +197,8 @@ void pretty_print_ref(const char *name, const struct object_id *oid, */ struct ref_array_item *ref_array_push(struct ref_array *array, const char *refname, - const struct object_id *oid); + const struct object_id *oid, + const struct object_id *peeled_oid); /* * If the provided format includes ahead-behind atoms, then compute the -- cgit v1.3-5-g9baa From feaaea4c123e6b94ebbdc2135278946ee9cc8eed Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:17 +0200 Subject: builtin/show-ref: convert to use `reference_get_peeled_oid()` The git-show-ref(1) command has multiple different modes: - It knows to show all references matching a pattern. - It knows to list all references that are an exact match to whatever the user has provided. - It knows to check for reference existence. The first two commands use mostly the same infrastructure to print the references via `show_one()`. But while the former mode uses a proper iterator and thus has a `struct reference` available in its context, the latter calls `refs_read_ref()` and thus doesn't. Consequently, we cannot easily use `reference_get_peeled_oid()` to print the peeled value. Adapt the code so that we manually construct a `struct reference` when verifying refs. We wouldn't ever have the peeled value available anyway as we're not using an iterator here, so we can simply plug in the values we _do_ have. With this change we now have a `struct reference` available at both callsites of `show_one()` and can thus pass it, which allows us to use `reference_get_peeled_oid()` instead of `peel_iterated_oid()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/show-ref.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'builtin') diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 4803b5e598..4d4984e4e0 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -31,31 +31,31 @@ struct show_one_options { }; static void show_one(const struct show_one_options *opts, - const char *refname, const struct object_id *oid) + const struct reference *ref) { const char *hex; struct object_id peeled; - if (!odb_has_object(the_repository->objects, oid, + if (!odb_has_object(the_repository->objects, ref->oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) - die("git show-ref: bad ref %s (%s)", refname, - oid_to_hex(oid)); + die("git show-ref: bad ref %s (%s)", ref->name, + oid_to_hex(ref->oid)); if (opts->quiet) return; - hex = repo_find_unique_abbrev(the_repository, oid, opts->abbrev); + hex = repo_find_unique_abbrev(the_repository, ref->oid, opts->abbrev); if (opts->hash_only) printf("%s\n", hex); else - printf("%s %s\n", hex, refname); + printf("%s %s\n", hex, ref->name); if (!opts->deref_tags) return; - if (!peel_iterated_oid(the_repository, oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { hex = repo_find_unique_abbrev(the_repository, &peeled, opts->abbrev); - printf("%s %s^{}\n", hex, refname); + printf("%s %s^{}\n", hex, ref->name); } } @@ -93,7 +93,7 @@ static int show_ref(const struct reference *ref, void *cbdata) match: data->found_match++; - show_one(data->show_one_opts, ref->name, ref->oid); + show_one(data->show_one_opts, ref); return 0; } @@ -175,12 +175,18 @@ static int cmd_show_ref__verify(const struct show_one_options *show_one_opts, if ((starts_with(*refs, "refs/") || refname_is_safe(*refs)) && !refs_read_ref(get_main_ref_store(the_repository), *refs, &oid)) { - show_one(show_one_opts, *refs, &oid); - } - else if (!show_one_opts->quiet) + struct reference ref = { + .name = *refs, + .oid = &oid, + }; + + show_one(show_one_opts, &ref); + } else if (!show_one_opts->quiet) { die("'%s' - not a valid ref", *refs); - else + } else { return 1; + } + refs++; } -- cgit v1.3-5-g9baa