diff options
| author | Junio C Hamano <gitster@pobox.com> | 2025-02-12 10:08:51 -0800 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-02-12 10:08:51 -0800 |
| commit | aae91a86fb2a71ff89a71b63ccec3a947b26ca51 (patch) | |
| tree | 3bfc421ac1b1f445d22bae71a3b3524ec993fb4c /builtin | |
| parent | 388218fac77d0405a5083cd4b4ee20f6694609c3 (diff) | |
| parent | b4cf68476a983ff063846b43cd46ee9805f2c0bb (diff) | |
| download | git-aae91a86fb2a71ff89a71b63ccec3a947b26ca51.tar.xz | |
Merge branch 'ds/name-hash-tweaks'
"git pack-objects" and its wrapper "git repack" learned an option
to use an alternative path-hash function to improve delta-base
selection to produce a packfile with deeper history than window
size.
* ds/name-hash-tweaks:
pack-objects: prevent name hash version change
test-tool: add helper for name-hash values
p5313: add size comparison test
pack-objects: add GIT_TEST_NAME_HASH_VERSION
repack: add --name-hash-version option
pack-objects: add --name-hash-version option
pack-objects: create new name-hash function version
Diffstat (limited to 'builtin')
| -rw-r--r-- | builtin/pack-objects.c | 63 | ||||
| -rw-r--r-- | builtin/repack.c | 9 |
2 files changed, 66 insertions, 6 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 7b2dacda51..58a9b16126 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -269,6 +269,43 @@ struct configured_exclusion { static struct oidmap configured_exclusions; static struct oidset excluded_by_config; +static int name_hash_version = -1; + +/** + * Check whether the name_hash_version chosen by user input is appropriate, + * and also validate whether it is compatible with other features. + */ +static void validate_name_hash_version(void) +{ + if (name_hash_version < 1 || name_hash_version > 2) + die(_("invalid --name-hash-version option: %d"), name_hash_version); + if (write_bitmap_index && name_hash_version != 1) { + warning(_("currently, --write-bitmap-index requires --name-hash-version=1")); + name_hash_version = 1; + } +} + +static inline uint32_t pack_name_hash_fn(const char *name) +{ + static int seen_version = -1; + + if (seen_version < 0) + seen_version = name_hash_version; + else if (seen_version != name_hash_version) + BUG("name hash version changed from %d to %d mid-process", + seen_version, name_hash_version); + + switch (name_hash_version) { + case 1: + return pack_name_hash(name); + + case 2: + return pack_name_hash_v2((const unsigned char *)name); + + default: + BUG("invalid name-hash version: %d", name_hash_version); + } +} /* * stats @@ -1689,7 +1726,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, return 0; } - create_object_entry(oid, type, pack_name_hash(name), + create_object_entry(oid, type, pack_name_hash_fn(name), exclude, name && no_try_delta(name), found_pack, found_offset); return 1; @@ -1903,7 +1940,7 @@ static void add_preferred_base_object(const char *name) { struct pbase_tree *it; size_t cmplen; - unsigned hash = pack_name_hash(name); + unsigned hash = pack_name_hash_fn(name); if (!num_preferred_base || check_pbase_path(hash)) return; @@ -3415,7 +3452,7 @@ static void show_object_pack_hint(struct object *object, const char *name, * here using a now in order to perhaps improve the delta selection * process. */ - oe->hash = pack_name_hash(name); + oe->hash = pack_name_hash_fn(name); oe->no_try_delta = name && no_try_delta(name); stdin_packs_hints_nr++; @@ -3565,7 +3602,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type entry = packlist_find(&to_pack, oid); if (entry) { if (name) { - entry->hash = pack_name_hash(name); + entry->hash = pack_name_hash_fn(name); entry->no_try_delta = no_try_delta(name); } } else { @@ -3588,7 +3625,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type return; } - entry = create_object_entry(oid, type, pack_name_hash(name), + entry = create_object_entry(oid, type, pack_name_hash_fn(name), 0, name && no_try_delta(name), pack, offset); } @@ -4068,6 +4105,15 @@ static int get_object_list_from_bitmap(struct rev_info *revs) if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) return -1; + /* + * For now, force the name-hash version to be 1 since that + * is the version implied by the bitmap format. Later, the + * format can include this version explicitly in its format, + * allowing readers to know the version that was used during + * the bitmap write. + */ + name_hash_version = 1; + if (pack_options_allow_reuse()) reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfiles, @@ -4443,6 +4489,8 @@ int cmd_pack_objects(int argc, OPT_STRING_LIST(0, "uri-protocol", &uri_protocols, N_("protocol"), N_("exclude any configured uploadpack.blobpackfileuri with this protocol")), + OPT_INTEGER(0, "name-hash-version", &name_hash_version, + N_("use the specified name-hash function to group similar objects")), OPT_END(), }; @@ -4598,6 +4646,11 @@ int cmd_pack_objects(int argc, if (pack_to_stdout || !rev_list_all) write_bitmap_index = 0; + if (name_hash_version < 0) + name_hash_version = (int)git_env_ulong("GIT_TEST_NAME_HASH_VERSION", 1); + + validate_name_hash_version(); + if (use_delta_islands) strvec_push(&rp, "--topo-order"); diff --git a/builtin/repack.c b/builtin/repack.c index 81d13630ea..e472586ed5 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -41,7 +41,9 @@ static int run_update_server_info = 1; static char *packdir, *packtmp_name, *packtmp; static const char *const git_repack_usage[] = { - N_("git repack [<options>]"), + N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" + "[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]\n" + "[--write-midx] [--name-hash-version=<n>]"), NULL }; @@ -60,6 +62,7 @@ struct pack_objects_args { int no_reuse_object; int quiet; int local; + int name_hash_version; struct list_objects_filter_options filter_options; }; @@ -308,6 +311,8 @@ static void prepare_pack_objects(struct child_process *cmd, strvec_pushf(&cmd->args, "--no-reuse-delta"); if (args->no_reuse_object) strvec_pushf(&cmd->args, "--no-reuse-object"); + if (args->name_hash_version) + strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); if (args->local) strvec_push(&cmd->args, "--local"); if (args->quiet) @@ -1205,6 +1210,8 @@ int cmd_repack(int argc, N_("pass --no-reuse-delta to git-pack-objects")), OPT_BOOL('F', NULL, &po_args.no_reuse_object, N_("pass --no-reuse-object to git-pack-objects")), + OPT_INTEGER(0, "name-hash-version", &po_args.name_hash_version, + N_("specify the name hash version to use for grouping similar objects by path")), OPT_NEGBIT('n', NULL, &run_update_server_info, N_("do not run git-update-server-info"), 1), OPT__QUIET(&po_args.quiet, N_("be quiet")), |
