From f7c4d63e35b4358f93efc8d2f124056c246f912e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 30 Mar 2021 11:03:47 -0400 Subject: builtin/multi-pack-index.c: inline 'flags' with options Subcommands of the 'git multi-pack-index' command (e.g., 'write', 'verify', etc.) will want to optionally change a set of shared flags that are eventually passed to the MIDX libraries. Right now, options and flags are handled separately. That's fine, since the options structure is never passed around. But a future patch will make it so that common options shared by all sub-commands are defined in a common location. That means that "flags" would have to become a global variable. Group it with the options structure so that we reduce the number of global variables we have overall. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'builtin') diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 5bf88cd2a8..4a0ddb06c4 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -14,13 +14,12 @@ static struct opts_multi_pack_index { const char *object_dir; unsigned long batch_size; int progress; + unsigned flags; } opts; int cmd_multi_pack_index(int argc, const char **argv, const char *prefix) { - unsigned flags = 0; - static struct option builtin_multi_pack_index_options[] = { OPT_FILENAME(0, "object-dir", &opts.object_dir, N_("object directory containing set of packfile and pack-index pairs")), @@ -40,7 +39,7 @@ int cmd_multi_pack_index(int argc, const char **argv, if (!opts.object_dir) opts.object_dir = get_object_directory(); if (opts.progress) - flags |= MIDX_PROGRESS; + opts.flags |= MIDX_PROGRESS; if (argc == 0) usage_with_options(builtin_multi_pack_index_usage, @@ -55,16 +54,16 @@ int cmd_multi_pack_index(int argc, const char **argv, if (!strcmp(argv[0], "repack")) return midx_repack(the_repository, opts.object_dir, - (size_t)opts.batch_size, flags); + (size_t)opts.batch_size, opts.flags); if (opts.batch_size) die(_("--batch-size option is only for 'repack' subcommand")); if (!strcmp(argv[0], "write")) - return write_midx_file(opts.object_dir, flags); + return write_midx_file(opts.object_dir, opts.flags); if (!strcmp(argv[0], "verify")) - return verify_midx_file(the_repository, opts.object_dir, flags); + return verify_midx_file(the_repository, opts.object_dir, opts.flags); if (!strcmp(argv[0], "expire")) - return expire_midx_packs(the_repository, opts.object_dir, flags); + return expire_midx_packs(the_repository, opts.object_dir, opts.flags); die(_("unrecognized subcommand: %s"), argv[0]); } -- cgit v1.3-5-g9baa From cf1f5389ec9eed5453c090cba347f68a3c3da3f7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 30 Mar 2021 11:03:51 -0400 Subject: builtin/multi-pack-index.c: don't handle 'progress' separately Now that there is a shared 'flags' member in the options structure, there is no need to keep track of whether to force progress or not, since ultimately the decision of whether or not to show a progress meter is controlled by a bit in the flags member. Manipulate that bit directly, and drop the now-unnecessary 'progress' field while we're at it. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'builtin') diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 4a0ddb06c4..c70f020d8f 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -13,7 +13,6 @@ static char const * const builtin_multi_pack_index_usage[] = { static struct opts_multi_pack_index { const char *object_dir; unsigned long batch_size; - int progress; unsigned flags; } opts; @@ -23,7 +22,7 @@ int cmd_multi_pack_index(int argc, const char **argv, static struct option builtin_multi_pack_index_options[] = { OPT_FILENAME(0, "object-dir", &opts.object_dir, N_("object directory containing set of packfile and pack-index pairs")), - OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")), + OPT_BIT(0, "progress", &opts.flags, N_("force progress reporting"), MIDX_PROGRESS), OPT_MAGNITUDE(0, "batch-size", &opts.batch_size, N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")), OPT_END(), @@ -31,15 +30,14 @@ int cmd_multi_pack_index(int argc, const char **argv, git_config(git_default_config, NULL); - opts.progress = isatty(2); + if (isatty(2)) + opts.flags |= MIDX_PROGRESS; argc = parse_options(argc, argv, prefix, builtin_multi_pack_index_options, builtin_multi_pack_index_usage, 0); if (!opts.object_dir) opts.object_dir = get_object_directory(); - if (opts.progress) - opts.flags |= MIDX_PROGRESS; if (argc == 0) usage_with_options(builtin_multi_pack_index_usage, -- cgit v1.3-5-g9baa From b25b727494f4782c7af1db31fdfde1cc9b30f7c4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 30 Mar 2021 11:03:54 -0400 Subject: builtin/multi-pack-index.c: define common usage with a macro Factor out the usage message into pieces corresponding to each mode. This avoids options specific to one sub-command from being shared with another in the usage. A subsequent commit will use these #define macros to have usage variables for each sub-command without duplicating their contents. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index c70f020d8f..eea498e026 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -5,8 +5,23 @@ #include "midx.h" #include "trace2.h" +#define BUILTIN_MIDX_WRITE_USAGE \ + N_("git multi-pack-index [] write") + +#define BUILTIN_MIDX_VERIFY_USAGE \ + N_("git multi-pack-index [] verify") + +#define BUILTIN_MIDX_EXPIRE_USAGE \ + N_("git multi-pack-index [] expire") + +#define BUILTIN_MIDX_REPACK_USAGE \ + N_("git multi-pack-index [] repack [--batch-size=]") + static char const * const builtin_multi_pack_index_usage[] = { - N_("git multi-pack-index [] (write|verify|expire|repack --batch-size=)"), + BUILTIN_MIDX_WRITE_USAGE, + BUILTIN_MIDX_VERIFY_USAGE, + BUILTIN_MIDX_EXPIRE_USAGE, + BUILTIN_MIDX_REPACK_USAGE, NULL }; -- cgit v1.3-5-g9baa From 60ca94769ce45161c4bcfb4aa92212585d3da0f6 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 30 Mar 2021 11:03:57 -0400 Subject: builtin/multi-pack-index.c: split sub-commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle sub-commands of the 'git multi-pack-index' builtin (e.g., "write", "repack", etc.) separately from one another. This allows sub-commands with unique options, without forcing cmd_multi_pack_index() to reject invalid combinations itself. This comes at the cost of some duplication and boilerplate. Luckily, the duplication is reduced to a minimum, since common options are shared among sub-commands due to a suggestion by Ævar. (Sub-commands do have to retain the common options, too, since this builtin accepts common options on either side of the sub-command). Roughly speaking, cmd_multi_pack_index() parses options (including common ones), and stops at the first non-option, which is the sub-command. It then dispatches to the appropriate sub-command, which parses the remaining options (also including common options). Unknown options are kept by the sub-commands in order to detect their presence (and complain that too many arguments were given). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 130 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 105 insertions(+), 25 deletions(-) (limited to 'builtin') diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index eea498e026..a78640c061 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -17,6 +17,22 @@ #define BUILTIN_MIDX_REPACK_USAGE \ N_("git multi-pack-index [] repack [--batch-size=]") +static char const * const builtin_multi_pack_index_write_usage[] = { + BUILTIN_MIDX_WRITE_USAGE, + NULL +}; +static char const * const builtin_multi_pack_index_verify_usage[] = { + BUILTIN_MIDX_VERIFY_USAGE, + NULL +}; +static char const * const builtin_multi_pack_index_expire_usage[] = { + BUILTIN_MIDX_EXPIRE_USAGE, + NULL +}; +static char const * const builtin_multi_pack_index_repack_usage[] = { + BUILTIN_MIDX_REPACK_USAGE, + NULL +}; static char const * const builtin_multi_pack_index_usage[] = { BUILTIN_MIDX_WRITE_USAGE, BUILTIN_MIDX_VERIFY_USAGE, @@ -31,25 +47,98 @@ static struct opts_multi_pack_index { unsigned flags; } opts; -int cmd_multi_pack_index(int argc, const char **argv, - const char *prefix) +static struct option common_opts[] = { + OPT_FILENAME(0, "object-dir", &opts.object_dir, + N_("object directory containing set of packfile and pack-index pairs")), + OPT_BIT(0, "progress", &opts.flags, N_("force progress reporting"), MIDX_PROGRESS), + OPT_END(), +}; + +static struct option *add_common_options(struct option *prev) +{ + return parse_options_concat(common_opts, prev); +} + +static int cmd_multi_pack_index_write(int argc, const char **argv) +{ + struct option *options = common_opts; + + argc = parse_options(argc, argv, NULL, + options, builtin_multi_pack_index_write_usage, + PARSE_OPT_KEEP_UNKNOWN); + if (argc) + usage_with_options(builtin_multi_pack_index_write_usage, + options); + + return write_midx_file(opts.object_dir, opts.flags); +} + +static int cmd_multi_pack_index_verify(int argc, const char **argv) +{ + struct option *options = common_opts; + + argc = parse_options(argc, argv, NULL, + options, builtin_multi_pack_index_verify_usage, + PARSE_OPT_KEEP_UNKNOWN); + if (argc) + usage_with_options(builtin_multi_pack_index_verify_usage, + options); + + return verify_midx_file(the_repository, opts.object_dir, opts.flags); +} + +static int cmd_multi_pack_index_expire(int argc, const char **argv) +{ + struct option *options = common_opts; + + argc = parse_options(argc, argv, NULL, + options, builtin_multi_pack_index_expire_usage, + PARSE_OPT_KEEP_UNKNOWN); + if (argc) + usage_with_options(builtin_multi_pack_index_expire_usage, + options); + + return expire_midx_packs(the_repository, opts.object_dir, opts.flags); +} + +static int cmd_multi_pack_index_repack(int argc, const char **argv) { - static struct option builtin_multi_pack_index_options[] = { - OPT_FILENAME(0, "object-dir", &opts.object_dir, - N_("object directory containing set of packfile and pack-index pairs")), - OPT_BIT(0, "progress", &opts.flags, N_("force progress reporting"), MIDX_PROGRESS), + struct option *options; + static struct option builtin_multi_pack_index_repack_options[] = { OPT_MAGNITUDE(0, "batch-size", &opts.batch_size, N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")), OPT_END(), }; + options = add_common_options(builtin_multi_pack_index_repack_options); + + argc = parse_options(argc, argv, NULL, + options, + builtin_multi_pack_index_repack_usage, + PARSE_OPT_KEEP_UNKNOWN); + if (argc) + usage_with_options(builtin_multi_pack_index_repack_usage, + options); + + FREE_AND_NULL(options); + + return midx_repack(the_repository, opts.object_dir, + (size_t)opts.batch_size, opts.flags); +} + +int cmd_multi_pack_index(int argc, const char **argv, + const char *prefix) +{ + struct option *builtin_multi_pack_index_options = common_opts; + git_config(git_default_config, NULL); if (isatty(2)) opts.flags |= MIDX_PROGRESS; argc = parse_options(argc, argv, prefix, builtin_multi_pack_index_options, - builtin_multi_pack_index_usage, 0); + builtin_multi_pack_index_usage, + PARSE_OPT_STOP_AT_NON_OPTION); if (!opts.object_dir) opts.object_dir = get_object_directory(); @@ -58,25 +147,16 @@ int cmd_multi_pack_index(int argc, const char **argv, usage_with_options(builtin_multi_pack_index_usage, builtin_multi_pack_index_options); - if (argc > 1) { - die(_("too many arguments")); - return 1; - } - trace2_cmd_mode(argv[0]); if (!strcmp(argv[0], "repack")) - return midx_repack(the_repository, opts.object_dir, - (size_t)opts.batch_size, opts.flags); - if (opts.batch_size) - die(_("--batch-size option is only for 'repack' subcommand")); - - if (!strcmp(argv[0], "write")) - return write_midx_file(opts.object_dir, opts.flags); - if (!strcmp(argv[0], "verify")) - return verify_midx_file(the_repository, opts.object_dir, opts.flags); - if (!strcmp(argv[0], "expire")) - return expire_midx_packs(the_repository, opts.object_dir, opts.flags); - - die(_("unrecognized subcommand: %s"), argv[0]); + return cmd_multi_pack_index_repack(argc, argv); + else if (!strcmp(argv[0], "write")) + return cmd_multi_pack_index_write(argc, argv); + else if (!strcmp(argv[0], "verify")) + return cmd_multi_pack_index_verify(argc, argv); + else if (!strcmp(argv[0], "expire")) + return cmd_multi_pack_index_expire(argc, argv); + else + die(_("unrecognized subcommand: %s"), argv[0]); } -- cgit v1.3-5-g9baa From 690eb057198275b314b776688240fcc7f9e789d0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 30 Mar 2021 11:04:01 -0400 Subject: builtin/multi-pack-index.c: don't enter bogus cmd_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even before the recent refactoring, 'git multi-pack-index' calls 'trace2_cmd_mode()' before verifying that the sub-command is recognized. Push this call down into the individual sub-commands so that we don't enter a bogus command mode. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index a78640c061..b590c4fc88 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -63,6 +63,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv) { struct option *options = common_opts; + trace2_cmd_mode(argv[0]); + argc = parse_options(argc, argv, NULL, options, builtin_multi_pack_index_write_usage, PARSE_OPT_KEEP_UNKNOWN); @@ -77,6 +79,8 @@ static int cmd_multi_pack_index_verify(int argc, const char **argv) { struct option *options = common_opts; + trace2_cmd_mode(argv[0]); + argc = parse_options(argc, argv, NULL, options, builtin_multi_pack_index_verify_usage, PARSE_OPT_KEEP_UNKNOWN); @@ -91,6 +95,8 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv) { struct option *options = common_opts; + trace2_cmd_mode(argv[0]); + argc = parse_options(argc, argv, NULL, options, builtin_multi_pack_index_expire_usage, PARSE_OPT_KEEP_UNKNOWN); @@ -112,6 +118,8 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv) options = add_common_options(builtin_multi_pack_index_repack_options); + trace2_cmd_mode(argv[0]); + argc = parse_options(argc, argv, NULL, options, builtin_multi_pack_index_repack_usage, @@ -147,8 +155,6 @@ int cmd_multi_pack_index(int argc, const char **argv, usage_with_options(builtin_multi_pack_index_usage, builtin_multi_pack_index_options); - trace2_cmd_mode(argv[0]); - if (!strcmp(argv[0], "repack")) return cmd_multi_pack_index_repack(argc, argv); else if (!strcmp(argv[0], "write")) -- cgit v1.3-5-g9baa From cd57bc41bbcc8d260040243946d19075eff0bfaf Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 30 Mar 2021 11:04:04 -0400 Subject: builtin/multi-pack-index.c: display usage on unrecognized command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When given a sub-command that it doesn't understand, 'git multi-pack-index' dies with the following message: $ git multi-pack-index bogus fatal: unrecognized subcommand: bogus Instead of 'die()'-ing, we can display the usage text, which is much more helpful: $ git.compile multi-pack-index bogus error: unrecognized subcommand: bogus usage: git multi-pack-index [] write or: git multi-pack-index [] verify or: git multi-pack-index [] expire or: git multi-pack-index [] repack [--batch-size=] --object-dir object directory containing set of packfile and pack-index pairs --progress force progress reporting While we're at it, clean up some duplication between the "no sub-command" and "unrecognized sub-command" conditionals. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'builtin') diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index b590c4fc88..8711174fae 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -152,8 +152,7 @@ int cmd_multi_pack_index(int argc, const char **argv, opts.object_dir = get_object_directory(); if (argc == 0) - usage_with_options(builtin_multi_pack_index_usage, - builtin_multi_pack_index_options); + goto usage; if (!strcmp(argv[0], "repack")) return cmd_multi_pack_index_repack(argc, argv); @@ -163,6 +162,10 @@ int cmd_multi_pack_index(int argc, const char **argv, return cmd_multi_pack_index_verify(argc, argv); else if (!strcmp(argv[0], "expire")) return cmd_multi_pack_index_expire(argc, argv); - else - die(_("unrecognized subcommand: %s"), argv[0]); + else { +usage: + error(_("unrecognized subcommand: %s"), argv[0]); + usage_with_options(builtin_multi_pack_index_usage, + builtin_multi_pack_index_options); + } } -- cgit v1.3-5-g9baa From 9218c6a40c37023a1f434222d501218cf8157857 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 30 Mar 2021 11:04:11 -0400 Subject: midx: allow marking a pack as preferred When multiple packs in the multi-pack index contain the same object, the MIDX machinery must make a choice about which pack it associates with that object. Prior to this patch, the lowest-ordered[1] pack was always selected. Pack selection for duplicate objects is relatively unimportant today, but it will become important for multi-pack bitmaps. This is because we can only invoke the pack-reuse mechanism when all of the bits for reused objects come from the reuse pack (in order to ensure that all reused deltas can find their base objects in the same pack). To encourage the pack selection process to prefer one pack over another (the pack to be preferred is the one a caller would like to later use as a reuse pack), introduce the concept of a "preferred pack". When provided, the MIDX code will always prefer an object found in a preferred pack over any other. No format changes are required to store the preferred pack, since it will be able to be inferred with a corresponding MIDX bitmap, by looking up the pack associated with the object in the first bit position (this ordering is described in detail in a subsequent commit). [1]: the ordering is specified by MIDX internals; for our purposes we can consider the "lowest ordered" pack to be "the one with the most-recent mtime. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.txt | 14 ++++- Documentation/technical/multi-pack-index.txt | 5 +- builtin/multi-pack-index.c | 19 ++++++- builtin/repack.c | 2 +- midx.c | 82 +++++++++++++++++++++++++--- midx.h | 2 +- t/t5319-multi-pack-index.sh | 42 ++++++++++++++ 7 files changed, 148 insertions(+), 18 deletions(-) (limited to 'builtin') diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index eb0caa0439..ffd601bc17 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -9,7 +9,8 @@ git-multi-pack-index - Write and verify multi-pack-indexes SYNOPSIS -------- [verse] -'git multi-pack-index' [--object-dir=] [--[no-]progress] +'git multi-pack-index' [--object-dir=] [--[no-]progress] + [--preferred-pack=] DESCRIPTION ----------- @@ -30,7 +31,16 @@ OPTIONS The following subcommands are available: write:: - Write a new MIDX file. + Write a new MIDX file. The following options are available for + the `write` sub-command: ++ +-- + --preferred-pack=:: + Optionally specify the tie-breaking pack used when + multiple packs contain the same object. If not given, + ties are broken in favor of the pack with the lowest + mtime. +-- verify:: Verify the contents of the MIDX file. diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt index e8e377a59f..fb688976c4 100644 --- a/Documentation/technical/multi-pack-index.txt +++ b/Documentation/technical/multi-pack-index.txt @@ -43,8 +43,9 @@ Design Details a change in format. - The MIDX keeps only one record per object ID. If an object appears - in multiple packfiles, then the MIDX selects the copy in the most- - recently modified packfile. + in multiple packfiles, then the MIDX selects the copy in the + preferred packfile, otherwise selecting from the most-recently + modified packfile. - If there exist packfiles in the pack directory not registered in the MIDX, then those packfiles are loaded into the `packed_git` diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 8711174fae..5d3ea445fd 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -4,9 +4,10 @@ #include "parse-options.h" #include "midx.h" #include "trace2.h" +#include "object-store.h" #define BUILTIN_MIDX_WRITE_USAGE \ - N_("git multi-pack-index [] write") + N_("git multi-pack-index [] write [--preferred-pack=]") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -43,6 +44,7 @@ static char const * const builtin_multi_pack_index_usage[] = { static struct opts_multi_pack_index { const char *object_dir; + const char *preferred_pack; unsigned long batch_size; unsigned flags; } opts; @@ -61,7 +63,15 @@ static struct option *add_common_options(struct option *prev) static int cmd_multi_pack_index_write(int argc, const char **argv) { - struct option *options = common_opts; + struct option *options; + static struct option builtin_multi_pack_index_write_options[] = { + OPT_STRING(0, "preferred-pack", &opts.preferred_pack, + N_("preferred-pack"), + N_("pack for reuse when computing a multi-pack bitmap")), + OPT_END(), + }; + + options = add_common_options(builtin_multi_pack_index_write_options); trace2_cmd_mode(argv[0]); @@ -72,7 +82,10 @@ static int cmd_multi_pack_index_write(int argc, const char **argv) usage_with_options(builtin_multi_pack_index_write_usage, options); - return write_midx_file(opts.object_dir, opts.flags); + FREE_AND_NULL(options); + + return write_midx_file(opts.object_dir, opts.preferred_pack, + opts.flags); } static int cmd_multi_pack_index_verify(int argc, const char **argv) diff --git a/builtin/repack.c b/builtin/repack.c index 01440de2d5..9f00806805 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -523,7 +523,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) remove_temporary_files(); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) - write_midx_file(get_object_directory(), 0); + write_midx_file(get_object_directory(), NULL, 0); string_list_clear(&names, 0); string_list_clear(&rollback, 0); diff --git a/midx.c b/midx.c index 971faa8cfc..b8f42bf31e 100644 --- a/midx.c +++ b/midx.c @@ -431,6 +431,14 @@ static int pack_info_compare(const void *_a, const void *_b) return strcmp(a->pack_name, b->pack_name); } +static int idx_or_pack_name_cmp(const void *_va, const void *_vb) +{ + const char *pack_name = _va; + const struct pack_info *compar = _vb; + + return cmp_idx_or_pack_name(pack_name, compar->pack_name); +} + struct write_midx_context { struct pack_info *info; uint32_t nr; @@ -445,6 +453,8 @@ struct write_midx_context { uint32_t *pack_perm; unsigned large_offsets_needed:1; uint32_t num_large_offsets; + + int preferred_pack_idx; }; static void add_pack_to_midx(const char *full_path, size_t full_path_len, @@ -489,6 +499,7 @@ struct pack_midx_entry { uint32_t pack_int_id; time_t pack_mtime; uint64_t offset; + unsigned preferred : 1; }; static int midx_oid_compare(const void *_a, const void *_b) @@ -500,6 +511,12 @@ static int midx_oid_compare(const void *_a, const void *_b) if (cmp) return cmp; + /* Sort objects in a preferred pack first when multiple copies exist. */ + if (a->preferred > b->preferred) + return -1; + if (a->preferred < b->preferred) + return 1; + if (a->pack_mtime > b->pack_mtime) return -1; else if (a->pack_mtime < b->pack_mtime) @@ -527,7 +544,8 @@ static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, static void fill_pack_entry(uint32_t pack_int_id, struct packed_git *p, uint32_t cur_object, - struct pack_midx_entry *entry) + struct pack_midx_entry *entry, + int preferred) { if (nth_packed_object_id(&entry->oid, p, cur_object) < 0) die(_("failed to locate object %d in packfile"), cur_object); @@ -536,6 +554,7 @@ static void fill_pack_entry(uint32_t pack_int_id, entry->pack_mtime = p->mtime; entry->offset = nth_packed_object_offset(p, cur_object); + entry->preferred = !!preferred; } /* @@ -552,7 +571,8 @@ static void fill_pack_entry(uint32_t pack_int_id, static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, struct pack_info *info, uint32_t nr_packs, - uint32_t *nr_objects) + uint32_t *nr_objects, + int preferred_pack) { uint32_t cur_fanout, cur_pack, cur_object; uint32_t alloc_fanout, alloc_objects, total_objects = 0; @@ -589,12 +609,17 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, nth_midxed_pack_midx_entry(m, &entries_by_fanout[nr_fanout], cur_object); + if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack) + entries_by_fanout[nr_fanout].preferred = 1; + else + entries_by_fanout[nr_fanout].preferred = 0; nr_fanout++; } } for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) { uint32_t start = 0, end; + int preferred = cur_pack == preferred_pack; if (cur_fanout) start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1); @@ -602,7 +627,11 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, for (cur_object = start; cur_object < end; cur_object++) { ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); - fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]); + fill_pack_entry(cur_pack, + info[cur_pack].p, + cur_object, + &entries_by_fanout[nr_fanout], + preferred); nr_fanout++; } } @@ -777,7 +806,9 @@ static int write_midx_large_offsets(struct hashfile *f, } static int write_midx_internal(const char *object_dir, struct multi_pack_index *m, - struct string_list *packs_to_drop, unsigned flags) + struct string_list *packs_to_drop, + const char *preferred_pack_name, + unsigned flags) { char *midx_name; uint32_t i; @@ -828,7 +859,19 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop) goto cleanup; - ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr); + ctx.preferred_pack_idx = -1; + if (preferred_pack_name) { + for (i = 0; i < ctx.nr; i++) { + if (!cmp_idx_or_pack_name(preferred_pack_name, + ctx.info[i].pack_name)) { + ctx.preferred_pack_idx = i; + break; + } + } + } + + ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr, + ctx.preferred_pack_idx); ctx.large_offsets_needed = 0; for (i = 0; i < ctx.entries_nr; i++) { @@ -889,6 +932,24 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1; } + /* Check that the preferred pack wasn't expired (if given). */ + if (preferred_pack_name) { + struct pack_info *preferred = bsearch(preferred_pack_name, + ctx.info, ctx.nr, + sizeof(*ctx.info), + idx_or_pack_name_cmp); + + if (!preferred) + warning(_("unknown preferred pack: '%s'"), + preferred_pack_name); + else { + uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; + if (perm == PACK_EXPIRED) + warning(_("preferred pack '%s' is expired"), + preferred_pack_name); + } + } + if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); @@ -947,9 +1008,12 @@ cleanup: return result; } -int write_midx_file(const char *object_dir, unsigned flags) +int write_midx_file(const char *object_dir, + const char *preferred_pack_name, + unsigned flags) { - return write_midx_internal(object_dir, NULL, NULL, flags); + return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name, + flags); } void clear_midx_file(struct repository *r) @@ -1184,7 +1248,7 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla free(count); if (packs_to_drop.nr) - result = write_midx_internal(object_dir, m, &packs_to_drop, flags); + result = write_midx_internal(object_dir, m, &packs_to_drop, NULL, flags); string_list_clear(&packs_to_drop, 0); return result; @@ -1373,7 +1437,7 @@ int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, goto cleanup; } - result = write_midx_internal(object_dir, m, NULL, flags); + result = write_midx_internal(object_dir, m, NULL, NULL, flags); m = NULL; cleanup: diff --git a/midx.h b/midx.h index b18cf53bc4..e7fea61109 100644 --- a/midx.h +++ b/midx.h @@ -47,7 +47,7 @@ int fill_midx_entry(struct repository *r, const struct object_id *oid, struct pa int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local); -int write_midx_file(const char *object_dir, unsigned flags); +int write_midx_file(const char *object_dir, const char *preferred_pack_name, unsigned flags); void clear_midx_file(struct repository *r); int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags); int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index b4afab1dfc..5641d158df 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -234,6 +234,48 @@ test_expect_success 'warn on improper hash version' ' ) ' +test_expect_success 'midx picks objects from preferred pack' ' + test_when_finished rm -rf preferred.git && + git init --bare preferred.git && + ( + cd preferred.git && + + a=$(echo "a" | git hash-object -w --stdin) && + b=$(echo "b" | git hash-object -w --stdin) && + c=$(echo "c" | git hash-object -w --stdin) && + + # Set up two packs, duplicating the object "B" at different + # offsets. + # + # Note that the "BC" pack (the one we choose as preferred) sorts + # lexically after the "AB" pack, meaning that omitting the + # --preferred-pack argument would cause this test to fail (since + # the MIDX code would select the copy of "b" in the "AB" pack). + git pack-objects objects/pack/test-AB <<-EOF && + $a + $b + EOF + bc=$(git pack-objects objects/pack/test-BC <<-EOF + $b + $c + EOF + ) && + + git multi-pack-index --object-dir=objects \ + write --preferred-pack=test-BC-$bc.idx 2>err && + test_must_be_empty err && + + test-tool read-midx --show-objects objects >out && + + ofs=$(git show-index expect && + grep ^$b out >actual && + + test_cmp expect actual + ) +' test_expect_success 'verify multi-pack-index success' ' git multi-pack-index verify --object-dir=$objdir -- cgit v1.3-5-g9baa