From 4f33a6345f2bd6e47253d1dbefd01874d895ab2e Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Wed, 9 Mar 2022 16:01:38 +0000 Subject: list-objects: handle NULL function pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a caller to traverse_commit_list() specifies the options for the --objects flag but does not specify a show_object function pointer, the result is a segfault. This is currently visible by running 'git bundle create --objects HEAD'. We could fix this problem by supplying a no-op callback in builtin/bundle.c, but that only solves the problem for one builtin, leaving this segfault open for other callers. Replace all callers of the show_commit and show_object function pointers in list-objects.c to call helper functions show_commit() and show_object() which check that the given context has non-NULL functions before passing the necessary data. One extra benefit is that it reduces duplication due to passing ctx->show_data to every caller. Test that this segfault no longer occurs for 'git bundle'. Co-authored-by: Derrick Stolee Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'bundle.c') diff --git a/bundle.c b/bundle.c index a0bb687b0f..7ba60a573d 100644 --- a/bundle.c +++ b/bundle.c @@ -544,6 +544,8 @@ int create_bundle(struct repository *r, const char *path, die("revision walk setup failed"); bpi.fd = bundle_fd; bpi.pending = &revs_copy.pending; + + revs.blob_objects = revs.tree_objects = 0; traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi); object_array_remove_duplicates(&revs_copy.pending); -- cgit v1.3 From 105c6f14ad34b417c1e78bc9a8704dcda7b059f2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:39 +0000 Subject: bundle: parse filter capability The v3 bundle format has capabilities, allowing newer versions of Git to create bundles with newer features. Older versions that do not understand these new capabilities will fail with a helpful warning. Create a new capability allowing Git to understand that the contained pack-file is filtered according to some object filter. Typically, this filter will be "blob:none" for a blobless partial clone. This change teaches Git to parse this capability, place its value in the bundle header, and demonstrate this understanding by adding a message to 'git bundle verify'. Since we will use gently_parse_list_objects_filter() outside of list-objects-filter-options.c, make it an external method and move its API documentation to before its declaration. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-bundle.txt | 7 +++++-- Documentation/technical/bundle-format.txt | 11 ++++++++--- bundle.c | 15 ++++++++++++++- bundle.h | 2 ++ list-objects-filter-options.c | 17 +---------------- list-objects-filter-options.h | 20 ++++++++++++++++++++ 6 files changed, 50 insertions(+), 22 deletions(-) (limited to 'bundle.c') diff --git a/Documentation/git-bundle.txt b/Documentation/git-bundle.txt index 72ab813905..ac4c4352aa 100644 --- a/Documentation/git-bundle.txt +++ b/Documentation/git-bundle.txt @@ -75,8 +75,11 @@ verify :: cleanly to the current repository. This includes checks on the bundle format itself as well as checking that the prerequisite commits exist and are fully linked in the current repository. - 'git bundle' prints a list of missing commits, if any, and exits - with a non-zero status. + Information about additional capabilities, such as "object filter", + is printed. See "Capabilities" in link:technical/bundle-format.html + for more information. Finally, 'git bundle' prints a list of + missing commits, if any. The exit code is zero for success, but + will be nonzero if the bundle file is invalid. list-heads :: Lists the references defined in the bundle. If followed by a diff --git a/Documentation/technical/bundle-format.txt b/Documentation/technical/bundle-format.txt index bac558d049..b9be8644cf 100644 --- a/Documentation/technical/bundle-format.txt +++ b/Documentation/technical/bundle-format.txt @@ -71,6 +71,11 @@ and the Git bundle v2 format cannot represent a shallow clone repository. == Capabilities Because there is no opportunity for negotiation, unknown capabilities cause 'git -bundle' to abort. The only known capability is `object-format`, which specifies -the hash algorithm in use, and can take the same values as the -`extensions.objectFormat` configuration value. +bundle' to abort. + +* `object-format` specifies the hash algorithm in use, and can take the same + values as the `extensions.objectFormat` configuration value. + +* `filter` specifies an object filter as in the `--filter` option in + linkgit:git-rev-list[1]. The resulting pack-file must be marked as a + `.promisor` pack-file after it is unbundled. diff --git a/bundle.c b/bundle.c index 7ba60a573d..41e75efab9 100644 --- a/bundle.c +++ b/bundle.c @@ -11,7 +11,7 @@ #include "run-command.h" #include "refs.h" #include "strvec.h" - +#include "list-objects-filter-options.h" static const char v2_bundle_signature[] = "# v2 git bundle\n"; static const char v3_bundle_signature[] = "# v3 git bundle\n"; @@ -33,6 +33,7 @@ void bundle_header_release(struct bundle_header *header) { string_list_clear(&header->prerequisites, 1); string_list_clear(&header->references, 1); + list_objects_filter_release(&header->filter); } static int parse_capability(struct bundle_header *header, const char *capability) @@ -45,6 +46,10 @@ static int parse_capability(struct bundle_header *header, const char *capability header->hash_algo = &hash_algos[algo]; return 0; } + if (skip_prefix(capability, "filter=", &arg)) { + parse_list_objects_filter(&header->filter, arg); + return 0; + } return error(_("unknown capability '%s'"), capability); } @@ -220,6 +225,8 @@ int verify_bundle(struct repository *r, req_nr = revs.pending.nr; setup_revisions(2, argv, &revs, NULL); + list_objects_filter_copy(&revs.filter, &header->filter); + if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); @@ -259,6 +266,12 @@ int verify_bundle(struct repository *r, r->nr), r->nr); list_refs(r, 0, NULL); + + if (header->filter.choice) { + printf_ln("The bundle uses this filter: %s", + list_objects_filter_spec(&header->filter)); + } + r = &header->prerequisites; if (!r->nr) { printf_ln(_("The bundle records a complete history.")); diff --git a/bundle.h b/bundle.h index 06009fe6b1..7fef2108f4 100644 --- a/bundle.h +++ b/bundle.h @@ -4,12 +4,14 @@ #include "strvec.h" #include "cache.h" #include "string-list.h" +#include "list-objects-filter-options.h" struct bundle_header { unsigned version; struct string_list prerequisites; struct string_list references; const struct git_hash_algo *hash_algo; + struct list_objects_filter_options filter; }; #define BUNDLE_HEADER_INIT \ diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 449d53af69..f02d8df142 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -40,22 +40,7 @@ const char *list_object_filter_config_name(enum list_objects_filter_choice c) BUG("list_object_filter_config_name: invalid argument '%d'", c); } -/* - * Parse value of the argument to the "filter" keyword. - * On the command line this looks like: - * --filter= - * and in the pack protocol as: - * "filter" SP - * - * The filter keyword will be used by many commands. - * See Documentation/rev-list-options.txt for allowed values for . - * - * Capture the given arg as the "filter_spec". This can be forwarded to - * subordinate commands when necessary (although it's better to pass it through - * expand_list_objects_filter_spec() first). We also "intern" the arg for the - * convenience of the current command. - */ -static int gently_parse_list_objects_filter( +int gently_parse_list_objects_filter( struct list_objects_filter_options *filter_options, const char *arg, struct strbuf *errbuf) diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 425c38cae9..2eb6c98394 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -72,6 +72,26 @@ struct list_objects_filter_options { /* Normalized command line arguments */ #define CL_ARG__FILTER "filter" +/* + * Parse value of the argument to the "filter" keyword. + * On the command line this looks like: + * --filter= + * and in the pack protocol as: + * "filter" SP + * + * The filter keyword will be used by many commands. + * See Documentation/rev-list-options.txt for allowed values for . + * + * Capture the given arg as the "filter_spec". This can be forwarded to + * subordinate commands when necessary (although it's better to pass it through + * expand_list_objects_filter_spec() first). We also "intern" the arg for the + * convenience of the current command. + */ +int gently_parse_list_objects_filter( + struct list_objects_filter_options *filter_options, + const char *arg, + struct strbuf *errbuf); + void list_objects_filter_die_if_populated( struct list_objects_filter_options *filter_options); -- cgit v1.3 From f18b512bbba8ee508fec64698f581920bfa5b46d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:41 +0000 Subject: bundle: create filtered bundles A previous change allowed Git to parse bundles with the 'filter' capability. Now, teach Git to create bundles with this option. Some rearranging of code is required to get the option parsing in the correct spot. There are now two reasons why we might need capabilities (a new hash algorithm or an object filter) so that is pulled out into a place where we can check both at the same time. The --filter option is parsed as part of setup_revisions(), but it expected the --objects flag, too. That flag is somewhat implied by 'git bundle' because it creates a pack-file walking objects, but there is also a walk that walks the revision range expecting only commits. Make this parsing work by setting 'revs.tree_objects' and 'revs.blob_objects' before the call to setup_revisions(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 53 +++++++++++++++++++++++++++++++++++++++----------- t/t6020-bundle-misc.sh | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 11 deletions(-) (limited to 'bundle.c') diff --git a/bundle.c b/bundle.c index 41e75efab9..9370a6e307 100644 --- a/bundle.c +++ b/bundle.c @@ -332,6 +332,9 @@ static int write_pack_data(int bundle_fd, struct rev_info *revs, struct strvec * "--stdout", "--thin", "--delta-base-offset", NULL); strvec_pushv(&pack_objects.args, pack_options->v); + if (revs->filter.choice) + strvec_pushf(&pack_objects.args, "--filter=%s", + list_objects_filter_spec(&revs->filter)); pack_objects.in = -1; pack_objects.out = bundle_fd; pack_objects.git_cmd = 1; @@ -499,10 +502,37 @@ int create_bundle(struct repository *r, const char *path, int bundle_to_stdout; int ref_count = 0; struct rev_info revs, revs_copy; - int min_version = the_hash_algo == &hash_algos[GIT_HASH_SHA1] ? 2 : 3; + int min_version = 2; struct bundle_prerequisites_info bpi; int i; + /* init revs to list objects for pack-objects later */ + save_commit_buffer = 0; + repo_init_revisions(r, &revs, NULL); + + /* + * Pre-initialize the '--objects' flag so we can parse a + * --filter option successfully. + */ + revs.tree_objects = revs.blob_objects = 1; + + argc = setup_revisions(argc, argv, &revs, NULL); + + /* + * Reasons to require version 3: + * + * 1. @object-format is required because our hash algorithm is not + * SHA1. + * 2. @filter is required because we parsed an object filter. + */ + if (the_hash_algo != &hash_algos[GIT_HASH_SHA1] || revs.filter.choice) + min_version = 3; + + if (argc > 1) { + error(_("unrecognized argument: %s"), argv[1]); + goto err; + } + bundle_to_stdout = !strcmp(path, "-"); if (bundle_to_stdout) bundle_fd = 1; @@ -525,17 +555,14 @@ int create_bundle(struct repository *r, const char *path, write_or_die(bundle_fd, capability, strlen(capability)); write_or_die(bundle_fd, the_hash_algo->name, strlen(the_hash_algo->name)); write_or_die(bundle_fd, "\n", 1); - } - - /* init revs to list objects for pack-objects later */ - save_commit_buffer = 0; - repo_init_revisions(r, &revs, NULL); - argc = setup_revisions(argc, argv, &revs, NULL); - - if (argc > 1) { - error(_("unrecognized argument: %s"), argv[1]); - goto err; + if (revs.filter.choice) { + const char *value = expand_list_objects_filter_spec(&revs.filter); + capability = "@filter="; + write_or_die(bundle_fd, capability, strlen(capability)); + write_or_die(bundle_fd, value, strlen(value)); + write_or_die(bundle_fd, "\n", 1); + } } /* save revs.pending in revs_copy for later use */ @@ -558,6 +585,10 @@ int create_bundle(struct repository *r, const char *path, bpi.fd = bundle_fd; bpi.pending = &revs_copy.pending; + /* + * Remove any object walking here. We only care about commits and + * tags here. The revs_copy has the right instances of these values. + */ revs.blob_objects = revs.tree_objects = 0; traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi); object_array_remove_duplicates(&revs_copy.pending); diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh index df5ff561fa..6e97c044ee 100755 --- a/t/t6020-bundle-misc.sh +++ b/t/t6020-bundle-misc.sh @@ -487,4 +487,52 @@ test_expect_success 'unfiltered bundle with --objects' ' test_cmp expect actual ' +for filter in "blob:none" "tree:0" "tree:1" "blob:limit=100" +do + test_expect_success "filtered bundle: $filter" ' + test_when_finished rm -rf .git/objects/pack cloned unbundled && + git bundle create partial.bdl \ + --all \ + --filter=$filter && + + git bundle verify partial.bdl >unfiltered && + make_user_friendly_and_stable_output actual && + + cat >expect <<-EOF && + The bundle contains these 10 refs: + refs/heads/main + refs/heads/release + refs/heads/topic/1 + refs/heads/topic/2 + refs/pull/1/head + refs/pull/2/head + refs/tags/v1 + refs/tags/v2 + refs/tags/v3 + HEAD + The bundle uses this filter: $filter + The bundle records a complete history. + EOF + test_cmp expect actual && + + test_config uploadpack.allowfilter 1 && + test_config uploadpack.allowanysha1inwant 1 && + git clone --no-local --filter=$filter --bare "file://$(pwd)" cloned && + + git init unbundled && + git -C unbundled bundle unbundle ../partial.bdl >ref-list.txt && + + # Count the same number of reachable objects. + reflist=$(git for-each-ref --format="%(objectname)") && + git rev-list --objects --filter=$filter --missing=allow-any \ + $reflist >expect && + for repo in cloned unbundled + do + git -C $repo rev-list --objects --missing=allow-any \ + $reflist >actual && + test_cmp expect actual || return 1 + done + ' +done + test_done -- cgit v1.3 From 4f39eb031af8690eb86eb781e43b10141dd47da9 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:42 +0000 Subject: bundle: unbundle promisor packs In order to have a valid pack-file after unbundling a bundle that has the 'filter' capability, we need to generate a .promisor file. The bundle does not promise _where_ the objects can be found, but we can expect that these bundles will be unbundled in repositories with appropriate promisor remotes that can find those missing objects. Use the 'git index-pack --promisor=' option to create this .promisor file. Add "from-bundle" as the message to help anyone diagnose issues with these promisor packs. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 4 ++++ t/t6020-bundle-misc.sh | 2 ++ 2 files changed, 6 insertions(+) (limited to 'bundle.c') diff --git a/bundle.c b/bundle.c index 9370a6e307..56681c2113 100644 --- a/bundle.c +++ b/bundle.c @@ -620,6 +620,10 @@ int unbundle(struct repository *r, struct bundle_header *header, struct child_process ip = CHILD_PROCESS_INIT; strvec_pushl(&ip.args, "index-pack", "--fix-thin", "--stdin", NULL); + /* If there is a filter, then we need to create the promisor pack. */ + if (header->filter.choice) + strvec_push(&ip.args, "--promisor=from-bundle"); + if (extra_index_pack_args) { strvec_pushv(&ip.args, extra_index_pack_args->v); strvec_clear(extra_index_pack_args); diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh index 6e97c044ee..7c6db67022 100755 --- a/t/t6020-bundle-misc.sh +++ b/t/t6020-bundle-misc.sh @@ -521,6 +521,8 @@ do git init unbundled && git -C unbundled bundle unbundle ../partial.bdl >ref-list.txt && + ls unbundled/.git/objects/pack/pack-*.promisor >promisor && + test_line_count = 1 promisor && # Count the same number of reachable objects. reflist=$(git for-each-ref --format="%(objectname)") && -- cgit v1.3