From b9f7daa6ef52ad8894b5f730eaee0599668d66d5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 10 Jul 2023 17:12:07 -0400 Subject: ref-filter.h: provide `REF_FILTER_INIT` Provide a sane initialization value for `struct ref_filter`, which in a subsequent patch will be used to initialize a new field. In the meantime, ensure that the `ref_filter` struct used in the test-helper's `cmd__reach()` is zero-initialized. The lack of initialization is OK, since `commit_contains()` only looks at the single `with_commit_tag_algo` field that *is* initialized directly above. So this does not fix a bug, but rather prevents one from biting us in the future. Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-reach.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 't') diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 5b6f217441..ef58f10c2d 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -139,7 +139,7 @@ int cmd__reach(int ac, const char **av) printf("%s(X,_,_,0,0):%d\n", av[1], can_all_from_reach_with_flag(&X_obj, 2, 4, 0, 0)); } else if (!strcmp(av[1], "commit_contains")) { - struct ref_filter filter; + struct ref_filter filter = REF_FILTER_INIT; struct contains_cache cache; init_contains_cache(&cache); -- cgit v1.3 From b571fb98008b485bfc6f7d6538b79a7e92d731f4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 10 Jul 2023 17:12:13 -0400 Subject: ref-filter: add `ref_filter_clear()` We did not bother to clean up at all in `git branch` or `git tag`, and `git for-each-ref` only cleans up a couple of members. Add and call `ref_filter_clear()` when cleaning up a `struct ref_filter`. Running this patch (without any test changes) indicates a couple of now leak-free tests. This was found by running: $ make SANITIZE=leak $ make -C t GIT_TEST_PASSING_SANITIZE_LEAK=check GIT_TEST_OPTS=--immediate (Note that the `reachable_from` and `unreachable_from` lists should be cleaned as they are used. So this is just covering any case where we might bail before running the reachability check.) Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/branch.c | 1 + builtin/for-each-ref.c | 3 +-- builtin/tag.c | 1 + ref-filter.c | 16 ++++++++++++++++ ref-filter.h | 3 +++ t/t0041-usage.sh | 1 + t/t3402-rebase-merge.sh | 1 + 7 files changed, 24 insertions(+), 2 deletions(-) (limited to 't') diff --git a/builtin/branch.c b/builtin/branch.c index 7891dec361..07ee874617 100644 --- a/builtin/branch.c +++ b/builtin/branch.c @@ -858,6 +858,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) print_columns(&output, colopts, NULL); string_list_clear(&output, 0); ref_sorting_release(sorting); + ref_filter_clear(&filter); return 0; } else if (edit_description) { const char *branch_name; diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c index 99ccb73518..c01fa6fefe 100644 --- a/builtin/for-each-ref.c +++ b/builtin/for-each-ref.c @@ -120,8 +120,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix) strbuf_release(&err); strbuf_release(&output); ref_array_clear(&array); - free_commit_list(filter.with_commit); - free_commit_list(filter.no_commit); + ref_filter_clear(&filter); ref_sorting_release(sorting); strvec_clear(&vec); return 0; diff --git a/builtin/tag.c b/builtin/tag.c index 6b41bb7374..aab5e693fe 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -645,6 +645,7 @@ int cmd_tag(int argc, const char **argv, const char *prefix) cleanup: ref_sorting_release(sorting); + ref_filter_clear(&filter); strbuf_release(&buf); strbuf_release(&ref); strbuf_release(&reflog_msg); diff --git a/ref-filter.c b/ref-filter.c index 048d277cbf..d32f426898 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2866,3 +2866,19 @@ int parse_opt_merge_filter(const struct option *opt, const char *arg, int unset) return 0; } + +void ref_filter_init(struct ref_filter *filter) +{ + struct ref_filter blank = REF_FILTER_INIT; + memcpy(filter, &blank, sizeof(blank)); +} + +void ref_filter_clear(struct ref_filter *filter) +{ + oid_array_clear(&filter->points_at); + free_commit_list(filter->with_commit); + free_commit_list(filter->no_commit); + free_commit_list(filter->reachable_from); + free_commit_list(filter->unreachable_from); + ref_filter_init(filter); +} diff --git a/ref-filter.h b/ref-filter.h index a920f73b29..160b807224 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -170,4 +170,7 @@ void filter_ahead_behind(struct repository *r, struct ref_format *format, struct ref_array *array); +void ref_filter_init(struct ref_filter *filter); +void ref_filter_clear(struct ref_filter *filter); + #endif /* REF_FILTER_H */ diff --git a/t/t0041-usage.sh b/t/t0041-usage.sh index c4fc34eb18..9ea974b0c6 100755 --- a/t/t0041-usage.sh +++ b/t/t0041-usage.sh @@ -5,6 +5,7 @@ test_description='Test commands behavior when given invalid argument value' GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh test_expect_success 'setup ' ' diff --git a/t/t3402-rebase-merge.sh b/t/t3402-rebase-merge.sh index 79b0640c00..e9e03ca4b5 100755 --- a/t/t3402-rebase-merge.sh +++ b/t/t3402-rebase-merge.sh @@ -8,6 +8,7 @@ test_description='git rebase --merge test' GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh T="A quick brown fox -- cgit v1.3 From 8255dd8a3dce094129690d17595dc822171d1e61 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 10 Jul 2023 17:12:19 -0400 Subject: builtin/for-each-ref.c: add `--exclude` option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using `for-each-ref`, it is sometimes convenient for the caller to be able to exclude certain parts of the references. For example, if there are many `refs/__hidden__/*` references, the caller may want to emit all references *except* the hidden ones. Currently, the only way to do this is to post-process the output, like: $ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/' Which is do-able, but requires processing a potentially large quantity of references. Teach `git for-each-ref` a new `--exclude=` option, which excludes references from the results if they match one or more excluded patterns. This patch provides a naive implementation where the `ref_filter` still sees all references (including ones that it will discard) and is left to check whether each reference matches any excluded pattern(s) before emitting them. By culling out references we know the caller doesn't care about, we can avoid allocating memory for their storage, as well as spending time sorting the output (among other things). Even the naive implementation provides a significant speed-up on a modified copy of linux.git (that has a hidden ref pointing at each commit): $ hyperfine \ 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \ 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/" Time (mean ± σ): 820.1 ms ± 2.0 ms [User: 703.7 ms, System: 152.0 ms] Range (min … max): 817.7 ms … 823.3 ms 10 runs Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/ Time (mean ± σ): 106.6 ms ± 1.1 ms [User: 99.4 ms, System: 7.1 ms] Range (min … max): 104.7 ms … 109.1 ms 27 runs Summary 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran 7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' Subsequent patches will improve on this by avoiding visiting excluded sections of the `packed-refs` file in certain cases. Co-authored-by: Jeff King Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-for-each-ref.txt | 6 ++++++ builtin/for-each-ref.c | 1 + ref-filter.c | 14 ++++++++++++++ ref-filter.h | 6 ++++++ t/t6300-for-each-ref.sh | 35 +++++++++++++++++++++++++++++++++++ 5 files changed, 62 insertions(+) (limited to 't') diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt index 1e215d4e73..5743eb5def 100644 --- a/Documentation/git-for-each-ref.txt +++ b/Documentation/git-for-each-ref.txt @@ -14,6 +14,7 @@ SYNOPSIS [--points-at=] [--merged[=]] [--no-merged[=]] [--contains[=]] [--no-contains[=]] + [--exclude= ...] DESCRIPTION ----------- @@ -102,6 +103,11 @@ OPTIONS Do not print a newline after formatted refs where the format expands to the empty string. +--exclude=:: + If one or more patterns are given, only refs which do not match + any excluded pattern(s) are shown. Matching is done using the + same rules as `` above. + FIELD NAMES ----------- diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c index c01fa6fefe..3384987428 100644 --- a/builtin/for-each-ref.c +++ b/builtin/for-each-ref.c @@ -47,6 +47,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix) OPT_INTEGER( 0 , "count", &maxcount, N_("show only matched refs")), OPT_STRING( 0 , "format", &format.format, N_("format"), N_("format to use for the output")), OPT__COLOR(&format.use_color, N_("respect format colors")), + OPT_REF_FILTER_EXCLUDE(&filter), OPT_REF_SORT(&sorting_options), OPT_CALLBACK(0, "points-at", &filter.points_at, N_("object"), N_("print only refs which points at the given object"), diff --git a/ref-filter.c b/ref-filter.c index 91acf53ef9..ec9b79c918 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2171,6 +2171,16 @@ static int filter_pattern_match(struct ref_filter *filter, const char *refname) filter->ignore_case); } +static int filter_exclude_match(struct ref_filter *filter, const char *refname) +{ + if (!filter->exclude.nr) + return 0; + if (filter->match_as_path) + return match_name_as_path(filter->exclude.v, refname, + filter->ignore_case); + return match_pattern(filter->exclude.v, refname, filter->ignore_case); +} + /* * This is the same as for_each_fullref_in(), but it tries to iterate * only over the patterns we'll care about. Note that it _doesn't_ do a full @@ -2338,6 +2348,9 @@ static int ref_filter_handler(const char *refname, const struct object_id *oid, if (!filter_pattern_match(filter, refname)) return 0; + if (filter_exclude_match(filter, refname)) + return 0; + if (filter->points_at.nr && !match_points_at(&filter->points_at, oid, refname)) return 0; @@ -2877,6 +2890,7 @@ void ref_filter_init(struct ref_filter *filter) void ref_filter_clear(struct ref_filter *filter) { + strvec_clear(&filter->exclude); oid_array_clear(&filter->points_at); free_commit_list(filter->with_commit); free_commit_list(filter->no_commit); diff --git a/ref-filter.h b/ref-filter.h index 160b807224..1524bc463a 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -6,6 +6,7 @@ #include "refs.h" #include "commit.h" #include "string-list.h" +#include "strvec.h" /* Quoting styles */ #define QUOTE_NONE 0 @@ -59,6 +60,7 @@ struct ref_array { struct ref_filter { const char **name_patterns; + struct strvec exclude; struct oid_array points_at; struct commit_list *with_commit; struct commit_list *no_commit; @@ -94,6 +96,7 @@ struct ref_format { #define REF_FILTER_INIT { \ .points_at = OID_ARRAY_INIT, \ + .exclude = STRVEC_INIT, \ } #define REF_FORMAT_INIT { \ .use_color = -1, \ @@ -112,6 +115,9 @@ struct ref_format { #define OPT_REF_SORT(var) \ OPT_STRING_LIST(0, "sort", (var), \ N_("key"), N_("field name to sort on")) +#define OPT_REF_FILTER_EXCLUDE(var) \ + OPT_STRVEC(0, "exclude", &(var)->exclude, \ + N_("pattern"), N_("exclude refs which match pattern")) /* * API for filtering a set of refs. Based on the type of refs the user diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh index 5c00607608..7e8d578522 100755 --- a/t/t6300-for-each-ref.sh +++ b/t/t6300-for-each-ref.sh @@ -447,6 +447,41 @@ test_expect_success 'exercise glob patterns with prefixes' ' test_cmp expected actual ' +cat >expected <<\EOF +refs/tags/bar +refs/tags/baz +refs/tags/testtag +EOF + +test_expect_success 'exercise patterns with prefix exclusions' ' + for tag in foo/one foo/two foo/three bar baz + do + git tag "$tag" || return 1 + done && + test_when_finished "git tag -d foo/one foo/two foo/three bar baz" && + git for-each-ref --format="%(refname)" \ + refs/tags/ --exclude=refs/tags/foo >actual && + test_cmp expected actual +' + +cat >expected <<\EOF +refs/tags/bar +refs/tags/baz +refs/tags/foo/one +refs/tags/testtag +EOF + +test_expect_success 'exercise patterns with pattern exclusions' ' + for tag in foo/one foo/two foo/three bar baz + do + git tag "$tag" || return 1 + done && + test_when_finished "git tag -d foo/one foo/two foo/three bar baz" && + git for-each-ref --format="%(refname)" \ + refs/tags/ --exclude="refs/tags/foo/t*" >actual && + test_cmp expected actual +' + cat >expected <<\EOF 'refs/heads/main' 'refs/remotes/origin/main' -- cgit v1.3 From 59c35fac54054b3f781b0275eac7d7c54468f0d5 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 10 Jul 2023 17:12:28 -0400 Subject: refs/packed-backend.c: implement jump lists to avoid excluded pattern(s) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When iterating through the `packed-refs` file in order to answer a query like: $ git for-each-ref --exclude=refs/__hidden__ it would be useful to avoid walking over all of the entries in `refs/__hidden__/*` when possible, since we know that the ref-filter code is going to throw them away anyways. In certain circumstances, doing so is possible. The algorithm for doing so is as follows: - For each excluded pattern, find the first record that matches it, and the first record that *doesn't* match it (i.e. the location you'd next want to consider when excluding that pattern). - Sort the set of excluded regions from the previous step in ascending order of the first location within the `packed-refs` file that matches. - Clean up the results from the previous step: discard empty regions, and combine adjacent regions. The set of regions which remains is referred to as the "jump list", and never contains any references which should be included in the result set. Then when iterating through the `packed-refs` file, if `iter->pos` is ever contained in one of the regions from the previous steps, advance `iter->pos` past the end of that region, and continue enumeration. Note that we only perform this optimization when none of the excluded pattern(s) have special meta-characters in them. For a pattern like "refs/foo[ac]", the excluded regions ("refs/fooa", "refs/fooc", and everything underneath them) are not connected. A future implementation that handles this case may split the character class (pretending as if two patterns were excluded: "refs/fooa", and "refs/fooc"). There are a few other gotchas worth considering. First, note that the jump list is sorted, so once we jump past a region, we can avoid considering it (or any regions preceding it) again. The member `jump_pos` is used to track the first next-possible region to jump through. Second, note that the jump list is best-effort, since we do not handle loose references, and because of the meta-character issue above. The jump list may not skip past all references which won't appear in the results, but will never skip over a reference which does appear in the result set. In repositories with a large number of hidden references, the speed-up can be significant. Tests here are done with a copy of linux.git with a reference "refs/pull/N" pointing at every commit, as in: $ git rev-list HEAD | awk '{ print "create refs/pull/" NR " " $0 }' | git update-ref --stdin $ git pack-refs --all , it is significantly faster to have `for-each-ref` jump over the excluded references, as opposed to filtering them out after the fact: $ hyperfine \ 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' \ 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' \ 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' Benchmark 1: git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/" Time (mean ± σ): 798.1 ms ± 3.3 ms [User: 687.6 ms, System: 146.4 ms] Range (min … max): 794.5 ms … 805.5 ms 10 runs Benchmark 2: git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull" Time (mean ± σ): 98.9 ms ± 1.4 ms [User: 93.1 ms, System: 5.7 ms] Range (min … max): 97.0 ms … 104.0 ms 29 runs Benchmark 3: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull" Time (mean ± σ): 4.5 ms ± 0.2 ms [User: 0.7 ms, System: 3.8 ms] Range (min … max): 4.1 ms … 5.8 ms 524 runs Summary 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' ran 21.87 ± 1.05 times faster than 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' 176.52 ± 8.19 times faster than 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' (Comparing stock git and this patch isn't quite fair, since an earlier commit in this series adds a naive implementation of the `--exclude` option. `git.prev` is built from the previous commit and includes this naive implementation). Using the jump list is fairly straightforward (see the changes to `refs/packed-backend.c::next_record()`), but constructing the list is not. To ensure that the construction is correct, add a new suite of tests in t1419 covering various corner cases (overlapping regions, partially overlapping regions, adjacent regions, etc.). Co-authored-by: Jeff King Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- ref-filter.c | 6 +- refs.h | 4 ++ refs/packed-backend.c | 163 ++++++++++++++++++++++++++++++++++++++++++++-- t/helper/test-ref-store.c | 10 +++ t/t1419-exclude-refs.sh | 101 ++++++++++++++++++++++++++++ 5 files changed, 276 insertions(+), 8 deletions(-) create mode 100755 t/t1419-exclude-refs.sh (limited to 't') diff --git a/ref-filter.c b/ref-filter.c index c72d016bcc..28c1eb06be 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2210,12 +2210,14 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, if (!filter->name_patterns[0]) { /* no patterns; we have to look at everything */ - return for_each_fullref_in("", cb, cb_data); + return refs_for_each_fullref_in(get_main_ref_store(the_repository), + "", filter->exclude.v, cb, cb_data); } return refs_for_each_fullref_in_prefixes(get_main_ref_store(the_repository), NULL, filter->name_patterns, - NULL, cb, cb_data); + filter->exclude.v, + cb, cb_data); } /* diff --git a/refs.h b/refs.h index d672d636cf..207e18a98f 100644 --- a/refs.h +++ b/refs.h @@ -337,6 +337,10 @@ int for_each_ref(each_ref_fn fn, void *cb_data); */ int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data); +/* + * references matching any pattern in "exclude_patterns" are omitted from the + * result set on a best-effort basis. + */ int refs_for_each_fullref_in(struct ref_store *refs, const char *prefix, const char **exclude_patterns, each_ref_fn fn, void *cb_data); diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 33639f73e1..092b50fa84 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -303,7 +303,8 @@ static int cmp_packed_ref_records(const void *v1, const void *v2) * Compare a snapshot record at `rec` to the specified NUL-terminated * refname. */ -static int cmp_record_to_refname(const char *rec, const char *refname) +static int cmp_record_to_refname(const char *rec, const char *refname, + int start) { const char *r1 = rec + the_hash_algo->hexsz + 1; const char *r2 = refname; @@ -312,7 +313,7 @@ static int cmp_record_to_refname(const char *rec, const char *refname) if (*r1 == '\n') return *r2 ? -1 : 0; if (!*r2) - return 1; + return start ? 1 : -1; if (*r1 != *r2) return (unsigned char)*r1 < (unsigned char)*r2 ? -1 : +1; r1++; @@ -528,7 +529,8 @@ static int load_contents(struct snapshot *snapshot) } static const char *find_reference_location_1(struct snapshot *snapshot, - const char *refname, int mustexist) + const char *refname, int mustexist, + int start) { /* * This is not *quite* a garden-variety binary search, because @@ -558,7 +560,7 @@ static const char *find_reference_location_1(struct snapshot *snapshot, mid = lo + (hi - lo) / 2; rec = find_start_of_record(lo, mid); - cmp = cmp_record_to_refname(rec, refname); + cmp = cmp_record_to_refname(rec, refname, start); if (cmp < 0) { lo = find_end_of_record(mid, hi); } else if (cmp > 0) { @@ -591,7 +593,22 @@ static const char *find_reference_location_1(struct snapshot *snapshot, static const char *find_reference_location(struct snapshot *snapshot, const char *refname, int mustexist) { - return find_reference_location_1(snapshot, refname, mustexist); + return find_reference_location_1(snapshot, refname, mustexist, 1); +} + +/* + * Find the place in `snapshot->buf` after the end of the record for + * `refname`. In other words, find the location of first thing *after* + * `refname`. + * + * Other semantics are identical to the ones in + * `find_reference_location()`. + */ +static const char *find_reference_location_end(struct snapshot *snapshot, + const char *refname, + int mustexist) +{ + return find_reference_location_1(snapshot, refname, mustexist, 0); } /* @@ -785,6 +802,13 @@ struct packed_ref_iterator { /* The end of the part of the buffer that will be iterated over: */ const char *eof; + struct jump_list_entry { + const char *start; + const char *end; + } *jump; + size_t jump_nr, jump_alloc; + size_t jump_cur; + /* Scratch space for current values: */ struct object_id oid, peeled; struct strbuf refname_buf; @@ -802,14 +826,35 @@ struct packed_ref_iterator { */ static int next_record(struct packed_ref_iterator *iter) { - const char *p = iter->pos, *eol; + const char *p, *eol; strbuf_reset(&iter->refname_buf); + /* + * If iter->pos is contained within a skipped region, jump past + * it. + * + * Note that each skipped region is considered at most once, + * since they are ordered based on their starting position. + */ + while (iter->jump_cur < iter->jump_nr) { + struct jump_list_entry *curr = &iter->jump[iter->jump_cur]; + if (iter->pos < curr->start) + break; /* not to the next jump yet */ + + iter->jump_cur++; + if (iter->pos < curr->end) { + iter->pos = curr->end; + /* jumps are coalesced, so only one jump is necessary */ + break; + } + } + if (iter->pos == iter->eof) return ITER_DONE; iter->base.flags = REF_ISPACKED; + p = iter->pos; if (iter->eof - p < the_hash_algo->hexsz + 2 || parse_oid_hex(p, &iter->oid, &p) || @@ -917,6 +962,7 @@ static int packed_ref_iterator_abort(struct ref_iterator *ref_iterator) int ok = ITER_DONE; strbuf_release(&iter->refname_buf); + free(iter->jump); release_snapshot(iter->snapshot); base_ref_iterator_free(ref_iterator); return ok; @@ -928,6 +974,108 @@ static struct ref_iterator_vtable packed_ref_iterator_vtable = { .abort = packed_ref_iterator_abort }; +static int jump_list_entry_cmp(const void *va, const void *vb) +{ + const struct jump_list_entry *a = va; + const struct jump_list_entry *b = vb; + + if (a->start < b->start) + return -1; + if (a->start > b->start) + return 1; + return 0; +} + +static int has_glob_special(const char *str) +{ + const char *p; + for (p = str; *p; p++) { + if (is_glob_special(*p)) + return 1; + } + return 0; +} + +static void populate_excluded_jump_list(struct packed_ref_iterator *iter, + struct snapshot *snapshot, + const char **excluded_patterns) +{ + size_t i, j; + const char **pattern; + struct jump_list_entry *last_disjoint; + + if (!excluded_patterns) + return; + + for (pattern = excluded_patterns; *pattern; pattern++) { + struct jump_list_entry *e; + const char *start, *end; + + /* + * We can't feed any excludes with globs in them to the + * refs machinery. It only understands prefix matching. + * We likewise can't even feed the string leading up to + * the first meta-character, as something like "foo[a]" + * should not exclude "foobar" (but the prefix "foo" + * would match that and mark it for exclusion). + */ + if (has_glob_special(*pattern)) + continue; + + start = find_reference_location(snapshot, *pattern, 0); + end = find_reference_location_end(snapshot, *pattern, 0); + + if (start == end) + continue; /* nothing to jump over */ + + ALLOC_GROW(iter->jump, iter->jump_nr + 1, iter->jump_alloc); + + e = &iter->jump[iter->jump_nr++]; + e->start = start; + e->end = end; + } + + if (!iter->jump_nr) { + /* + * Every entry in exclude_patterns has a meta-character, + * nothing to do here. + */ + return; + } + + QSORT(iter->jump, iter->jump_nr, jump_list_entry_cmp); + + /* + * As an optimization, merge adjacent entries in the jump list + * to jump forwards as far as possible when entering a skipped + * region. + * + * For example, if we have two skipped regions: + * + * [[A, B], [B, C]] + * + * we want to combine that into a single entry jumping from A to + * C. + */ + last_disjoint = iter->jump; + + for (i = 1, j = 1; i < iter->jump_nr; i++) { + struct jump_list_entry *ours = &iter->jump[i]; + if (ours->start <= last_disjoint->end) { + /* overlapping regions extend the previous one */ + last_disjoint->end = last_disjoint->end > ours->end + ? last_disjoint->end : ours->end; + } else { + /* otherwise, insert a new region */ + iter->jump[j++] = *ours; + last_disjoint = ours; + } + } + + iter->jump_nr = j; + iter->jump_cur = 0; +} + static struct ref_iterator *packed_ref_iterator_begin( struct ref_store *ref_store, const char *prefix, const char **exclude_patterns, @@ -963,6 +1111,9 @@ static struct ref_iterator *packed_ref_iterator_begin( ref_iterator = &iter->base; base_ref_iterator_init(ref_iterator, &packed_ref_iterator_vtable, 1); + if (exclude_patterns) + populate_excluded_jump_list(iter, snapshot, exclude_patterns); + iter->snapshot = snapshot; acquire_snapshot(snapshot); diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index 6d8f844e9c..2bff003f7c 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -175,6 +175,15 @@ static int cmd_for_each_ref(struct ref_store *refs, const char **argv) return refs_for_each_ref_in(refs, prefix, each_ref, NULL); } +static int cmd_for_each_ref__exclude(struct ref_store *refs, const char **argv) +{ + const char *prefix = notnull(*argv++, "prefix"); + const char **exclude_patterns = argv; + + return refs_for_each_fullref_in(refs, prefix, exclude_patterns, each_ref, + NULL); +} + static int cmd_resolve_ref(struct ref_store *refs, const char **argv) { struct object_id oid = *null_oid(); @@ -307,6 +316,7 @@ static struct command commands[] = { { "delete-refs", cmd_delete_refs }, { "rename-ref", cmd_rename_ref }, { "for-each-ref", cmd_for_each_ref }, + { "for-each-ref--exclude", cmd_for_each_ref__exclude }, { "resolve-ref", cmd_resolve_ref }, { "verify-ref", cmd_verify_ref }, { "for-each-reflog", cmd_for_each_reflog }, diff --git a/t/t1419-exclude-refs.sh b/t/t1419-exclude-refs.sh new file mode 100755 index 0000000000..bc534c8ea1 --- /dev/null +++ b/t/t1419-exclude-refs.sh @@ -0,0 +1,101 @@ +#!/bin/sh + +test_description='test exclude_patterns functionality in main ref store' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +TEST_PASSES_SANITIZE_LEAK=true +. ./test-lib.sh + +for_each_ref__exclude () { + test-tool ref-store main for-each-ref--exclude "$@" >actual.raw + cut -d ' ' -f 2 actual.raw +} + +for_each_ref () { + git for-each-ref --format='%(refname)' "$@" +} + +test_expect_success 'setup' ' + test_commit --no-tag base && + base="$(git rev-parse HEAD)" && + + for name in foo bar baz quux + do + for i in 1 2 3 + do + echo "create refs/heads/$name/$i $base" || return 1 + done || return 1 + done >in && + echo "delete refs/heads/main" >>in && + + git update-ref --stdin actual && + for_each_ref refs/heads/bar refs/heads/baz refs/heads/quux >expect && + + test_cmp expect actual +' + +test_expect_success 'excluded region at beginning' ' + for_each_ref__exclude refs/heads refs/heads/bar >actual && + for_each_ref refs/heads/baz refs/heads/foo refs/heads/quux >expect && + + test_cmp expect actual +' + +test_expect_success 'excluded region at end' ' + for_each_ref__exclude refs/heads refs/heads/quux >actual && + for_each_ref refs/heads/foo refs/heads/bar refs/heads/baz >expect && + + test_cmp expect actual +' + +test_expect_success 'disjoint excluded regions' ' + for_each_ref__exclude refs/heads refs/heads/bar refs/heads/quux >actual && + for_each_ref refs/heads/baz refs/heads/foo >expect && + + test_cmp expect actual +' + +test_expect_success 'adjacent, non-overlapping excluded regions' ' + for_each_ref__exclude refs/heads refs/heads/bar refs/heads/baz >actual && + for_each_ref refs/heads/foo refs/heads/quux >expect && + + test_cmp expect actual +' + +test_expect_success 'overlapping excluded regions' ' + for_each_ref__exclude refs/heads refs/heads/ba refs/heads/baz >actual && + for_each_ref refs/heads/foo refs/heads/quux >expect && + + test_cmp expect actual +' + +test_expect_success 'several overlapping excluded regions' ' + for_each_ref__exclude refs/heads \ + refs/heads/bar refs/heads/baz refs/heads/foo >actual && + for_each_ref refs/heads/quux >expect && + + test_cmp expect actual +' + +test_expect_success 'non-matching excluded section' ' + for_each_ref__exclude refs/heads refs/heads/does/not/exist >actual && + for_each_ref >expect && + + test_cmp expect actual +' + +test_expect_success 'meta-characters are discarded' ' + for_each_ref__exclude refs/heads "refs/heads/ba*" >actual && + for_each_ref >expect && + + test_cmp expect actual +' + +test_done -- cgit v1.3 From c489f47a649da8984a2240032e1d819e9a80ea2a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 10 Jul 2023 17:12:31 -0400 Subject: refs/packed-backend.c: add trace2 counters for jump list The previous commit added low-level tests to ensure that the packed-refs iterator did not enumerate excluded sections of the refspace. However, there was no guarantee that these sections weren't being visited, only that they were being suppressed from the output. To harden these tests, add a trace2 counter which tracks the number of regions skipped by the packed-refs iterator, and assert on its value. Suggested-by: Derrick Stolee Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 2 ++ t/t1419-exclude-refs.sh | 59 +++++++++++++++++++++++++++++++++---------------- trace2.h | 2 ++ trace2/tr2_ctr.c | 5 +++++ 4 files changed, 49 insertions(+), 19 deletions(-) (limited to 't') diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 092b50fa84..97cc2090fd 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -12,6 +12,7 @@ #include "../chdir-notify.h" #include "../wrapper.h" #include "../write-or-die.h" +#include "../trace2.h" enum mmap_strategy { /* @@ -845,6 +846,7 @@ static int next_record(struct packed_ref_iterator *iter) iter->jump_cur++; if (iter->pos < curr->end) { iter->pos = curr->end; + trace2_counter_add(TRACE2_COUNTER_ID_PACKED_REFS_JUMPS, 1); /* jumps are coalesced, so only one jump is necessary */ break; } diff --git a/t/t1419-exclude-refs.sh b/t/t1419-exclude-refs.sh index bc534c8ea1..5d8c86b657 100755 --- a/t/t1419-exclude-refs.sh +++ b/t/t1419-exclude-refs.sh @@ -9,7 +9,8 @@ TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh for_each_ref__exclude () { - test-tool ref-store main for-each-ref--exclude "$@" >actual.raw + GIT_TRACE2_PERF=1 test-tool ref-store main \ + for-each-ref--exclude "$@" >actual.raw cut -d ' ' -f 2 actual.raw } @@ -17,6 +18,17 @@ for_each_ref () { git for-each-ref --format='%(refname)' "$@" } +assert_jumps () { + local nr="$1" + local trace="$2" + + grep -q "name:jumps_made value:$nr$" $trace +} + +assert_no_jumps () { + ! assert_jumps ".*" "$1" +} + test_expect_success 'setup' ' test_commit --no-tag base && base="$(git rev-parse HEAD)" && @@ -35,67 +47,76 @@ test_expect_success 'setup' ' ' test_expect_success 'excluded region in middle' ' - for_each_ref__exclude refs/heads refs/heads/foo >actual && + for_each_ref__exclude refs/heads refs/heads/foo >actual 2>perf && for_each_ref refs/heads/bar refs/heads/baz refs/heads/quux >expect && - test_cmp expect actual + test_cmp expect actual && + assert_jumps 1 perf ' test_expect_success 'excluded region at beginning' ' - for_each_ref__exclude refs/heads refs/heads/bar >actual && + for_each_ref__exclude refs/heads refs/heads/bar >actual 2>perf && for_each_ref refs/heads/baz refs/heads/foo refs/heads/quux >expect && - test_cmp expect actual + test_cmp expect actual && + assert_jumps 1 perf ' test_expect_success 'excluded region at end' ' - for_each_ref__exclude refs/heads refs/heads/quux >actual && + for_each_ref__exclude refs/heads refs/heads/quux >actual 2>perf && for_each_ref refs/heads/foo refs/heads/bar refs/heads/baz >expect && - test_cmp expect actual + test_cmp expect actual && + assert_jumps 1 perf ' test_expect_success 'disjoint excluded regions' ' - for_each_ref__exclude refs/heads refs/heads/bar refs/heads/quux >actual && + for_each_ref__exclude refs/heads refs/heads/bar refs/heads/quux >actual 2>perf && for_each_ref refs/heads/baz refs/heads/foo >expect && - test_cmp expect actual + test_cmp expect actual && + assert_jumps 2 perf ' test_expect_success 'adjacent, non-overlapping excluded regions' ' - for_each_ref__exclude refs/heads refs/heads/bar refs/heads/baz >actual && + for_each_ref__exclude refs/heads refs/heads/bar refs/heads/baz >actual 2>perf && for_each_ref refs/heads/foo refs/heads/quux >expect && - test_cmp expect actual + test_cmp expect actual && + assert_jumps 1 perf ' test_expect_success 'overlapping excluded regions' ' - for_each_ref__exclude refs/heads refs/heads/ba refs/heads/baz >actual && + for_each_ref__exclude refs/heads refs/heads/ba refs/heads/baz >actual 2>perf && for_each_ref refs/heads/foo refs/heads/quux >expect && - test_cmp expect actual + test_cmp expect actual && + assert_jumps 1 perf ' test_expect_success 'several overlapping excluded regions' ' for_each_ref__exclude refs/heads \ - refs/heads/bar refs/heads/baz refs/heads/foo >actual && + refs/heads/bar refs/heads/baz refs/heads/foo >actual 2>perf && for_each_ref refs/heads/quux >expect && - test_cmp expect actual + test_cmp expect actual && + assert_jumps 1 perf ' test_expect_success 'non-matching excluded section' ' - for_each_ref__exclude refs/heads refs/heads/does/not/exist >actual && + for_each_ref__exclude refs/heads refs/heads/does/not/exist >actual 2>perf && for_each_ref >expect && - test_cmp expect actual + test_cmp expect actual && + assert_no_jumps perf ' test_expect_success 'meta-characters are discarded' ' - for_each_ref__exclude refs/heads "refs/heads/ba*" >actual && + for_each_ref__exclude refs/heads "refs/heads/ba*" >actual 2>perf && for_each_ref >expect && - test_cmp expect actual + test_cmp expect actual && + assert_no_jumps perf ' test_done diff --git a/trace2.h b/trace2.h index 4ced30c0db..9452e291f5 100644 --- a/trace2.h +++ b/trace2.h @@ -551,6 +551,8 @@ enum trace2_counter_id { TRACE2_COUNTER_ID_TEST1 = 0, /* emits summary event only */ TRACE2_COUNTER_ID_TEST2, /* emits summary and thread events */ + TRACE2_COUNTER_ID_PACKED_REFS_JUMPS, /* counts number of jumps */ + /* Add additional counter definitions before here. */ TRACE2_NUMBER_OF_COUNTERS }; diff --git a/trace2/tr2_ctr.c b/trace2/tr2_ctr.c index b342d3b1a3..50570d0165 100644 --- a/trace2/tr2_ctr.c +++ b/trace2/tr2_ctr.c @@ -27,6 +27,11 @@ static struct tr2_counter_metadata tr2_counter_metadata[TRACE2_NUMBER_OF_COUNTER .name = "test2", .want_per_thread_events = 1, }, + [TRACE2_COUNTER_ID_PACKED_REFS_JUMPS] = { + .category = "packed-refs", + .name = "jumps_made", + .want_per_thread_events = 0, + }, /* Add additional metadata before here. */ }; -- cgit v1.3