aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDerrick Stolee <stolee@gmail.com>2026-03-26 15:14:53 +0000
committerJunio C Hamano <gitster@pobox.com>2026-03-26 09:38:07 -0700
commit3f20c21a1ceeb796e121147a53ba10d28041b1fe (patch)
tree998e84a94c0d1480e6be6a4cbb0f336b8cdecf3d
parent7be182045a6a113b118982fc81296d5b9746779e (diff)
downloadgit-3f20c21a1ceeb796e121147a53ba10d28041b1fe.tar.xz
path-walk: support wildcard pathspecs for blob filtering
Previously, walk_objects_by_path() silently ignored pathspecs containing wildcards or magic by clearing them. This caused all blobs to be downloaded regardless of the given pathspec. Wildcard pathspecs like "d/file.*.txt" are useful for narrowing which blobs to process (e.g., during 'git backfill'). Support wildcard pathspecs by making two changes: 1. Add an 'exact_pathspecs' flag to path_walk_context. When the pathspec has no wildcards or magic, set this flag and use the existing fast-path prefix matching in add_tree_entries(). When wildcards are present, skip that block since prefix matching cannot handle glob patterns. 2. Add a match_pathspec() check in walk_path() to filter out blobs whose full path does not match the pathspec. This provides the actual blob-level filtering for wildcard pathspecs. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--path-walk.c22
-rwxr-xr-xt/t5620-backfill.sh7
2 files changed, 16 insertions, 13 deletions
diff --git a/path-walk.c b/path-walk.c
index 3750552978..2aa3e7d8a4 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -63,6 +63,8 @@ struct path_walk_context {
*/
struct prio_queue path_stack;
struct strset path_stack_pushed;
+
+ unsigned exact_pathspecs:1;
};
static int compare_by_type(const void *one, const void *two, void *cb_data)
@@ -207,7 +209,7 @@ static int add_tree_entries(struct path_walk_context *ctx,
match != MATCHED)
continue;
}
- if (ctx->revs->prune_data.nr) {
+ if (ctx->revs->prune_data.nr && ctx->exact_pathspecs) {
struct pathspec *pd = &ctx->revs->prune_data;
bool found = false;
int did_strip_suffix = strbuf_strip_suffix(&path, "/");
@@ -302,6 +304,13 @@ static int walk_path(struct path_walk_context *ctx,
return 0;
}
+ if (list->type == OBJ_BLOB &&
+ ctx->revs->prune_data.nr &&
+ !match_pathspec(ctx->repo->index, &ctx->revs->prune_data,
+ path, strlen(path), 0,
+ NULL, 0))
+ return 0;
+
/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs) ||
@@ -510,14 +519,9 @@ int walk_objects_by_path(struct path_walk_info *info)
info->revs->tag_objects = 1;
if (ctx.revs->prune_data.nr) {
- /*
- * Only exact prefix pathspecs are currently supported.
- * Clear any wildcard or magic pathspecs to avoid
- * incorrect prefix matching.
- */
- if (ctx.revs->prune_data.has_wildcard ||
- ctx.revs->prune_data.magic)
- clear_pathspec(&ctx.revs->prune_data);
+ if (!ctx.revs->prune_data.has_wildcard &&
+ !ctx.revs->prune_data.magic)
+ ctx.exact_pathspecs = 1;
}
/* Insert a single list for the root tree into the paths. */
diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh
index 52f6484ca1..c6f54ee91c 100755
--- a/t/t5620-backfill.sh
+++ b/t/t5620-backfill.sh
@@ -307,12 +307,11 @@ test_expect_success 'backfill with wildcard pathspec' '
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&
- # TODO: The wildcard pathspec should limit downloaded blobs,
- # but currently all blobs are downloaded.
- git -C backfill-path backfill HEAD -- "d/file.*.txt" &&
+ git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err &&
+ test_must_be_empty err &&
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
- test_line_count = 0 missing
+ test_line_count = 40 missing
'
test_expect_success 'backfill with --all' '