aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/git-backfill.adoc5
-rw-r--r--builtin/backfill.c22
-rw-r--r--path-walk.c43
-rw-r--r--path.c2
-rw-r--r--path.h6
-rw-r--r--revision.h1
-rwxr-xr-xt/t5620-backfill.sh211
7 files changed, 280 insertions, 10 deletions
diff --git a/Documentation/git-backfill.adoc b/Documentation/git-backfill.adoc
index b8394dcf22..246ab417c2 100644
--- a/Documentation/git-backfill.adoc
+++ b/Documentation/git-backfill.adoc
@@ -63,9 +63,12 @@ OPTIONS
current sparse-checkout. If the sparse-checkout feature is enabled,
then `--sparse` is assumed and can be disabled with `--no-sparse`.
+You may also specify the commit limiting options from linkgit:git-rev-list[1].
+
SEE ALSO
--------
-linkgit:git-clone[1].
+linkgit:git-clone[1],
+linkgit:git-rev-list[1]
GIT
---
diff --git a/builtin/backfill.c b/builtin/backfill.c
index e9a33e81be..2c5ce56fb7 100644
--- a/builtin/backfill.c
+++ b/builtin/backfill.c
@@ -35,6 +35,7 @@ struct backfill_context {
struct oid_array current_batch;
size_t min_batch_size;
int sparse;
+ struct rev_info revs;
};
static void backfill_context_clear(struct backfill_context *ctx)
@@ -79,7 +80,6 @@ static int fill_missing_blobs(const char *path UNUSED,
static int do_backfill(struct backfill_context *ctx)
{
- struct rev_info revs;
struct path_walk_info info = PATH_WALK_INFO_INIT;
int ret;
@@ -91,13 +91,14 @@ static int do_backfill(struct backfill_context *ctx)
}
}
- repo_init_revisions(ctx->repo, &revs, "");
- handle_revision_arg("HEAD", &revs, 0, 0);
+ /* Walk from HEAD if otherwise unspecified. */
+ if (!ctx->revs.pending.nr)
+ add_head_to_pending(&ctx->revs);
info.blobs = 1;
info.tags = info.commits = info.trees = 0;
- info.revs = &revs;
+ info.revs = &ctx->revs;
info.path_fn = fill_missing_blobs;
info.path_fn_data = ctx;
@@ -108,7 +109,6 @@ static int do_backfill(struct backfill_context *ctx)
download_batch(ctx);
path_walk_info_clear(&info);
- release_revisions(&revs);
return ret;
}
@@ -120,6 +120,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
.current_batch = OID_ARRAY_INIT,
.min_batch_size = 50000,
.sparse = 0,
+ .revs = REV_INFO_INIT,
};
struct option options[] = {
OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size,
@@ -134,7 +135,15 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
builtin_backfill_usage, options);
argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage,
- 0);
+ PARSE_OPT_KEEP_UNKNOWN_OPT |
+ PARSE_OPT_KEEP_ARGV0 |
+ PARSE_OPT_KEEP_DASHDASH);
+
+ repo_init_revisions(repo, &ctx.revs, prefix);
+ argc = setup_revisions(argc, argv, &ctx.revs, NULL);
+
+ if (argc > 1)
+ die(_("unrecognized argument: %s"), argv[1]);
repo_config(repo, git_default_config, NULL);
@@ -143,5 +152,6 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
result = do_backfill(&ctx);
backfill_context_clear(&ctx);
+ release_revisions(&ctx.revs);
return result;
}
diff --git a/path-walk.c b/path-walk.c
index 364e4cfa19..2aa3e7d8a4 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -11,6 +11,7 @@
#include "list-objects.h"
#include "object.h"
#include "oid-array.h"
+#include "path.h"
#include "prio-queue.h"
#include "repository.h"
#include "revision.h"
@@ -62,6 +63,8 @@ struct path_walk_context {
*/
struct prio_queue path_stack;
struct strset path_stack_pushed;
+
+ unsigned exact_pathspecs:1;
};
static int compare_by_type(const void *one, const void *two, void *cb_data)
@@ -206,6 +209,33 @@ static int add_tree_entries(struct path_walk_context *ctx,
match != MATCHED)
continue;
}
+ if (ctx->revs->prune_data.nr && ctx->exact_pathspecs) {
+ struct pathspec *pd = &ctx->revs->prune_data;
+ bool found = false;
+ int did_strip_suffix = strbuf_strip_suffix(&path, "/");
+
+
+ for (int i = 0; i < pd->nr; i++) {
+ struct pathspec_item *item = &pd->items[i];
+
+ /*
+ * Continue if either is a directory prefix
+ * of the other.
+ */
+ if (dir_prefix(path.buf, item->match) ||
+ dir_prefix(item->match, path.buf)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (did_strip_suffix)
+ strbuf_addch(&path, '/');
+
+ /* Skip paths that do not match the prefix. */
+ if (!found)
+ continue;
+ }
add_path_to_list(ctx, path.buf, type, &entry.oid,
!(o->flags & UNINTERESTING));
@@ -274,6 +304,13 @@ static int walk_path(struct path_walk_context *ctx,
return 0;
}
+ if (list->type == OBJ_BLOB &&
+ ctx->revs->prune_data.nr &&
+ !match_pathspec(ctx->repo->index, &ctx->revs->prune_data,
+ path, strlen(path), 0,
+ NULL, 0))
+ return 0;
+
/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs) ||
@@ -481,6 +518,12 @@ int walk_objects_by_path(struct path_walk_info *info)
if (info->tags)
info->revs->tag_objects = 1;
+ if (ctx.revs->prune_data.nr) {
+ if (!ctx.revs->prune_data.has_wildcard &&
+ !ctx.revs->prune_data.magic)
+ ctx.exact_pathspecs = 1;
+ }
+
/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(root_tree_list, 1);
root_tree_list->type = OBJ_TREE;
diff --git a/path.c b/path.c
index c285357859..d7e17bf174 100644
--- a/path.c
+++ b/path.c
@@ -56,7 +56,7 @@ static void strbuf_cleanup_path(struct strbuf *sb)
strbuf_remove(sb, 0, path - sb->buf);
}
-static int dir_prefix(const char *buf, const char *dir)
+int dir_prefix(const char *buf, const char *dir)
{
size_t len = strlen(dir);
return !strncmp(buf, dir, len) &&
diff --git a/path.h b/path.h
index cbcad254a0..0434ba5e07 100644
--- a/path.h
+++ b/path.h
@@ -112,6 +112,12 @@ const char *repo_submodule_path_replace(struct repository *repo,
const char *fmt, ...)
__attribute__((format (printf, 4, 5)));
+/*
+ * Given a directory name 'dir' (not ending with a trailing '/'),
+ * determine if 'buf' is equal to 'dir' or has prefix 'dir'+'/'.
+ */
+int dir_prefix(const char *buf, const char *dir);
+
void report_linked_checkout_garbage(struct repository *r);
/*
diff --git a/revision.h b/revision.h
index 69242ecb18..584f1338b5 100644
--- a/revision.h
+++ b/revision.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "grep.h"
#include "notes.h"
+#include "object-name.h"
#include "oidset.h"
#include "pretty.h"
#include "diff.h"
diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh
index 58c81556e7..2c347a91fe 100755
--- a/t/t5620-backfill.sh
+++ b/t/t5620-backfill.sh
@@ -7,6 +7,14 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
. ./test-lib.sh
+test_expect_success 'backfill rejects unexpected arguments' '
+ test_must_fail git backfill unexpected-arg 2>err &&
+ test_grep "ambiguous argument .*unexpected-arg" err &&
+
+ test_must_fail git backfill --all --unexpected-arg --first-parent 2>err &&
+ test_grep "unrecognized argument: --unexpected-arg" err
+'
+
# We create objects in the 'src' repo.
test_expect_success 'setup repo for object creation' '
echo "{print \$1}" >print_1.awk &&
@@ -15,7 +23,7 @@ test_expect_success 'setup repo for object creation' '
git init src &&
mkdir -p src/a/b/c &&
- mkdir -p src/d/e &&
+ mkdir -p src/d/f &&
for i in 1 2
do
@@ -26,8 +34,9 @@ test_expect_success 'setup repo for object creation' '
echo "Version $i of file a/b/$n" > src/a/b/file.$n.txt &&
echo "Version $i of file a/b/c/$n" > src/a/b/c/file.$n.txt &&
echo "Version $i of file d/$n" > src/d/file.$n.txt &&
- echo "Version $i of file d/e/$n" > src/d/e/file.$n.txt &&
+ echo "Version $i of file d/f/$n" > src/d/f/file.$n.txt &&
git -C src add . &&
+ test_tick &&
git -C src commit -m "Iteration $n" || return 1
done
done
@@ -41,6 +50,53 @@ test_expect_success 'setup bare clone for server' '
git -C srv.bare config --local uploadpack.allowanysha1inwant 1
'
+# Create a version of the repo with branches for testing revision
+# arguments like --all, --first-parent, and --since.
+#
+# main: 8 commits (linear) + merge of side branch
+# 48 original blobs + 4 side blobs = 52 blobs from main HEAD
+# side: 2 commits adding s/file.{1,2}.txt (v1, v2), merged into main
+# other: 1 commit adding o/file.{1,2}.txt (not merged)
+# 54 total blobs reachable from --all
+test_expect_success 'setup branched repo for revision tests' '
+ git clone src src-revs &&
+
+ # Side branch from tip of main with unique files
+ git -C src-revs checkout -b side HEAD &&
+ mkdir -p src-revs/s &&
+ echo "Side version 1 of file 1" >src-revs/s/file.1.txt &&
+ echo "Side version 1 of file 2" >src-revs/s/file.2.txt &&
+ test_tick &&
+ git -C src-revs add . &&
+ git -C src-revs commit -m "Side commit 1" &&
+
+ echo "Side version 2 of file 1" >src-revs/s/file.1.txt &&
+ echo "Side version 2 of file 2" >src-revs/s/file.2.txt &&
+ test_tick &&
+ git -C src-revs add . &&
+ git -C src-revs commit -m "Side commit 2" &&
+
+ # Merge side into main
+ git -C src-revs checkout main &&
+ test_tick &&
+ git -C src-revs merge side --no-ff -m "Merge side branch" &&
+
+ # Other branch (not merged) for --all testing
+ git -C src-revs checkout -b other main~1 &&
+ mkdir -p src-revs/o &&
+ echo "Other content 1" >src-revs/o/file.1.txt &&
+ echo "Other content 2" >src-revs/o/file.2.txt &&
+ test_tick &&
+ git -C src-revs add . &&
+ git -C src-revs commit -m "Other commit" &&
+
+ git -C src-revs checkout main &&
+
+ git clone --bare "file://$(pwd)/src-revs" srv-revs.bare &&
+ git -C srv-revs.bare config --local uploadpack.allowfilter 1 &&
+ git -C srv-revs.bare config --local uploadpack.allowanysha1inwant 1
+'
+
# do basic partial clone from "srv.bare"
test_expect_success 'do partial clone 1, backfill gets all objects' '
git clone --no-checkout --filter=blob:none \
@@ -176,6 +232,157 @@ test_expect_success 'backfill --sparse without cone mode (negative)' '
test_line_count = 12 missing
'
+test_expect_success 'backfill with revision range' '
+ test_when_finished rm -rf backfill-revs &&
+ git clone --no-checkout --filter=blob:none \
+ --single-branch --branch=main \
+ "file://$(pwd)/srv.bare" backfill-revs &&
+
+ # No blobs yet
+ git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 48 missing &&
+
+ git -C backfill-revs backfill HEAD~2..HEAD &&
+
+ # 30 objects downloaded.
+ git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 18 missing
+'
+
+test_expect_success 'backfill with revisions over stdin' '
+ test_when_finished rm -rf backfill-revs &&
+ git clone --no-checkout --filter=blob:none \
+ --single-branch --branch=main \
+ "file://$(pwd)/srv.bare" backfill-revs &&
+
+ # No blobs yet
+ git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 48 missing &&
+
+ cat >in <<-EOF &&
+ HEAD
+ ^HEAD~2
+ EOF
+
+ git -C backfill-revs backfill --stdin <in &&
+
+ # 30 objects downloaded.
+ git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 18 missing
+'
+
+test_expect_success 'backfill with prefix pathspec' '
+ test_when_finished rm -rf backfill-path &&
+ git clone --bare --filter=blob:none \
+ --single-branch --branch=main \
+ "file://$(pwd)/srv.bare" backfill-path &&
+
+ # No blobs yet
+ git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 48 missing &&
+
+ git -C backfill-path backfill HEAD -- d/f 2>err &&
+ test_must_be_empty err &&
+
+ git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 40 missing
+'
+
+test_expect_success 'backfill with multiple pathspecs' '
+ test_when_finished rm -rf backfill-path &&
+ git clone --bare --filter=blob:none \
+ --single-branch --branch=main \
+ "file://$(pwd)/srv.bare" backfill-path &&
+
+ # No blobs yet
+ git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 48 missing &&
+
+ git -C backfill-path backfill HEAD -- d/f a 2>err &&
+ test_must_be_empty err &&
+
+ git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 16 missing
+'
+
+test_expect_success 'backfill with wildcard pathspec' '
+ test_when_finished rm -rf backfill-path &&
+ git clone --bare --filter=blob:none \
+ --single-branch --branch=main \
+ "file://$(pwd)/srv.bare" backfill-path &&
+
+ # No blobs yet
+ git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 48 missing &&
+
+ git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err &&
+ test_must_be_empty err &&
+
+ git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 40 missing
+'
+
+test_expect_success 'backfill with --all' '
+ test_when_finished rm -rf backfill-all &&
+ git clone --no-checkout --filter=blob:none \
+ "file://$(pwd)/srv-revs.bare" backfill-all &&
+
+ # All blobs from all refs are missing
+ git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
+ test_line_count = 54 missing &&
+
+ # Backfill from HEAD gets main blobs only
+ git -C backfill-all backfill HEAD &&
+
+ # Other branch blobs still missing
+ git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
+ test_line_count = 2 missing &&
+
+ # Backfill with --all gets everything
+ git -C backfill-all backfill --all &&
+
+ git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
+ test_line_count = 0 missing
+'
+
+test_expect_success 'backfill with --first-parent' '
+ test_when_finished rm -rf backfill-fp &&
+ git clone --no-checkout --filter=blob:none \
+ --single-branch --branch=main \
+ "file://$(pwd)/srv-revs.bare" backfill-fp &&
+
+ git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 52 missing &&
+
+ # --first-parent skips the side branch commits, so
+ # s/file.{1,2}.txt v1 blobs (only in side commit 1) are missed.
+ git -C backfill-fp backfill --first-parent HEAD &&
+
+ git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 2 missing
+'
+
+test_expect_success 'backfill with --since' '
+ test_when_finished rm -rf backfill-since &&
+ git clone --no-checkout --filter=blob:none \
+ --single-branch --branch=main \
+ "file://$(pwd)/srv-revs.bare" backfill-since &&
+
+ git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 52 missing &&
+
+ # Use a cutoff between commits 4 and 5 (between v1 and v2
+ # iterations). Commits 5-8 still carry v1 of files 2-4 in
+ # their trees, but v1 of file.1.txt is only in commits 1-4.
+ SINCE=$(git -C backfill-since log --first-parent --reverse \
+ --format=%ct HEAD~1 | sed -n 5p) &&
+ git -C backfill-since backfill --since="@$((SINCE - 1))" HEAD &&
+
+ # 6 missing: v1 of file.1.txt in all 6 directories
+ git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 6 missing
+'
+
. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd