From f447a499dbb8be3a9f76f8099938129c74fcbd32 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Mon, 13 Aug 2018 11:14:28 -0700 Subject: list-objects: store common func args in struct This will make utility functions easier to create, as done by the next patch. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects.c | 158 +++++++++++++++++++++++++++------------------------------ 1 file changed, 74 insertions(+), 84 deletions(-) (limited to 'list-objects.c') diff --git a/list-objects.c b/list-objects.c index c99c47ac18..584518a3fa 100644 --- a/list-objects.c +++ b/list-objects.c @@ -12,20 +12,25 @@ #include "packfile.h" #include "object-store.h" -static void process_blob(struct rev_info *revs, +struct traversal_context { + struct rev_info *revs; + show_object_fn show_object; + show_commit_fn show_commit; + void *show_data; + filter_object_fn filter_fn; + void *filter_data; +}; + +static void process_blob(struct traversal_context *ctx, struct blob *blob, - show_object_fn show, struct strbuf *path, - const char *name, - void *cb_data, - filter_object_fn filter_fn, - void *filter_data) + const char *name) { struct object *obj = &blob->object; size_t pathlen; enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; - if (!revs->blob_objects) + if (!ctx->revs->blob_objects) return; if (!obj) die("bad blob object"); @@ -41,21 +46,21 @@ static void process_blob(struct rev_info *revs, * may cause the actual filter to report an incomplete list * of missing objects. */ - if (revs->exclude_promisor_objects && + if (ctx->revs->exclude_promisor_objects && !has_object_file(&obj->oid) && is_promisor_object(&obj->oid)) return; pathlen = path->len; strbuf_addstr(path, name); - if (!(obj->flags & USER_GIVEN) && filter_fn) - r = filter_fn(LOFS_BLOB, obj, - path->buf, &path->buf[pathlen], - filter_data); + if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) + r = ctx->filter_fn(LOFS_BLOB, obj, + path->buf, &path->buf[pathlen], + ctx->filter_data); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) - show(obj, path->buf, cb_data); + ctx->show_object(obj, path->buf, ctx->show_data); strbuf_setlen(path, pathlen); } @@ -81,26 +86,21 @@ static void process_blob(struct rev_info *revs, * the link, and how to do it. Whether it necessarily makes * any sense what-so-ever to ever do that is another issue. */ -static void process_gitlink(struct rev_info *revs, +static void process_gitlink(struct traversal_context *ctx, const unsigned char *sha1, - show_object_fn show, struct strbuf *path, - const char *name, - void *cb_data) + const char *name) { /* Nothing to do */ } -static void process_tree(struct rev_info *revs, +static void process_tree(struct traversal_context *ctx, struct tree *tree, - show_object_fn show, struct strbuf *base, - const char *name, - void *cb_data, - filter_object_fn filter_fn, - void *filter_data) + const char *name) { struct object *obj = &tree->object; + struct rev_info *revs = ctx->revs; struct tree_desc desc; struct name_entry entry; enum interesting match = revs->diffopt.pathspec.nr == 0 ? @@ -133,14 +133,14 @@ static void process_tree(struct rev_info *revs, } strbuf_addstr(base, name); - if (!(obj->flags & USER_GIVEN) && filter_fn) - r = filter_fn(LOFS_BEGIN_TREE, obj, - base->buf, &base->buf[baselen], - filter_data); + if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) + r = ctx->filter_fn(LOFS_BEGIN_TREE, obj, + base->buf, &base->buf[baselen], + ctx->filter_data); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) - show(obj, base->buf, cb_data); + ctx->show_object(obj, base->buf, ctx->show_data); if (base->len) strbuf_addch(base, '/'); @@ -157,29 +157,25 @@ static void process_tree(struct rev_info *revs, } if (S_ISDIR(entry.mode)) - process_tree(revs, + process_tree(ctx, lookup_tree(the_repository, entry.oid), - show, base, entry.path, - cb_data, filter_fn, filter_data); + base, entry.path); else if (S_ISGITLINK(entry.mode)) - process_gitlink(revs, entry.oid->hash, - show, base, entry.path, - cb_data); + process_gitlink(ctx, entry.oid->hash, base, entry.path); else - process_blob(revs, + process_blob(ctx, lookup_blob(the_repository, entry.oid), - show, base, entry.path, - cb_data, filter_fn, filter_data); + base, entry.path); } - if (!(obj->flags & USER_GIVEN) && filter_fn) { - r = filter_fn(LOFS_END_TREE, obj, - base->buf, &base->buf[baselen], - filter_data); + if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) { + r = ctx->filter_fn(LOFS_END_TREE, obj, + base->buf, &base->buf[baselen], + ctx->filter_data); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) - show(obj, base->buf, cb_data); + ctx->show_object(obj, base->buf, ctx->show_data); } strbuf_setlen(base, baselen); @@ -242,19 +238,15 @@ static void add_pending_tree(struct rev_info *revs, struct tree *tree) add_pending_object(revs, &tree->object, ""); } -static void traverse_trees_and_blobs(struct rev_info *revs, - struct strbuf *base, - show_object_fn show_object, - void *show_data, - filter_object_fn filter_fn, - void *filter_data) +static void traverse_trees_and_blobs(struct traversal_context *ctx, + struct strbuf *base) { int i; assert(base->len == 0); - for (i = 0; i < revs->pending.nr; i++) { - struct object_array_entry *pending = revs->pending.objects + i; + for (i = 0; i < ctx->revs->pending.nr; i++) { + struct object_array_entry *pending = ctx->revs->pending.objects + i; struct object *obj = pending->item; const char *name = pending->name; const char *path = pending->path; @@ -262,62 +254,49 @@ static void traverse_trees_and_blobs(struct rev_info *revs, continue; if (obj->type == OBJ_TAG) { obj->flags |= SEEN; - show_object(obj, name, show_data); + ctx->show_object(obj, name, ctx->show_data); continue; } if (!path) path = ""; if (obj->type == OBJ_TREE) { - process_tree(revs, (struct tree *)obj, show_object, - base, path, show_data, - filter_fn, filter_data); + process_tree(ctx, (struct tree *)obj, base, path); continue; } if (obj->type == OBJ_BLOB) { - process_blob(revs, (struct blob *)obj, show_object, - base, path, show_data, - filter_fn, filter_data); + process_blob(ctx, (struct blob *)obj, base, path); continue; } die("unknown pending object %s (%s)", oid_to_hex(&obj->oid), name); } - object_array_clear(&revs->pending); + object_array_clear(&ctx->revs->pending); } -static void do_traverse(struct rev_info *revs, - show_commit_fn show_commit, - show_object_fn show_object, - void *show_data, - filter_object_fn filter_fn, - void *filter_data) +static void do_traverse(struct traversal_context *ctx) { struct commit *commit; struct strbuf csp; /* callee's scratch pad */ strbuf_init(&csp, PATH_MAX); - while ((commit = get_revision(revs)) != NULL) { + while ((commit = get_revision(ctx->revs)) != NULL) { /* * an uninteresting boundary commit may not have its tree * parsed yet, but we are not going to show them anyway */ if (get_commit_tree(commit)) - add_pending_tree(revs, get_commit_tree(commit)); - show_commit(commit, show_data); + add_pending_tree(ctx->revs, get_commit_tree(commit)); + ctx->show_commit(commit, ctx->show_data); - if (revs->tree_blobs_in_commit_order) + if (ctx->revs->tree_blobs_in_commit_order) /* * NEEDSWORK: Adding the tree and then flushing it here * needs a reallocation for each commit. Can we pass the * tree directory without allocation churn? */ - traverse_trees_and_blobs(revs, &csp, - show_object, show_data, - filter_fn, filter_data); + traverse_trees_and_blobs(ctx, &csp); } - traverse_trees_and_blobs(revs, &csp, - show_object, show_data, - filter_fn, filter_data); + traverse_trees_and_blobs(ctx, &csp); strbuf_release(&csp); } @@ -326,7 +305,14 @@ void traverse_commit_list(struct rev_info *revs, show_object_fn show_object, void *show_data) { - do_traverse(revs, show_commit, show_object, show_data, NULL, NULL); + struct traversal_context ctx; + ctx.revs = revs; + ctx.show_commit = show_commit; + ctx.show_object = show_object; + ctx.show_data = show_data; + ctx.filter_fn = NULL; + ctx.filter_data = NULL; + do_traverse(&ctx); } void traverse_commit_list_filtered( @@ -337,14 +323,18 @@ void traverse_commit_list_filtered( void *show_data, struct oidset *omitted) { - filter_object_fn filter_fn = NULL; + struct traversal_context ctx; filter_free_fn filter_free_fn = NULL; - void *filter_data = NULL; - - filter_data = list_objects_filter__init(omitted, filter_options, - &filter_fn, &filter_free_fn); - do_traverse(revs, show_commit, show_object, show_data, - filter_fn, filter_data); - if (filter_data && filter_free_fn) - filter_free_fn(filter_data); + + ctx.revs = revs; + ctx.show_object = show_object; + ctx.show_commit = show_commit; + ctx.show_data = show_data; + ctx.filter_fn = NULL; + + ctx.filter_data = list_objects_filter__init(omitted, filter_options, + &ctx.filter_fn, &filter_free_fn); + do_traverse(&ctx); + if (ctx.filter_data && filter_free_fn) + filter_free_fn(ctx.filter_data); } -- cgit v1.3 From 9202489174a110f82867edbac601f12480a4e284 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Mon, 13 Aug 2018 11:14:29 -0700 Subject: list-objects: refactor to process_tree_contents This will be used in a follow-up patch to reduce indentation needed when invoking the logic conditionally. i.e. rather than: if (foo) { while (...) { /* this is very indented */ } } we will have: if (foo) process_tree_contents(...); Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects.c | 68 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 27 deletions(-) (limited to 'list-objects.c') diff --git a/list-objects.c b/list-objects.c index 584518a3fa..ccc529e5e3 100644 --- a/list-objects.c +++ b/list-objects.c @@ -94,6 +94,46 @@ static void process_gitlink(struct traversal_context *ctx, /* Nothing to do */ } +static void process_tree(struct traversal_context *ctx, + struct tree *tree, + struct strbuf *base, + const char *name); + +static void process_tree_contents(struct traversal_context *ctx, + struct tree *tree, + struct strbuf *base) +{ + struct tree_desc desc; + struct name_entry entry; + enum interesting match = ctx->revs->diffopt.pathspec.nr == 0 ? + all_entries_interesting : entry_not_interesting; + + init_tree_desc(&desc, tree->buffer, tree->size); + + while (tree_entry(&desc, &entry)) { + if (match != all_entries_interesting) { + match = tree_entry_interesting(&entry, base, 0, + &ctx->revs->diffopt.pathspec); + if (match == all_entries_not_interesting) + break; + if (match == entry_not_interesting) + continue; + } + + if (S_ISDIR(entry.mode)) + process_tree(ctx, + lookup_tree(the_repository, entry.oid), + base, entry.path); + else if (S_ISGITLINK(entry.mode)) + process_gitlink(ctx, entry.oid->hash, + base, entry.path); + else + process_blob(ctx, + lookup_blob(the_repository, entry.oid), + base, entry.path); + } +} + static void process_tree(struct traversal_context *ctx, struct tree *tree, struct strbuf *base, @@ -101,10 +141,6 @@ static void process_tree(struct traversal_context *ctx, { struct object *obj = &tree->object; struct rev_info *revs = ctx->revs; - struct tree_desc desc; - struct name_entry entry; - enum interesting match = revs->diffopt.pathspec.nr == 0 ? - all_entries_interesting: entry_not_interesting; int baselen = base->len; enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; int gently = revs->ignore_missing_links || @@ -144,29 +180,7 @@ static void process_tree(struct traversal_context *ctx, if (base->len) strbuf_addch(base, '/'); - init_tree_desc(&desc, tree->buffer, tree->size); - - while (tree_entry(&desc, &entry)) { - if (match != all_entries_interesting) { - match = tree_entry_interesting(&entry, base, 0, - &revs->diffopt.pathspec); - if (match == all_entries_not_interesting) - break; - if (match == entry_not_interesting) - continue; - } - - if (S_ISDIR(entry.mode)) - process_tree(ctx, - lookup_tree(the_repository, entry.oid), - base, entry.path); - else if (S_ISGITLINK(entry.mode)) - process_gitlink(ctx, entry.oid->hash, base, entry.path); - else - process_blob(ctx, - lookup_blob(the_repository, entry.oid), - base, entry.path); - } + process_tree_contents(ctx, tree, base); if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) { r = ctx->filter_fn(LOFS_END_TREE, obj, -- cgit v1.3 From f1d02daacfe657fd175634174b4928a645d537f4 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Tue, 14 Aug 2018 17:22:52 -0700 Subject: list-objects: always parse trees gently If parsing fails when revs->ignore_missing_links and revs->exclude_promisor_objects are both false, we print the OID anyway in the die("bad tree object...") call, so any message printed by parse_tree_gently() is superfluous. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'list-objects.c') diff --git a/list-objects.c b/list-objects.c index ccc529e5e3..f9b51db7a7 100644 --- a/list-objects.c +++ b/list-objects.c @@ -143,8 +143,6 @@ static void process_tree(struct traversal_context *ctx, struct rev_info *revs = ctx->revs; int baselen = base->len; enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; - int gently = revs->ignore_missing_links || - revs->exclude_promisor_objects; if (!revs->tree_objects) return; @@ -152,7 +150,7 @@ static void process_tree(struct traversal_context *ctx, die("bad tree object"); if (obj->flags & (UNINTERESTING | SEEN)) return; - if (parse_tree_gently(tree, gently) < 0) { + if (parse_tree_gently(tree, 1) < 0) { if (revs->ignore_missing_links) return; -- cgit v1.3 From 7c0fe330d5f3d2fc7aac57a19c7580ea2543c799 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Fri, 5 Oct 2018 14:31:23 -0700 Subject: rev-list: handle missing tree objects properly Previously, we assumed only blob objects could be missing. This patch makes rev-list handle missing trees like missing blobs. The --missing=* and --exclude-promisor-objects flags now work for trees as they already do for blobs. This is demonstrated in t6112. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 11 ++++++--- list-objects.c | 11 ++++++--- revision.h | 15 ++++++++++++ t/t0410-partial-clone.sh | 45 ++++++++++++++++++++++++++++++++++ t/t5317-pack-objects-filter-objects.sh | 13 ++++++++++ t/t6112-rev-list-filters-objects.sh | 22 +++++++++++++++++ 6 files changed, 110 insertions(+), 7 deletions(-) (limited to 'list-objects.c') diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 5b07f3f4a2..49d6deed70 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -6,6 +6,7 @@ #include "list-objects.h" #include "list-objects-filter.h" #include "list-objects-filter-options.h" +#include "object.h" #include "object-store.h" #include "pack.h" #include "pack-bitmap.h" @@ -209,7 +210,8 @@ static inline void finish_object__ma(struct object *obj) */ switch (arg_missing_action) { case MA_ERROR: - die("missing blob object '%s'", oid_to_hex(&obj->oid)); + die("missing %s object '%s'", + type_name(obj->type), oid_to_hex(&obj->oid)); return; case MA_ALLOW_ANY: @@ -222,8 +224,8 @@ static inline void finish_object__ma(struct object *obj) case MA_ALLOW_PROMISOR: if (is_promisor_object(&obj->oid)) return; - die("unexpected missing blob object '%s'", - oid_to_hex(&obj->oid)); + die("unexpected missing %s object '%s'", + type_name(obj->type), oid_to_hex(&obj->oid)); return; default: @@ -235,7 +237,7 @@ static inline void finish_object__ma(struct object *obj) static int finish_object(struct object *obj, const char *name, void *cb_data) { struct rev_list_info *info = cb_data; - if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) { + if (!has_object_file(&obj->oid)) { finish_object__ma(obj); return 1; } @@ -373,6 +375,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) init_revisions(&revs, prefix); revs.abbrev = DEFAULT_ABBREV; revs.commit_format = CMIT_FMT_UNSPECIFIED; + revs.do_not_die_on_missing_tree = 1; /* * Scan the argument list before invoking setup_revisions(), so that we diff --git a/list-objects.c b/list-objects.c index f9b51db7a7..243192af53 100644 --- a/list-objects.c +++ b/list-objects.c @@ -143,6 +143,7 @@ static void process_tree(struct traversal_context *ctx, struct rev_info *revs = ctx->revs; int baselen = base->len; enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; + int failed_parse; if (!revs->tree_objects) return; @@ -150,7 +151,9 @@ static void process_tree(struct traversal_context *ctx, die("bad tree object"); if (obj->flags & (UNINTERESTING | SEEN)) return; - if (parse_tree_gently(tree, 1) < 0) { + + failed_parse = parse_tree_gently(tree, 1); + if (failed_parse) { if (revs->ignore_missing_links) return; @@ -163,7 +166,8 @@ static void process_tree(struct traversal_context *ctx, is_promisor_object(&obj->oid)) return; - die("bad tree object %s", oid_to_hex(&obj->oid)); + if (!revs->do_not_die_on_missing_tree) + die("bad tree object %s", oid_to_hex(&obj->oid)); } strbuf_addstr(base, name); @@ -178,7 +182,8 @@ static void process_tree(struct traversal_context *ctx, if (base->len) strbuf_addch(base, '/'); - process_tree_contents(ctx, tree, base); + if (!failed_parse) + process_tree_contents(ctx, tree, base); if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) { r = ctx->filter_fn(LOFS_END_TREE, obj, diff --git a/revision.h b/revision.h index c599c34da9..5118aaaa92 100644 --- a/revision.h +++ b/revision.h @@ -125,6 +125,21 @@ struct rev_info { line_level_traverse:1, tree_blobs_in_commit_order:1, + /* + * Blobs are shown without regard for their existence. + * But not so for trees: unless exclude_promisor_objects + * is set and the tree in question is a promisor object; + * OR ignore_missing_links is set, the revision walker + * dies with a "bad tree object HASH" message when + * encountering a missing tree. For callers that can + * handle missing trees and want them to be filterable + * and showable, set this to true. The revision walker + * will filter and show such a missing tree as usual, + * but will not attempt to recurse into this tree + * object. + */ + do_not_die_on_missing_tree:1, + /* for internal use only */ exclude_promisor_objects:1; diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 4984ca583d..2f4ea487a4 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -186,6 +186,51 @@ test_expect_success 'rev-list stops traversal at missing and promised commit' ' ! grep $FOO out ' +test_expect_success 'missing tree objects with --missing=allow-promisor and --exclude-promisor-objects' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo foo && + test_commit -C repo bar && + test_commit -C repo baz && + + promise_and_delete $(git -C repo rev-parse bar^{tree}) && + promise_and_delete $(git -C repo rev-parse foo^{tree}) && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + + git -C repo rev-list --missing=allow-promisor --objects HEAD >objs 2>rev_list_err && + test_must_be_empty rev_list_err && + # 3 commits, 3 blobs, and 1 tree + test_line_count = 7 objs && + + # Do the same for --exclude-promisor-objects, but with all trees gone. + promise_and_delete $(git -C repo rev-parse baz^{tree}) && + git -C repo rev-list --exclude-promisor-objects --objects HEAD >objs 2>rev_list_err && + test_must_be_empty rev_list_err && + # 3 commits, no blobs or trees + test_line_count = 3 objs +' + +test_expect_success 'missing non-root tree object and rev-list' ' + rm -rf repo && + test_create_repo repo && + mkdir repo/dir && + echo foo >repo/dir/foo && + git -C repo add dir/foo && + git -C repo commit -m "commit dir/foo" && + + promise_and_delete $(git -C repo rev-parse HEAD:dir) && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + + git -C repo rev-list --missing=allow-any --objects HEAD >objs 2>rev_list_err && + test_must_be_empty rev_list_err && + # 1 commit and 1 tree + test_line_count = 2 objs +' + test_expect_success 'rev-list stops traversal at missing and promised tree' ' rm -rf repo && test_create_repo repo && diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 6710c8bc8c..9839b48c1c 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -59,6 +59,19 @@ test_expect_success 'verify normal and blob:none packfiles have same commits/tre test_cmp observed expected ' +test_expect_success 'get an error for missing tree object' ' + git init r5 && + echo foo >r5/foo && + git -C r5 add foo && + git -C r5 commit -m "foo" && + del=$(git -C r5 rev-parse HEAD^{tree} | sed "s|..|&/|") && + rm r5/.git/objects/$del && + test_must_fail git -C r5 pack-objects --rev --stdout 2>bad_tree <<-EOF && + HEAD + EOF + grep -q "bad tree object" bad_tree +' + # Test blob:limit=[kmg] filter. # We boundary test around the size parameter. The filter is strictly less than # the value, so size 500 and 1000 should have the same results, but 1001 should diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 0a37dd5f97..6d69e6a0aa 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -196,6 +196,28 @@ test_expect_success 'verify sparse:oid=oid-ish omits top-level files' ' test_cmp observed expected ' +test_expect_success 'rev-list W/ --missing=print and --missing=allow-any for trees' ' + TREE=$(git -C r3 rev-parse HEAD:dir1) && + + # Create a spare repo because we will be deleting objects from this one. + git clone r3 r3.b && + + rm r3.b/.git/objects/$(echo $TREE | sed "s|^..|&/|") && + + git -C r3.b rev-list --quiet --missing=print --objects HEAD \ + >missing_objs 2>rev_list_err && + echo "?$TREE" >expected && + test_cmp expected missing_objs && + + # do not complain when a missing tree cannot be parsed + test_must_be_empty rev_list_err && + + git -C r3.b rev-list --missing=allow-any --objects HEAD \ + >objs 2>rev_list_err && + ! grep $TREE objs && + test_must_be_empty rev_list_err +' + # Delete some loose objects and use rev-list, but WITHOUT any filtering. # This models previously omitted objects that we did not receive. -- cgit v1.3 From 99c9aa9579ae970c0d273ced8fb8efe9eed70a75 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Fri, 5 Oct 2018 14:31:24 -0700 Subject: revision: mark non-user-given objects instead Currently, list-objects.c incorrectly treats all root trees of commits as USER_GIVEN. Also, it would be easier to mark objects that are non-user-given instead of user-given, since the places in the code where we access an object through a reference are more obvious than the places where we access an object that was given by the user. Resolve these two problems by introducing a flag NOT_USER_GIVEN that marks blobs and trees that are non-user-given, replacing USER_GIVEN. (Only blobs and trees are marked because this mark is only used when filtering objects, and filtering of other types of objects is not supported yet.) This fixes a bug in that git rev-list behaved differently from git pack-objects. pack-objects would *not* filter objects given explicitly on the command line and rev-list would filter. This was because the two commands used a different function to add objects to the rev_info struct. This seems to have been an oversight, and pack-objects has the correct behavior, so I added a test to make sure that rev-list now behaves properly. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects.c | 31 ++++++++++++++++++------------- revision.c | 1 - revision.h | 11 +++++++++-- t/t6112-rev-list-filters-objects.sh | 12 ++++++++++++ 4 files changed, 39 insertions(+), 16 deletions(-) (limited to 'list-objects.c') diff --git a/list-objects.c b/list-objects.c index 243192af53..7a1a0929db 100644 --- a/list-objects.c +++ b/list-objects.c @@ -53,7 +53,7 @@ static void process_blob(struct traversal_context *ctx, pathlen = path->len; strbuf_addstr(path, name); - if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) + if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) r = ctx->filter_fn(LOFS_BLOB, obj, path->buf, &path->buf[pathlen], ctx->filter_data); @@ -120,17 +120,19 @@ static void process_tree_contents(struct traversal_context *ctx, continue; } - if (S_ISDIR(entry.mode)) - process_tree(ctx, - lookup_tree(the_repository, entry.oid), - base, entry.path); + if (S_ISDIR(entry.mode)) { + struct tree *t = lookup_tree(the_repository, entry.oid); + t->object.flags |= NOT_USER_GIVEN; + process_tree(ctx, t, base, entry.path); + } else if (S_ISGITLINK(entry.mode)) process_gitlink(ctx, entry.oid->hash, base, entry.path); - else - process_blob(ctx, - lookup_blob(the_repository, entry.oid), - base, entry.path); + else { + struct blob *b = lookup_blob(the_repository, entry.oid); + b->object.flags |= NOT_USER_GIVEN; + process_blob(ctx, b, base, entry.path); + } } } @@ -171,7 +173,7 @@ static void process_tree(struct traversal_context *ctx, } strbuf_addstr(base, name); - if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) + if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) r = ctx->filter_fn(LOFS_BEGIN_TREE, obj, base->buf, &base->buf[baselen], ctx->filter_data); @@ -185,7 +187,7 @@ static void process_tree(struct traversal_context *ctx, if (!failed_parse) process_tree_contents(ctx, tree, base); - if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) { + if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) { r = ctx->filter_fn(LOFS_END_TREE, obj, base->buf, &base->buf[baselen], ctx->filter_data); @@ -301,8 +303,11 @@ static void do_traverse(struct traversal_context *ctx) * an uninteresting boundary commit may not have its tree * parsed yet, but we are not going to show them anyway */ - if (get_commit_tree(commit)) - add_pending_tree(ctx->revs, get_commit_tree(commit)); + if (get_commit_tree(commit)) { + struct tree *tree = get_commit_tree(commit); + tree->object.flags |= NOT_USER_GIVEN; + add_pending_tree(ctx->revs, tree); + } ctx->show_commit(commit, ctx->show_data); if (ctx->revs->tree_blobs_in_commit_order) diff --git a/revision.c b/revision.c index 0627494378..6d355b43c3 100644 --- a/revision.c +++ b/revision.c @@ -175,7 +175,6 @@ static void add_pending_object_with_path(struct rev_info *revs, strbuf_release(&buf); return; /* do not add the commit itself */ } - obj->flags |= USER_GIVEN; add_object_array_with_path(obj, name, &revs->pending, mode, path); } diff --git a/revision.h b/revision.h index 5118aaaa92..4dc45bb9ad 100644 --- a/revision.h +++ b/revision.h @@ -20,9 +20,16 @@ #define SYMMETRIC_LEFT (1u<<8) #define PATCHSAME (1u<<9) #define BOTTOM (1u<<10) -#define USER_GIVEN (1u<<25) /* given directly by the user */ +/* + * Indicates object was reached by traversal. i.e. not given by user on + * command-line or stdin. + * NEEDSWORK: NOT_USER_GIVEN doesn't apply to commits because we only support + * filtering trees and blobs, but it may be useful to support filtering commits + * in the future. + */ +#define NOT_USER_GIVEN (1u<<25) #define TRACK_LINEAR (1u<<26) -#define ALL_REV_FLAGS (((1u<<11)-1) | USER_GIVEN | TRACK_LINEAR) +#define ALL_REV_FLAGS (((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR) #define DECORATE_SHORT_REFS 1 #define DECORATE_FULL_REFS 2 diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 6d69e6a0aa..0d4d43894b 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -30,6 +30,18 @@ test_expect_success 'verify blob:none omits all 5 blobs' ' test_cmp observed expected ' +test_expect_success 'specify blob explicitly prevents filtering' ' + file_3=$(git -C r1 ls-files -s file.3 | + awk -f print_2.awk) && + + file_4=$(git -C r1 ls-files -s file.4 | + awk -f print_2.awk) && + + git -C r1 rev-list --objects --filter=blob:none HEAD $file_3 >observed && + grep -q "$file_3" observed && + test_must_fail grep -q "$file_4" observed +' + test_expect_success 'verify emitted+omitted == all' ' git -C r1 rev-list HEAD --objects \ | awk -f print_1.awk \ -- cgit v1.3 From 8b10a206f090e01ce1ac4d9a10ec769e2409e2b0 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Wed, 17 Oct 2018 17:39:15 -0700 Subject: list-objects: support for skipping tree traversal The tree:0 filter does not need to traverse the trees that it has filtered out, so optimize list-objects and list-objects-filter to skip traversing the trees entirely. Before this patch, we iterated over all children of the tree, and did nothing for all of them, which was wasteful. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter.c | 11 +++++++++-- list-objects-filter.h | 6 ++++++ list-objects.c | 5 ++++- t/t6112-rev-list-filters-objects.sh | 13 +++++++++++++ 4 files changed, 32 insertions(+), 3 deletions(-) (limited to 'list-objects.c') diff --git a/list-objects-filter.c b/list-objects-filter.c index 09b2b05d54..765f3df3b0 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -102,9 +102,16 @@ static enum list_objects_filter_result filter_trees_none( case LOFS_BEGIN_TREE: case LOFS_BLOB: - if (filter_data->omits) + if (filter_data->omits) { oidset_insert(filter_data->omits, &obj->oid); - return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ + /* _MARK_SEEN but not _DO_SHOW (hard omit) */ + return LOFR_MARK_SEEN; + } else { + /* + * Not collecting omits so no need to to traverse tree. + */ + return LOFR_SKIP_TREE | LOFR_MARK_SEEN; + } case LOFS_END_TREE: assert(obj->type == OBJ_TREE); diff --git a/list-objects-filter.h b/list-objects-filter.h index a963d0274c..9c19875a41 100644 --- a/list-objects-filter.h +++ b/list-objects-filter.h @@ -20,6 +20,11 @@ * In general, objects should only be shown once, but * this result DOES NOT imply that we mark it SEEN. * + * _SKIP_TREE : Used in LOFS_BEGIN_TREE situation - indicates that + * the tree's children should not be iterated over. This + * is used as an optimization when all children will + * definitely be ignored. + * * Most of the time, you want the combination (_MARK_SEEN | _DO_SHOW) * but they can be used independently, such as when sparse-checkout * pattern matching is being applied. @@ -41,6 +46,7 @@ enum list_objects_filter_result { LOFR_ZERO = 0, LOFR_MARK_SEEN = 1<<0, LOFR_DO_SHOW = 1<<1, + LOFR_SKIP_TREE = 1<<2, }; enum list_objects_filter_situation { diff --git a/list-objects.c b/list-objects.c index 7a1a0929db..d1e3d217c5 100644 --- a/list-objects.c +++ b/list-objects.c @@ -11,6 +11,7 @@ #include "list-objects-filter-options.h" #include "packfile.h" #include "object-store.h" +#include "trace.h" struct traversal_context { struct rev_info *revs; @@ -184,7 +185,9 @@ static void process_tree(struct traversal_context *ctx, if (base->len) strbuf_addch(base, '/'); - if (!failed_parse) + if (r & LOFR_SKIP_TREE) + trace_printf("Skipping contents of tree %s...\n", base->buf); + else if (!failed_parse) process_tree_contents(ctx, tree, base); if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) { diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index d24f9d5b5a..c6aae93b57 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -245,6 +245,19 @@ test_expect_success 'verify tree:0 includes trees in "filtered" output' ' test_cmp expected filtered_types ' +# Make sure tree:0 does not iterate through any trees. + +test_expect_success 'filter a GIANT tree through tree:0' ' + GIT_TRACE=1 git -C r3 rev-list \ + --objects --filter=tree:0 HEAD 2>filter_trace && + grep "Skipping contents of tree [.][.][.]" filter_trace >actual && + # One line for each commit traversed. + test_line_count = 2 actual && + + # Make sure no other trees were considered besides the root. + ! grep "Skipping contents of tree [^.]" filter_trace +' + # Delete some loose objects and use rev-list, but WITHOUT any filtering. # This models previously omitted objects that we did not receive. -- cgit v1.3