From bdbebe5714b25dc9d215b48efbb80f410925d7dd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:10 +0200 Subject: refs: introduce wrapper struct for `each_ref_fn` The `each_ref_fn` callback function type is used across our code base for several different functions that iterate through reference. There's a bunch of callbacks implementing this type, which makes any changes to the callback signature extremely noisy. An example of the required churn is e8207717f1 (refs: add referent to each_ref_fn, 2024-08-09): adding a single argument required us to change 48 files. It was already proposed back then [1] that we might want to introduce a wrapper structure to alleviate the pain going forward. While this of course requires the same kind of global refactoring as just introducing a new parameter, it at least allows us to more change the callback type afterwards by just extending the wrapper structure. One counterargument to this refactoring is that it makes the structure more opaque. While it is obvious which callsites need to be fixed up when we change the function type, it's not obvious anymore once we use a structure. That being said, we only have a handful of sites that actually need to populate this wrapper structure: our ref backends, "refs/iterator.c" as well as very few sites that invoke the iterator callback functions directly. Introduce this wrapper structure so that we can adapt the iterator interfaces more readily. [1]: Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'ref-filter.c') diff --git a/ref-filter.c b/ref-filter.c index 30cc488d8a..6837fa60a9 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2954,14 +2954,15 @@ struct ref_filter_cbdata { * A call-back given to for_each_ref(). Filter refs and keep them for * later object processing. */ -static int filter_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data) +static int filter_one(const struct reference *ref, void *cb_data) { struct ref_filter_cbdata *ref_cbdata = cb_data; - struct ref_array_item *ref; + struct ref_array_item *item; - ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter); - if (ref) - ref_array_append(ref_cbdata->array, ref); + item = apply_ref_filter(ref->name, ref->target, ref->oid, + ref->flags, ref_cbdata->filter); + if (item) + ref_array_append(ref_cbdata->array, item); return 0; } @@ -2990,17 +2991,18 @@ struct ref_filter_and_format_cbdata { } internal; }; -static int filter_and_format_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data) +static int filter_and_format_one(const struct reference *ref, void *cb_data) { struct ref_filter_and_format_cbdata *ref_cbdata = cb_data; - struct ref_array_item *ref; + struct ref_array_item *item; struct strbuf output = STRBUF_INIT, err = STRBUF_INIT; - ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter); - if (!ref) + item = apply_ref_filter(ref->name, ref->target, ref->oid, + ref->flags, ref_cbdata->filter); + if (!item) return 0; - if (format_ref_array_item(ref, ref_cbdata->format, &output, &err)) + if (format_ref_array_item(item, ref_cbdata->format, &output, &err)) die("%s", err.buf); if (output.len || !ref_cbdata->format->array_opts.omit_empty) { @@ -3010,7 +3012,7 @@ static int filter_and_format_one(const char *refname, const char *referent, cons strbuf_release(&output); strbuf_release(&err); - free_array_item(ref); + free_array_item(item); /* * Increment the running count of refs that match the filter. If -- cgit v1.3 From 70b783c3a194746d8b747677615f33b94454146f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:16 +0200 Subject: ref-filter: propagate peeled object ID When queueing a reference in the "ref-filter" subsystem we end up creating a new ref array item that contains the reference's info. One bit of info that we always discard though is the peeled object ID, and because of that we are forced to use `peel_iterated_oid()`. Refactor the code to propagate the peeled object ID via the ref array, if available. This allows us to manually peel tags without having to go through the object database. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/ls-remote.c | 2 +- builtin/tag.c | 2 +- builtin/verify-tag.c | 2 +- ref-filter.c | 66 ++++++++++++++++++++++++++++++---------------------- ref-filter.h | 5 +++- 5 files changed, 45 insertions(+), 32 deletions(-) (limited to 'ref-filter.c') diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index df09000b30..fe77829557 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -156,7 +156,7 @@ int cmd_ls_remote(int argc, continue; if (!tail_match(&pattern, ref->name)) continue; - item = ref_array_push(&ref_array, ref->name, &ref->old_oid); + item = ref_array_push(&ref_array, ref->name, &ref->old_oid, NULL); item->symref = xstrdup_or_null(ref->symref); } diff --git a/builtin/tag.c b/builtin/tag.c index f0665af3ac..01eba90c5c 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -153,7 +153,7 @@ static int verify_tag(const char *name, const char *ref UNUSED, return -1; if (format->format) - pretty_print_ref(name, oid, format); + pretty_print_ref(name, oid, NULL, format); return 0; } diff --git a/builtin/verify-tag.c b/builtin/verify-tag.c index cd6bc11095..558121eaa1 100644 --- a/builtin/verify-tag.c +++ b/builtin/verify-tag.c @@ -67,7 +67,7 @@ int cmd_verify_tag(int argc, } if (format.format) - pretty_print_ref(name, &oid, &format); + pretty_print_ref(name, &oid, NULL, &format); } return had_error; } diff --git a/ref-filter.c b/ref-filter.c index 6837fa60a9..7fd8babec8 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2578,8 +2578,15 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) * If it is a tag object, see if we use the peeled value. If we do, * grab the peeled OID. */ - if (need_tagged && peel_iterated_oid(the_repository, &obj->oid, &oi_deref.oid)) - die("bad tag"); + if (need_tagged) { + if (!is_null_oid(&ref->peeled_oid)) { + oidcpy(&oi_deref.oid, &ref->peeled_oid); + } else if (!peel_object(the_repository, &obj->oid, &oi_deref.oid)) { + /* We managed to peel the object ourselves. */ + } else { + die("bad tag"); + } + } return get_object(ref, 1, &obj, &oi_deref, err); } @@ -2807,12 +2814,15 @@ static int match_points_at(struct oid_array *points_at, * Callers can then fill in other struct members at their leisure. */ static struct ref_array_item *new_ref_array_item(const char *refname, - const struct object_id *oid) + const struct object_id *oid, + const struct object_id *peeled_oid) { struct ref_array_item *ref; FLEX_ALLOC_STR(ref, refname, refname); oidcpy(&ref->objectname, oid); + if (peeled_oid) + oidcpy(&ref->peeled_oid, peeled_oid); ref->rest = NULL; return ref; @@ -2826,9 +2836,10 @@ static void ref_array_append(struct ref_array *array, struct ref_array_item *ref struct ref_array_item *ref_array_push(struct ref_array *array, const char *refname, - const struct object_id *oid) + const struct object_id *oid, + const struct object_id *peeled_oid) { - struct ref_array_item *ref = new_ref_array_item(refname, oid); + struct ref_array_item *ref = new_ref_array_item(refname, oid, peeled_oid); ref_array_append(array, ref); return ref; } @@ -2871,25 +2882,25 @@ static int filter_ref_kind(struct ref_filter *filter, const char *refname) return ref_kind_from_refname(refname); } -static struct ref_array_item *apply_ref_filter(const char *refname, const char *referent, const struct object_id *oid, - int flag, struct ref_filter *filter) +static struct ref_array_item *apply_ref_filter(const struct reference *ref, + struct ref_filter *filter) { - struct ref_array_item *ref; + struct ref_array_item *item; struct commit *commit = NULL; unsigned int kind; - if (flag & REF_BAD_NAME) { - warning(_("ignoring ref with broken name %s"), refname); + if (ref->flags & REF_BAD_NAME) { + warning(_("ignoring ref with broken name %s"), ref->name); return NULL; } - if (flag & REF_ISBROKEN) { - warning(_("ignoring broken ref %s"), refname); + if (ref->flags & REF_ISBROKEN) { + warning(_("ignoring broken ref %s"), ref->name); return NULL; } /* Obtain the current ref kind from filter_ref_kind() and ignore unwanted refs. */ - kind = filter_ref_kind(filter, refname); + kind = filter_ref_kind(filter, ref->name); /* * Generally HEAD refs are printed with special description denoting a rebase, @@ -2902,13 +2913,13 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * else if (!(kind & filter->kind)) return NULL; - if (!filter_pattern_match(filter, refname)) + if (!filter_pattern_match(filter, ref->name)) return NULL; - if (filter_exclude_match(filter, refname)) + if (filter_exclude_match(filter, ref->name)) return NULL; - if (filter->points_at.nr && !match_points_at(&filter->points_at, oid, refname)) + if (filter->points_at.nr && !match_points_at(&filter->points_at, ref->oid, ref->name)) return NULL; /* @@ -2918,7 +2929,7 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * */ if (filter->reachable_from || filter->unreachable_from || filter->with_commit || filter->no_commit || filter->verbose) { - commit = lookup_commit_reference_gently(the_repository, oid, 1); + commit = lookup_commit_reference_gently(the_repository, ref->oid, 1); if (!commit) return NULL; /* We perform the filtering for the '--contains' option... */ @@ -2936,13 +2947,13 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * * to do its job and the resulting list may yet to be pruned * by maxcount logic. */ - ref = new_ref_array_item(refname, oid); - ref->commit = commit; - ref->flag = flag; - ref->kind = kind; - ref->symref = xstrdup_or_null(referent); + item = new_ref_array_item(ref->name, ref->oid, ref->peeled_oid); + item->commit = commit; + item->flag = ref->flags; + item->kind = kind; + item->symref = xstrdup_or_null(ref->target); - return ref; + return item; } struct ref_filter_cbdata { @@ -2959,8 +2970,7 @@ static int filter_one(const struct reference *ref, void *cb_data) struct ref_filter_cbdata *ref_cbdata = cb_data; struct ref_array_item *item; - item = apply_ref_filter(ref->name, ref->target, ref->oid, - ref->flags, ref_cbdata->filter); + item = apply_ref_filter(ref, ref_cbdata->filter); if (item) ref_array_append(ref_cbdata->array, item); @@ -2997,8 +3007,7 @@ static int filter_and_format_one(const struct reference *ref, void *cb_data) struct ref_array_item *item; struct strbuf output = STRBUF_INIT, err = STRBUF_INIT; - item = apply_ref_filter(ref->name, ref->target, ref->oid, - ref->flags, ref_cbdata->filter); + item = apply_ref_filter(ref, ref_cbdata->filter); if (!item) return 0; @@ -3585,13 +3594,14 @@ void print_formatted_ref_array(struct ref_array *array, struct ref_format *forma } void pretty_print_ref(const char *name, const struct object_id *oid, + const struct object_id *peeled_oid, struct ref_format *format) { struct ref_array_item *ref_item; struct strbuf output = STRBUF_INIT; struct strbuf err = STRBUF_INIT; - ref_item = new_ref_array_item(name, oid); + ref_item = new_ref_array_item(name, oid, peeled_oid); ref_item->kind = ref_kind_from_refname(name); if (format_ref_array_item(ref_item, format, &output, &err)) die("%s", err.buf); diff --git a/ref-filter.h b/ref-filter.h index 235c60f79c..120221b47f 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -41,6 +41,7 @@ enum ref_sorting_order { struct ref_array_item { struct object_id objectname; + struct object_id peeled_oid; const char *rest; int flag; unsigned int kind; @@ -187,6 +188,7 @@ void print_formatted_ref_array(struct ref_array *array, struct ref_format *forma * name must be a fully qualified refname. */ void pretty_print_ref(const char *name, const struct object_id *oid, + const struct object_id *peeled_oid, struct ref_format *format); /* @@ -195,7 +197,8 @@ void pretty_print_ref(const char *name, const struct object_id *oid, */ struct ref_array_item *ref_array_push(struct ref_array *array, const char *refname, - const struct object_id *oid); + const struct object_id *oid, + const struct object_id *peeled_oid); /* * If the provided format includes ahead-behind atoms, then compute the -- cgit v1.3 From 7ec85185b197ce1cd28721a6f4415fb9db5cd42f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:20 +0200 Subject: object: add flag to `peel_object()` to verify object type When peeling a tag to a non-tag object we repeatedly call `parse_object()` on the tagged object until we find the first object that isn't a tag. While this feels sensible at first, there is a big catch here: `parse_object()` doesn't actually verify the type of the tagged object. The relevant code path here eventually ends up in `parse_tag_buffer()`. Here, we parse the various fields of the tag, including the "type". Once we've figured out the type and the tagged object ID, we call one of the `lookup_${type}()` functions for whatever type we have found. There is two possible outcomes in the successful case: 1. The object is already part of our cached objects. In that case we double-check whether the type we're trying to look up matches the type that was cached. 2. The object is _not_ part of our cached objects. In that case, we simply create a new object with the expected type, but we don't parse that object. In the first case we might notice type mismatches, but only in the case where our cache has the object with the correct type. In the second case, we'll blindly assume that the type is correct and then go with it. We'll only notice that the type might be wrong when we try to parse the object at a later point. Now arguably, we could change `parse_tag_buffer()` to verify the tagged object's type for us. But that would have the effect that such a tag cannot be parsed at all anymore, and we have a small bunch of tests for exactly this case that assert we still can open such tags. So this change does not feel like something we can retroactively tighten, even though one shouldn't ever hit such corrupted tags. Instead, add a new `flags` field to `peel_object()` that allows the caller to opt in to strict object verification. This will be wired up at a subset of callsites over the next few commits. Note that this change also inlines `deref_tag_noverify()`. There's only been two callsites of that function, the one we're changing and one in our test helpers. The latter callsite can trivially use `deref_tag()` instead, so by inlining the function we avoid having to pass down the flag. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object.c | 20 +++++++++++++++++--- object.h | 15 ++++++++++++++- ref-filter.c | 2 +- refs.c | 2 +- refs/packed-backend.c | 5 ++--- refs/reftable-backend.c | 4 ++-- t/helper/test-reach.c | 2 +- tag.c | 12 ------------ tag.h | 1 - 9 files changed, 38 insertions(+), 25 deletions(-) (limited to 'ref-filter.c') diff --git a/object.c b/object.c index 986114a6db..e72b0ed436 100644 --- a/object.c +++ b/object.c @@ -209,11 +209,12 @@ struct object *lookup_object_by_type(struct repository *r, enum peel_status peel_object(struct repository *r, const struct object_id *name, - struct object_id *oid) + struct object_id *oid, + unsigned flags) { struct object *o = lookup_unknown_object(r, name); - if (o->type == OBJ_NONE) { + if (o->type == OBJ_NONE || flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { int type = odb_read_object_info(r->objects, name, NULL); if (type < 0 || !object_as_type(o, type, 0)) return PEEL_INVALID; @@ -222,7 +223,20 @@ enum peel_status peel_object(struct repository *r, if (o->type != OBJ_TAG) return PEEL_NON_TAG; - o = deref_tag_noverify(r, o); + while (o && o->type == OBJ_TAG) { + o = parse_object(r, &o->oid); + if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged) { + o = ((struct tag *)o)->tagged; + + if (flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { + int type = odb_read_object_info(r->objects, &o->oid, NULL); + if (type < 0 || !object_as_type(o, type, 0)) + return PEEL_INVALID; + } + } else { + o = NULL; + } + } if (!o) return PEEL_INVALID; diff --git a/object.h b/object.h index 8c3c1c46e1..1499f63d50 100644 --- a/object.h +++ b/object.h @@ -287,6 +287,17 @@ enum peel_status { PEEL_BROKEN = -4 }; +enum peel_object_flags { + /* + * Always verify the object type, even in the case where the looked-up + * object already has an object type. This can be useful when the + * stored object type may be invalid. One such case is when looking up + * objects via tags, where we blindly trust the object type declared by + * the tag. + */ + PEEL_OBJECT_VERIFY_OBJECT_TYPE = (1 << 0), +}; + /* * Peel the named object; i.e., if the object is a tag, resolve the * tag recursively until a non-tag is found. If successful, store the @@ -295,7 +306,9 @@ enum peel_status { * and leave oid unchanged. */ enum peel_status peel_object(struct repository *r, - const struct object_id *name, struct object_id *oid); + const struct object_id *name, + struct object_id *oid, + unsigned flags); struct object_list *object_list_insert(struct object *item, struct object_list **list_p); diff --git a/ref-filter.c b/ref-filter.c index 7fd8babec8..9a8ed8c8fc 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2581,7 +2581,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) if (need_tagged) { if (!is_null_oid(&ref->peeled_oid)) { oidcpy(&oi_deref.oid, &ref->peeled_oid); - } else if (!peel_object(the_repository, &obj->oid, &oi_deref.oid)) { + } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, 0)) { /* We managed to peel the object ourselves. */ } else { die("bad tag"); diff --git a/refs.c b/refs.c index 9d8f0a9ca4..a41a94ae55 100644 --- a/refs.c +++ b/refs.c @@ -2333,7 +2333,7 @@ int reference_get_peeled_oid(struct repository *repo, return 0; } - return peel_object(repo, ref->oid, peeled_oid) ? -1 : 0; + return peel_object(repo, ref->oid, peeled_oid, 0) ? -1 : 0; } int refs_update_symref(struct ref_store *refs, const char *ref, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 6fa229edd0..4752d3f398 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1527,9 +1527,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re i++; } else { struct object_id peeled; - int peel_error = peel_object(refs->base.repo, - &update->new_oid, - &peeled); + int peel_error = peel_object(refs->base.repo, &update->new_oid, + &peeled, 0); if (write_packed_entry(out, update->refname, &update->new_oid, diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index e329d4a423..9febb2322c 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1632,7 +1632,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data ref.refname = (char *)u->refname; ref.update_index = ts; - peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled); + peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled, 0); if (!peel_error) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); @@ -2497,7 +2497,7 @@ static int write_reflog_expiry_table(struct reftable_writer *writer, void *cb_da ref.refname = (char *)arg->refname; ref.update_index = ts; - if (!peel_object(arg->refs->base.repo, &arg->update_oid, &peeled)) { + if (!peel_object(arg->refs->base.repo, &arg->update_oid, &peeled, 0)) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); memcpy(ref.value.val2.value, arg->update_oid.hash, GIT_MAX_RAWSZ); diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 028ec00306..c58c93800f 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -63,7 +63,7 @@ int cmd__reach(int ac, const char **av) die("failed to resolve %s", buf.buf + 2); orig = parse_object(r, &oid); - peeled = deref_tag_noverify(the_repository, orig); + peeled = deref_tag(the_repository, orig, NULL, 0); if (!peeled) die("failed to load commit for input %s resulting in oid %s", diff --git a/tag.c b/tag.c index 1d52686ee1..f5c232d2f1 100644 --- a/tag.c +++ b/tag.c @@ -94,18 +94,6 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war return o; } -struct object *deref_tag_noverify(struct repository *r, struct object *o) -{ - while (o && o->type == OBJ_TAG) { - o = parse_object(r, &o->oid); - if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged) - o = ((struct tag *)o)->tagged; - else - o = NULL; - } - return o; -} - struct tag *lookup_tag(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); diff --git a/tag.h b/tag.h index c49d7c19ad..ef12a61037 100644 --- a/tag.h +++ b/tag.h @@ -16,7 +16,6 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u int parse_tag(struct tag *item); void release_tag_memory(struct tag *t); struct object *deref_tag(struct repository *r, struct object *, const char *, int); -struct object *deref_tag_noverify(struct repository *r, struct object *); int gpg_verify_tag(const struct object_id *oid, const char *name_to_report, unsigned flags); struct object_id *get_tagged_oid(struct tag *tag); -- cgit v1.3 From e66077ae45813a5ca269a7d676310a243bdcc1c2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:22 +0200 Subject: ref-filter: detect broken tags when dereferencing them Users can ask git-for-each-ref(1) to peel tags and return information of the tagged object by adding an asterisk to the format, like for example "%(*$objectname)". If so, git-for-each-ref(1) peels that object to the first non-tag object and then returns its values. As mentioned in preceding commits, it can happen that the tagged object type and the claimed object type differ, effectively resulting in a corrupt tag. git-for-each-ref(1) would notice this mismatch, print an error and then bail out when trying to peel the tag. But we only notice this corruption in some very specific edge cases! While we have a test in "t/for-each-ref-tests.sh" that verifies the above scenario, this test is specifically crafted to detect the issue at hand. Namely, we create two tags: - One tag points to a specific object with the correct type. - The other tag points to the *same* object with a different type. The fact that both tags point to the same object is important here: `peel_object()` wouldn't notice the corruption if the tagged objects were different. The root cause is that `peel_object()` calls `lookup_${type}()` eventually, where the type is the same type declared in the tag object. Consequently, when we have two tags pointing to the same object but with different declared types we'll call two different lookup functions. The first lookup will store the object with an unverified type A, whereas the second lookup will try to look up the object with a different unverified type B. And it is only now that we notice the discrepancy in object types, even though type A could've already been the wrong type. Fix the issue by verifying the object type in `populate_value()`. With this change we'll also notice type mismatches when only dereferencing a tag once. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 3 ++- t/for-each-ref-tests.sh | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'ref-filter.c') diff --git a/ref-filter.c b/ref-filter.c index 9a8ed8c8fc..c54025d6b4 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2581,7 +2581,8 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) if (need_tagged) { if (!is_null_oid(&ref->peeled_oid)) { oidcpy(&oi_deref.oid, &ref->peeled_oid); - } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, 0)) { + } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, + PEEL_OBJECT_VERIFY_OBJECT_TYPE)) { /* We managed to peel the object ourselves. */ } else { die("bad tag"); diff --git a/t/for-each-ref-tests.sh b/t/for-each-ref-tests.sh index e3ad19298a..4593be5fd5 100644 --- a/t/for-each-ref-tests.sh +++ b/t/for-each-ref-tests.sh @@ -1809,7 +1809,9 @@ test_expect_success "${git_for_each_ref} reports broken tags" ' bad=$(git hash-object -w -t tag bad) && git update-ref refs/tags/broken-tag-bad $bad && test_must_fail ${git_for_each_ref} --format="%(*objectname)" \ - refs/tags/broken-tag-* + refs/tags/broken-tag-* && + test_must_fail ${git_for_each_ref} --format="%(*objectname)" \ + refs/tags/broken-tag-bad ' test_expect_success 'set up tag with signature and no blank lines' ' -- cgit v1.3 From a29e2e8fe7e3935e23d2a03dc429cc9c2e68bfbe Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:23 +0200 Subject: ref-filter: parse objects on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When formatting an arbitrary object we parse that object regardless of whether or not we actually need any parsed data. In fact, many of the atoms we have don't require any. Refactor the code so that we parse the data on demand when we see an atom that wants to access the objects. This leads to a small speedup, for example in the Chromium repository with around 40000 refs: Benchmark 1: for-each-ref --format='%(raw)' (HEAD~) Time (mean ± σ): 388.7 ms ± 1.1 ms [User: 322.2 ms, System: 65.0 ms] Range (min … max): 387.3 ms … 390.8 ms 10 runs Benchmark 2: for-each-ref --format='%(raw)' (HEAD) Time (mean ± σ): 344.7 ms ± 0.7 ms [User: 287.8 ms, System: 55.1 ms] Range (min … max): 343.9 ms … 345.7 ms 10 runs Summary for-each-ref --format='%(raw)' (HEAD) ran 1.13 ± 0.00 times faster than for-each-ref --format='%(raw)' (HEAD~) With this change, we now spend ~90% of the time decompressing objects, which is almost as good as it gets regarding git-for-each-ref(1)'s own infrastructure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 142 ++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 106 insertions(+), 36 deletions(-) (limited to 'ref-filter.c') diff --git a/ref-filter.c b/ref-filter.c index c54025d6b4..7cfcd5c355 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -91,6 +91,7 @@ static struct expand_data { struct object_id delta_base_oid; void *content; + struct object *maybe_object; struct object_info info; } oi, oi_deref; @@ -1475,11 +1476,29 @@ static void grab_common_values(struct atom_value *val, int deref, struct expand_ } } +static struct object *get_or_parse_object(struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) +{ + if (!data->maybe_object) { + data->maybe_object = parse_object_buffer(the_repository, &data->oid, data->type, + data->size, data->content, eaten); + if (!data->maybe_object) { + strbuf_addf(err, _("parse_object_buffer failed on %s for %s"), + oid_to_hex(&data->oid), refname); + return NULL; + } + } + + return data->maybe_object; +} + /* See grab_values */ -static void grab_tag_values(struct atom_value *val, int deref, struct object *obj) +static int grab_tag_values(struct atom_value *val, int deref, + struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) { + struct tag *tag = NULL; int i; - struct tag *tag = (struct tag *) obj; for (i = 0; i < used_atom_cnt; i++) { const char *name = used_atom[i].name; @@ -1487,6 +1506,14 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob struct atom_value *v = &val[i]; if (!!deref != (*name == '*')) continue; + + if (!tag) { + tag = (struct tag *) get_or_parse_object(data, refname, + err, eaten); + if (!tag) + return -1; + } + if (deref) name++; if (atom_type == ATOM_TAG) @@ -1496,22 +1523,35 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob else if (atom_type == ATOM_OBJECT && tag->tagged) v->s = xstrdup(oid_to_hex(&tag->tagged->oid)); } + + return 0; } /* See grab_values */ -static void grab_commit_values(struct atom_value *val, int deref, struct object *obj) +static int grab_commit_values(struct atom_value *val, int deref, + struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) { int i; - struct commit *commit = (struct commit *) obj; + struct commit *commit = NULL; for (i = 0; i < used_atom_cnt; i++) { const char *name = used_atom[i].name; enum atom_type atom_type = used_atom[i].atom_type; struct atom_value *v = &val[i]; + if (!!deref != (*name == '*')) continue; if (deref) name++; + + if (!commit) { + commit = (struct commit *) get_or_parse_object(data, refname, + err, eaten); + if (!commit) + return -1; + } + if (atom_type == ATOM_TREE && grab_oid(name, "tree", get_commit_tree_oid(commit), v, &used_atom[i])) continue; @@ -1531,6 +1571,8 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object v->s = strbuf_detach(&s, NULL); } } + + return 0; } static const char *find_wholine(const char *who, int wholen, const char *buf) @@ -1759,10 +1801,12 @@ static void grab_person(const char *who, struct atom_value *val, int deref, void } } -static void grab_signature(struct atom_value *val, int deref, struct object *obj) +static int grab_signature(struct atom_value *val, int deref, + struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) { int i; - struct commit *commit = (struct commit *) obj; + struct commit *commit = NULL; struct signature_check sigc = { 0 }; int signature_checked = 0; @@ -1790,6 +1834,13 @@ static void grab_signature(struct atom_value *val, int deref, struct object *obj continue; if (!signature_checked) { + if (!commit) { + commit = (struct commit *) get_or_parse_object(data, refname, + err, eaten); + if (!commit) + return -1; + } + check_commit_signature(commit, &sigc); signature_checked = 1; } @@ -1843,6 +1894,8 @@ static void grab_signature(struct atom_value *val, int deref, struct object *obj if (signature_checked) signature_check_clear(&sigc); + + return 0; } static void find_subpos(const char *buf, @@ -1920,9 +1973,8 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size } static void grab_describe_values(struct atom_value *val, int deref, - struct object *obj) + struct expand_data *data) { - struct commit *commit = (struct commit *)obj; int i; for (i = 0; i < used_atom_cnt; i++) { @@ -1944,7 +1996,7 @@ static void grab_describe_values(struct atom_value *val, int deref, cmd.git_cmd = 1; strvec_push(&cmd.args, "describe"); strvec_pushv(&cmd.args, atom->u.describe_args.v); - strvec_push(&cmd.args, oid_to_hex(&commit->object.oid)); + strvec_push(&cmd.args, oid_to_hex(&data->oid)); if (pipe_command(&cmd, NULL, 0, &out, 0, &err, 0) < 0) { error(_("failed to run 'describe'")); v->s = xstrdup(""); @@ -2066,24 +2118,36 @@ static void fill_missing_values(struct atom_value *val) * pointed at by the ref itself; otherwise it is the object the * ref (which is a tag) refers to. */ -static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data) +static int grab_values(struct atom_value *val, int deref, struct expand_data *data, + const char *refname, struct strbuf *err, int *eaten) { void *buf = data->content; + int ret; - switch (obj->type) { + switch (data->type) { case OBJ_TAG: - grab_tag_values(val, deref, obj); + ret = grab_tag_values(val, deref, data, refname, err, eaten); + if (ret < 0) + goto out; + grab_sub_body_contents(val, deref, data); grab_person("tagger", val, deref, buf); - grab_describe_values(val, deref, obj); + grab_describe_values(val, deref, data); break; case OBJ_COMMIT: - grab_commit_values(val, deref, obj); + ret = grab_commit_values(val, deref, data, refname, err, eaten); + if (ret < 0) + goto out; + grab_sub_body_contents(val, deref, data); grab_person("author", val, deref, buf); grab_person("committer", val, deref, buf); - grab_signature(val, deref, obj); - grab_describe_values(val, deref, obj); + + ret = grab_signature(val, deref, data, refname, err, eaten); + if (ret < 0) + goto out; + + grab_describe_values(val, deref, data); break; case OBJ_TREE: /* grab_tree_values(val, deref, obj, buf, sz); */ @@ -2094,8 +2158,12 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s grab_sub_body_contents(val, deref, data); break; default: - die("Eh? Object of type %d?", obj->type); + die("Eh? Object of type %d?", data->type); } + + ret = 0; +out: + return ret; } static inline char *copy_advance(char *dst, const char *src) @@ -2292,38 +2360,41 @@ static const char *get_refname(struct used_atom *atom, struct ref_array_item *re return show_ref(&atom->u.refname, ref->refname); } -static int get_object(struct ref_array_item *ref, int deref, struct object **obj, +static int get_object(struct ref_array_item *ref, int deref, struct expand_data *oi, struct strbuf *err) { - /* parse_object_buffer() will set eaten to 0 if free() will be needed */ - int eaten = 1; + /* parse_object_buffer() will set eaten to 1 if free() will be needed */ + int eaten = 0; + int ret; + if (oi->info.contentp) { /* We need to know that to use parse_object_buffer properly */ oi->info.sizep = &oi->size; oi->info.typep = &oi->type; } + if (odb_read_object_info_extended(the_repository->objects, &oi->oid, &oi->info, - OBJECT_INFO_LOOKUP_REPLACE)) - return strbuf_addf_ret(err, -1, _("missing object %s for %s"), - oid_to_hex(&oi->oid), ref->refname); + OBJECT_INFO_LOOKUP_REPLACE)) { + ret = strbuf_addf_ret(err, -1, _("missing object %s for %s"), + oid_to_hex(&oi->oid), ref->refname); + goto out; + } if (oi->info.disk_sizep && oi->disk_size < 0) BUG("Object size is less than zero."); if (oi->info.contentp) { - *obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten); - if (!*obj) { - if (!eaten) - free(oi->content); - return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"), - oid_to_hex(&oi->oid), ref->refname); - } - grab_values(ref->value, deref, *obj, oi); + ret = grab_values(ref->value, deref, oi, ref->refname, err, &eaten); + if (ret < 0) + goto out; } grab_common_values(ref->value, deref, oi); + ret = 0; + +out: if (!eaten) free(oi->content); - return 0; + return ret; } static void populate_worktree_map(struct hashmap *map, struct worktree **worktrees) @@ -2376,7 +2447,6 @@ static char *get_worktree_path(const struct ref_array_item *ref) */ static int populate_value(struct ref_array_item *ref, struct strbuf *err) { - struct object *obj; int i; struct object_info empty = OBJECT_INFO_INIT; int ahead_behind_atoms = 0; @@ -2564,14 +2634,14 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) oi.oid = ref->objectname; - if (get_object(ref, 0, &obj, &oi, err)) + if (get_object(ref, 0, &oi, err)) return -1; /* * If there is no atom that wants to know about tagged * object, we are done. */ - if (!need_tagged || (obj->type != OBJ_TAG)) + if (!need_tagged || (oi.type != OBJ_TAG)) return 0; /* @@ -2589,7 +2659,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) } } - return get_object(ref, 1, &obj, &oi_deref, err); + return get_object(ref, 1, &oi_deref, err); } /* -- cgit v1.3 From bea37f1d647c6b17896eb3f0c210ac8dfc27b6d7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 15:36:13 +0100 Subject: ref-filter: fix stale parsed objects In 054f5f457e (ref-filter: parse objects on demand, 2025-10-23) we have started to skip parsing some objects in case we don't need to access their values in the first place. This was done by introducing a new member `struct expand_data::maybe_object` that gets populated on demand via `get_or_parse_object()`. This has led to a regression though where the object now gets reused because we don't reset it properly. The `oi` structure is declared in global scope, and there is no single place where we reset it before invoking `get_object()`. The consequence is that the `maybe_object` member doesn't get reset across calls, so subsequent calls will end up reusing the same object. This is only an issue for a subset of retrieved values, as not all of the infrastructure ends up calling `get_or_parse_object()`. So the effect is limited, which is probably why the issue wasn't detected earlier. Fix the issue by resetting `maybe_object` in `get_object()`. Reported-by: Junio C Hamano Based-on-patch-by: Jeff King Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 2 ++ t/t7004-tag.sh | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'ref-filter.c') diff --git a/ref-filter.c b/ref-filter.c index 7cfcd5c355..d8667c569a 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2367,6 +2367,8 @@ static int get_object(struct ref_array_item *ref, int deref, int eaten = 0; int ret; + oi->maybe_object = NULL; + if (oi->info.contentp) { /* We need to know that to use parse_object_buffer properly */ oi->info.sizep = &oi->size; diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index 10835631ca..d1388cfdf4 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -2332,4 +2332,24 @@ test_expect_success 'If tag cannot be created then tag message file is not unlin test_path_exists .git/TAG_EDITMSG ' +test_expect_success 'annotated tag version sort' ' + git tag -a -m "sample 1.0" vsample-1.0 && + git tag -a -m "sample 2.0" vsample-2.0 && + git tag -a -m "sample 10.0" vsample-10.0 && + cat >expect <<-EOF && + vsample-1.0 + vsample-2.0 + vsample-10.0 + EOF + + git tag --list --sort=version:tag vsample-\* >actual && + test_cmp expect actual && + + # Ensure that we also handle this case alright in the case we have the + # peeled values cached e.g. via the packed-refs file. + git pack-refs --all && + git tag --list --sort=version:tag vsample-\* && + test_cmp expect actual +' + test_done -- cgit v1.3