From b9fd73a234db1a272f6cbfb528bae0ead9e07bde Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 31 Jul 2025 16:56:49 +0200 Subject: refs: pass refname when invoking reflog entry callback With `refs_for_each_reflog_ent()` callers can iterate through all the reflog entries for a given reference. The callback that is being invoked for each such entry does not receive the name of the reference that we are currently iterating through. This isn't really a limiting factor, as callers can simply pass the name via the callback data. But this layout sometimes does make for a bit of an awkward calling pattern. One example: when iterating through all reflogs, and for each reflog we iterate through all refnames, we have to do some extra book keeping to track which reference name we are currently yielding reflog entries for. Change the signature of the callback function so that the reference name of the reflog gets passed through to it. Adapt callers accordingly and start using the new parameter in trivial cases. The next commit will refactor the reference migration logic to make use of this parameter so that we can simplify its logic a bit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'refs.c') diff --git a/refs.c b/refs.c index 4bd8028705..6ed0cd6ddc 100644 --- a/refs.c +++ b/refs.c @@ -1022,7 +1022,6 @@ int is_branch(const char *refname) } struct read_ref_at_cb { - const char *refname; timestamp_t at_time; int cnt; int reccnt; @@ -1052,7 +1051,8 @@ static void set_read_ref_cutoffs(struct read_ref_at_cb *cb, *cb->cutoff_cnt = cb->reccnt; } -static int read_ref_at_ent(struct object_id *ooid, struct object_id *noid, +static int read_ref_at_ent(const char *refname, + struct object_id *ooid, struct object_id *noid, const char *email UNUSED, timestamp_t timestamp, int tz, const char *message, void *cb_data) @@ -1072,14 +1072,13 @@ static int read_ref_at_ent(struct object_id *ooid, struct object_id *noid, oidcpy(cb->oid, noid); if (!oideq(&cb->ooid, noid)) warning(_("log for ref %s has gap after %s"), - cb->refname, show_date(cb->date, cb->tz, DATE_MODE(RFC2822))); + refname, show_date(cb->date, cb->tz, DATE_MODE(RFC2822))); } else if (cb->date == cb->at_time) oidcpy(cb->oid, noid); else if (!oideq(noid, cb->oid)) warning(_("log for ref %s unexpectedly ended on %s"), - cb->refname, show_date(cb->date, cb->tz, - DATE_MODE(RFC2822))); + refname, show_date(cb->date, cb->tz, DATE_MODE(RFC2822))); cb->reccnt++; oidcpy(&cb->ooid, ooid); oidcpy(&cb->noid, noid); @@ -1094,7 +1093,8 @@ static int read_ref_at_ent(struct object_id *ooid, struct object_id *noid, return 0; } -static int read_ref_at_ent_oldest(struct object_id *ooid, struct object_id *noid, +static int read_ref_at_ent_oldest(const char *refname UNUSED, + struct object_id *ooid, struct object_id *noid, const char *email UNUSED, timestamp_t timestamp, int tz, const char *message, void *cb_data) @@ -1117,7 +1117,6 @@ int read_ref_at(struct ref_store *refs, const char *refname, struct read_ref_at_cb cb; memset(&cb, 0, sizeof(cb)); - cb.refname = refname; cb.at_time = at_time; cb.cnt = cnt; cb.msg = msg; @@ -2976,14 +2975,14 @@ done: struct reflog_migration_data { uint64_t index; - const char *refname; struct ref_store *old_refs; struct ref_transaction *transaction; struct strbuf *errbuf; struct strbuf *sb, *name, *mail; }; -static int migrate_one_reflog_entry(struct object_id *old_oid, +static int migrate_one_reflog_entry(const char *refname, + struct object_id *old_oid, struct object_id *new_oid, const char *committer, timestamp_t timestamp, int tz, @@ -3006,7 +3005,7 @@ static int migrate_one_reflog_entry(struct object_id *old_oid, strbuf_reset(data->sb); strbuf_addstr(data->sb, fmt_ident(data->name->buf, data->mail->buf, WANT_BLANK_IDENT, date, 0)); - ret = ref_transaction_update_reflog(data->transaction, data->refname, + ret = ref_transaction_update_reflog(data->transaction, refname, new_oid, old_oid, data->sb->buf, msg, data->index++, data->errbuf); return ret; @@ -3016,7 +3015,6 @@ static int migrate_one_reflog(const char *refname, void *cb_data) { struct migration_data *migration_data = cb_data; struct reflog_migration_data data = { - .refname = refname, .old_refs = migration_data->old_refs, .transaction = migration_data->transaction, .errbuf = migration_data->errbuf, -- cgit v1.3-6-g1900 From 2f530e5d0ac9349ad5884a7d74a60762e4ee05f8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 31 Jul 2025 16:56:50 +0200 Subject: refs: simplify logic when migrating reflog entries When migrating reflog entries between two storage formats we have to do so via two callback-driven functions: - `migrate_one_reflog()` gets invoked via `refs_for_each_reflog()` to first list all available reflogs. - `migrate_one_reflog_entry()` gets invoked via `refs_for_each_reflog_ent()` in `migrate_one_reflog()`. Before the preceding commit we didn't have the refname available in `migrate_one_reflog_entry()`, which made it necessary to have a separate structure that we pass to the second callback so that we can propagate the refname. Now that `refs_for_each_reflog_ent()` knows to pass the refname to the callback though that indirection isn't necessary anymore. There's one catch though: we do have an update index that is also stored in the entry-specific callback data. This update index is required so that we can tell the ref backend in which order it should persist the reflog entries to disk. But that purpose can be trivially achieved by just converting it into a global counter that is used for all reflog entries, regardless of which reference they are for. The ordering will remain the same as both the update index and the refname is considered when sorting the entries. Move the index into `struct migration_data` and drop the now-unused `struct reflog_migration_data` to simplify the code a bit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) (limited to 'refs.c') diff --git a/refs.c b/refs.c index 6ed0cd6ddc..04c9ace793 100644 --- a/refs.c +++ b/refs.c @@ -2941,6 +2941,7 @@ struct migration_data { struct ref_transaction *transaction; struct strbuf *errbuf; struct strbuf sb, name, mail; + uint64_t index; }; static int migrate_one_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, @@ -2973,14 +2974,6 @@ done: return ret; } -struct reflog_migration_data { - uint64_t index; - struct ref_store *old_refs; - struct ref_transaction *transaction; - struct strbuf *errbuf; - struct strbuf *sb, *name, *mail; -}; - static int migrate_one_reflog_entry(const char *refname, struct object_id *old_oid, struct object_id *new_oid, @@ -2988,7 +2981,7 @@ static int migrate_one_reflog_entry(const char *refname, timestamp_t timestamp, int tz, const char *msg, void *cb_data) { - struct reflog_migration_data *data = cb_data; + struct migration_data *data = cb_data; struct ident_split ident; const char *date; int ret; @@ -2996,17 +2989,17 @@ static int migrate_one_reflog_entry(const char *refname, if (split_ident_line(&ident, committer, strlen(committer)) < 0) return -1; - strbuf_reset(data->name); - strbuf_add(data->name, ident.name_begin, ident.name_end - ident.name_begin); - strbuf_reset(data->mail); - strbuf_add(data->mail, ident.mail_begin, ident.mail_end - ident.mail_begin); + strbuf_reset(&data->name); + strbuf_add(&data->name, ident.name_begin, ident.name_end - ident.name_begin); + strbuf_reset(&data->mail); + strbuf_add(&data->mail, ident.mail_begin, ident.mail_end - ident.mail_begin); date = show_date(timestamp, tz, DATE_MODE(NORMAL)); - strbuf_reset(data->sb); - strbuf_addstr(data->sb, fmt_ident(data->name->buf, data->mail->buf, WANT_BLANK_IDENT, date, 0)); + strbuf_reset(&data->sb); + strbuf_addstr(&data->sb, fmt_ident(data->name.buf, data->mail.buf, WANT_BLANK_IDENT, date, 0)); ret = ref_transaction_update_reflog(data->transaction, refname, - new_oid, old_oid, data->sb->buf, + new_oid, old_oid, data->sb.buf, msg, data->index++, data->errbuf); return ret; } @@ -3014,17 +3007,8 @@ static int migrate_one_reflog_entry(const char *refname, static int migrate_one_reflog(const char *refname, void *cb_data) { struct migration_data *migration_data = cb_data; - struct reflog_migration_data data = { - .old_refs = migration_data->old_refs, - .transaction = migration_data->transaction, - .errbuf = migration_data->errbuf, - .sb = &migration_data->sb, - .name = &migration_data->name, - .mail = &migration_data->mail, - }; - return refs_for_each_reflog_ent(migration_data->old_refs, refname, - migrate_one_reflog_entry, &data); + migrate_one_reflog_entry, migration_data); } static int move_files(const char *from_path, const char *to_path, struct strbuf *errbuf) -- cgit v1.3-6-g1900 From 16c4fa26b99e6f6c24dc93575ffa884c13b1fe5f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 31 Jul 2025 16:56:54 +0200 Subject: builtin/remote: only iterate through refs that are to be renamed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When renaming a remote we also need to rename all references accordingly. But while we only need to rename references that are contained in the "refs/remotes/$OLDNAME/" namespace, we end up using `refs_for_each_rawref()` that iterates through _all_ references. We know to exit early in the callback in case we see an irrelevant reference, but ultimately this is still a waste of compute as we knowingly iterate through references that we won't ever care about. Improve this by using `refs_for_each_rawref_in()`, which knows to only iterate through (potentially broken) references in a given prefix. The following benchmark renames a remote with a single reference in a repository that has 100k unrelated references. This shows a sizeable improvement with the "files" backend: Benchmark 1: rename remote (refformat = files, revision = HEAD~) Time (mean ± σ): 42.6 ms ± 0.9 ms [User: 29.1 ms, System: 8.4 ms] Range (min … max): 40.1 ms … 43.3 ms 10 runs Benchmark 2: rename remote (refformat = files, revision = HEAD) Time (mean ± σ): 31.7 ms ± 4.0 ms [User: 19.6 ms, System: 6.9 ms] Range (min … max): 27.1 ms … 36.0 ms 10 runs Summary rename remote (refformat = files, revision = HEAD) ran 1.35 ± 0.17 times faster than rename remote (refformat = files, revision = HEAD~) The "reftable" backend shows roughly the same absolute improvement, but given that it's already significantly faster than the "files" backend this translates to a much larger relative improvement: Benchmark 1: rename remote (refformat = reftable, revision = HEAD~) Time (mean ± σ): 18.2 ms ± 0.5 ms [User: 12.7 ms, System: 3.0 ms] Range (min … max): 17.3 ms … 21.4 ms 110 runs Benchmark 2: rename remote (refformat = reftable, revision = HEAD) Time (mean ± σ): 8.8 ms ± 0.5 ms [User: 3.8 ms, System: 2.9 ms] Range (min … max): 7.5 ms … 9.9 ms 167 runs Summary rename remote (refformat = reftable, revision = HEAD) ran 2.07 ± 0.12 times faster than rename remote (refformat = reftable, revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/remote.c | 13 ++++--------- refs.c | 8 +++++++- refs.h | 2 ++ 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'refs.c') diff --git a/builtin/remote.c b/builtin/remote.c index db481f39bc..60e67f1b74 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -720,16 +720,8 @@ static int rename_one_ref(const char *old_refname, const char *referent, struct strbuf new_referent = STRBUF_INIT; struct strbuf new_refname = STRBUF_INIT; struct rename_info *rename = cb_data; - const char *ptr = old_refname; int error; - if (!skip_prefix(ptr, "refs/remotes/", &ptr) || - !skip_prefix(ptr, rename->old_name, &ptr) || - !skip_prefix(ptr, "/", &ptr)) { - error = 0; - goto out; - } - compute_renamed_ref(rename, old_refname, &new_refname); if (flags & REF_ISSYMREF) { @@ -932,7 +924,10 @@ static int mv(int argc, const char **argv, const char *prefix, rename.progress = start_delayed_progress(the_repository, _("Renaming remote references"), 0); - result = refs_for_each_rawref(get_main_ref_store(the_repository), + strbuf_reset(&buf); + strbuf_addf(&buf, "refs/remotes/%s/", rename.old_name); + + result = refs_for_each_rawref_in(get_main_ref_store(the_repository), buf.buf, rename_one_ref, &rename); if (result < 0) die(_("queueing remote ref renames failed: %s"), rename.err->buf); diff --git a/refs.c b/refs.c index 04c9ace793..7e2f02dddf 100644 --- a/refs.c +++ b/refs.c @@ -1839,7 +1839,13 @@ int refs_for_each_namespaced_ref(struct ref_store *refs, int refs_for_each_rawref(struct ref_store *refs, each_ref_fn fn, void *cb_data) { - return do_for_each_ref(refs, "", NULL, fn, 0, + return refs_for_each_rawref_in(refs, "", fn, cb_data); +} + +int refs_for_each_rawref_in(struct ref_store *refs, const char *prefix, + each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(refs, prefix, NULL, fn, 0, DO_FOR_EACH_INCLUDE_BROKEN, cb_data); } diff --git a/refs.h b/refs.h index 0bf50ce25c..19fb1d924a 100644 --- a/refs.h +++ b/refs.h @@ -428,6 +428,8 @@ int refs_for_each_namespaced_ref(struct ref_store *refs, /* can be used to learn about broken ref and symref */ int refs_for_each_rawref(struct ref_store *refs, each_ref_fn fn, void *cb_data); +int refs_for_each_rawref_in(struct ref_store *refs, const char *prefix, + each_ref_fn fn, void *cb_data); /* * Iterates over all refs including root refs, i.e. pseudorefs and HEAD. -- cgit v1.3-6-g1900