From d6ec08788e667d4f556e9c2d97bbd7adb7e582be Mon Sep 17 00:00:00 2001 From: René Scharfe Date: Fri, 18 Jul 2025 11:39:06 +0200 Subject: commit: convert pop_most_recent_commit() to prio_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pop_most_recent_commit() calls commit_list_insert_by_date() for parent commits, which is itself called in a loop. This can lead to quadratic complexity if there are many merges. Replace the commit_list with a prio_queue to ensure logarithmic worst case complexity and convert all three users. Add a performance test that exercises one of them using a pathological history that consists of 50% merges and 50% root commits to demonstrate the speedup: Test v2.50.1 HEAD ---------------------------------------------------------------------- 1501.2: rev-parse ':/65535' 2.48(2.47+0.00) 0.20(0.19+0.00) -91.9% Alas, sane histories don't benefit from the conversion much, and traversing Git's own history takes a 1% performance hit on my machine: $ hyperfine -w3 -L git ./git_2.50.1,./git '{git} rev-parse :/^Initial.revision' Benchmark 1: ./git_2.50.1 rev-parse :/^Initial.revision Time (mean ± σ): 1.071 s ± 0.004 s [User: 1.052 s, System: 0.017 s] Range (min … max): 1.067 s … 1.078 s 10 runs Benchmark 2: ./git rev-parse :/^Initial.revision Time (mean ± σ): 1.079 s ± 0.003 s [User: 1.060 s, System: 0.017 s] Range (min … max): 1.074 s … 1.083 s 10 runs Summary ./git_2.50.1 rev-parse :/^Initial.revision ran 1.01 ± 0.00 times faster than ./git rev-parse :/^Initial.revision Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- walker.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'walker.c') diff --git a/walker.c b/walker.c index b470d43e54..c6d0e20c72 100644 --- a/walker.c +++ b/walker.c @@ -14,6 +14,7 @@ #include "blob.h" #include "refs.h" #include "progress.h" +#include "prio-queue.h" static struct object_id current_commit_oid; @@ -78,7 +79,7 @@ static int process_tree(struct walker *walker, struct tree *tree) #define SEEN (1U << 1) #define TO_SCAN (1U << 2) -static struct commit_list *complete = NULL; +static struct prio_queue complete = { compare_commits_by_commit_date }; static int process_commit(struct walker *walker, struct commit *commit) { @@ -87,7 +88,10 @@ static int process_commit(struct walker *walker, struct commit *commit) if (repo_parse_commit(the_repository, commit)) return -1; - while (complete && complete->item->date >= commit->date) { + while (complete.nr) { + struct commit *item = prio_queue_peek(&complete); + if (item->date < commit->date) + break; pop_most_recent_commit(&complete, COMPLETE); } @@ -233,7 +237,7 @@ static int mark_complete(const char *path UNUSED, if (commit) { commit->object.flags |= COMPLETE; - commit_list_insert(commit, &complete); + prio_queue_put(&complete, commit); } return 0; } @@ -302,7 +306,6 @@ int walker_fetch(struct walker *walker, int targets, char **target, if (!walker->get_recover) { refs_for_each_ref(get_main_ref_store(the_repository), mark_complete, NULL); - commit_list_sort_by_date(&complete); } for (i = 0; i < targets; i++) { -- cgit v1.3