aboutsummaryrefslogtreecommitdiff
path: root/contrib/subtree/git-subtree.sh
diff options
context:
space:
mode:
authorColin Stagner <ask+git@howdoi.land>2026-01-09 19:18:11 -0600
committerJunio C Hamano <gitster@pobox.com>2026-01-09 20:21:43 -0800
commit28a7e27cff717e5ef91f7445e6a418068608082d (patch)
tree808a22448cf5bae9b88e07d62a5c134aa8e51c61 /contrib/subtree/git-subtree.sh
parentd529f3a197364881746f558e5652f0236131eb86 (diff)
downloadgit-28a7e27cff717e5ef91f7445e6a418068608082d.tar.xz
contrib/subtree: detect rewritten subtree commits
git subtree split --prefix P detects splits that are outside of path prefix `P` and prunes them from history graph processing. This improves the performance of repeated `split --rejoin` with many different prefixes. Both before and after 83f9dad7d6 (contrib/subtree: fix split with squashed subtrees, 2025-09-09), the pruning logic does not detect **rebased** or **cherry-picked** git-subtree commits. If `split` encounters any of these commits, the split output may have incomplete history. All commits authored by git subtree merge [--squash] --prefix Q have a first or second parent that has *only* subtree commits as ancestors. When splitting a completely different path `P/`, it is safe to ignore: 1. the merged tree 2. the subtree parent 3. *all* of that parent's ancestry, which applies only to path `Q/` and not `P/`. But this relationship no longer holds if the git-subtree commit is rebased or otherwise reauthored. After a rebase, the former git-subtree commit will have other unrelated commits as ancestors. Ignoring these commits may exclude the history of `P/`, leading to incomplete `subtree split` output. The pruning logic relies solely on the `git-subtree-*:` trailers to detect git-subtree commits, which it blindly accepts without further validation. The split logic also takes its time about being wrong: `cmd_split()` execs a `git show` for *every* commit in the split range… twice. This is inefficient in a shell script. Add a "reality check" to ignore rebased or rewritten commits: * Rewrites of non-merge commits cannot be detected, so the new detector no longer looks for them. * Merges carry a `git-subtree-mainline:` trailer with the hash of the **first parent**. If this hash differs, or if the "merge" commit no longer has multiple parents, a rewrite has occurred. To increase speed, package this logic in a new method, `find_other_splits()`. Perform the check up-front by iterating over a single `git log`. Add ignored subtrees to: 1. the `notree` cache, which excludes them from the `split` history 2. a `prune` negative refs list. The negative refs prevent recursing into other subtrees. Since there are potentially a *lot* of these, cache them on disk and use rev-list's `--stdin` mode. Reported-by: George <george@mail.dietrich.pub> Signed-off-by: Colin Stagner <ask+git@howdoi.land> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'contrib/subtree/git-subtree.sh')
-rwxr-xr-xcontrib/subtree/git-subtree.sh141
1 files changed, 92 insertions, 49 deletions
diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh
index 17106d1a72..3ebe88cbea 100755
--- a/contrib/subtree/git-subtree.sh
+++ b/contrib/subtree/git-subtree.sh
@@ -325,6 +325,12 @@ check_parents () {
done
}
+# Usage: get_notree REV
+get_notree () {
+ assert test $# = 1
+ test -r "$cachedir/notree/$1"
+}
+
# Usage: set_notree REV
set_notree () {
assert test $# = 1
@@ -511,6 +517,71 @@ find_existing_splits () {
done || exit $?
}
+# Usage: find_other_splits DIR REV UNREVS...
+#
+# Scan history in REV UNREVS for other `git subtree split --rejoin`
+# merge commits belonging to prefixes outside of DIR. These
+# "other splits" don't contribute to DIR and can be ignored.
+#
+# If any such rejoins are found,
+#
+# * emit their second-parent as an UNREV, avoiding a
+# potentially costly history traversal
+#
+# * mark the merge commit as "notree" to ignore it
+find_other_splits () {
+ assert test $# -ge 2
+ dir="${1%/}"
+ rev="$2"
+ shift 2
+ debug "Looking for other splits with dir != $dir..."
+
+ git log \
+ --grep '^git-subtree-mainline:' \
+ --no-patch \
+ --no-show-signature \
+ --format='hash: %H%nparents: %P%n%(trailers:key=git-subtree-dir,key=git-subtree-mainline,key=git-subtree-split)%nEND' \
+ "$rev" ${@:+"$@"} |
+ while read -r key val
+ do
+ case "$key" in
+ hash:)
+ commit_hash="${val}"
+ commit_parents=
+ subtree_dir=
+ subtree_mainline=
+ subtree_split=
+ ;;
+ parents:)
+ commit_parents="${val}" ;;
+ git-subtree-dir:)
+ subtree_dir="${val%/}/" ;;
+ git-subtree-mainline:)
+ subtree_mainline="${val}" ;;
+ git-subtree-split:)
+ subtree_split="${val}" ;;
+ END)
+ # verify:
+ # * all git-subtree-* trailers are present
+ # * this subtree is outside of $dir
+ # * the first parent is the git-subtree-mainline:
+ # * the commit has at least two parents
+ if test -n "${subtree_dir}" &&
+ test -n "${subtree_split}" &&
+ test -n "${subtree_mainline}" &&
+ test "${subtree_dir}" = "${subtree_dir#"${dir}/"}" &&
+ test "${commit_parents}" != "${commit_parents#"$subtree_mainline "}" &&
+ rev_exists "${commit_hash}^2"
+ then
+ debug "find_other_splits excluding dir=$subtree_dir merged in ${commit_hash}"
+ echo "^${commit_hash}^2"
+ set_notree "${commit_hash}"
+ fi
+ ;;
+ esac
+ done
+}
+
# Usage: copy_commit REV TREE FLAGS_STR
copy_commit () {
assert test $# = 3
@@ -785,42 +856,6 @@ ensure_valid_ref_format () {
die "fatal: '$1' does not look like a ref"
}
-# Usage: should_ignore_subtree_split_commit REV
-#
-# Check if REV is a commit from another subtree and should be
-# ignored from processing for splits
-should_ignore_subtree_split_commit () {
- assert test $# = 1
-
- git show \
- --no-patch \
- --no-show-signature \
- --format='%(trailers:key=git-subtree-dir,key=git-subtree-mainline)' \
- "$1" |
- (
- have_mainline=
- subtree_dir=
-
- while read -r trailer val
- do
- case "$trailer" in
- git-subtree-dir:)
- subtree_dir="${val%/}" ;;
- git-subtree-mainline:)
- have_mainline=y ;;
- esac
- done
-
- if test -n "${subtree_dir}" &&
- test -z "${have_mainline}" &&
- test "${subtree_dir}" != "$arg_prefix"
- then
- return 0
- fi
- return 1
- )
-}
-
# Usage: process_split_commit REV PARENTS
process_split_commit () {
assert test $# = 2
@@ -994,31 +1029,39 @@ cmd_split () {
fi
unrevs="$(find_existing_splits "$dir" "$rev" "$repository")" || exit $?
+ (find_other_splits >"$cachedir/prune" "$dir" "$rev" $unrevs) || exit $?
# We can't restrict rev-list to only $dir here, because some of our
# parents have the $dir contents the root, and those won't match.
# (and rev-list --follow doesn't seem to solve this)
- grl='git rev-list --topo-order --reverse --parents $rev $unrevs'
- revmax=$(eval "$grl" | wc -l)
+ revmax="$(git rev-list \
+ <"$cachedir/prune" \
+ --topo-order \
+ --reverse \
+ --parents \
+ --stdin \
+ --count \
+ "$rev" \
+ $unrevs
+ )"
revcount=0
createcount=0
extracount=0
- eval "$grl" |
+ git rev-list \
+ <"$cachedir/prune" \
+ --topo-order \
+ --reverse \
+ --parents \
+ --stdin \
+ "$rev" \
+ $unrevs |
while read rev parents
do
- if should_ignore_subtree_split_commit "$rev"
+ if get_notree "$rev"
then
continue
fi
- parsedparents=''
- for parent in $parents
- do
- if ! should_ignore_subtree_split_commit "$parent"
- then
- parsedparents="$parsedparents$parent "
- fi
- done
- process_split_commit "$rev" "$parsedparents"
+ process_split_commit "$rev" "$parents"
done || exit $?
latest_new=$(cache_get latest_new) || exit $?