Skip to content

Commit 46decc4

Browse files
committed
Merge branch 'cs/subtree-split-recursion' into seen
When processing large history graphs on Debian or Ubuntu, "git subtree" can die with a "recursion depth reached" error. Comments? * cs/subtree-split-recursion: contrib/subtree: reduce recursion during split contrib/subtree: functionalize split traversal contrib/subtree: reduce function side-effects
2 parents b4dfeff + c30871b commit 46decc4

File tree

1 file changed

+88
-7
lines changed

1 file changed

+88
-7
lines changed

contrib/subtree/git-subtree.sh

Lines changed: 88 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -315,15 +315,67 @@ cache_miss () {
315315
}
316316

317317
# Usage: check_parents [REVS...]
318+
#
319+
# During a split, check that every commit in REVS has already been
320+
# processed via `process_split_commit`. If not, deepen the history
321+
# until it is.
322+
#
323+
# Commits authored by `subtree split` have to be created in the
324+
# same order as every other git commit: ancestor-first, with new
325+
# commits building on old commits. The traversal order normally
326+
# ensures this is the case, but it also excludes --rejoins commits
327+
# by default.
328+
#
329+
# The --rejoin tells us, "this mainline commit is equivalent to
330+
# this split commit." The relationship is only known for that
331+
# exact commit---and not before or after it. Frequently, commits
332+
# prior to a rejoin are not needed... but, just as often, they
333+
# are! Consider this history graph:
334+
#
335+
# --D---
336+
# / \
337+
# A--B--C--R--X--Y main
338+
# / /
339+
# a--b--c / split
340+
# \ /
341+
# --e--/
342+
#
343+
# The main branch has commits A, B, and C. main is split into
344+
# commits a, b, and c. The split history is rejoined at R.
345+
#
346+
# There are at least two cases where we might need the A-B-C
347+
# history that is prior to R:
348+
#
349+
# 1. Commit D is based on history prior to R, but
350+
# it isn't merged into mainline until after R.
351+
#
352+
# 2. Commit e is based on old split history. It is merged
353+
# back into mainline with a subtree merge. Again, this
354+
# happens after R.
355+
#
356+
# check_parents detects these cases and deepens the history
357+
# to the next available rejoin.
318358
check_parents () {
319359
missed=$(cache_miss "$@") || exit $?
320360
local indent=$(($indent + 1))
321361
for miss in $missed
322362
do
323363
if ! test -r "$cachedir/notree/$miss"
324364
then
325-
debug "incorrect order: $miss"
326-
process_split_commit "$miss" ""
365+
debug "found commit excluded by --rejoin: $miss. skipping to the next --rejoin..."
366+
unrevs="$(find_existing_splits "$dir" "$miss" "$repository")" || exit 1
367+
368+
find_commits_to_split "$miss" "$unrevs" |
369+
while read -r rev parents
370+
do
371+
process_split_commit "$rev" "$parents"
372+
done
373+
374+
if ! test -r "$cachedir/$miss" &&
375+
! test -r "$cachedir/notree/$miss"
376+
then
377+
die "failed to deepen history at $miss"
378+
fi
327379
fi
328380
done
329381
}
@@ -373,6 +425,10 @@ try_remove_previous () {
373425
}
374426

375427
# Usage: process_subtree_split_trailer SPLIT_HASH MAIN_HASH [REPOSITORY]
428+
#
429+
# Parse SPLIT_HASH as a commit. If the commit is not found, fetches
430+
# REPOSITORY and tries again. If found, prints full commit hash.
431+
# Otherwise, dies.
376432
process_subtree_split_trailer () {
377433
assert test $# -ge 2
378434
assert test $# -le 3
@@ -400,6 +456,7 @@ process_subtree_split_trailer () {
400456
die "$fail_msg"
401457
fi
402458
fi
459+
echo "${sub}"
403460
}
404461

405462
# Usage: find_latest_squash DIR [REPOSITORY]
@@ -432,7 +489,7 @@ find_latest_squash () {
432489
main="$b"
433490
;;
434491
git-subtree-split:)
435-
process_subtree_split_trailer "$b" "$sq" "$repository"
492+
sub="$(process_subtree_split_trailer "$b" "$sq" "$repository")" || exit 1
436493
;;
437494
END)
438495
if test -n "$sub"
@@ -489,7 +546,7 @@ find_existing_splits () {
489546
main="$b"
490547
;;
491548
git-subtree-split:)
492-
process_subtree_split_trailer "$b" "$sq" "$repository"
549+
sub="$(process_subtree_split_trailer "$b" "$sq" "$repository")" || exit 1
493550
;;
494551
END)
495552
debug "Main is: '$main'"
@@ -514,6 +571,31 @@ find_existing_splits () {
514571
done || exit $?
515572
}
516573

574+
# Usage: find_commits_to_split REV UNREVS [ARGS...]
575+
#
576+
# List each commit to split, with its parents.
577+
#
578+
# Specify the starting REV for the split, which is usually
579+
# a branch tip. Populate UNREVS with the last --rejoin for
580+
# this prefix, if any. Typically, `subtree split` ignores
581+
# history prior to the last --rejoin... unless and if it
582+
# becomes necessary to consider it. `find_existing_splits` is
583+
# a convenient source of UNREVS.
584+
#
585+
# Remaining arguments are passed to rev-list.
586+
#
587+
# Outputs commits in ancestor-first order, one per line, with
588+
# parent information. Outputs all parents before any child.
589+
find_commits_to_split() {
590+
assert test $# -ge 2
591+
rev="$1"
592+
unrevs="$2"
593+
shift 2
594+
595+
echo "$unrevs" |
596+
git rev-list --topo-order --reverse --parents --stdin "$rev" "$@"
597+
}
598+
517599
# Usage: copy_commit REV TREE FLAGS_STR
518600
copy_commit () {
519601
assert test $# = 3
@@ -971,12 +1053,11 @@ cmd_split () {
9711053
# We can't restrict rev-list to only $dir here, because some of our
9721054
# parents have the $dir contents the root, and those won't match.
9731055
# (and rev-list --follow doesn't seem to solve this)
974-
grl='git rev-list --topo-order --reverse --parents $rev $unrevs'
975-
revmax=$(eval "$grl" | wc -l)
1056+
revmax="$(find_commits_to_split "$rev" "$unrevs" --count)"
9761057
revcount=0
9771058
createcount=0
9781059
extracount=0
979-
eval "$grl" |
1060+
find_commits_to_split "$rev" "$unrevs" |
9801061
while read rev parents
9811062
do
9821063
process_split_commit "$rev" "$parents"

0 commit comments

Comments
 (0)