@@ -315,15 +315,67 @@ cache_miss () {
315315}
316316
317317# Usage: check_parents [REVS...]
318+ #
319+ # During a split, check that every commit in REVS has already been
320+ # processed via `process_split_commit`. If not, deepen the history
321+ # until it is.
322+ #
323+ # Commits authored by `subtree split` have to be created in the
324+ # same order as every other git commit: ancestor-first, with new
325+ # commits building on old commits. The traversal order normally
326+ # ensures this is the case, but it also excludes --rejoins commits
327+ # by default.
328+ #
329+ # The --rejoin tells us, "this mainline commit is equivalent to
330+ # this split commit." The relationship is only known for that
331+ # exact commit---and not before or after it. Frequently, commits
332+ # prior to a rejoin are not needed... but, just as often, they
333+ # are! Consider this history graph:
334+ #
335+ # --D---
336+ # / \
337+ # A--B--C--R--X--Y main
338+ # / /
339+ # a--b--c / split
340+ # \ /
341+ # --e--/
342+ #
343+ # The main branch has commits A, B, and C. main is split into
344+ # commits a, b, and c. The split history is rejoined at R.
345+ #
346+ # There are at least two cases where we might need the A-B-C
347+ # history that is prior to R:
348+ #
349+ # 1. Commit D is based on history prior to R, but
350+ # it isn't merged into mainline until after R.
351+ #
352+ # 2. Commit e is based on old split history. It is merged
353+ # back into mainline with a subtree merge. Again, this
354+ # happens after R.
355+ #
356+ # check_parents detects these cases and deepens the history
357+ # to the next available rejoin.
318358check_parents () {
319359 missed=$( cache_miss " $@ " ) || exit $?
320360 local indent=$(( $indent + 1 ))
321361 for miss in $missed
322362 do
323363 if ! test -r " $cachedir /notree/$miss "
324364 then
325- debug " incorrect order: $miss "
326- process_split_commit " $miss " " "
365+ debug " found commit excluded by --rejoin: $miss . skipping to the next --rejoin..."
366+ unrevs=" $( find_existing_splits " $dir " " $miss " " $repository " ) " || exit 1
367+
368+ find_commits_to_split " $miss " " $unrevs " |
369+ while read -r rev parents
370+ do
371+ process_split_commit " $rev " " $parents "
372+ done
373+
374+ if ! test -r " $cachedir /$miss " &&
375+ ! test -r " $cachedir /notree/$miss "
376+ then
377+ die " failed to deepen history at $miss "
378+ fi
327379 fi
328380 done
329381}
@@ -373,6 +425,10 @@ try_remove_previous () {
373425}
374426
375427# Usage: process_subtree_split_trailer SPLIT_HASH MAIN_HASH [REPOSITORY]
428+ #
429+ # Parse SPLIT_HASH as a commit. If the commit is not found, fetches
430+ # REPOSITORY and tries again. If found, prints full commit hash.
431+ # Otherwise, dies.
376432process_subtree_split_trailer () {
377433 assert test $# -ge 2
378434 assert test $# -le 3
@@ -400,6 +456,7 @@ process_subtree_split_trailer () {
400456 die " $fail_msg "
401457 fi
402458 fi
459+ echo " ${sub} "
403460}
404461
405462# Usage: find_latest_squash DIR [REPOSITORY]
@@ -432,7 +489,7 @@ find_latest_squash () {
432489 main=" $b "
433490 ;;
434491 git-subtree-split:)
435- process_subtree_split_trailer " $b " " $sq " " $repository "
492+ sub= " $( process_subtree_split_trailer " $b " " $sq " " $repository " ) " || exit 1
436493 ;;
437494 END)
438495 if test -n " $sub "
@@ -489,7 +546,7 @@ find_existing_splits () {
489546 main=" $b "
490547 ;;
491548 git-subtree-split:)
492- process_subtree_split_trailer " $b " " $sq " " $repository "
549+ sub= " $( process_subtree_split_trailer " $b " " $sq " " $repository " ) " || exit 1
493550 ;;
494551 END)
495552 debug " Main is: '$main '"
@@ -514,6 +571,31 @@ find_existing_splits () {
514571 done || exit $?
515572}
516573
574+ # Usage: find_commits_to_split REV UNREVS [ARGS...]
575+ #
576+ # List each commit to split, with its parents.
577+ #
578+ # Specify the starting REV for the split, which is usually
579+ # a branch tip. Populate UNREVS with the last --rejoin for
580+ # this prefix, if any. Typically, `subtree split` ignores
581+ # history prior to the last --rejoin... unless and if it
582+ # becomes necessary to consider it. `find_existing_splits` is
583+ # a convenient source of UNREVS.
584+ #
585+ # Remaining arguments are passed to rev-list.
586+ #
587+ # Outputs commits in ancestor-first order, one per line, with
588+ # parent information. Outputs all parents before any child.
589+ find_commits_to_split () {
590+ assert test $# -ge 2
591+ rev=" $1 "
592+ unrevs=" $2 "
593+ shift 2
594+
595+ echo " $unrevs " |
596+ git rev-list --topo-order --reverse --parents --stdin " $rev " " $@ "
597+ }
598+
517599# Usage: copy_commit REV TREE FLAGS_STR
518600copy_commit () {
519601 assert test $# = 3
@@ -971,12 +1053,11 @@ cmd_split () {
9711053 # We can't restrict rev-list to only $dir here, because some of our
9721054 # parents have the $dir contents the root, and those won't match.
9731055 # (and rev-list --follow doesn't seem to solve this)
974- grl=' git rev-list --topo-order --reverse --parents $rev $unrevs'
975- revmax=$( eval " $grl " | wc -l)
1056+ revmax=" $( find_commits_to_split " $rev " " $unrevs " --count) "
9761057 revcount=0
9771058 createcount=0
9781059 extracount=0
979- eval " $grl " |
1060+ find_commits_to_split " $rev " " $unrevs " |
9801061 while read rev parents
9811062 do
9821063 process_split_commit " $rev " " $parents "
0 commit comments