@@ -403,14 +403,22 @@ runs:
403403 ref : ${{ steps.guard.outputs.checkout_sha }}
404404 persist-credentials : false
405405
406- - name : Ensure PR base commit is fetched
406+ - name : Ensure PR comparison commits are fetched
407407 if : steps.guard.outputs.skip != 'true' && steps.guard.outputs.mode == 'review'
408408 shell : bash
409409 working-directory : target-repo
410410 env :
411411 BASE_SHA : ${{ steps.guard.outputs.base_sha }}
412412 BASE_REPO : ${{ steps.guard.outputs.base_repo }}
413+ BASE_REF : ${{ steps.guard.outputs.base_ref }}
413414 run : |
415+ git remote add base "https://github.com/${BASE_REPO}.git" 2>/dev/null || git remote set-url base "https://github.com/${BASE_REPO}.git"
416+ # The baseline search walks the PR head ancestry, which checkout fetched
417+ # with fetch-depth: 0. We still fetch the PR base commit for GitHub-style
418+ # changed-file diffs and for the no-baseline fallback full base analysis.
419+ if [ -n "$BASE_REF" ]; then
420+ git fetch --no-tags base "+refs/heads/${BASE_REF}:refs/remotes/codeboarding-base/${BASE_REF}" || true
421+ fi
414422 git fetch origin "$BASE_SHA" --depth=2 || true
415423 if ! git cat-file -e "$BASE_SHA" 2>/dev/null; then
416424 git remote add base "https://github.com/${BASE_REPO}.git" 2>/dev/null || git remote set-url base "https://github.com/${BASE_REPO}.git"
@@ -657,31 +665,50 @@ runs:
657665 working-directory : target-repo
658666 env :
659667 BASE_SHA : ${{ steps.guard.outputs.base_sha }}
668+ HEAD_SHA : ${{ steps.guard.outputs.head_sha }}
660669 ACTION_PATH : ${{ github.action_path }}
661670 run : |
662671 BASE_DIR="${RUNNER_TEMP}/cb-base"
663672 HEAD_DIR="${RUNNER_TEMP}/cb-head"
664673 mkdir -p "$BASE_DIR" "$HEAD_DIR"
665674 echo "base_dir=$BASE_DIR" >> $GITHUB_OUTPUT
666675 echo "head_dir=$HEAD_DIR" >> $GITHUB_OUTPUT
667- if git show "${BASE_SHA}:.codeboarding/analysis.json" > "${BASE_DIR}/analysis.json" 2>/dev/null; then
676+ BASELINE_SHA=""
677+ # Use the newest analysis.json reachable by walking backwards through
678+ # the PR head branch ancestry. If the PR branch is a->b->c->d and master
679+ # is a->b->m1->m2, this deliberately searches d,c,b,a — not m2,m1,b,a.
680+ while IFS= read -r candidate; do
681+ if git cat-file -e "${candidate}:.codeboarding/analysis.json" 2>/dev/null; then
682+ BASELINE_SHA="$candidate"
683+ break
684+ fi
685+ done < <(git rev-list "$HEAD_SHA" -- .codeboarding/analysis.json 2>/dev/null || true)
686+
687+ if [ -n "$BASELINE_SHA" ] && git show "${BASELINE_SHA}:.codeboarding/analysis.json" > "${BASE_DIR}/analysis.json" 2>/dev/null; then
668688 if python3 "$ACTION_PATH/scripts/engine_adapter.py" validate-base \
669689 --analysis "${BASE_DIR}/analysis.json" \
670- --expected-sha "$BASE_SHA "; then
690+ --expected-sha "$BASELINE_SHA "; then
671691 echo "committed=true" >> $GITHUB_OUTPUT
672- echo "Using committed .codeboarding/analysis.json at ${BASE_SHA}."
692+ echo "baseline_sha=$BASELINE_SHA" >> $GITHUB_OUTPUT
693+ if [ "$BASELINE_SHA" = "$HEAD_SHA" ]; then
694+ echo "Using committed .codeboarding/analysis.json at PR head ${HEAD_SHA}."
695+ else
696+ echo "Using nearest committed .codeboarding/analysis.json at ${BASELINE_SHA} from PR head history ${HEAD_SHA}."
697+ fi
673698 else
674699 rm -f "${BASE_DIR}/analysis.json"
675700 echo "committed=false" >> $GITHUB_OUTPUT
676- echo "Committed baseline at ${BASE_SHA} is stale; will generate a fresh base analysis."
701+ echo "baseline_sha=$BASE_SHA" >> $GITHUB_OUTPUT
702+ echo "Committed baseline at ${BASELINE_SHA} is unusable; will generate a fresh base analysis at ${BASE_SHA}."
677703 fi
678704 else
679705 rm -f "${BASE_DIR}/analysis.json"
680706 echo "committed=false" >> $GITHUB_OUTPUT
681- echo "No committed baseline at ${BASE_SHA}; will generate one via a full analysis on the base commit."
707+ echo "baseline_sha=$BASE_SHA" >> $GITHUB_OUTPUT
708+ echo "No committed baseline found in PR head history; will generate one via a full analysis on the base commit ${BASE_SHA}."
682709 fi
683710
684- - name : Restore base artifacts (keyed by base SHA)
711+ - name : Restore base artifacts (keyed by baseline SHA)
685712 if : steps.guard.outputs.skip != 'true' && steps.guard.outputs.mode == 'review'
686713 id : basecache
687714 uses : actions/cache/restore@v4
@@ -693,7 +720,7 @@ runs:
693720 # inputs. So a free-tier run (oidc, forced Gemini) and a BYO OpenRouter-key
694721 # run with no model pinned would share a key yet produce different base
695722 # analyses; the mode discriminator keeps them from reusing each other's cache.
696- key : cb-base-v2-${{ runner.os }}-${{ steps.guard .outputs.base_sha }}-d${{ steps.resolve_depth.outputs.depth }}-${{ inputs.engine_ref }}-${{ steps.llm.outputs.mode }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }}
723+ key : cb-base-v2-${{ runner.os }}-${{ steps.base .outputs.baseline_sha }}-d${{ steps.resolve_depth.outputs.depth }}-${{ inputs.engine_ref }}-${{ steps.llm.outputs.mode }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }}
697724
698725 # A committed analysis.json gives the head analysis stable component ids,
699726 # but the engine's incremental path ALSO needs the base static_analysis.pkl
@@ -715,22 +742,22 @@ runs:
715742 ACTION_PATH : ${{ github.action_path }}
716743 TARGET : ${{ github.workspace }}/target-repo
717744 BASE_DIR : ${{ steps.base.outputs.base_dir }}
718- BASE_SHA : ${{ steps.guard .outputs.base_sha }}
745+ BASELINE_SHA : ${{ steps.base .outputs.baseline_sha }}
719746 run : |
720747 # Clean up any stale registration before re-adding (rm -rf alone leaves a
721748 # dangling worktree entry that makes a retry's `worktree add` fail).
722749 BASE_SRC="${RUNNER_TEMP}/base-src"
723750 git -C "$TARGET" worktree remove --force "$BASE_SRC" 2>/dev/null || true
724751 git -C "$TARGET" worktree prune
725752 rm -rf "$BASE_SRC"
726- git -C "$TARGET" worktree add --detach "$BASE_SRC" "$BASE_SHA "
753+ git -C "$TARGET" worktree add --detach "$BASE_SRC" "$BASELINE_SHA "
727754 if uv run python "$ACTION_PATH/scripts/engine_adapter.py" seed \
728755 --repo "$BASE_SRC" \
729756 --out "$BASE_DIR" \
730- --source-sha "$BASE_SHA " \
757+ --source-sha "$BASELINE_SHA " \
731758 && [ -f "$BASE_DIR/static_analysis.pkl" ] && [ -f "$BASE_DIR/static_analysis.sha" ]; then
732759 echo "seed_ok=true" >> "$GITHUB_OUTPUT"
733- echo "::notice::Seeded base static-analysis cache for ${BASE_SHA }; head analysis can run incrementally."
760+ echo "::notice::Seeded base static-analysis cache for ${BASELINE_SHA }; head analysis can run incrementally."
734761 else
735762 # Never leave a partial pkl/sha pair behind: the save step below would
736763 # cache it under this base SHA's key and suppress every retry.
@@ -812,7 +839,7 @@ runs:
812839 # inputs. So a free-tier run (oidc, forced Gemini) and a BYO OpenRouter-key
813840 # run with no model pinned would share a key yet produce different base
814841 # analyses; the mode discriminator keeps them from reusing each other's cache.
815- key : cb-base-v2-${{ runner.os }}-${{ steps.guard .outputs.base_sha }}-d${{ steps.resolve_depth.outputs.depth }}-${{ inputs.engine_ref }}-${{ steps.llm.outputs.mode }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }}
842+ key : cb-base-v2-${{ runner.os }}-${{ steps.base .outputs.baseline_sha }}-d${{ steps.resolve_depth.outputs.depth }}-${{ inputs.engine_ref }}-${{ steps.llm.outputs.mode }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }}
816843
817844 - name : Analyze PR head (incremental from base)
818845 if : steps.guard.outputs.skip != 'true' && steps.guard.outputs.mode == 'review'
@@ -832,7 +859,7 @@ runs:
832859 REPO_NAME : ${{ github.event.repository.name }}
833860 RUN_ID_HEAD : ${{ github.run_id }}-${{ github.run_attempt }}-head
834861 DEPTH : ${{ steps.resolve_depth.outputs.depth }}
835- BASE_SHA : ${{ steps.guard .outputs.base_sha }}
862+ BASELINE_SHA : ${{ steps.base .outputs.baseline_sha }}
836863 HEAD_SHA : ${{ steps.guard.outputs.head_sha }}
837864 run : |
838865 # Export the key under the selected provider's env var (only this one),
@@ -865,7 +892,7 @@ runs:
865892 --name "$REPO_NAME" \
866893 --run-id "$RUN_ID_HEAD" \
867894 --depth "$DEPTH" \
868- --base-ref "$BASE_SHA " \
895+ --base-ref "$BASELINE_SHA " \
869896 --target-ref "$HEAD_SHA" \
870897 --source-sha "$HEAD_SHA"
871898 if [ ! -f "$HEAD_DIR/analysis.json" ]; then
0 commit comments