Skip to content

Commit e09698e

Browse files
fix: resolve review baseline from PR head history
Amp-Thread-ID: https://ampcode.com/threads/T-019ecb81-dc76-76cb-8bf3-22a366c9be41 Co-authored-by: Amp <amp@ampcode.com>
1 parent 215d57e commit e09698e

2 files changed

Lines changed: 62 additions & 22 deletions

File tree

action.yml

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -403,14 +403,22 @@ runs:
403403
ref: ${{ steps.guard.outputs.checkout_sha }}
404404
persist-credentials: false
405405

406-
- name: Ensure PR base commit is fetched
406+
- name: Ensure PR comparison commits are fetched
407407
if: steps.guard.outputs.skip != 'true' && steps.guard.outputs.mode == 'review'
408408
shell: bash
409409
working-directory: target-repo
410410
env:
411411
BASE_SHA: ${{ steps.guard.outputs.base_sha }}
412412
BASE_REPO: ${{ steps.guard.outputs.base_repo }}
413+
BASE_REF: ${{ steps.guard.outputs.base_ref }}
413414
run: |
415+
git remote add base "https://github.com/${BASE_REPO}.git" 2>/dev/null || git remote set-url base "https://github.com/${BASE_REPO}.git"
416+
# The baseline search walks the PR head ancestry, which checkout fetched
417+
# with fetch-depth: 0. We still fetch the PR base commit for GitHub-style
418+
# changed-file diffs and for the no-baseline fallback full base analysis.
419+
if [ -n "$BASE_REF" ]; then
420+
git fetch --no-tags base "+refs/heads/${BASE_REF}:refs/remotes/codeboarding-base/${BASE_REF}" || true
421+
fi
414422
git fetch origin "$BASE_SHA" --depth=2 || true
415423
if ! git cat-file -e "$BASE_SHA" 2>/dev/null; then
416424
git remote add base "https://github.com/${BASE_REPO}.git" 2>/dev/null || git remote set-url base "https://github.com/${BASE_REPO}.git"
@@ -657,31 +665,50 @@ runs:
657665
working-directory: target-repo
658666
env:
659667
BASE_SHA: ${{ steps.guard.outputs.base_sha }}
668+
HEAD_SHA: ${{ steps.guard.outputs.head_sha }}
660669
ACTION_PATH: ${{ github.action_path }}
661670
run: |
662671
BASE_DIR="${RUNNER_TEMP}/cb-base"
663672
HEAD_DIR="${RUNNER_TEMP}/cb-head"
664673
mkdir -p "$BASE_DIR" "$HEAD_DIR"
665674
echo "base_dir=$BASE_DIR" >> $GITHUB_OUTPUT
666675
echo "head_dir=$HEAD_DIR" >> $GITHUB_OUTPUT
667-
if git show "${BASE_SHA}:.codeboarding/analysis.json" > "${BASE_DIR}/analysis.json" 2>/dev/null; then
676+
BASELINE_SHA=""
677+
# Use the newest analysis.json reachable by walking backwards through
678+
# the PR head branch ancestry. If the PR branch is a->b->c->d and master
679+
# is a->b->m1->m2, this deliberately searches d,c,b,a — not m2,m1,b,a.
680+
while IFS= read -r candidate; do
681+
if git cat-file -e "${candidate}:.codeboarding/analysis.json" 2>/dev/null; then
682+
BASELINE_SHA="$candidate"
683+
break
684+
fi
685+
done < <(git rev-list "$HEAD_SHA" -- .codeboarding/analysis.json 2>/dev/null || true)
686+
687+
if [ -n "$BASELINE_SHA" ] && git show "${BASELINE_SHA}:.codeboarding/analysis.json" > "${BASE_DIR}/analysis.json" 2>/dev/null; then
668688
if python3 "$ACTION_PATH/scripts/engine_adapter.py" validate-base \
669689
--analysis "${BASE_DIR}/analysis.json" \
670-
--expected-sha "$BASE_SHA"; then
690+
--expected-sha "$BASELINE_SHA"; then
671691
echo "committed=true" >> $GITHUB_OUTPUT
672-
echo "Using committed .codeboarding/analysis.json at ${BASE_SHA}."
692+
echo "baseline_sha=$BASELINE_SHA" >> $GITHUB_OUTPUT
693+
if [ "$BASELINE_SHA" = "$HEAD_SHA" ]; then
694+
echo "Using committed .codeboarding/analysis.json at PR head ${HEAD_SHA}."
695+
else
696+
echo "Using nearest committed .codeboarding/analysis.json at ${BASELINE_SHA} from PR head history ${HEAD_SHA}."
697+
fi
673698
else
674699
rm -f "${BASE_DIR}/analysis.json"
675700
echo "committed=false" >> $GITHUB_OUTPUT
676-
echo "Committed baseline at ${BASE_SHA} is stale; will generate a fresh base analysis."
701+
echo "baseline_sha=$BASE_SHA" >> $GITHUB_OUTPUT
702+
echo "Committed baseline at ${BASELINE_SHA} is unusable; will generate a fresh base analysis at ${BASE_SHA}."
677703
fi
678704
else
679705
rm -f "${BASE_DIR}/analysis.json"
680706
echo "committed=false" >> $GITHUB_OUTPUT
681-
echo "No committed baseline at ${BASE_SHA}; will generate one via a full analysis on the base commit."
707+
echo "baseline_sha=$BASE_SHA" >> $GITHUB_OUTPUT
708+
echo "No committed baseline found in PR head history; will generate one via a full analysis on the base commit ${BASE_SHA}."
682709
fi
683710
684-
- name: Restore base artifacts (keyed by base SHA)
711+
- name: Restore base artifacts (keyed by baseline SHA)
685712
if: steps.guard.outputs.skip != 'true' && steps.guard.outputs.mode == 'review'
686713
id: basecache
687714
uses: actions/cache/restore@v4
@@ -693,7 +720,7 @@ runs:
693720
# inputs. So a free-tier run (oidc, forced Gemini) and a BYO OpenRouter-key
694721
# run with no model pinned would share a key yet produce different base
695722
# analyses; the mode discriminator keeps them from reusing each other's cache.
696-
key: cb-base-v2-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ steps.resolve_depth.outputs.depth }}-${{ inputs.engine_ref }}-${{ steps.llm.outputs.mode }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }}
723+
key: cb-base-v2-${{ runner.os }}-${{ steps.base.outputs.baseline_sha }}-d${{ steps.resolve_depth.outputs.depth }}-${{ inputs.engine_ref }}-${{ steps.llm.outputs.mode }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }}
697724

698725
# A committed analysis.json gives the head analysis stable component ids,
699726
# but the engine's incremental path ALSO needs the base static_analysis.pkl
@@ -715,22 +742,22 @@ runs:
715742
ACTION_PATH: ${{ github.action_path }}
716743
TARGET: ${{ github.workspace }}/target-repo
717744
BASE_DIR: ${{ steps.base.outputs.base_dir }}
718-
BASE_SHA: ${{ steps.guard.outputs.base_sha }}
745+
BASELINE_SHA: ${{ steps.base.outputs.baseline_sha }}
719746
run: |
720747
# Clean up any stale registration before re-adding (rm -rf alone leaves a
721748
# dangling worktree entry that makes a retry's `worktree add` fail).
722749
BASE_SRC="${RUNNER_TEMP}/base-src"
723750
git -C "$TARGET" worktree remove --force "$BASE_SRC" 2>/dev/null || true
724751
git -C "$TARGET" worktree prune
725752
rm -rf "$BASE_SRC"
726-
git -C "$TARGET" worktree add --detach "$BASE_SRC" "$BASE_SHA"
753+
git -C "$TARGET" worktree add --detach "$BASE_SRC" "$BASELINE_SHA"
727754
if uv run python "$ACTION_PATH/scripts/engine_adapter.py" seed \
728755
--repo "$BASE_SRC" \
729756
--out "$BASE_DIR" \
730-
--source-sha "$BASE_SHA" \
757+
--source-sha "$BASELINE_SHA" \
731758
&& [ -f "$BASE_DIR/static_analysis.pkl" ] && [ -f "$BASE_DIR/static_analysis.sha" ]; then
732759
echo "seed_ok=true" >> "$GITHUB_OUTPUT"
733-
echo "::notice::Seeded base static-analysis cache for ${BASE_SHA}; head analysis can run incrementally."
760+
echo "::notice::Seeded base static-analysis cache for ${BASELINE_SHA}; head analysis can run incrementally."
734761
else
735762
# Never leave a partial pkl/sha pair behind: the save step below would
736763
# cache it under this base SHA's key and suppress every retry.
@@ -812,7 +839,7 @@ runs:
812839
# inputs. So a free-tier run (oidc, forced Gemini) and a BYO OpenRouter-key
813840
# run with no model pinned would share a key yet produce different base
814841
# analyses; the mode discriminator keeps them from reusing each other's cache.
815-
key: cb-base-v2-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ steps.resolve_depth.outputs.depth }}-${{ inputs.engine_ref }}-${{ steps.llm.outputs.mode }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }}
842+
key: cb-base-v2-${{ runner.os }}-${{ steps.base.outputs.baseline_sha }}-d${{ steps.resolve_depth.outputs.depth }}-${{ inputs.engine_ref }}-${{ steps.llm.outputs.mode }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }}
816843

817844
- name: Analyze PR head (incremental from base)
818845
if: steps.guard.outputs.skip != 'true' && steps.guard.outputs.mode == 'review'
@@ -832,7 +859,7 @@ runs:
832859
REPO_NAME: ${{ github.event.repository.name }}
833860
RUN_ID_HEAD: ${{ github.run_id }}-${{ github.run_attempt }}-head
834861
DEPTH: ${{ steps.resolve_depth.outputs.depth }}
835-
BASE_SHA: ${{ steps.guard.outputs.base_sha }}
862+
BASELINE_SHA: ${{ steps.base.outputs.baseline_sha }}
836863
HEAD_SHA: ${{ steps.guard.outputs.head_sha }}
837864
run: |
838865
# Export the key under the selected provider's env var (only this one),
@@ -865,7 +892,7 @@ runs:
865892
--name "$REPO_NAME" \
866893
--run-id "$RUN_ID_HEAD" \
867894
--depth "$DEPTH" \
868-
--base-ref "$BASE_SHA" \
895+
--base-ref "$BASELINE_SHA" \
869896
--target-ref "$HEAD_SHA" \
870897
--source-sha "$HEAD_SHA"
871898
if [ ! -f "$HEAD_DIR/analysis.json" ]; then

scripts/run_local.sh

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,32 @@ else
8989
BASE_DIR="$OUT/base"; HEAD_DIR="$OUT/head"
9090
rm -rf "$BASE_DIR" "$HEAD_DIR"; mkdir -p "$BASE_DIR" "$HEAD_DIR"
9191

92-
echo "== Resolving base analysis at $BASE_SHA =="
93-
if git -C "$REPO" show "$BASE_SHA:.codeboarding/analysis.json" > "$BASE_DIR/analysis.json" 2>/dev/null \
94-
&& run_engine validate-base --analysis "$BASE_DIR/analysis.json" --expected-sha "$BASE_SHA"; then
95-
echo " using committed baseline"
92+
echo "== Resolving base analysis from head history at or before $HEAD_SHA =="
93+
BASELINE_SHA=""
94+
while IFS= read -r candidate; do
95+
if git -C "$REPO" cat-file -e "${candidate}:.codeboarding/analysis.json" 2>/dev/null; then
96+
BASELINE_SHA="$candidate"
97+
break
98+
fi
99+
done < <(git -C "$REPO" rev-list "$HEAD_SHA" -- .codeboarding/analysis.json 2>/dev/null || true)
100+
101+
if [ -n "$BASELINE_SHA" ] \
102+
&& git -C "$REPO" show "$BASELINE_SHA:.codeboarding/analysis.json" > "$BASE_DIR/analysis.json" 2>/dev/null \
103+
&& run_engine validate-base --analysis "$BASE_DIR/analysis.json" --expected-sha "$BASELINE_SHA"; then
104+
if [ "$BASELINE_SHA" = "$HEAD_SHA" ]; then
105+
echo " using committed baseline at head"
106+
else
107+
echo " using nearest committed baseline at $BASELINE_SHA from head history"
108+
fi
96109
# Mirror action.yml: a committed analysis.json alone can't drive incremental —
97110
# the engine needs the base static_analysis.pkl with its cluster baseline.
98111
# Seed it deterministically (LSP + clustering, no LLM); fail-open on error.
99112
BASE_SRC="$OUT/base-src"
100113
git -C "$REPO" worktree remove --force "$BASE_SRC" 2>/dev/null || true
101114
git -C "$REPO" worktree prune
102115
rm -rf "$BASE_SRC"
103-
git -C "$REPO" worktree add --detach "$BASE_SRC" "$BASE_SHA" >/dev/null
104-
if run_engine seed --repo "$BASE_SRC" --out "$BASE_DIR" --source-sha "$BASE_SHA"; then
116+
git -C "$REPO" worktree add --detach "$BASE_SRC" "$BASELINE_SHA" >/dev/null
117+
if run_engine seed --repo "$BASE_SRC" --out "$BASE_DIR" --source-sha "$BASELINE_SHA"; then
105118
echo " seeded static-analysis baseline (no LLM)"
106119
else
107120
rm -f "$BASE_DIR/static_analysis.pkl" "$BASE_DIR/static_analysis.sha"
@@ -135,7 +148,7 @@ else
135148
--name "$(basename "$REPO")" \
136149
--run-id local-head \
137150
--depth "$DEPTH" \
138-
--base-ref "$BASE_SHA" \
151+
--base-ref "${BASELINE_SHA:-$BASE_SHA}" \
139152
--target-ref "$HEAD_SHA" \
140153
--source-sha "$HEAD_SHA"
141154
[ -f "$HEAD_DIR/analysis.json" ] || { echo "Head analysis ran but analysis.json is missing." >&2; exit 1; }

0 commit comments

Comments
 (0)