Skip to content

Commit 3e95e08

Browse files
committed
feat(ai-dev): add model fallback chain + REST API fallback for plan mode
1 parent 38dbbab commit 3e95e08

2 files changed

Lines changed: 65 additions & 24 deletions

File tree

.github/workflows/ai-dev.yml

Lines changed: 64 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,11 @@ jobs:
9292
env:
9393
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
9494
TASK_TYPE: ${{ inputs.task_type }}
95-
GEMINI_MODEL: ${{ vars.GEMINI_MODEL || 'gemini-2.5-flash' }}
96-
# Required for headless CI: skip the interactive "trusted folder" prompt.
95+
# Comma-separated fallback chain. The CLI is tried with each model in
96+
# order; if a model hits HTTP 429 quota, the next one is tried. For
97+
# `plan` task_type we also fall back to a direct Gemini REST API call
98+
# which bypasses the CLI's internal model routing entirely.
99+
GEMINI_MODELS: ${{ vars.GEMINI_MODELS || 'gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-lite' }}
97100
GEMINI_CLI_TRUST_WORKSPACE: 'true'
98101
run: |
99102
set -e
@@ -102,36 +105,74 @@ jobs:
102105
gemini --version || true
103106
echo "::endgroup::"
104107
105-
set +e
106-
# --yolo is required in headless CI for both modes (without it any
107-
# tool-call confirmation hangs and stdout stays empty). For plan mode
108-
# we run the agent for context-gathering and revert any file changes
109-
# below; non-plan modes keep the file changes for the PR.
110-
if [ "$TASK_TYPE" = "plan" ]; then
111-
gemini --model "$GEMINI_MODEL" --yolo --prompt "$(cat .ai/prompt.txt)" \
112-
> .ai/plan.md 2> .ai/gemini.err
113-
else
114-
gemini --model "$GEMINI_MODEL" --yolo --prompt "$(cat .ai/prompt.txt)" \
115-
> .ai/run.log 2> .ai/gemini.err
108+
OUT_FILE=".ai/run.log"
109+
[ "$TASK_TYPE" = "plan" ] && OUT_FILE=".ai/plan.md"
110+
111+
rc=1
112+
IFS=',' read -ra MODELS <<< "$GEMINI_MODELS"
113+
for model in "${MODELS[@]}"; do
114+
model="$(echo "$model" | xargs)" # trim whitespace
115+
echo "::group::gemini CLI attempt: $model"
116+
set +e
117+
gemini --model "$model" --yolo --prompt "$(cat .ai/prompt.txt)" \
118+
> "$OUT_FILE" 2> .ai/gemini.err
119+
rc=$?
120+
set -e
121+
echo "exit: $rc"
122+
tail -n 20 .ai/gemini.err 2>/dev/null || true
123+
echo "::endgroup::"
124+
125+
if [ $rc -eq 0 ]; then
126+
echo "succeeded with $model"
127+
break
128+
fi
129+
if grep -qE 'TerminalQuotaError|Quota exceeded|"code": ?429|status: ?429' .ai/gemini.err 2>/dev/null; then
130+
echo "::notice::$model hit quota; trying next model"
131+
continue
132+
fi
133+
echo "::warning::$model failed with non-quota error; will not retry other CLI models"
134+
break
135+
done
136+
137+
# REST API fallback (plan-only). Bypasses gemini-cli entirely so the
138+
# CLI's internal calls to gemini-2.5-flash (used for tool routing /
139+
# summarization) cannot cause us to hit that model's 20-RPD ceiling.
140+
if [ $rc -ne 0 ] && [ "$TASK_TYPE" = "plan" ]; then
141+
echo "::notice::falling back to direct Gemini REST API for plan mode"
142+
for model in "${MODELS[@]}"; do
143+
model="$(echo "$model" | xargs)"
144+
echo "::group::REST API attempt: $model"
145+
body=$(jq -n --rawfile p .ai/prompt.txt '{contents:[{parts:[{text:$p}]}]}')
146+
http_code=$(curl -sS -o .ai/plan-raw.json -w '%{http_code}' \
147+
"https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent" \
148+
-H "x-goog-api-key: $GEMINI_API_KEY" \
149+
-H "Content-Type: application/json" \
150+
-d "$body" || echo "000")
151+
echo "HTTP $http_code"
152+
if [ "$http_code" = "200" ]; then
153+
jq -r '.candidates[0].content.parts[0].text // empty' .ai/plan-raw.json > "$OUT_FILE"
154+
if [ -s "$OUT_FILE" ]; then
155+
rc=0
156+
echo "REST API succeeded with $model"
157+
echo "::endgroup::"
158+
break
159+
fi
160+
fi
161+
jq -r '.error.message // .' .ai/plan-raw.json 2>/dev/null | head -3 || true
162+
echo "::endgroup::"
163+
done
116164
fi
117-
rc=$?
118-
set -e
119165
120-
# Plan mode is read-only. If the model edited anything despite the
121-
# prompt, revert it so the PR contains only the plan text.
166+
# Plan mode is read-only: revert any incidental file edits.
122167
if [ "$TASK_TYPE" = "plan" ]; then
123168
git checkout -- . 2>/dev/null || true
124169
git clean -fd -e .ai 2>/dev/null || true
125170
fi
126171
127-
echo "::group::gemini stderr"
128-
cat .ai/gemini.err 2>/dev/null || echo "(no stderr file)"
129-
echo "::endgroup::"
130-
131172
if [ $rc -ne 0 ]; then
132-
echo "::error::gemini exited with code $rc"
173+
echo "::error::all Gemini attempts (CLI + REST fallback) failed"
133174
echo "::group::stdout tail"
134-
tail -n 60 .ai/plan.md 2>/dev/null || tail -n 60 .ai/run.log 2>/dev/null || echo "(no stdout file)"
175+
tail -n 60 "$OUT_FILE" 2>/dev/null || echo "(no stdout)"
135176
echo "::endgroup::"
136177
exit $rc
137178
fi

docs/ai-pipeline.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ To enable the pipeline, a maintainer must:
170170
1. Create the labels listed in section 1.
171171
2. Add the following GitHub Actions secrets:
172172
- `GEMINI_API_KEY` — used by Gemini CLI inside `ai-dev.yml`. Get one from Google AI Studio (https://aistudio.google.com/apikey); free tier covers `gemini-2.5-flash`.
173-
- (optional repo variable) `GEMINI_MODEL`pin a non-default model, e.g. `gemini-2.5-pro`. Defaults to `gemini-2.5-flash`.
173+
- (optional repo variable) `GEMINI_MODELS`comma-separated fallback chain, e.g. `gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-lite`. The workflow tries each in order on HTTP 429 (free-tier daily quota), then falls back to a direct Gemini REST API call for `plan` task types. Defaults to the chain above.
174174
3. Configure n8n with:
175175
- a GitHub App or PAT with `contents:write`, `pull_requests:write`, `issues:write`, `actions:write` (for `workflow_dispatch`),
176176
- webhook endpoints for `issues`, `issue_comment`, and `workflow_run`.

0 commit comments

Comments
 (0)