Skip to content

Commit dd86f95

Browse files
fix: end-to-end agent testing on this PR
- Use GitHub API for structured failure info (which job/step failed + targeted log) instead of noisy grep across all logs - Fix system prompt: concrete example, no angle brackets - Fix filepath parsing edge cases - Fix ruff: lambda → def, formatting - Add 15-min wait for PR CI security scan to complete - Add aiohttp==3.9.0 to Dockerfile (CVE trigger) - Add pull_request trigger with Dockerfile path TODO after validation: remove pull_request trigger, remove aiohttp, restore branch validation
1 parent 9d519f6 commit dd86f95

3 files changed

Lines changed: 227 additions & 41 deletions

File tree

.github/workflows/agent-currency-fix.yml

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@ on:
88
workflow_run:
99
workflows: ["Merge Conditions"]
1010
types: [completed]
11+
# PR trigger for testing (remove after validation)
12+
pull_request:
13+
branches: [main]
14+
paths:
15+
- "scripts/autocurrency/agent-fix.py"
16+
- ".github/workflows/agent-currency-fix.yml"
17+
- "docker/vllm/Dockerfile"
1118

1219
permissions:
1320
contents: read
@@ -23,19 +30,27 @@ env:
2330
jobs:
2431
fix-agent:
2532
if: >-
26-
github.event.workflow_run.conclusion == 'failure' &&
27-
startsWith(github.event.workflow_run.head_branch, 'auto-update/')
33+
github.event_name == 'workflow_dispatch' ||
34+
github.event_name == 'pull_request' || (
35+
github.event.workflow_run.conclusion == 'failure' &&
36+
startsWith(github.event.workflow_run.head_branch, 'auto-update/')
37+
)
2838
runs-on:
2939
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
3040
fleet:default-runner
3141
buildspec-override:true
3242
env:
33-
HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
34-
RUN_ID: ${{ github.event.workflow_run.id }}
35-
RUN_URL: ${{ github.event.workflow_run.html_url }}
43+
HEAD_BRANCH: ${{ github.event.workflow_run.head_branch || github.head_ref }}
44+
RUN_ID: ${{ github.event.workflow_run.id || '' }}
45+
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
3646
steps:
3747
- name: Validate branch name
3848
run: |
49+
# Skip validation for pull_request testing
50+
if [ "${{ github.event_name }}" = "pull_request" ]; then
51+
echo "Skipping branch validation for PR testing"
52+
exit 0
53+
fi
3954
if [[ ! "$HEAD_BRANCH" =~ ^auto-update/[a-z]+-[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
4055
echo "::error::Branch '${HEAD_BRANCH}' does not match expected pattern."
4156
exit 1
@@ -51,13 +66,40 @@ jobs:
5166
fi
5267
gh --version
5368
69+
# For pull_request testing: poll until a tracked workflow fails (or timeout)
70+
- name: Wait for CI failure
71+
if: github.event_name == 'pull_request'
72+
env:
73+
GH_TOKEN: ${{ github.token }}
74+
run: |
75+
TRACKED="PR - vLLM EC2|PR - vLLM SageMaker|PR - SGLang EC2|PR - SGLang SageMaker"
76+
SHA="${{ github.event.pull_request.head.sha }}"
77+
echo "Polling for tracked workflow failures on SHA: $SHA"
78+
for i in $(seq 1 40); do
79+
FOUND=$(gh api "/repos/${{ github.repository }}/actions/runs?head_sha=${SHA}&status=completed&per_page=50" \
80+
--jq "[.workflow_runs[] | select(.conclusion == \"failure\" and (.name | test(\"${TRACKED}\")))] | length")
81+
if [ "$FOUND" -gt 0 ]; then
82+
echo "Found $FOUND failed tracked workflow(s) after $i minutes"
83+
break
84+
fi
85+
echo "No failures yet, waiting 60s... ($i/40)"
86+
sleep 60
87+
done
88+
5489
- name: Find failed tracked workflows
5590
id: failures
5691
env:
5792
GH_TOKEN: ${{ github.token }}
5893
run: |
5994
TRACKED="PR - vLLM EC2|PR - vLLM SageMaker|PR - SGLang EC2|PR - SGLang SageMaker"
60-
SHA=$(gh api "/repos/${{ github.repository }}/actions/runs/${RUN_ID}" --jq '.head_sha')
95+
96+
# Get HEAD SHA depending on event type
97+
if [ -n "$RUN_ID" ]; then
98+
SHA=$(gh api "/repos/${{ github.repository }}/actions/runs/${RUN_ID}" --jq '.head_sha')
99+
else
100+
SHA="${{ github.event.pull_request.head.sha || github.sha }}"
101+
fi
102+
echo "SHA: $SHA"
61103
62104
FAILED_RUN_IDS=$(gh api "/repos/${{ github.repository }}/actions/runs?head_sha=${SHA}&status=completed&per_page=50" \
63105
--jq "[.workflow_runs[] | select(.conclusion == \"failure\" and (.name | test(\"${TRACKED}\")))] | .[].id" \
@@ -93,16 +135,19 @@ jobs:
93135
if: steps.failures.outputs.has_failures == 'true'
94136
uses: actions/checkout@v5
95137
with:
96-
ref: main
138+
ref: ${{ github.event_name == 'pull_request' && github.head_ref || 'main' }}
97139
fetch-depth: 0
98140
token: ${{ steps.app-token.outputs.token }}
99141

100142
- name: Prepare workspace
101143
if: steps.failures.outputs.has_failures == 'true'
102144
run: |
103145
cp scripts/autocurrency/agent-fix.py /tmp/agent-fix.py
104-
git fetch origin "$HEAD_BRANCH"
105-
git checkout "origin/$HEAD_BRANCH" -B pr-branch
146+
# For pull_request testing, we're already on the right branch
147+
if [ "${{ github.event_name }}" != "pull_request" ]; then
148+
git fetch origin "$HEAD_BRANCH"
149+
git checkout "origin/$HEAD_BRANCH" -B pr-branch
150+
fi
106151
107152
- name: Count previous attempts
108153
if: steps.failures.outputs.has_failures == 'true'
@@ -162,12 +207,16 @@ jobs:
162207
id: fix
163208
env:
164209
AWS_REGION: us-west-2
210+
GH_TOKEN: ${{ steps.app-token.outputs.token }}
165211
run: |
166212
python3 -m pip install boto3 -q
167213
python3 /tmp/agent-fix.py \
168214
--logs-dir /tmp/ci-logs/ \
169215
--framework "$FRAMEWORK" \
170-
--branch "$HEAD_BRANCH"
216+
--branch "$HEAD_BRANCH" \
217+
--run-ids "${{ steps.failures.outputs.failed_runs }}" \
218+
--token "$GH_TOKEN" \
219+
--repo "${{ github.repository }}"
171220
172221
- name: Commit and push
173222
if: steps.failures.outputs.has_failures == 'true' && steps.retry.outputs.max_reached != 'true' && steps.fix.outcome == 'success'

docker/vllm/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ RUN uv pip install --system \
3838
"PyJWT>=2.12.0" \
3939
"model-hosting-container-standards>=0.1.15,<1.0.0" \
4040
"pyasn1>=0.6.3" \
41+
"aiohttp==3.9.0" \
4142
&& uv cache clean
4243

4344
COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py

0 commit comments

Comments
 (0)