fix: end-to-end agent testing on this PR

Eren-Jeager123 · Eren-Jeager123 · commit 3d1d142f0f66 · 2026-05-08T07:28:27.000Z
- Fix system prompt: use concrete example instead of &lt;filepath&gt; placeholder
  (LLM was interpreting angle brackets literally)
- Fix filepath parsing: handle remaining edge cases
- Remove hardcoded test values, use PR's own CI failures
- Add aiohttp==3.9.0 to Dockerfile to trigger CVE scan failure
- Skip branch validation for pull_request events

TODO after validation: remove pull_request trigger, remove aiohttp, restore branch validation
diff --git a/.github/workflows/agent-currency-fix.yml b/.github/workflows/agent-currency-fix.yml
@@ -8,6 +8,21 @@ on:
   workflow_run:
     workflows: ["Merge Conditions"]
     types: [completed]
+  # Manual trigger for testing (remove after validation)
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: "PR branch (e.g., auto-update/vllm-99.99.99)"
+        required: true
+      run_id:
+        description: "Failed workflow run ID"
+        required: true
+  # PR trigger for testing (remove after validation)
+  pull_request:
+    branches: [main]
+    paths:
+      - "scripts/autocurrency/agent-fix.py"
+      - ".github/workflows/agent-currency-fix.yml"
 
 permissions:
   contents: read
@@ -23,19 +38,27 @@ env:
 jobs:
   fix-agent:
     if: >-
-      github.event.workflow_run.conclusion == 'failure' &&
-      startsWith(github.event.workflow_run.head_branch, 'auto-update/')
+      github.event_name == 'workflow_dispatch' ||
+      github.event_name == 'pull_request' || (
+        github.event.workflow_run.conclusion == 'failure' &&
+        startsWith(github.event.workflow_run.head_branch, 'auto-update/')
+      )
     runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:default-runner
         buildspec-override:true
     env:
-      HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
-      RUN_ID: ${{ github.event.workflow_run.id }}
-      RUN_URL: ${{ github.event.workflow_run.html_url }}
+      HEAD_BRANCH: ${{ github.event.workflow_run.head_branch || inputs.branch || github.head_ref }}
+      RUN_ID: ${{ github.event.workflow_run.id || inputs.run_id || '' }}
+      RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
     steps:
       - name: Validate branch name
         run: |
+          # Skip validation for pull_request testing
+          if [ "${{ github.event_name }}" = "pull_request" ]; then
+            echo "Skipping branch validation for PR testing"
+            exit 0
+          fi
           if [[ ! "$HEAD_BRANCH" =~ ^auto-update/[a-z]+-[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
             echo "::error::Branch '${HEAD_BRANCH}' does not match expected pattern."
             exit 1
@@ -57,7 +80,14 @@ jobs:
           GH_TOKEN: ${{ github.token }}
         run: |
           TRACKED="PR - vLLM EC2|PR - vLLM SageMaker|PR - SGLang EC2|PR - SGLang SageMaker"
-          SHA=$(gh api "/repos/${{ github.repository }}/actions/runs/${RUN_ID}" --jq '.head_sha')
+
+          # Get HEAD SHA depending on event type
+          if [ -n "$RUN_ID" ]; then
+            SHA=$(gh api "/repos/${{ github.repository }}/actions/runs/${RUN_ID}" --jq '.head_sha')
+          else
+            SHA="${{ github.event.pull_request.head.sha || github.sha }}"
+          fi
+          echo "SHA: $SHA"
 
           FAILED_RUN_IDS=$(gh api "/repos/${{ github.repository }}/actions/runs?head_sha=${SHA}&status=completed&per_page=50" \
             --jq "[.workflow_runs[] | select(.conclusion == \"failure\" and (.name | test(\"${TRACKED}\")))] | .[].id" \
@@ -93,16 +123,19 @@ jobs:
         if: steps.failures.outputs.has_failures == 'true'
         uses: actions/checkout@v5
         with:
-          ref: main
+          ref: ${{ github.event_name == 'pull_request' && github.head_ref || 'main' }}
           fetch-depth: 0
           token: ${{ steps.app-token.outputs.token }}
 
       - name: Prepare workspace
         if: steps.failures.outputs.has_failures == 'true'
         run: |
           cp scripts/autocurrency/agent-fix.py /tmp/agent-fix.py
-          git fetch origin "$HEAD_BRANCH"
-          git checkout "origin/$HEAD_BRANCH" -B pr-branch
+          # For pull_request testing, we're already on the right branch
+          if [ "${{ github.event_name }}" != "pull_request" ]; then
+            git fetch origin "$HEAD_BRANCH"
+            git checkout "origin/$HEAD_BRANCH" -B pr-branch
+          fi
 
       - name: Count previous attempts
         if: steps.failures.outputs.has_failures == 'true'
diff --git a/docker/vllm/Dockerfile b/docker/vllm/Dockerfile
@@ -38,6 +38,7 @@ RUN uv pip install --system \
   "PyJWT>=2.12.0" \
   "model-hosting-container-standards>=0.1.15,<1.0.0" \
   "pyasn1>=0.6.3" \
+  "aiohttp==3.9.0" \
   && uv cache clean
 
 COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py
diff --git a/scripts/autocurrency/agent-fix.py b/scripts/autocurrency/agent-fix.py
@@ -43,19 +43,21 @@
 If the failure is TRANSIENT (capacity, timeout, runner crash), respond with exactly:
 TRANSIENT: <brief reason>
 
-Otherwise, respond with search/replace blocks:
+Otherwise, respond with search/replace blocks. Use this EXACT format:
 
-<filepath>
+path/to/file.ext
 <<<<<<< SEARCH
-<exact text to find in the file>
+exact text to find in the file
 =======
-<replacement text>
+replacement text
 >>>>>>> REPLACE
 
+IMPORTANT: Write the file path as plain text (e.g., docker/vllm/Dockerfile). Do NOT wrap it in angle brackets, backticks, or any other formatting.
+
 Include 1-2 surrounding lines in SEARCH for unique anchoring.
 For JSON arrays (allowlists), SEARCH the last few lines and REPLACE with those lines plus the new entry.
 
-End with: DESCRIPTION: <one-line commit message>"""
+End with: DESCRIPTION: one-line commit message"""
 
 
 def parse_args():
@@ -193,8 +195,9 @@ def parse_blocks(response: str) -> list:
     blocks = []
     for m in SEARCH_REPLACE_PATTERN.finditer(response):
         filepath = m.group(1).strip().strip("`").strip()
-        # Strip common LLM artifacts: <filepath>, **filepath**, `filepath`
-        filepath = re.sub(r"^<\w+>|<\/\w+>$", "", filepath).strip()
+        # Strip all common LLM artifacts: <filepath>path, <path>, **path**, `path`
+        filepath = re.sub(r"^<[^>]*>", "", filepath).strip()  # strips <filepath>, <file>, etc.
+        filepath = re.sub(r"^<|>$", "", filepath).strip()     # strips bare < >
         filepath = filepath.strip("*").strip("`").strip()
         blocks.append({"path": filepath, "search": m.group(2), "replace": m.group(3)})
     return blocks
@@ -336,7 +339,8 @@ def main():
 
         blocks = parse_blocks(response)
         if blocks:
-            print(f"Parsed {len(blocks)} block(s): {[b["path"] for b in blocks]}")
+            paths = [b["path"] for b in blocks]
+            print(f"Parsed {len(blocks)} block(s): {paths}")
         if not blocks:
             retry_context = (
                 f"Could not parse search/replace blocks from response.\n"