fix: write structured outputs to /tmp/alcove-outputs.json in v2 agents (#1141)

decko · web-flow · commit c2e35880f9fd · 2026-04-27T14:32:00.000-03:00
Skiff-init reads outputs from /tmp/alcove-outputs.json, but agents were
only printing JSON to stdout. Use Python json.dump via heredoc to safely
handle quotes, newlines, and special characters in output values.

Verifier and reviewer agents also exit non-zero on failure/rejection
to trigger downstream patch/revision steps.

Assisted-by: Claude Opus 4.6 (1M context)
diff --git a/.alcove/agents/patcher.yml b/.alcove/agents/patcher.yml
@@ -36,11 +36,21 @@ prompt: |
      `pytest pulp_service/pulp_service/tests/functional/`
   8. **Commit** each fix with a descriptive message.
 
-  Output: {
-    "fixes_applied": ["list of fixes applied"],
-    "too_complex": false,
-    "too_complex_reason": ""
-  }
+  CRITICAL — YOUR VERY LAST ACTION must be writing outputs to the pipeline file.
+  Use Python to safely serialize your results:
+
+    python3 - <<'PYEOF'
+    import json
+    output = {
+        "fixes_applied": ["description of fix 1"],
+        "too_complex": False,
+        "too_complex_reason": ""
+    }
+    with open("/tmp/alcove-outputs.json", "w") as f:
+        json.dump(output, f)
+    PYEOF
+
+  Replace with your actual fixes applied.
 
 timeout: 480
 enforcement_mode: monitor
diff --git a/.alcove/agents/planner-v2.yml b/.alcove/agents/planner-v2.yml
@@ -60,11 +60,22 @@ prompt: |
 
   Be concrete. Name files, functions, classes. Do not be vague.
 
-  Output: {
-    "plan": "detailed implementation plan text",
-    "verification_commands": ["command1", "command2"],
-    "reviewers_needed": ["django", "security"]
-  }
+  CRITICAL — YOUR VERY LAST ACTION must be writing outputs to the pipeline file.
+  Use Python to safely serialize your plan (handles quotes and newlines):
+
+    python3 - <<'PYEOF'
+    import json
+    output = {
+        "plan": """Your detailed implementation plan here.
+    Can span multiple lines safely.""",
+        "verification_commands": ["black --check --line-length 100 pulp_service/", "pytest pulp_service/pulp_service/tests/functional/"],
+        "reviewers_needed": ["django", "security"]
+    }
+    with open("/tmp/alcove-outputs.json", "w") as f:
+        json.dump(output, f)
+    PYEOF
+
+  Replace values with your actual plan and commands.
 
 timeout: 600
 enforcement_mode: monitor
diff --git a/.alcove/agents/pulp-developer.yml b/.alcove/agents/pulp-developer.yml
@@ -59,7 +59,19 @@ prompt: |
   If fixing CI failures or addressing review feedback, focus only on the
   specific issues listed. Do not refactor or add features beyond what's needed.
 
-  Output: {"summary": "what you implemented and key decisions made"}
+  CRITICAL — YOUR VERY LAST ACTION must be writing outputs to the pipeline file.
+  Use Python to safely serialize your summary:
+
+    python3 - <<'PYEOF'
+    import json
+    output = {
+        "summary": "What you implemented and key decisions made."
+    }
+    with open("/tmp/alcove-outputs.json", "w") as f:
+        json.dump(output, f)
+    PYEOF
+
+  Replace with your actual summary.
 
 timeout: 1800
 enforcement_mode: monitor
diff --git a/.alcove/agents/reviewer-django.yml b/.alcove/agents/reviewer-django.yml
@@ -77,11 +77,24 @@ prompt: |
   trigger a revision cycle. Only approve if there are no critical or
   major issues remaining.
 
-  Output: {
-    "approved": true,
-    "comments": "summary of review",
-    "issues": [{"severity": "critical|major|minor", "file": "path", "line": 42, "issue": "description", "suggestion": "how to fix"}]
-  }
+  CRITICAL — YOUR VERY LAST ACTION must be writing outputs to the pipeline file.
+  Use Python to safely serialize your review (handles quotes in comments):
+
+    python3 - <<'PYEOF'
+    import json, sys
+    approved = True  # set to False if critical/major issues found
+    output = {
+        "approved": approved,
+        "comments": "Summary of review findings.",
+        "issues": []
+    }
+    with open("/tmp/alcove-outputs.json", "w") as f:
+        json.dump(output, f)
+    if not approved:
+        sys.exit(1)
+    PYEOF
+
+  Replace with your actual review. Exit non-zero on rejection to trigger revision.
 
 timeout: 600
 enforcement_mode: monitor
diff --git a/.alcove/agents/reviewer-security.yml b/.alcove/agents/reviewer-security.yml
@@ -83,11 +83,24 @@ prompt: |
   trigger a revision cycle. Only approve if there are no critical or
   major security issues remaining.
 
-  Output: {
-    "approved": true,
-    "comments": "summary of review",
-    "issues": [{"severity": "critical|major|minor", "file": "path", "line": 42, "issue": "description", "suggestion": "how to fix"}]
-  }
+  CRITICAL — YOUR VERY LAST ACTION must be writing outputs to the pipeline file.
+  Use Python to safely serialize your review (handles quotes in comments):
+
+    python3 - <<'PYEOF'
+    import json, sys
+    approved = True  # set to False if critical/major security issues found
+    output = {
+        "approved": approved,
+        "comments": "Summary of security review findings.",
+        "issues": []
+    }
+    with open("/tmp/alcove-outputs.json", "w") as f:
+        json.dump(output, f)
+    if not approved:
+        sys.exit(1)
+    PYEOF
+
+  Replace with your actual review. Exit non-zero on rejection to trigger revision.
 
 timeout: 600
 enforcement_mode: monitor
diff --git a/.alcove/agents/triage.yml b/.alcove/agents/triage.yml
@@ -67,15 +67,25 @@ prompt: |
   Only set `automatable: true` and produce the full output if you are confident
   an agent can implement this end-to-end.
 
-  Output: {
-    "complexity": "small|medium|large",
-    "target_area": "description of affected area",
-    "candidate_files": ["file1.py", "file2.py"],
-    "approach": "how to implement",
-    "risks": "what could go wrong",
-    "automatable": true,
-    "reviewers_needed": ["django", "security"]
-  }
+  CRITICAL — YOUR VERY LAST ACTION must be writing outputs to the pipeline file.
+  Use Python to safely serialize your findings:
+
+    python3 - <<'PYEOF'
+    import json
+    output = {
+        "complexity": "small",  # or "medium" or "large"
+        "target_area": "description of affected area",
+        "candidate_files": ["file1.py", "file2.py"],
+        "approach": "how to implement",
+        "risks": "what could go wrong",
+        "automatable": True,
+        "reviewers_needed": ["django", "security"]
+    }
+    with open("/tmp/alcove-outputs.json", "w") as f:
+        json.dump(output, f)
+    PYEOF
+
+  Replace values with your actual findings.
 
 timeout: 480
 enforcement_mode: monitor
diff --git a/.alcove/agents/verifier.yml b/.alcove/agents/verifier.yml
@@ -62,11 +62,24 @@ prompt: |
   trigger the patch phase for corrections.
   Do not be lenient. A FAIL now is cheaper than a FAIL in CI.
 
-  Output: {
-    "verdict": "pass",
-    "fixes_required": ["list of things to fix"],
-    "code_issues": [{"severity": "critical|major|minor", "file": "path", "line": 42, "issue": "description", "suggested_fix": "what to change"}]
-  }
+  CRITICAL — YOUR VERY LAST ACTION must be writing outputs to the pipeline file.
+  Use Python to safely serialize your verdict:
+
+    python3 - <<'PYEOF'
+    import json, sys
+    verdict = "pass"  # or "fail"
+    output = {
+        "verdict": verdict,
+        "fixes_required": [],
+        "code_issues": []
+    }
+    with open("/tmp/alcove-outputs.json", "w") as f:
+        json.dump(output, f)
+    if verdict == "fail":
+        sys.exit(1)
+    PYEOF
+
+  Replace with your actual findings. Exit non-zero on fail to trigger patching.
 
 timeout: 600
 enforcement_mode: monitor