Skip to content

Commit 7a7d51b

Browse files
committed
fix(flows): 1-retry wrapper for agent buy prompt in flow-13/14
The agent step at flow-13/14 step 46 sends a long single-shot prompt to the obol-agent (qwen36-fast, ~4B params) telling it to invoke buy.py via its terminal tool. qwen36-fast occasionally narrates a fabricated failure (HTTP 404 path-doubling, eRPC DNS error, etc.) instead of actually running the bash command. When that happens, no PurchaseRequest is created and step 47 fails with "PurchaseRequest CR not ready" — even though buy.py was never invoked. This commit factors the prompt into agent_buy_with_retry() in lib-dual-stack.sh and replaces both flow-13 and flow-14 step 46 with a single call. The wrapper: 1. Sends the prompt as before. 2. Polls bob's hermes-obol-agent namespace for the alice-obol PR for up to 60s. 3. If the PR doesn't appear, prints a LOUD warning box flagging this as documented agent unreliability and re-sends the prompt once. 4. If still absent, step 47 fails as before. Net effect: probabilistic single-attempt FAILs become reliable PASSes on real flake while still failing loudly on a real regression. The WARN box on retry is the audit trail — if it fires regularly, the smoke needs a more reliable LLM (qwen36-deep / qwen36-35b-heretic) or a non-agent fallback. Refers: plans/inference-v1337-followup-20260514.md (the v1337 buy attempt-5 SIGKILL false-positive was the same flake class) Saves ~50 lines of duplication between the two flow scripts.
1 parent 7850332 commit 7a7d51b

3 files changed

Lines changed: 81 additions & 48 deletions

File tree

flows/flow-13-dual-stack-obol.sh

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -899,31 +899,7 @@ pass "Agent discovery prompt issued (success will be confirmed by buy + Purchase
899899
# ═════════════════════════════════════════════════════════════════
900900

901901
step "Bob's agent: buy 5 OBOL Permit2 auths from Alice"
902-
buy_response=$(curl -sf --max-time 300 \
903-
-X POST "http://localhost:${BOB_AGENT_PORT}/v1/chat/completions" \
904-
-H "Authorization: Bearer $BOB_TOKEN" \
905-
-H "Content-Type: application/json" \
906-
-d "{
907-
\"model\": \"$BOB_AGENT_RUNTIME-agent\",
908-
\"messages\": [{
909-
\"role\": \"user\",
910-
\"content\": \"Use the buy-x402 skill and your terminal tool. Run exactly once: ERPC_URL=http://erpc.erpc.svc.cluster.local/rpc ERPC_NETWORK=base-sepolia python3 $BOB_OBOL_SKILLS_DIR/buy-x402/scripts/buy.py buy alice-obol --endpoint $TUNNEL_URL/services/alice-obol-inference/v1/chat/completions --model $OBOL_LLM_MODEL --count 5\"
911-
}],
912-
\"max_tokens\": 4000,
913-
\"stream\": false
914-
}" 2>&1 || true)
915-
buy_content=$(extract_assistant_content "$buy_response" 2>/dev/null || true)
916-
echo "${buy_content:0:500}"
917-
# Don't grep buy_content for natural-language confirmation; structural success
918-
# is the PurchaseRequest CR Ready=True poll below.
919-
if [ -z "$(printf '%s' "$buy_content" | tr -d '[:space:]')" ]; then
920-
echo " ! Agent returned no final assistant text; confirming purchase via PurchaseRequest CR"
921-
fi
922-
if printf '%s' "$buy_content" | agent_response_refused; then
923-
fail "Agent refused to run buy.py: ${buy_content:0:500}"
924-
emit_metrics; exit 1
925-
fi
926-
pass "Agent buy prompt issued (success will be confirmed by PurchaseRequest CR)"
902+
agent_buy_with_retry
927903

928904
# ═════════════════════════════════════════════════════════════════
929905
# 36-39. PR Ready / LiteLLM rollout / sidecar auths / paid call

flows/flow-14-live-obol-base-sepolia.sh

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -953,29 +953,7 @@ pass "Agent discovery prompt issued (success will be confirmed by buy + Purchase
953953
# ═════════════════════════════════════════════════════════════════
954954

955955
step "Bob's agent: buy 5 OBOL Permit2 auths from Alice"
956-
buy_response=$(curl -sf --max-time 300 \
957-
-X POST "http://localhost:${BOB_AGENT_PORT}/v1/chat/completions" \
958-
-H "Authorization: Bearer $BOB_TOKEN" \
959-
-H "Content-Type: application/json" \
960-
-d "{
961-
\"model\": \"$BOB_AGENT_RUNTIME-agent\",
962-
\"messages\": [{
963-
\"role\": \"user\",
964-
\"content\": \"Use the buy-x402 skill and your terminal tool. Run exactly once: ERPC_URL=http://erpc.erpc.svc.cluster.local/rpc ERPC_NETWORK=base-sepolia python3 $BOB_OBOL_SKILLS_DIR/buy-x402/scripts/buy.py buy alice-obol --endpoint $TUNNEL_URL/services/alice-obol-inference/v1/chat/completions --model $OBOL_LLM_MODEL --count 5\"
965-
}],
966-
\"max_tokens\": 4000,
967-
\"stream\": false
968-
}" 2>&1 || true)
969-
buy_content=$(extract_assistant_content "$buy_response" 2>/dev/null || true)
970-
echo "${buy_content:0:500}"
971-
if [ -z "$(printf '%s' "$buy_content" | tr -d '[:space:]')" ]; then
972-
echo " ! Agent returned no final assistant text; confirming purchase via PurchaseRequest CR"
973-
fi
974-
if printf '%s' "$buy_content" | agent_response_refused; then
975-
fail "Agent refused to run buy.py: ${buy_content:0:500}"
976-
emit_metrics; exit 1
977-
fi
978-
pass "Agent buy prompt issued (success will be confirmed by PurchaseRequest CR)"
956+
agent_buy_with_retry
979957

980958
# ═════════════════════════════════════════════════════════════════
981959
# 31-34. PR Ready / LiteLLM rollout / sidecar auths / paid call

flows/lib-dual-stack.sh

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,85 @@ except Exception as e:
347347
" 2>&1 || true
348348
}
349349

350+
# Send the long single-shot buy prompt to Bob's agent. The prompt expands
351+
# against the caller's environment (BOB_AGENT_PORT, BOB_TOKEN,
352+
# BOB_AGENT_RUNTIME, BOB_OBOL_SKILLS_DIR, TUNNEL_URL, OBOL_LLM_MODEL).
353+
_agent_buy_send_prompt() {
354+
curl -sf --max-time 300 \
355+
-X POST "http://localhost:${BOB_AGENT_PORT}/v1/chat/completions" \
356+
-H "Authorization: Bearer $BOB_TOKEN" \
357+
-H "Content-Type: application/json" \
358+
-d "{
359+
\"model\": \"$BOB_AGENT_RUNTIME-agent\",
360+
\"messages\": [{
361+
\"role\": \"user\",
362+
\"content\": \"Use the buy-x402 skill and your terminal tool. Run exactly once: ERPC_URL=http://erpc.erpc.svc.cluster.local/rpc ERPC_NETWORK=base-sepolia python3 $BOB_OBOL_SKILLS_DIR/buy-x402/scripts/buy.py buy alice-obol --endpoint $TUNNEL_URL/services/alice-obol-inference/v1/chat/completions --model $OBOL_LLM_MODEL --count 5\"
363+
}],
364+
\"max_tokens\": 4000,
365+
\"stream\": false
366+
}" 2>&1 || true
367+
}
368+
369+
_agent_buy_pr_exists() {
370+
bob kubectl get purchaserequests.obol.org -n "$BOB_AGENT_NS" alice-obol \
371+
-o name 2>/dev/null | grep -q .
372+
}
373+
374+
# 1-retry wrapper for the agent buy prompt at flow-13/14 step 46. qwen36-fast
375+
# (4B-class) occasionally narrates a fabricated failure on the long single-shot
376+
# buy prompt instead of actually invoking the bash tool. When that happens, no
377+
# PurchaseRequest is created and step 47 fails with "PurchaseRequest CR not
378+
# ready" — even though buy.py was never invoked. See
379+
# plans/inference-v1337-followup-20260514.md.
380+
#
381+
# Strategy: poll for the PR for up to 60s after the first prompt; if absent,
382+
# print a LOUD warning flagging this as agent unreliability and re-send the
383+
# prompt once. If still absent after the retry, step 47 fails as before.
384+
agent_buy_with_retry() {
385+
local response content retried=0 i
386+
387+
response=$(_agent_buy_send_prompt)
388+
content=$(extract_assistant_content "$response" 2>/dev/null || true)
389+
echo "${content:0:500}"
390+
if [ -z "$(printf '%s' "$content" | tr -d '[:space:]')" ]; then
391+
echo " ! Agent returned no final assistant text; confirming purchase via PurchaseRequest CR"
392+
fi
393+
if printf '%s' "$content" | agent_response_refused; then
394+
fail "Agent refused to run buy.py: ${content:0:500}"
395+
emit_metrics; exit 1
396+
fi
397+
398+
# Wait up to 60s for the controller to reconcile the PR. Healthy runs see
399+
# it within ~5s; the long ceiling absorbs cluster-cold-start jitter.
400+
for i in $(seq 1 12); do
401+
_agent_buy_pr_exists && break
402+
sleep 5
403+
done
404+
405+
if ! _agent_buy_pr_exists; then
406+
echo ""
407+
echo " ╔════════════════════════════════════════════════════════════════════════╗"
408+
echo " ║ WARN: agent did NOT create a PurchaseRequest after 60s. ║"
409+
echo " ║ Documented qwen36-fast (4B) flake — agent narrates a fabricated ║"
410+
echo " ║ failure instead of invoking buy.py. Re-prompting ONCE. ║"
411+
echo " ║ If this fires regularly, switch to a more reliable LLM (qwen36-deep ║"
412+
echo " ║ / qwen36-35b-heretic) or add a non-agent fallback path. ║"
413+
echo " ║ Ref: plans/inference-v1337-followup-20260514.md ║"
414+
echo " ╚════════════════════════════════════════════════════════════════════════╝"
415+
echo ""
416+
retried=1
417+
response=$(_agent_buy_send_prompt)
418+
content=$(extract_assistant_content "$response" 2>/dev/null || true)
419+
echo " RETRY response: ${content:0:500}"
420+
if printf '%s' "$content" | agent_response_refused; then
421+
fail "Agent refused to run buy.py on retry: ${content:0:500}"
422+
emit_metrics; exit 1
423+
fi
424+
fi
425+
426+
pass "Agent buy prompt issued (retry=$retried; success will be confirmed by PurchaseRequest CR)"
427+
}
428+
350429
extract_assistant_content() {
351430
DUAL_STACK_RESPONSE="$1" python3 - <<'PY'
352431
import json

0 commit comments

Comments
 (0)