feat(a2a): unify on Pro 2.5 / us-central1 + appspot runtime SA (W9.2)

simonraj79 · simonraj79 · commit fd3c68c39c89 · 2026-05-01T13:57:39.000+08:00
Fixes the 2026-04-30 Telegram failure where the A2A orchestrator returned a brief whose every consult had failed with 403 PERMISSION_DENIED. Root cause: every A2A engine was deployed with the default Reasoning Engine Service Agent, which lacks aiplatform.user — so engine→engine calls 403d. Bot-as-caller worked (appspot SA on Cloud Run), engine-as- caller didn't (different runtime identity). Bundles three coupled changes: deploy_a2a.py - New --service_account flag; defaults to APPSPOT_SA constant (gcp-cits-ccat-poc-d4d2@appspot.gserviceaccount.com — the only enabled SA on this project with roles/editor). - --region default flipped from asia-southeast1 to us-central1. - Lifted the env-var auto-forward block to a module-level AUTO_FORWARD_ENV_VARS constant. Added LEVEL_2_*, LEVEL_2B_*, LEVEL_3_*, LEVEL_4_* + LEVEL_REGION (orchestrator routes all 5). - truststore.inject_into_ssl() at module top so deploys work on NTU's TLS-inspecting network where certifi's bundle isn't trusted. Model bumps Flash 2.5 -> Pro 2.5 across every A2A sub-agent - level_1_agent: root. - level_2_agent: classify, quick_answerer, task_planner, researcher, schedule_writer. - level_2b_agent: classify, greet_user, bug_handler, billing_handler, feature_handler. - level_3_agent: search_agent, analyst_agent, writer_agent, root. - level_4_agent: data_fetcher, analyst (code-executor — gotcha google#21 override, verified locally not to hang), report_writer, agent_creator (BuiltInPlanner block removed, Pro has native thinking on by default), root. - a2a_orchestrator: chart_agent (code-executor — gotcha google#21 override), writer_agent. Root was already Pro 2.5. remote_tools.py defaults - a2a_orchestrator/remote_tools.py: _LEVEL_REGION default asia-southeast1 -> us-central1. - level_4_agent/remote_tools.py: _LEVEL_1_REGION default asia-southeast1 -> us-central1. scripts/local_smoke.py - New file. InMemoryRunner-based local probe for any A2A agent. - 90s hang threshold catches the Pro+BuiltInCodeExecutor signature from CLAUDE.md gotcha google#21. All 6 W9.2 probes ran < 60s; gotcha did NOT reproduce on this ADK 2.0 path. 3.x Gemini models (gemini-3.x-pro-preview, gemini-3.x-flash-preview, gemini-3.1-flash-lite-preview) verified gated on this project as of 2026-05-01 — return 404 NOT_FOUND for generateContent in both us-central1 and asia-southeast1. Pinned to 2.5 family until per-project preview access is granted. Resource IDs are unchanged at this commit. The 6 engines themselves are redeployed in a follow-up step; the W9.2 plan documents the delete-then-redeploy sequence at new features/17-a2a-orchestrator-403-fix.md in the swarm repo.
diff --git a/a2a_orchestrator/agent.py b/a2a_orchestrator/agent.py
@@ -149,7 +149,10 @@ class WriterInput(BaseModel):
     # Flash + BuiltInPlanner per Level 4 line ~252 ("the proven combination"
     # for Flash on tool-heavy code-execution tasks). Pro on a
     # BuiltInCodeExecutor leaf can hang 6+ min under AFC — Level 4 gotcha #21.
-    model="gemini-2.5-flash",
+    # W9.2 (Simon 2026-05-01): override the gotcha — try Pro 2.5 here. Local
+    # smoke test in plan §5.4 catches the hang signature before deploy; if
+    # it reproduces, revert this line to "gemini-2.5-flash".
+    model="gemini-2.5-pro",
     planner=BuiltInPlanner(
         thinking_config=types.ThinkingConfig(include_thoughts=True),
     ),
@@ -231,7 +234,8 @@ class WriterInput(BaseModel):
 
 writer_agent = Agent(
     name="writer_agent",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — all A2A sub-agents on Pro per Simon 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Synthesises consulted findings (and optionally a chart "
         "description) into a Markdown-formatted report. Final node — "
diff --git a/a2a_orchestrator/remote_tools.py b/a2a_orchestrator/remote_tools.py
@@ -25,8 +25,8 @@
 
 logger = logging.getLogger(__name__)
 
-# All five Level engines live in asia-southeast1.
-_LEVEL_REGION = os.environ.get("LEVEL_REGION", "asia-southeast1")
+# All five Level engines live in us-central1 (W9.2 — Pro 2.5 unification).
+_LEVEL_REGION = os.environ.get("LEVEL_REGION", "us-central1")
 _PROJECT_NUMBER = os.environ.get("LEVEL_PROJECT_NUMBER", "888142536377")
 
 # Defaults are the post-Phase-A engine IDs (verified 2026-04-28). Override
diff --git a/deploy_a2a.py b/deploy_a2a.py
@@ -27,6 +27,19 @@
 """
 from __future__ import annotations
 
+# truststore must be injected BEFORE any HTTPS-using import (vertexai,
+# google-auth, requests, etc.) so all SSL goes through the Windows trust
+# store. Required on NTU's network where TLS inspection injects a
+# corporate root CA that's not in certifi's bundle.
+try:
+    import truststore  # type: ignore
+    truststore.inject_into_ssl()
+except ImportError:
+    # truststore is optional — required only when running against a
+    # network that does TLS inspection (e.g., NTU). On other networks
+    # certifi handles validation fine.
+    pass
+
 import argparse
 import importlib
 import os
@@ -43,6 +56,27 @@
 from vertexai.preview.reasoning_engines.templates.a2a import create_agent_card
 
 PROJECT = "gcp-cits-ccat-poc-d4d2"
+APPSPOT_SA = f"{PROJECT}@appspot.gserviceaccount.com"
+
+# Env vars auto-forwarded into the deployed engine's runtime container if
+# present in the deploy shell. Lifted from main()'s body to a module
+# constant so tests can import + assert against it (W9.2 §6.3.5).
+AUTO_FORWARD_ENV_VARS = (
+    # gahmen-mcp toolset (level_4_agent's data_fetcher_agent reads at import).
+    "SMITHERY_API_KEY",
+    "SMITHERY_GAHMEN_URL",
+    # Per-Level A2A peer routing (orchestrator + level_4 consume these).
+    # All Levels live in us-central1 post-W9.2 (was asia-southeast1).
+    "LEVEL_1_A2A_ENGINE_ID", "LEVEL_1_A2A_REGION",
+    "LEVEL_2_A2A_ENGINE_ID", "LEVEL_2_A2A_REGION",
+    "LEVEL_2B_A2A_ENGINE_ID", "LEVEL_2B_A2A_REGION",
+    "LEVEL_3_A2A_ENGINE_ID", "LEVEL_3_A2A_REGION",
+    "LEVEL_4_A2A_ENGINE_ID", "LEVEL_4_A2A_REGION",
+    # Generic project / region overrides (orchestrator's remote_tools reads
+    # LEVEL_REGION as the cross-Level default).
+    "LEVEL_PROJECT_NUMBER",
+    "LEVEL_REGION",
+)
 
 
 def _executor_builder(root_agent):
@@ -71,7 +105,19 @@ def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("module", help="Agent package, e.g. level_1_agent")
     parser.add_argument("--display", required=True, help='Display name, e.g. "Level 1 (A2A)"')
-    parser.add_argument("--region", default="asia-southeast1")
+    # W9.2 default: us-central1 (Pro 2.5 lives here; was asia-southeast1).
+    parser.add_argument("--region", default="us-central1")
+    parser.add_argument(
+        "--service_account",
+        default=APPSPOT_SA,
+        help=(
+            "Runtime SA for the deployed engine. Default: appspot SA "
+            "(the only enabled SA on this project with roles/editor). "
+            "Without this, Vertex assigns the default Reasoning Engine "
+            "Service Agent — which lacks aiplatform.user, so any "
+            "engine→engine agent_engines.get() call 403s. See W9.2 plan §3."
+        ),
+    )
     parser.add_argument(
         "--description",
         default="ADK agent exposed via A2A on Vertex Agent Engine.",
@@ -160,40 +206,27 @@ def main() -> None:
         "google-adk[a2a]>=2.0.0b1,<3.0.0",
     ]
 
-    # Auto-forward env vars that the agent might need at runtime.
-    #   SMITHERY_API_KEY     — gahmen-mcp toolset gate (level_4_agent's
-    #                          data_fetcher_agent reads it at import time).
-    #   SMITHERY_GAHMEN_URL  — override for the Smithery server URL.
-    #   LEVEL_1_A2A_*        — Level 1 peer-A2A target for level_4_agent's
-    #                          consult_level_1 tool. The defaults baked
-    #                          into remote_tools.py work for the canonical
-    #                          asia-southeast1 Phase 7 deploy; override
-    #                          via these env vars if Level 1 has been
-    #                          redeployed to a new ID/region.
-    # If the deploy shell has any of these set, bake them into the
-    # deployed engine's container. Anything not set falls back to the
-    # in-code defaults (or the agent runs without that capability).
+    # Auto-forward env vars that the agent might need at runtime. Source of
+    # truth is AUTO_FORWARD_ENV_VARS at module top — single place to add new
+    # ones. Anything not set in the deploy shell falls back to the in-code
+    # defaults (or the agent runs without that capability).
     env_vars: dict[str, str] = {}
-    for name in (
-        "SMITHERY_API_KEY",
-        "SMITHERY_GAHMEN_URL",
-        "LEVEL_1_A2A_ENGINE_ID",
-        "LEVEL_1_A2A_REGION",
-        "LEVEL_1_A2A_PROJECT_NUMBER",
-    ):
+    for name in AUTO_FORWARD_ENV_VARS:
         value = os.environ.get(name)
         if value:
             env_vars[name] = value
     if env_vars:
         print(f"Forwarding {len(env_vars)} env var(s) to engine: {sorted(env_vars)}")
 
     print(f"Deploying {args.module} to {args.region} as {args.display!r} ...")
+    print(f"Runtime SA: {args.service_account}")
     remote = agent_engines.create(
         agent_engine=a2a_app,
         requirements=requirements,
         extra_packages=[args.module],   # uploads e.g. ./level_1_agent
         display_name=args.display,
         env_vars=env_vars or None,
+        service_account=args.service_account,
     )
     print(f"\n✅ Deployed: {remote.resource_name}")
     print(
diff --git a/level_1_agent/agent.py b/level_1_agent/agent.py
@@ -49,14 +49,9 @@
 
 root_agent = Agent(
     name="level_1_agent",
-    # Was `gemini-3.1-flash-lite-preview` (preview alias resolves only via
-    # GOOGLE_CLOUD_LOCATION=global). Switched to `gemini-2.5-flash` because
-    # Vertex Agent Engine deploys force-overwrite the location to the
-    # engine's region (templates/a2a.py:241-245) and the preview alias 404s
-    # in regional endpoints like asia-southeast1. `gemini-2.5-flash` works
-    # in both `global` and `asia-southeast1`, so the local `adk run` path
-    # is unaffected. See DEPLOYMENT_NOTES.md "Phase 7" for context.
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated on this project per
+    # audit 2026-05-01). Was Flash 2.5 in asia-southeast1 (W9 Phase A).
+    model="gemini-2.5-pro",
     description=(
         "A connected problem-solver that uses Google Search to answer"
         " questions requiring real-time information."
diff --git a/level_2_agent/agent.py b/level_2_agent/agent.py
@@ -183,7 +183,8 @@ def anchor_today(ctx: Context):
 
 classify = Agent(
     name="classify",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     instruction=(
         "Classify the user's input as 'quick' or 'plan'."
         "\n\n  QUICK: greetings ('hi', 'hello', 'what can you do?',"
@@ -211,7 +212,8 @@ def anchor_today(ctx: Context):
 #     only when current information is needed.
 quick_answerer = Agent(
     name="quick_answerer",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Greets the user and answers single-step factual questions"
         " without going through the full planning pipeline."
@@ -251,7 +253,8 @@ def anchor_today(ctx: Context):
 
 task_planner = Agent(
     name="task_planner",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     instruction=(
         "Today is {today_human?} ({today?}).\n\n"
         'The user request: "{request}"\n\n'
@@ -272,7 +275,8 @@ def anchor_today(ctx: Context):
 # `fan_out_research` below.
 researcher = Agent(
     name="researcher",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     instruction=(
         "Use google_search to gather a 2-3 sentence study brief on the"
         " topic in the user message. Focus on: key concepts to review,"
@@ -285,7 +289,8 @@ def anchor_today(ctx: Context):
 
 schedule_writer = Agent(
     name="schedule_writer",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     instruction=(
         "Today is {today_human?} ({today?}). Produce a markdown"
         " timetable for the user.\n\n"
diff --git a/level_2b_agent/agent.py b/level_2b_agent/agent.py
@@ -163,7 +163,8 @@ def route_input(node_input: dict):
 
 classify = Agent(
     name="classify",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Classifies an inbound support message into one of four"
         " categories. Pure routing logic — does not respond to the"
@@ -196,7 +197,8 @@ def route_input(node_input: dict):
 
 greet_user = Agent(
     name="greet_user",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Handles greeting / capability-question routes by introducing"
         " the agent and suggesting example queries."
@@ -224,7 +226,8 @@ def route_input(node_input: dict):
 
 bug_handler = Agent(
     name="bug_handler",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Handles bug reports — captures repro steps, severity, and"
         " recent changes."
@@ -250,7 +253,8 @@ def route_input(node_input: dict):
 
 billing_handler = Agent(
     name="billing_handler",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Handles billing and pricing questions for the (mock) product"
         " plans."
@@ -277,7 +281,8 @@ def route_input(node_input: dict):
 
 feature_handler = Agent(
     name="feature_handler",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Handles feature requests — captures use case, logs to the"
         " product backlog, sets expectations."
diff --git a/level_3_agent/agent.py b/level_3_agent/agent.py
@@ -213,7 +213,8 @@ class Brief(BaseModel):
 
 search_agent = Agent(
     name="search_agent",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Searches the web for one focused sub-question and returns a"
         " plain-text finding with source domains cited inline."
@@ -248,7 +249,8 @@ class Brief(BaseModel):
 
 analyst_agent = Agent(
     name="analyst_agent",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Reviews accumulated search findings for patterns, contradictions,"
         " and gaps. Pure LLM reasoning — no tools."
@@ -274,7 +276,8 @@ class Brief(BaseModel):
 
 writer_agent = Agent(
     name="writer_agent",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Synthesises findings + analysis into the final structured Brief"
         " for the user. Pure LLM reasoning — no tools."
@@ -305,7 +308,8 @@ class Brief(BaseModel):
 
 root_agent = Agent(
     name="level_3_agent",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — 3.x preview gated per audit 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Research coordinator that delegates to search, analyst, and"
         " writer specialists and returns a structured brief. Routing"
diff --git a/level_4_agent/agent.py b/level_4_agent/agent.py
@@ -353,7 +353,8 @@ class Brief(BaseModel):
 
 data_fetcher_agent = Agent(
     name="data_fetcher_agent",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — orchestration-shaped: routes between MCP/A2A).
+    model="gemini-2.5-pro",
     description=(
         "Fetches public business data via two inter-system protocols:"
         " A2A peer consultation (Level 1 over on_message_send) for"
@@ -379,13 +380,17 @@ class Brief(BaseModel):
 
 analyst_agent = Agent(
     name="analyst_agent",
+    # W9.2 (Simon 2026-05-01): override gotcha #21 — Pro 2.5 on
+    # BuiltInCodeExecutor. Local smoke test in plan §5.4 catches the
+    # hang signature before any cloud deploy; if reproducible, revert
+    # this to "gemini-2.5-flash". Original gotcha:
     # Flash + BuiltInPlanner per AGENTS.md gotcha #21: Pro on a
     # BuiltInCodeExecutor leaf can hang 6+ min under AFC. The planner
     # turns Gemini's native thinking on for Flash so the model plans
     # cell layout before writing code — directly addressing gotcha #20
     # ("a later code cell closes/re-saves the figure → blank Version 1
     # overwrite hides the real chart at Version 0").
-    model="gemini-2.5-flash",
+    model="gemini-2.5-pro",
     planner=BuiltInPlanner(
         thinking_config=types.ThinkingConfig(include_thoughts=True)
     ),
@@ -449,7 +454,8 @@ class Brief(BaseModel):
 
 report_writer_agent = Agent(
     name="report_writer_agent",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — all A2A sub-agents on Pro per Simon 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Formats accumulated findings into a structured BI brief."
         " Output is the final answer — do not re-paraphrase."
@@ -510,19 +516,12 @@ class Brief(BaseModel):
     # output, which directly addresses the conflated-decision empty
     # STOP. Trade-off: ~30-50% more latency on creator turns vs. Pro,
     # but creator runs rarely so absolute cost stays low.
-    model="gemini-2.5-flash",
-    # Native thinking on Flash. Same shape as analyst_agent — confirmed
-    # working pattern. Don't replace with PlanReActPlanner: that's a
-    # prompt-level text scaffold (forces the LLM to TYPE planning
-    # sections), whereas BuiltInPlanner activates Gemini's native
-    # thinking compute (separate token budget, runs BEFORE tool-choice
-    # is committed). For multi-turn HITL with chained tool calls,
-    # native thinking is the right primitive. Only one `planner` field
-    # is supported per LlmAgent; the two planners are mutually
-    # exclusive.
-    planner=BuiltInPlanner(
-        thinking_config=types.ThinkingConfig(include_thoughts=True)
-    ),
+    # W9.2 (Simon 2026-05-01): flipped Flash → Pro 2.5. The downgrade
+    # to Flash + BuiltInPlanner was specifically because asia-southeast1
+    # didn't serve Pro; us-central1 does, so the original Pro choice is
+    # restored. Native thinking is on by default on Pro for compositional
+    # function calls — explicit BuiltInPlanner block removed.
+    model="gemini-2.5-pro",
     description=(
         "Synthesises a new specialist agent when the BI team lacks a"
         " capability. Use when the user's request cannot be served by"
@@ -623,7 +622,8 @@ def _rehydrate_runtime_tools(callback_context: CallbackContext):
 
 root_agent = Agent(
     name="level_4_agent",
-    model="gemini-2.5-flash",
+    # us-central1 + Pro 2.5 (W9.2 — orchestrator role, all A2A on Pro per Simon 2026-05-01).
+    model="gemini-2.5-pro",
     description=(
         "Self-evolving Business Intelligence coordinator. Routes"
         " analytical business questions to a fixed team (data_fetcher,"
diff --git a/level_4_agent/remote_tools.py b/level_4_agent/remote_tools.py
@@ -41,7 +41,8 @@
 # Where Level 1's A2A engine lives. Defaults to the resource ID minted
 # by Phase 7 (the level_1_agent A2A redeploy with `gemini-2.5-flash`).
 # Override via env var for re-deploys without rebuilding Level 4.
-_LEVEL_1_REGION = os.environ.get("LEVEL_1_A2A_REGION", "asia-southeast1")
+# W9.2 — Level 1 lives in us-central1 (was asia-southeast1).
+_LEVEL_1_REGION = os.environ.get("LEVEL_1_A2A_REGION", "us-central1")
 _LEVEL_1_RESOURCE_ID = os.environ.get(
     "LEVEL_1_A2A_ENGINE_ID",
     "2134899737420103680",
diff --git a/scripts/local_smoke.py b/scripts/local_smoke.py