feat: add logfire to agent sandbox, expand example questions

nikhilwoodruff · nikhilwoodruff · commit f458eb7d3b53 · 2025-12-28T17:17:26.000Z
diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx
@@ -272,9 +272,20 @@ export function PolicyChat() {
   };
 
   const exampleQuestions = [
+    // UK tax questions
     "How much would it cost to set the UK basic income tax rate to 19p?",
     "What would happen if we doubled child benefit?",
     "How would a £15,000 personal allowance affect different income groups?",
+    "What is the budgetary impact of abolishing the higher rate of income tax?",
+    "How much does universal credit cost the government?",
+    // US tax questions
+    "What would a $2,000 child tax credit cost in the US?",
+    "How would doubling SNAP benefits affect poverty rates?",
+    "What is the revenue impact of a 25% top marginal tax rate?",
+    // Household calculations
+    "Calculate tax for a UK household earning £50,000",
+    "What benefits would a single parent with two children receive in California?",
+    "How much income tax does someone earning $100,000 in New York pay?",
   ];
 
   const formatToolName = (name: string) => {
diff --git a/src/policyengine_api/agent_sandbox.py b/src/policyengine_api/agent_sandbox.py
@@ -10,6 +10,7 @@
 sandbox_image = (
     modal.Image.debian_slim(python_version="3.12")
     .apt_install("curl", "git", "unzip")
+    .pip_install("logfire")
     .run_commands(
         # Install Bun
         "curl -fsSL https://bun.sh/install | bash",
@@ -28,6 +29,7 @@
 
 # Secrets
 anthropic_secret = modal.Secret.from_name("anthropic-api-key")
+logfire_secret = modal.Secret.from_name("logfire-token")
 
 
 def run_claude_code_in_sandbox(
@@ -50,7 +52,7 @@ def run_claude_code_in_sandbox(
 
     sb = modal.Sandbox.create(
         image=sandbox_image,
-        secrets=[anthropic_secret],
+        secrets=[anthropic_secret, logfire_secret],
         timeout=600,
         workdir="/tmp",
     )
@@ -74,7 +76,7 @@ def run_claude_code_in_sandbox(
     return sb, process
 
 
-@app.function(image=sandbox_image, secrets=[anthropic_secret], timeout=600)
+@app.function(image=sandbox_image, secrets=[anthropic_secret, logfire_secret], timeout=600)
 def run_policy_analysis(
     question: str, api_base_url: str = "https://v2.api.policyengine.org"
 ) -> dict:
@@ -86,33 +88,50 @@ def run_policy_analysis(
     import os
     import subprocess
 
-    # Write MCP config
-    os.makedirs("/root/.claude", exist_ok=True)
-    mcp_config = {
-        "mcpServers": {"policyengine": {"type": "sse", "url": f"{api_base_url}/mcp"}}
-    }
-    with open("/root/.claude/mcp_servers.json", "w") as f:
-        json.dump(mcp_config, f)
-
-    # Run Claude Code
-    result = subprocess.run(
-        [
-            "claude",
-            "-p",
-            question,
-            "--allowedTools",
-            "mcp__policyengine__*,Bash,Read,Grep,Glob,Write,Edit",
-        ],
-        capture_output=True,
-        text=True,
-        timeout=540,
-    )
+    import logfire
+
+    logfire.configure(service_name="policyengine-agent-sandbox")
 
-    return {
-        "status": "completed" if result.returncode == 0 else "failed",
-        "report": result.stdout,
-        "error": result.stderr if result.returncode != 0 else None,
-    }
+    with logfire.span("run_policy_analysis", question=question[:100], api_base_url=api_base_url):
+        # Write MCP config
+        os.makedirs("/root/.claude", exist_ok=True)
+        mcp_config = {
+            "mcpServers": {"policyengine": {"type": "sse", "url": f"{api_base_url}/mcp"}}
+        }
+        with open("/root/.claude/mcp_servers.json", "w") as f:
+            json.dump(mcp_config, f)
+
+        logfire.info("Starting Claude Code", question=question[:100])
+
+        # Run Claude Code
+        result = subprocess.run(
+            [
+                "claude",
+                "-p",
+                question,
+                "--allowedTools",
+                "mcp__policyengine__*,Bash,Read,Grep,Glob,Write,Edit",
+            ],
+            capture_output=True,
+            text=True,
+            timeout=540,
+        )
+
+        logfire.info(
+            "Claude Code finished",
+            returncode=result.returncode,
+            stdout_len=len(result.stdout),
+            stderr_len=len(result.stderr),
+        )
+
+        if result.returncode != 0:
+            logfire.error("Claude Code failed", stderr=result.stderr[:500])
+
+        return {
+            "status": "completed" if result.returncode == 0 else "failed",
+            "report": result.stdout,
+            "error": result.stderr if result.returncode != 0 else None,
+        }
 
 
 # For local testing