Add schema input to agent nodes for structured JSON output

bchapuis · claude · bchapuis · commit 012057798c25 · 2026-03-31T17:11:51.000+02:00
Extends the agent loop with an optional callFinalLLM callback that applies
schema constraints on the final output-producing LLM call. Uses prompt-based
JSON schema enforcement for Anthropic and Workers AI providers, native
response_format for OpenAI and Gemini. Also fixes Workers AI LLM nodes to
use prompt-based approach instead of unsupported response_format.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/apps/api/src/durable-objects/agent-runner.ts b/apps/api/src/durable-objects/agent-runner.ts
@@ -25,8 +25,10 @@ import {
   getOpenAIConfig,
 } from "@dafthunk/runtime/utils/ai-gateway";
 import { createCodeModeToolDefinition } from "@dafthunk/runtime/utils/code-mode";
+import { schemaToJsonSchema } from "@dafthunk/runtime/utils/schema-to-json-schema";
 import type { TokenPricing } from "@dafthunk/runtime/utils/usage";
 import { calculateTokenUsage } from "@dafthunk/runtime/utils/usage";
+import type { Schema } from "@dafthunk/types";
 import { GoogleGenAI } from "@google/genai";
 import { Agent } from "agents";
 import OpenAI from "openai";
@@ -74,6 +76,8 @@ export interface AgentRunRequest {
   agentId?: string;
   /** Max number of previous messages to load from conversation history */
   maxHistory?: number;
+  /** Schema to constrain the final output format (structured JSON output) */
+  schema?: Record<string, unknown>;
 }
 
 export interface AgentRunResponse {
@@ -221,6 +225,28 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
         body.maxHistory ?? 50
       );
 
+      // Convert schema if provided
+      const jsonSchema =
+        body.schema &&
+        typeof body.schema === "object" &&
+        "fields" in body.schema
+          ? schemaToJsonSchema(body.schema as unknown as Schema)
+          : undefined;
+
+      // Build callFinalLLM that applies schema constraint on the final turn
+      const callFinalLLM = jsonSchema
+        ? (messages: AgentMessage[], tools: ToolDefinition[]) =>
+            this.callLLM(
+              body.provider,
+              body.model,
+              body.instructions,
+              messages,
+              tools,
+              geminiBuiltInTools,
+              jsonSchema
+            )
+        : undefined;
+
       // Run the agent loop
       const result = await runAgentLoop({
         userMessage,
@@ -235,6 +261,7 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
             tools,
             geminiBuiltInTools
           ),
+        callFinalLLM,
         onStepComplete: async (state) => {
           this.ctx.storage.sql.exec(
             `UPDATE agent_runs SET state = ?, updated_at = datetime('now') WHERE run_id = ?`,
@@ -388,6 +415,28 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
         body.maxHistory ?? 50
       );
 
+      // Convert schema if provided
+      const jsonSchema =
+        body.schema &&
+        typeof body.schema === "object" &&
+        "fields" in body.schema
+          ? schemaToJsonSchema(body.schema as unknown as Schema)
+          : undefined;
+
+      // Build callFinalLLM that applies schema constraint on the final turn
+      const callFinalLLM = jsonSchema
+        ? (messages: AgentMessage[], tools: ToolDefinition[]) =>
+            this.callLLM(
+              body.provider,
+              body.model,
+              body.instructions,
+              messages,
+              tools,
+              geminiBuiltInTools,
+              jsonSchema
+            )
+        : undefined;
+
       const result = await runAgentLoop({
         userMessage,
         tools: toolDefinitions,
@@ -401,6 +450,7 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
             tools,
             geminiBuiltInTools
           ),
+        callFinalLLM,
         onStepComplete: async (state) => {
           this.ctx.storage.sql.exec(
             `UPDATE agent_runs SET state = ?, updated_at = datetime('now') WHERE run_id = ?`,
@@ -639,23 +689,31 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
     instructions: string,
     messages: AgentMessage[],
     tools: ToolDefinition[],
-    builtInTools?: Record<string, unknown>[]
+    builtInTools?: Record<string, unknown>[],
+    schema?: Record<string, unknown>
   ): Promise<LLMResponse> {
     switch (provider) {
       case "anthropic":
-        return this.callAnthropic(model, instructions, messages, tools);
+        return this.callAnthropic(model, instructions, messages, tools, schema);
       case "google":
         return this.callGoogle(
           model,
           instructions,
           messages,
           tools,
-          builtInTools
+          builtInTools,
+          schema
         );
       case "openai":
-        return this.callOpenAI(model, instructions, messages, tools);
+        return this.callOpenAI(model, instructions, messages, tools, schema);
       case "workers-ai":
-        return this.callWorkersAI(model, instructions, messages, tools);
+        return this.callWorkersAI(
+          model,
+          instructions,
+          messages,
+          tools,
+          schema
+        );
       default:
         throw new Error(`Unsupported provider: ${provider}`);
     }
@@ -667,7 +725,8 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
     model: string,
     instructions: string,
     messages: AgentMessage[],
-    tools: ToolDefinition[]
+    tools: ToolDefinition[],
+    schema?: Record<string, unknown>
   ): Promise<LLMResponse> {
     const client = new Anthropic({
       apiKey: "gateway-managed",
@@ -716,11 +775,16 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
       input_schema: t.parameters as Anthropic.Tool.InputSchema,
     }));
 
+    // When schema is provided, append a JSON constraint to the system prompt
+    const systemPrompt = schema
+      ? `${instructions}\n\nYou MUST respond with valid JSON matching this schema:\n${JSON.stringify(schema)}`
+      : instructions;
+
     const response = await client.messages.create({
       model,
       max_tokens: 4096,
       messages: anthropicMessages,
-      ...(instructions && { system: instructions }),
+      ...(systemPrompt && { system: systemPrompt }),
       ...(anthropicTools.length > 0 && { tools: anthropicTools }),
     });
 
@@ -755,7 +819,8 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
     instructions: string,
     messages: AgentMessage[],
     tools: ToolDefinition[],
-    builtInTools?: Record<string, unknown>[]
+    builtInTools?: Record<string, unknown>[],
+    schema?: Record<string, unknown>
   ): Promise<LLMResponse> {
     const ai = new GoogleGenAI({
       apiKey: "gateway-managed",
@@ -815,6 +880,12 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
       config.tools = allTools;
     }
 
+    // Apply schema constraint for structured JSON output
+    if (schema) {
+      config.responseMimeType = "application/json";
+      config.responseSchema = schema;
+    }
+
     const response = await ai.models.generateContent({
       model,
       contents: contents as any,
@@ -859,7 +930,8 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
     model: string,
     instructions: string,
     messages: AgentMessage[],
-    tools: ToolDefinition[]
+    tools: ToolDefinition[],
+    schema?: Record<string, unknown>
   ): Promise<LLMResponse> {
     const client = new OpenAI({
       apiKey: "gateway-managed",
@@ -911,11 +983,24 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
       },
     }));
 
+    // Build response_format when a schema is provided
+    const responseFormat = schema
+      ? {
+          type: "json_schema" as const,
+          json_schema: {
+            name: "response",
+            schema,
+            strict: true,
+          },
+        }
+      : undefined;
+
     const completion = await client.chat.completions.create({
       model,
       max_tokens: 4096,
       messages: openaiMessages,
       ...(openaiTools.length > 0 && { tools: openaiTools }),
+      ...(responseFormat && { response_format: responseFormat }),
     });
 
     const choice = completion.choices[0];
@@ -948,13 +1033,20 @@ export class AgentRunner extends Agent<Bindings, AgentRunnerState> {
     model: string,
     _instructions: string,
     messages: AgentMessage[],
-    tools: ToolDefinition[]
+    tools: ToolDefinition[],
+    schema?: Record<string, unknown>
   ): Promise<LLMResponse> {
     // Workers AI uses OpenAI-compatible chat format
     const aiMessages: Array<{ role: string; content: string }> = [];
 
-    if (_instructions) {
-      aiMessages.push({ role: "system", content: _instructions });
+    // When schema is provided, append a JSON constraint to the system prompt
+    // (Workers AI models don't reliably support response_format)
+    const systemPrompt = schema
+      ? `${_instructions}\n\nYou MUST respond with valid JSON matching this schema:\n${JSON.stringify(schema)}`
+      : _instructions;
+
+    if (systemPrompt) {
+      aiMessages.push({ role: "system", content: systemPrompt });
     }
 
     for (const m of messages) {
diff --git a/packages/runtime/src/nodes/agent/base-agent-node.ts b/packages/runtime/src/nodes/agent/base-agent-node.ts
@@ -77,6 +77,12 @@ const AGENT_INPUTS: NodeType["inputs"] = [
     hidden: true,
     value: false,
   },
+  {
+    name: "schema",
+    type: "schema",
+    description: "JSON schema to constrain the final output format",
+    hidden: true,
+  },
 ];
 
 /** Extra inputs for Gemini agent nodes — Google built-in tools */
@@ -95,8 +101,8 @@ export const GEMINI_BUILTIN_TOOL_INPUTS: NodeType["inputs"] = [
 const AGENT_OUTPUTS: NodeType["outputs"] = [
   {
     name: "text",
-    type: "string",
-    description: "Final text response from the agent",
+    type: "any",
+    description: "Final text or JSON response from the agent",
   },
   {
     name: "steps",
@@ -227,6 +233,7 @@ export abstract class BaseAgentNode extends ExecutableNode {
         code_mode,
         googleSearch,
         agent_id,
+        schema,
       } = context.inputs;
       const agentContext = context.inputs.context as string | undefined;
       const agentId = agent_id as string | undefined;
@@ -267,6 +274,7 @@ export abstract class BaseAgentNode extends ExecutableNode {
           googleSearch: googleSearch ?? false,
           organizationId: context.organizationId,
           agentId,
+          ...(schema && { schema }),
         }),
       });
 
@@ -316,6 +324,7 @@ export abstract class BaseAgentNode extends ExecutableNode {
         code_mode,
         googleSearch,
         agent_id,
+        schema,
       } = context.inputs;
       const agentContext = context.inputs.context as string | undefined;
       const agentId = agent_id as string | undefined;
@@ -353,6 +362,7 @@ export abstract class BaseAgentNode extends ExecutableNode {
           googleSearch: googleSearch ?? false,
           organizationId: context.organizationId,
           agentId,
+          ...(schema && { schema }),
         }),
       });
 
diff --git a/packages/runtime/src/nodes/text/execute-workers-ai-text-model.ts b/packages/runtime/src/nodes/text/execute-workers-ai-text-model.ts
@@ -55,16 +55,23 @@ export async function executeWorkersAiTextModel(
       context
     );
 
-    // Build response_format when a schema is provided
+    // When schema is provided, prepend a JSON constraint to messages
+    // (Workers AI models don't reliably support response_format)
     const extraParams: Record<string, unknown> = { ...(config.params ?? {}) };
-    if (schemaInput && typeof schemaInput === "object" && "fields" in schemaInput) {
-      extraParams.response_format = {
-        type: "json_schema",
-        json_schema: {
-          name: "response",
-          schema: schemaToJsonSchema(schemaInput as Schema),
+    if (
+      schemaInput &&
+      typeof schemaInput === "object" &&
+      "fields" in schemaInput &&
+      parsedMessages
+    ) {
+      const jsonSchema = schemaToJsonSchema(schemaInput as Schema);
+      parsedMessages = [
+        {
+          role: "system",
+          content: `You MUST respond with valid JSON matching this schema:\n${JSON.stringify(jsonSchema)}`,
         },
-      };
+        ...parsedMessages,
+      ];
     }
 
     let result: any;
diff --git a/packages/runtime/src/utils/agent-loop.ts b/packages/runtime/src/utils/agent-loop.ts
@@ -61,6 +61,16 @@ export interface AgentLoopConfig {
     tools: ToolDefinition[]
   ) => Promise<LLMResponse>;
 
+  /**
+   * Optional LLM call used for the final output-producing turn.
+   * When provided (e.g. to enforce a JSON schema), this replaces `callLLM`
+   * for the last call that generates the user-facing response.
+   */
+  callFinalLLM?: (
+    messages: AgentMessage[],
+    tools: ToolDefinition[]
+  ) => Promise<LLMResponse>;
+
   /** Called after each iteration so the caller can persist state */
   onStepComplete?: (state: AgentLoopState) => Promise<void>;
 
@@ -95,6 +105,7 @@ export async function runAgentLoop(
   config: AgentLoopConfig
 ): Promise<AgentLoopResult> {
   const { userMessage, tools, maxSteps, callLLM, onStepComplete } = config;
+  const finalLLM = config.callFinalLLM ?? callLLM;
 
   // Initialise or resume state
   const state: AgentLoopState = config.resumeState ?? {
@@ -188,11 +199,32 @@ export async function runAgentLoop(
     }
   }
 
+  // If the model completed normally and a callFinalLLM is provided, make one
+  // additional call with schema constraints to produce structured output.
+  // We pop the last assistant message so the model generates a fresh response
+  // with the schema constraint, rather than seeing its own unformatted reply.
+  if (finishReason === "completed" && config.callFinalLLM) {
+    state.messages.pop();
+
+    const formatResponse = await finalLLM(state.messages, []);
+    state.totalInputTokens += formatResponse.inputTokens;
+    state.totalOutputTokens += formatResponse.outputTokens;
+
+    state.messages.push({
+      role: "assistant",
+      content: formatResponse.content,
+    });
+
+    if (onStepComplete) {
+      await onStepComplete(state);
+    }
+  }
+
   // If we exhausted maxSteps, do one final LLM call without tools to summarise
   if (state.steps.length >= maxSteps) {
     finishReason = "max_steps_reached";
 
-    const finalResponse = await callLLM(state.messages, []);
+    const finalResponse = await finalLLM(state.messages, []);
     state.totalInputTokens += finalResponse.inputTokens;
     state.totalOutputTokens += finalResponse.outputTokens;