jmbish04
diff --git a/‎.agent/rules/ai-rules.md‎
Lines changed: 14 additions & 0 deletions b/‎.agent/rules/ai-rules.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 20 additions & 5 deletions b/‎AGENTS.md‎
Lines changed: 20 additions & 5 deletions
diff --git a/‎backend/src/ai/agents/HealthDiagnostician.ts‎
Lines changed: 57 additions & 6 deletions b/‎backend/src/ai/agents/HealthDiagnostician.ts‎
Lines changed: 57 additions & 6 deletions
diff --git a/‎backend/src/ai/providers/gemini.ts‎
Lines changed: 57 additions & 22 deletions b/‎backend/src/ai/providers/gemini.ts‎
Lines changed: 57 additions & 22 deletions
diff --git a/‎backend/src/ai/providers/index.ts‎
Lines changed: 14 additions & 7 deletions b/‎backend/src/ai/providers/index.ts‎
Lines changed: 14 additions & 7 deletions
@@ -0,0 +1,14 @@
+# Rule: AI Provider & Structured Responses
+
+## 1. Structured Output Mandate
+
+- **CRYSTAL CLEAR RULE**: ANYTIME the AI model is being instructed to respond with a structured response (JSON), you **MUST** use `generateStructuredResponse` or `generateStructuredWithTools` exported from `@/ai/providers`.
+- **FORBIDDEN**: Do not rely on native Agent SDK schemas (e.g. `outputType: MySchema as any` in `@openai/agents`). These frequently fail to map correctly through the Cloudflare AI Gateway or result in brittle string parsing.
+
+## 2. The Extraction Pattern (Agents with Tools)
+
+If you are running an autonomous Agent that requires tool usage (e.g., `HealthDiagnostician` or `ResearchAgent`):
+
+1. Configure the Agent to output standard text/markdown (`outputType` must NOT be explicitly defined).
+2. Await the Agent's `finalOutput` inside the execution loop.
+3. Pass that string into `generateStructuredResponse` along with your Zod schema (converted via `zodToJsonSchema`) to strictly extract and type the final JSON object. This ensures Gateway compatibility while guaranteeing Zod-verified JSON.
@@ -98,18 +98,33 @@ console.log(result.text); // Getter, returns string
 - `generationConfig` (Use `config` property instead)
 - `result.response.text()` (Method call)
 
-## Structured Outputs
+## Structured Outputs (MANDATE)
 
-Always use `zod` and `zod-to-json-schema` to define your `responseSchema`.
+**CRYSTAL CLEAR RULE**: You MUST use `AiProvider.generateStructuredResponse` (or `generateStructuredWithTools` exported from `@/ai/providers`) _anytime_ the AI model is being instructed to respond with a structured JSON response.
+
+**FORBIDDEN**: Do NOT rely on Agent SDK schema enforcements (e.g., passing `outputType: MySchema as any` to `@openai/agents`), as they are prone to brittle string extraction failures or 400 errors via the Cloudflare AI Gateway.
+
+**Correct Pattern (Agent with Tools):**
+
+1. Let the Agent execute its internal tool loop freely (returning markdown text).
+2. Take the Agent's `result.finalOutput` and pass it into `generateStructuredResponse` along with your schema.
 
 ```typescript
-import { z } from "zod";
+import { generateStructuredResponse } from "@/ai/providers";
 import { zodToJsonSchema } from "zod-to-json-schema";
+import { z } from "zod";
 
 const MySchema = z.object({ ... });
 
-// ... inside generateContent config:
-responseSchema: zodToJsonSchema(MySchema) as any
+// 1. Let agent run
+const result = await runner.run(agent, prompt);
+
+// 2. Extract strictly
+const finalData = await generateStructuredResponse<z.infer<typeof MySchema>>(
+  env,
+  `Extract the exact data from the Agent's response:\n\n${result.finalOutput}`,
+  zodToJsonSchema(MySchema as any, "structured_output")
+);
 ```
 
 ## AI Provider Routing & Resolution
 
@@ -91,7 +91,7 @@ export class HealthDiagnostician extends BaseAgent {
         }
 
         // 3. Define the Agent's Instructions
-        const instructions = `You are a Codex Senior Engineer and an autonomous Site Reliability Agent operating on the Cloudflare ecosystem.
+        const instructions = `You are a Senior Engineer and an autonomous Site Reliability Agent operating on the Cloudflare ecosystem.
 Your primary directive is to investigate, diagnose, and remediate system health failures within the repository \`${repoOwner}/${repoName}\`.
 
 CRITICAL PRE-FLIGHT CHECK:
@@ -103,9 +103,48 @@ TRIAGE AND REMEDIATION:
    - IF the fix is SMALL (e.g., typos, simple config adjustments, single-file logic errors under 20 lines): Formulate the fix and use \`create_pull_request\` to submit it immediately.
    - IF the fix is COMPLEX (e.g., multi-file refactoring, architectural changes, deep logic bugs, package upgrades): Do NOT try to fix it yourself. Instead, use the \`delegate_to_jules\` tool to dispatch a deep-reasoning session to Google Jules. Provide Jules with a highly detailed prompt of what needs to be refactored.
 
-Return a JSON response containing the \`severity\`, \`rootCause\`, \`suggestedFix\` (or delegation note), and \`prUrl\` (or Jules Session ID).`;
+Conclude your investigation with a detailed summary containing the severity, rootCause, suggestedFix (or delegation note), and prUrl (or Jules Session ID).`;
 
-        const prompt = `Health Check Failed in category: ${payload.category}\nTarget: ${payload.target}\nError: ${payload.errorName} - ${payload.errorMessage}\nDetails: ${JSON.stringify(payload.errorDetails, null, 2)}\n\nRelevant Cloudflare Docs Context:\nQuery: ${rewritten}\nDocs Result: ${mcpContext}`;
+        const MAX_LOG_LENGTH = 15000;
+        let stringifiedDetails = JSON.stringify(payload.errorDetails, null, 2) || "{}";
+        
+        // Use RAG to fetch relevant chunks if the error details are a large array
+        if (Array.isArray(payload.errorDetails) && stringifiedDetails.length > MAX_LOG_LENGTH) {
+            try {
+                this.logger.info(`Extracting relevant logs via Vectorize RAG...`);
+                const { vectorizeAndStoreLogs } = await import("@/ai/utils/log-vectorizer");
+                const { generateEmbeddings } = await import("@/ai/providers/index");
+                
+                const runId = `diag-${Date.now()}`;
+                await vectorizeAndStoreLogs(this.env, runId, payload.errorDetails);
+                
+                const diagnosticQuery = "Find fatal errors, agent execution failures, timeouts, 400 status codes, crash stack traces, and high severity warnings.";
+                const queryEmbeddings = await generateEmbeddings(this.env, [diagnosticQuery]);
+                const searchVector = queryEmbeddings[0];
+                
+                const vectorMatches = await this.env.VECTORIZE_LOGS.query(searchVector, {
+                    topK: 10,
+                    filter: { runId: runId },
+                    returnValues: false,
+                    returnMetadata: true
+                });
+                
+                const relevantLogs = vectorMatches.matches
+                    .map(match => match.metadata?.content)
+                    .filter(Boolean)
+                    .join("\n\n---\n\n");
+                    
+                stringifiedDetails = `[RAG FETCHED RELEVANT LOG CHUNKS]\n${relevantLogs}`;
+                this.logger.info(`Successfully retrieved ${vectorMatches.matches.length} relevant chunks`);
+            } catch (e: any) {
+                this.logger.error("RAG Log Vectorization failed, falling back to truncation", e);
+                stringifiedDetails = stringifiedDetails.substring(0, MAX_LOG_LENGTH) + "\n...[RAG ERROR, TRUNCATED FOR LENGTH]";
+            }
+        } else if (stringifiedDetails.length > MAX_LOG_LENGTH) {
+            stringifiedDetails = stringifiedDetails.substring(0, MAX_LOG_LENGTH) + "\n...[TRUNCATED FOR LENGTH to prevent 400 payload rejection]";
+        }
+
+        const prompt = `Health Check Failed in category: ${payload.category}\nTarget: ${payload.target}\nError: ${payload.errorName} - ${payload.errorMessage}\nDetails: ${stringifiedDetails}\n\nRelevant Cloudflare Docs Context:\nQuery: ${rewritten}\nDocs Result: ${mcpContext}`;
 
         // 4. Define Tools inline for the BaseAgent to register
         const agentConfig = {
@@ -310,13 +349,25 @@ Return a JSON response containing the \`severity\`, \`rootCause\`, \`suggestedFi
                  instructions: agentConfig.instructions,
                  model: agentConfig.model,
                  tools: agentConfig.tools,
-                 outputType: HealthDiagnosticianOutputSchema as any,
+                 // Removed outputType here to comply with AI standard mandate: let agent run freely, extract structure internally below
              });
 
+             // Diagnostic tracking: monitor actual byte size of the outbound LLM payload
+             const payloadBytes = new TextEncoder().encode(prompt).length;
+             this.logger.info(`[HealthDiagnostician] Outbound Prompt Payload Size: ${payloadBytes} bytes`);
+
              const result = await runner.run(agent, prompt);
 
-             // The SDK guarantees this matches the Zod schema when outputType is provided
-             const finalData = result.finalOutput as HealthDiagnosticianOutput;
+             // Enforce strict JSON output using the globally mandated AiProvider.generateStructuredResponse
+             const { generateStructuredResponse } = await import("@/ai/providers/index");
+             const { zodToJsonSchema } = await import("zod-to-json-schema");
+             
+             const extractPrompt = `Extract the exact diagnosis details from the Agent's final response below. Respond ONLY with valid JSON.\n\nAgent Response:\n${result.finalOutput}`;
+             const finalData = await generateStructuredResponse<HealthDiagnosticianOutput>(
+                this.env, 
+                extractPrompt, 
+                zodToJsonSchema(HealthDiagnosticianOutputSchema as any, "structured_output")
+             );
 
              return new Response(JSON.stringify(finalData), {
                 headers: { "Content-Type": "application/json" }
 
@@ -1,38 +1,73 @@
 // Dynamically imported
 import { getAiGatewayUrl, resolveDefaultAiModel } from "./config";
 import { getAIGatewayUrl as getRawGatewayUrl } from "../utils/ai-gateway";
-import { getGeminiApiKey } from "@utils/secrets";
 import { cleanJsonOutput } from "@/ai/utils/sanitizer";
 import { AIOptions, TextWithToolsResponse, StructuredWithToolsResponse } from "./index";
 
 export async function createGeminiClient(env: Env, model: string) {
   // @ts-ignore
   const aigToken = typeof env.AI_GATEWAY_TOKEN === 'object' && env.AI_GATEWAY_TOKEN?.get ? await env.AI_GATEWAY_TOKEN.get() : env.AI_GATEWAY_TOKEN as string;
 
-  // "Key in Request + Authenticated Gateway" pattern:
-  // - apiKey: REAL Gemini key (SDK sends this as ?key= to Google)
-  // - cf-aig-authorization: gateway token (for gateway auth/logging)
-  // The gateway forwards the real key to upstream; BYOK is NOT used here.
-  const apiKey = await getGeminiApiKey(env);
-
-  if (!apiKey || !env.CLOUDFLARE_ACCOUNT_ID) {
-    throw new Error("Missing GEMINI_API_KEY and CLOUDFLARE_ACCOUNT_ID");
+  if (!aigToken || !env.CLOUDFLARE_ACCOUNT_ID) {
+    throw new Error("Missing AI_GATEWAY_TOKEN and CLOUDFLARE_ACCOUNT_ID required for BYOK configuration");
   }
 
   const { GoogleGenAI } = await import("@google/genai");
   const baseUrl = await getRawGatewayUrl(env, { provider: "google-ai-studio" });
+
+  const originalFetch = globalThis.fetch;
 
-  // Default to v1beta for Gemini 2.5 Flash and newer models
-  const apiVersion = "v1beta";
-
-  return new GoogleGenAI({
-    apiKey: apiKey,
-    httpOptions: {
-      baseUrl,
-      apiVersion,
-      headers: aigToken ? { 'cf-aig-authorization': `Bearer ${aigToken}` } : undefined,
-    },
-  });
+  // Intercept the fetch call to strip dummy keys and inject the Gateway Authorization
+  const wrappedFetch = async (url: any, init: any) => {
+    const newInit = { ...init };
+    if (newInit.headers) {
+      const headers = new Headers(newInit.headers);
+      
+      // Strip the SDK-enforced dummy key so it doesn't override the Gateway's BYOK injection
+      headers.delete("x-goog-api-key");
+      
+      // Apply the AI Gateway token for Gateway auth
+      if (aigToken && !headers.has("cf-aig-authorization")) {
+          headers.set("cf-aig-authorization", `Bearer ${aigToken}`);
+      }
+      
+      const headerObj: Record<string, string> = {};
+      headers.forEach((value, key) => {
+          headerObj[key] = value;
+      });
+      newInit.headers = headerObj;
+    }
+
+    let finalUrl = String(url);
+    try {
+        const u = new URL(finalUrl);
+        // Strip the query parameter ?key= if the SDK appended the dummy key
+        if (u.searchParams.has("key")) {
+            u.searchParams.delete("key");
+            finalUrl = u.toString();
+        }
+    } catch (e) { /* ignore url parsing errors */ }
+
+    return await originalFetch(finalUrl, newInit);
+  };
+  
+  // Monkey-patch temporarily for this instance creation
+  globalThis.fetch = wrappedFetch as unknown as typeof fetch;
+
+  try {
+    const client = new GoogleGenAI({
+      // Pass a dummy key to bypass SDK validation. 
+      // The real key is stored in Cloudflare AI Gateway (BYOK)
+      apiKey: "cf-aig-byok-dummy-key",
+      httpOptions: {
+        baseUrl,
+      },
+    });
+    
+    return client;
+  } finally {
+     // We leave fetch patched currently as the client resolves requests asynchronously later
+  }
 }
 
 export async function verifyApiKey(env: Env): Promise<boolean> {
@@ -42,7 +77,7 @@ export async function verifyApiKey(env: Env): Promise<boolean> {
     await client.models.get({ model: testModel });
     return true;
   } catch (error) {
-    console.error("Gemini Verification Error:", error);
+    console.error("Gemini BYOK Verification Error:", error);
     return false;
   }
 }
@@ -118,7 +153,7 @@ export async function generateTextWithTools(
   });
 
   const toolCalls = response.functionCalls?.map((call, index) => ({
-    id: `call_${index}`, // Gemini does not provide UUIDs for tools natively in the standard layout
+    id: `call_${index}`, 
     function: {
       name: call.name || "unknown",
       arguments: JSON.stringify(call.args || {})
 
@@ -2,7 +2,7 @@ import { resolveDefaultAiProvider, SupportedProvider } from "./config";
 import * as openai from "./openai";
 import * as gemini from "./gemini";
 import * as anthropic from "./anthropic";
-import * as workersAi from "./workers-ai";
+import * as workerAi from "./worker-ai";
 
 export interface AIOptions {
   model?: string;
@@ -40,7 +40,7 @@ export async function verifyApiKey(env: Env, providerOverride?: SupportedProvide
     case 'openai': return openai.verifyApiKey(env);
     case 'gemini': return gemini.verifyApiKey(env);
     case 'anthropic': return anthropic.verifyApiKey(env);
-    default: return workersAi.verifyApiKey(env);
+    default: return workerAi.verifyApiKey(env);
   }
 }
 
@@ -56,7 +56,7 @@ export async function generateText(
     case 'openai': return openai.generateText(env, prompt, systemPrompt, options);
     case 'gemini': return gemini.generateText(env, prompt, systemPrompt, options);
     case 'anthropic': return anthropic.generateText(env, prompt, systemPrompt, options);
-    default: return workersAi.generateText(env, prompt, systemPrompt, options);
+    default: return workerAi.generateText(env, prompt, systemPrompt, options);
   }
 }
 
@@ -73,7 +73,7 @@ export async function generateStructuredResponse<T = any>(
     case 'openai': return openai.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);
     case 'gemini': return gemini.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);
     case 'anthropic': return anthropic.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);
-    default: return workersAi.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);
+    default: return workerAi.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);
   }
 }
 
@@ -90,7 +90,7 @@ export async function generateTextWithTools(
     case 'openai': return openai.generateTextWithTools(env, prompt, tools, systemPrompt, options);
     case 'gemini': return gemini.generateTextWithTools(env, prompt, tools, systemPrompt, options);
     case 'anthropic': return anthropic.generateTextWithTools(env, prompt, tools, systemPrompt, options);
-    default: return workersAi.generateTextWithTools(env, prompt, tools, systemPrompt, options);
+    default: return workerAi.generateTextWithTools(env, prompt, tools, systemPrompt, options);
   }
 }
 
@@ -108,15 +108,22 @@ export async function generateStructuredWithTools<T = any>(
     case 'openai': return openai.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);
     case 'gemini': return gemini.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);
     case 'anthropic': return anthropic.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);
-    default: return workersAi.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);
+    default: return workerAi.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);
   }
 }
 
 export async function generateEmbedding(
   env: Env,
   text: string
 ): Promise<number[]> {
-  return workersAi.generateEmbedding(env, text);
+  return workerAi.generateEmbedding(env, text);
+}
+
+export async function generateEmbeddings(
+  env: Env,
+  text: string | string[]
+): Promise<number[][]> {
+  return workerAi.generateEmbeddings(env, text);
 }
 
 /**
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@ import { resolveDefaultAiProvider, SupportedProvider } from "./config";`
`2`	`2`	`import * as openai from "./openai";`
`3`	`3`	`import * as gemini from "./gemini";`
`4`	`4`	`import * as anthropic from "./anthropic";`
`5`		`-import * as workersAi from "./workers-ai";`
	`5`	`+import * as workerAi from "./worker-ai";`
`6`	`6`
`7`	`7`	`export interface AIOptions {`
`8`	`8`	`model?: string;`
`@@ -40,7 +40,7 @@ export async function verifyApiKey(env: Env, providerOverride?: SupportedProvide`
`40`	`40`	`case 'openai': return openai.verifyApiKey(env);`
`41`	`41`	`case 'gemini': return gemini.verifyApiKey(env);`
`42`	`42`	`case 'anthropic': return anthropic.verifyApiKey(env);`
`43`		`- default: return workersAi.verifyApiKey(env);`
	`43`	`+ default: return workerAi.verifyApiKey(env);`
`44`	`44`	`}`
`45`	`45`	`}`
`46`	`46`
`@@ -56,7 +56,7 @@ export async function generateText(`
`56`	`56`	`case 'openai': return openai.generateText(env, prompt, systemPrompt, options);`
`57`	`57`	`case 'gemini': return gemini.generateText(env, prompt, systemPrompt, options);`
`58`	`58`	`case 'anthropic': return anthropic.generateText(env, prompt, systemPrompt, options);`
`59`		`- default: return workersAi.generateText(env, prompt, systemPrompt, options);`
	`59`	`+ default: return workerAi.generateText(env, prompt, systemPrompt, options);`
`60`	`60`	`}`
`61`	`61`	`}`
`62`	`62`
`@@ -73,7 +73,7 @@ export async function generateStructuredResponse<T = any>(`
`73`	`73`	`case 'openai': return openai.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);`
`74`	`74`	`case 'gemini': return gemini.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);`
`75`	`75`	`case 'anthropic': return anthropic.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);`
`76`		`- default: return workersAi.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);`
	`76`	`+ default: return workerAi.generateStructuredResponse<T>(env, prompt, schema, systemPrompt, options);`
`77`	`77`	`}`
`78`	`78`	`}`
`79`	`79`
`@@ -90,7 +90,7 @@ export async function generateTextWithTools(`
`90`	`90`	`case 'openai': return openai.generateTextWithTools(env, prompt, tools, systemPrompt, options);`
`91`	`91`	`case 'gemini': return gemini.generateTextWithTools(env, prompt, tools, systemPrompt, options);`
`92`	`92`	`case 'anthropic': return anthropic.generateTextWithTools(env, prompt, tools, systemPrompt, options);`
`93`		`- default: return workersAi.generateTextWithTools(env, prompt, tools, systemPrompt, options);`
	`93`	`+ default: return workerAi.generateTextWithTools(env, prompt, tools, systemPrompt, options);`
`94`	`94`	`}`
`95`	`95`	`}`
`96`	`96`
`@@ -108,15 +108,22 @@ export async function generateStructuredWithTools<T = any>(`
`108`	`108`	`case 'openai': return openai.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);`
`109`	`109`	`case 'gemini': return gemini.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);`
`110`	`110`	`case 'anthropic': return anthropic.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);`
`111`		`- default: return workersAi.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);`
	`111`	`+ default: return workerAi.generateStructuredWithTools<T>(env, prompt, schema, tools, systemPrompt, options);`
`112`	`112`	`}`
`113`	`113`	`}`
`114`	`114`
`115`	`115`	`export async function generateEmbedding(`
`116`	`116`	`env: Env,`
`117`	`117`	`text: string`
`118`	`118`	`): Promise<number[]> {`
`119`		`- return workersAi.generateEmbedding(env, text);`
	`119`	`+ return workerAi.generateEmbedding(env, text);`
	`120`	`+}`
	`121`	`+`
	`122`	`+export async function generateEmbeddings(`
	`123`	`+ env: Env,`
	`124`	`+ text: string \| string[]`
	`125`	`+): Promise<number[][]> {`
	`126`	`+ return workerAi.generateEmbeddings(env, text);`
`120`	`127`	`}`
`121`	`128`
`122`	`129`	`/**`