Merge pull request #9 from lua-ai-global/feat/tool-result-other-adapters

scotty595 · web-flow · commit ad9e68e94eed · 2026-04-30T14:21:20.000+01:00
feat: tool-result scanning for langchain / openai-agents / genkit / l…
diff --git a/packages/governance/CHANGELOG.md b/packages/governance/CHANGELOG.md
@@ -1,5 +1,58 @@
 # Changelog
 
+## [0.15.0] - 2026-04-30 — Tool-result scanning across the framework adapters
+
+0.14 wired tool-result scanning into the Mastra processor and MCP adapter
+only. 0.15 rolls the same protection out to the four other adapters that
+already do tool wrapping at construction time:
+
+- **LangChain** — `tool.invoke` wrap (in both `governTool` and `governTools`)
+- **OpenAI Agents** — `tool.invoke` AND `tool.execute` wraps
+- **Genkit** — `tool.call` wrap
+- **LlamaIndex** — `tool.call` wrap
+
+For each, the wrapped invoke/call/execute now runs the tool's return value
+through `scanToolResult()` (the same shared signal-then-enforce helper
+the Mastra processor uses) at stage `tool_result` before returning. On
+block, a `{ blocked, reason, ruleId }` redacted detail object replaces
+the original output, so the LLM never ingests the poisoned content.
+
+### Added — `scanToolResults` config flag on each adapter
+
+```ts
+const { tools } = await governLangChainTools(gov, [searchTool], {
+  agentName: "my-agent",
+  scanToolResults: true,           // default — opt-out via false
+  toolResultInjectionThreshold: 0.5,
+});
+```
+
+Default `true` (matches the Mastra processor default). Existing callers
+who upgrade to 0.15 get tool-result scanning automatically; set
+`scanToolResults: false` to skip — useful for test environments that
+mock tool returns.
+
+### What didn't change
+
+- **Anthropic / Mistral / Ollama** still use a caller-driven
+  `handleToolUse` / `handleToolCall` pattern. Tool-result scanning here
+  has to be integrated at the call site by the user — the SDK can't
+  intercept transparently. Consider using `gov.scanToolResult()` in
+  your handler manually.
+- **Vercel AI** — no native tool-wrapping path on this adapter today.
+  Tracked as a follow-up; for now use `scanOutput` on model output.
+- **Bedrock** — entry-gate only; tool execution happens inside AWS,
+  no post-execute hook is exposed by Bedrock Agents.
+- **Mastra middleware adapter** (`mastra.ts`, not the processor) — uses
+  a different wrap shape; coverage to follow.
+
+### Migration
+
+Drop-in. No public type breakage. The new config fields are optional
+and additive. Existing tests that mock tool returns may need
+`scanToolResults: false` if they don't expect the helper's path engine
+to run on their fixtures.
+
 ## [0.14.1] - 2026-04-30 — Field extraction on the `process` stage
 
 `scope_boundary` and `network_allowlist` rules at stage `process` (the
diff --git a/packages/governance/package.json b/packages/governance/package.json
@@ -1,6 +1,6 @@
 {
   "name": "governance-sdk",
-  "version": "0.14.1",
+  "version": "0.15.0",
   "description": "AI Agent Governance for TypeScript — policy enforcement, scoring, compliance, and audit for AI agents",
   "type": "module",
   "main": "./dist/index.js",
diff --git a/packages/governance/src/plugins/genkit-types.ts b/packages/governance/src/plugins/genkit-types.ts
@@ -82,6 +82,20 @@ export interface GovernGenkitConfig {
   onApprovalRequired?: (decision: EnforcementDecision, toolName: string) => void;
   actionMapper?: (toolName: string) => PolicyAction;
   sessionTokenTracker?: () => number;
+  /**
+   * Master switch for tool-result scanning (governance-sdk 0.15+).
+   * Default: `true`. Wrapped tools run their return values through the
+   * policy engine at stage `tool_result` before returning. On block,
+   * the redacted detail object replaces the original output so the
+   * agent never ingests poisoned tool content. Set `false` to skip
+   * — useful for test environments that mock tool returns.
+   */
+  scanToolResults?: boolean;
+  /**
+   * Detection threshold for the local injection signal (0-1) that
+   * `scanToolResult` populates on `ctx.mlInjectionScore`. Default 0.5.
+   */
+  toolResultInjectionThreshold?: number;
 }
 
 // ─── Results ────────────────────────────────────────────────
diff --git a/packages/governance/src/plugins/genkit.ts b/packages/governance/src/plugins/genkit.ts
@@ -38,6 +38,7 @@ export type {
 
 import { handleOutcome, GovernanceBlockedError, GovernanceApprovalRequiredError } from "./outcome-handler.js";
 import type { OutcomeCallbacks } from "./outcome-handler.js";
+import { scanToolResult } from "../tool-result-scan.js";
 
 // ─── Blocked Error ──────────────────────────────────────────
 
@@ -99,10 +100,35 @@ function createAuditor(governance: GovernanceInstance, agentId: string) {
     });
 }
 
+/**
+ * Build a result-scan closure bound to this governance instance + agent.
+ * Returned function: takes the tool's raw output, runs it through the
+ * policy engine at stage="tool_result", returns either the original
+ * output (allow) or a redacted detail object (block / require_approval).
+ *
+ * No-op when `config.scanToolResults === false`. Default-on so any
+ * Genkit user upgrading to SDK 0.15+ gets injection scanning of tool
+ * returns automatically — same default as the Mastra processor.
+ */
+function createResultScanner(
+  governance: GovernanceInstance, agentId: string, config: GovernGenkitConfig,
+) {
+  return async (toolName: string, args: Record<string, unknown> | undefined, output: unknown): Promise<unknown> => {
+    if (config.scanToolResults === false) return output;
+    const scanned = await scanToolResult({
+      governance, agentId, agentName: config.agentName, tool: toolName,
+      args, result: output,
+      injectionThreshold: config.toolResultInjectionThreshold,
+    });
+    return scanned.result;
+  };
+}
+
 function wrapTool(
   tool: GenkitTool,
   enforce: ReturnType<typeof createEnforcer>,
   audit: ReturnType<typeof createAuditor>,
+  scanResult: ReturnType<typeof createResultScanner>,
 ): GenkitTool {
   return {
     ...tool,
@@ -111,8 +137,11 @@ function wrapTool(
       const decision = await enforce(tool.name, inputRecord);
       try {
         const output = await tool.call(input, options);
+        // Scan tool result before returning to the agent loop. On block
+        // the LLM gets a redacted detail object in place of the original.
+        const finalOutput = await scanResult(tool.name, inputRecord, output);
         await audit(tool.name, "success");
-        return output;
+        return finalOutput;
       } catch (error) {
         await audit(tool.name, "failure", { error: error instanceof Error ? error.message : String(error) });
         throw error;
@@ -134,9 +163,10 @@ export async function governGenkitTools(
 
   const enforce = createEnforcer(governance, result.id, config);
   const audit = createAuditor(governance, result.id);
+  const scanResult = createResultScanner(governance, result.id, config);
 
   return {
-    tools: tools.map((tool) => wrapTool(tool, enforce, audit)),
+    tools: tools.map((tool) => wrapTool(tool, enforce, audit, scanResult)),
     agentId: result.id,
     score: result.score,
     level: result.level,
diff --git a/packages/governance/src/plugins/langchain.ts b/packages/governance/src/plugins/langchain.ts
@@ -46,6 +46,7 @@ import type {
 import type { AgentRegistration, AgentFramework } from "../types";
 import { handleOutcome, GovernanceBlockedError, GovernanceApprovalRequiredError } from "./outcome-handler.js";
 import type { OutcomeCallbacks } from "./outcome-handler.js";
+import { scanToolResult } from "../tool-result-scan.js";
 
 // ─── Types ──────────────────────────────────────────────────────
 
@@ -115,6 +116,15 @@ export interface GovernToolConfig {
   onApprovalRequired?: (decision: EnforcementDecision, toolName: string) => void;
   actionMapper?: (toolName: string) => PolicyAction;
   sessionTokenTracker?: () => number;
+  /**
+   * Master switch for tool-result scanning (governance-sdk 0.15+).
+   * Default: `true`. Wrapped tools run their return values through the
+   * policy engine at stage `tool_result` before returning to the agent
+   * loop. On block, the redacted detail object replaces the original.
+   */
+  scanToolResults?: boolean;
+  /** Detection threshold for the local injection signal (0-1). Default 0.5. */
+  toolResultInjectionThreshold?: number;
 }
 
 export interface GovernedResult {
@@ -196,6 +206,32 @@ function createAuditor(governance: GovernanceInstance, agentId: string) {
   };
 }
 
+/**
+ * Build a result-scan closure bound to this governance + agent. Runs
+ * the tool's raw output through the policy engine at stage `tool_result`
+ * and returns either the original (allow) or a redacted detail object
+ * (block / require_approval). No-op when `config.scanToolResults === false`.
+ */
+function createResultScanner(
+  governance: GovernanceInstance,
+  agentId: string,
+  config: GovernToolConfig,
+) {
+  return async (
+    toolName: string,
+    args: Record<string, unknown> | undefined,
+    output: unknown,
+  ): Promise<unknown> => {
+    if (config.scanToolResults === false) return output;
+    const scanned = await scanToolResult({
+      governance, agentId, agentName: config.agentName, tool: toolName,
+      args, result: output,
+      injectionThreshold: config.toolResultInjectionThreshold,
+    });
+    return scanned.result;
+  };
+}
+
 // ─── Govern a Single Tool ───────────────────────────────────────
 
 /**
@@ -211,20 +247,31 @@ export async function governTool<T extends LangChainTool>(
   const result = await registerAgent(governance, config, [tool.name]);
   const enforce = createEnforcer(governance, result.id, result.level, config);
   const audit = createAuditor(governance, result.id);
+  const scanResult = createResultScanner(governance, result.id, config);
 
   const governed = {
     ...tool,
     agentId: result.id,
     score: result.score,
     level: result.level,
     governance,
-    invoke: async (input: unknown, config?: LangChainRunnableConfig): Promise<unknown> => {
+    invoke: async (input: unknown, runConfig?: LangChainRunnableConfig): Promise<unknown> => {
       await enforce(tool.name, input);
 
       try {
-        const output = await tool.invoke(input, config);
+        const output = await tool.invoke(input, runConfig);
+        // Guard the cast — LangChain DynamicTool inputs are commonly
+        // strings. An unchecked cast would set ctx.input to a string
+        // (typed as Record<string, unknown>), and condition evaluators
+        // reading properties off it would silently get undefined and
+        // never match. Mirror the guard createEnforcer uses on its own
+        // input field.
+        const argRecord = typeof input === "object" && input !== null
+          ? input as Record<string, unknown>
+          : undefined;
+        const finalOutput = await scanResult(tool.name, argRecord, output);
         await audit(tool.name, "success");
-        return output;
+        return finalOutput;
       } catch (error) {
         await audit(tool.name, "failure", {
           error: error instanceof Error ? error.message : String(error),
@@ -253,16 +300,27 @@ export async function governTools<T extends LangChainTool>(
   const result = await registerAgent(governance, config, toolNames);
   const enforce = createEnforcer(governance, result.id, result.level, config);
   const audit = createAuditor(governance, result.id);
+  const scanResult = createResultScanner(governance, result.id, config);
 
   const governed = tools.map((tool) => ({
     ...tool,
-    invoke: async (input: unknown, config?: LangChainRunnableConfig): Promise<unknown> => {
+    invoke: async (input: unknown, runConfig?: LangChainRunnableConfig): Promise<unknown> => {
       await enforce(tool.name, input);
 
       try {
-        const output = await tool.invoke(input, config);
+        const output = await tool.invoke(input, runConfig);
+        // Guard the cast — LangChain DynamicTool inputs are commonly
+        // strings. An unchecked cast would set ctx.input to a string
+        // (typed as Record<string, unknown>), and condition evaluators
+        // reading properties off it would silently get undefined and
+        // never match. Mirror the guard createEnforcer uses on its own
+        // input field.
+        const argRecord = typeof input === "object" && input !== null
+          ? input as Record<string, unknown>
+          : undefined;
+        const finalOutput = await scanResult(tool.name, argRecord, output);
         await audit(tool.name, "success");
-        return output;
+        return finalOutput;
       } catch (error) {
         await audit(tool.name, "failure", {
           error: error instanceof Error ? error.message : String(error),
diff --git a/packages/governance/src/plugins/llamaindex-types.ts b/packages/governance/src/plugins/llamaindex-types.ts
@@ -82,6 +82,15 @@ export interface GovernLlamaIndexConfig {
   onApprovalRequired?: (decision: EnforcementDecision, toolName: string) => void;
   actionMapper?: (toolName: string) => PolicyAction;
   sessionTokenTracker?: () => number;
+  /**
+   * Master switch for tool-result scanning (governance-sdk 0.15+).
+   * Default: `true`. Wrapped tools run their return values through the
+   * policy engine at stage `tool_result` before returning to the agent
+   * loop. On block, the redacted detail object replaces the original.
+   */
+  scanToolResults?: boolean;
+  /** Detection threshold for the local injection signal (0-1). Default 0.5. */
+  toolResultInjectionThreshold?: number;
 }
 
 // ─── Results ────────────────────────────────────────────────
diff --git a/packages/governance/src/plugins/llamaindex.ts b/packages/governance/src/plugins/llamaindex.ts
@@ -40,6 +40,7 @@ export type {
 
 import { handleOutcome, GovernanceBlockedError, GovernanceApprovalRequiredError } from "./outcome-handler.js";
 import type { OutcomeCallbacks } from "./outcome-handler.js";
+import { scanToolResult } from "../tool-result-scan.js";
 
 // ─── Pre/post LLM wrapper ───────────────────────────────────
 // See ./llamaindex-llm.ts for docs + examples.
@@ -99,10 +100,42 @@ function createAuditor(governance: GovernanceInstance, agentId: string) {
     });
 }
 
+/**
+ * Build a result-scan closure bound to this governance + agent. Runs the
+ * tool's raw output through the policy engine at stage `tool_result` and
+ * returns either the original (allow) or a redacted detail object (block).
+ * No-op when `config.scanToolResults === false`. Default-on.
+ */
+function createResultScanner(
+  governance: GovernanceInstance, agentId: string, config: GovernLlamaIndexConfig,
+) {
+  return async (toolName: string, args: Record<string, unknown> | undefined, output: LlamaIndexJSONValue): Promise<LlamaIndexJSONValue> => {
+    if (config.scanToolResults === false) return output;
+    const scanned = await scanToolResult({
+      governance, agentId, agentName: config.agentName, tool: toolName,
+      args, result: output,
+      injectionThreshold: config.toolResultInjectionThreshold,
+    });
+    // BlockedToolResult.ruleId is `string | null`, but LlamaIndexJSONValue
+    // explicitly excludes `null` per the SDK contract. Coerce on block so
+    // downstream LlamaIndex JSON walkers don't trip on the null property.
+    if (scanned.blocked) {
+      const blocked = scanned.result as { blocked: true; reason: string; ruleId: string | null };
+      return {
+        blocked: true,
+        reason: blocked.reason,
+        ruleId: blocked.ruleId ?? "unknown",
+      };
+    }
+    return scanned.result as LlamaIndexJSONValue;
+  };
+}
+
 function wrapTool(
   tool: LlamaIndexTool,
   enforce: ReturnType<typeof createEnforcer>,
   audit: ReturnType<typeof createAuditor>,
+  scanResult: ReturnType<typeof createResultScanner>,
 ): LlamaIndexTool {
   if (!tool.call) return tool;
   const toolName = tool.metadata.name;
@@ -112,8 +145,9 @@ function wrapTool(
       const decision = await enforce(toolName, input);
       try {
         const output = await tool.call!(input);
+        const finalOutput = await scanResult(toolName, input, output);
         await audit(toolName, "success");
-        return output;
+        return finalOutput;
       } catch (error) {
         await audit(toolName, "failure", { error: error instanceof Error ? error.message : String(error) });
         throw error;
@@ -135,9 +169,10 @@ export async function governLlamaIndexTools(
 
   const enforce = createEnforcer(governance, result.id, config);
   const audit = createAuditor(governance, result.id);
+  const scanResult = createResultScanner(governance, result.id, config);
 
   return {
-    tools: tools.map((tool) => wrapTool(tool, enforce, audit)),
+    tools: tools.map((tool) => wrapTool(tool, enforce, audit, scanResult)),
     agentId: result.id,
     score: result.score,
     level: result.level,
@@ -160,9 +195,10 @@ export async function governLlamaIndexAgent(
 
   const enforce = createEnforcer(governance, result.id, config);
   const audit = createAuditor(governance, result.id);
+  const scanResult = createResultScanner(governance, result.id, config);
 
   return {
-    agent: { ...agent, tools: agent.tools.map((tool) => wrapTool(tool, enforce, audit)) },
+    agent: { ...agent, tools: agent.tools.map((tool) => wrapTool(tool, enforce, audit, scanResult)) },
     agentId: result.id,
     score: result.score,
     level: result.level,
diff --git a/packages/governance/src/plugins/openai-agents-types.ts b/packages/governance/src/plugins/openai-agents-types.ts
@@ -122,6 +122,15 @@ export interface GovernAgentConfig {
   onApprovalRequired?: (decision: EnforcementDecision, toolName: string) => void;
   actionMapper?: (toolName: string) => PolicyAction;
   sessionTokenTracker?: () => number;
+  /**
+   * Master switch for tool-result scanning (governance-sdk 0.15+).
+   * Default: `true`. Wrapped tools run their return values through the
+   * policy engine at stage `tool_result` before returning to the agent
+   * loop. On block, the redacted detail object replaces the original.
+   */
+  scanToolResults?: boolean;
+  /** Detection threshold for the local injection signal (0-1). Default 0.5. */
+  toolResultInjectionThreshold?: number;
 }
 
 // ─── Results ────────────────────────────────────────────────
diff --git a/packages/governance/src/plugins/openai-agents.ts b/packages/governance/src/plugins/openai-agents.ts

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "governance-sdk",`
`3`		`- "version": "0.14.1",`
	`3`	`+ "version": "0.15.0",`
`4`	`4`	`"description": "AI Agent Governance for TypeScript — policy enforcement, scoring, compliance, and audit for AI agents",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "./dist/index.js",`