refactor(codemode): remove generic agent tool (#35417)

rekram1-node · web-flow · commit f14eafe9db38 · 2026-07-05T09:30:46.000-05:00
diff --git a/packages/codemode/README.md b/packages/codemode/README.md
@@ -4,7 +4,7 @@ Effect-native confined code execution over explicit, schema-described tools.
 
 CodeMode lets a model write a small JavaScript program that can call only the tools supplied by the host. The program can sequence calls, transform plain data, branch, loop, and run independent calls in parallel without receiving ambient filesystem, process, network, module, or application authority.
 
-The package is currently private to this workspace. Its API is designed around three uses:
+The package is currently private to this workspace. Its API is designed around one-shot and reusable execution:
 
 ```ts
 // One execution
@@ -13,9 +13,6 @@ yield * CodeMode.execute({ tools, code })
 // A reusable runtime
 const runtime = CodeMode.make({ tools, limits })
 yield * runtime.execute(code)
-
-// One agent-facing code tool
-const codeTool = runtime.agentTool()
 ```
 
 ## Install
@@ -117,10 +114,9 @@ const runtime = CodeMode.make({
 runtime.catalog() // structured tool descriptions
 runtime.instructions() // model-facing syntax and tool guide
 runtime.execute(source) // ExecuteResult
-runtime.agentTool() // { name, description, input, output, execute }
 ```
 
-`catalog`, `instructions`, and `agentTool` are projections of the same configured tool tree. `agentTool().description` is exactly `instructions()`.
+`CodeMode.Input` and `CodeMode.Result` are Effect schemas for the execution request and result. Hosts can combine them with `runtime.instructions()` and `runtime.execute()` when constructing a framework-specific agent tool.
 
 ### Results
 
@@ -333,8 +329,6 @@ A program cannot gain authority through prose or generated code. It can only exe
 The public contract is guided by these equivalences:
 
 - `CodeMode.execute({ ...options, code })` is equivalent to `CodeMode.make(options).execute(code)`.
-- `CodeMode.make(options).agentTool().execute({ code })` is equivalent to `CodeMode.make(options).execute(code)`.
-- `CodeMode.make(options).agentTool().description` equals `CodeMode.make(options).instructions()`.
 - A tool implementation is not invoked unless its input has decoded successfully.
 - A tool result is not visible to the program unless its output has decoded and crossed the plain-data boundary successfully.
 - Unknown host failures do not become model-visible diagnostics; `ToolError` is the explicit safe-message channel.
diff --git a/packages/codemode/codemode.md b/packages/codemode/codemode.md
@@ -246,7 +246,7 @@ maxOutputBytes? }` (defaults 10_000 / 100 / 32_000). This wave kept the other kn
   serialized values become truncated text + ` [result truncated: N bytes exceeds the M-byte
 output limit; return a smaller value]`; logs keep leading lines within the remaining budget
   - `[logs truncated: showing K of N lines]`; result gains `truncated: true` (also added to
-    `ExecuteResultSchema`). UTF-8-safe truncation (no split code points). (The in-sandbox
+    `CodeMode.Result`). UTF-8-safe truncation (no split code points). (The in-sandbox
     `maxDataBytes` check that used to throw first on oversized raw values died in Fix 5 -
     truncation is now the only result-size mechanism.)
 - **Search polish**: default limit 12 -> **10** (`defaultSearchLimit`); exact-path lookup - a
@@ -575,7 +575,7 @@ configurable knobs; the internal limit system dies):
   `maxCollectionLength` (every array-length/object-field-count check - this knob was
   actively harmful: an MCP tool returning 20k rows failed). The `OperationLimitExceeded`
   and `AuditLimitExceeded` diagnostic kinds are gone from the `DiagnosticKind` union and
-  `ExecuteResultSchema` (fine - the package is unreleased).
+  `CodeMode.Result` (fine - the package is unreleased).
 - **Fixed constants, not knobs**: `TOOL_CALL_CONCURRENCY = 8` (codemode.ts; the fork
   semaphore) and `MAX_VALUE_DEPTH = 32` (tool-runtime.ts; the `copyIn` depth check - kept
   only because it produces a clearer error than a native stack-overflow RangeError; still
diff --git a/packages/codemode/src/codemode.ts b/packages/codemode/src/codemode.ts
@@ -108,14 +108,14 @@ export type ExecuteFailure = {
 /** Result of executing a CodeMode program. Program failures are data, not Effect failures. */
 export type ExecuteResult = ExecuteSuccess | ExecuteFailure
 
-/** Reusable CodeMode configuration shared by `execute` and `agentTool`. */
+/** Configuration shared by `CodeMode.make` and `CodeMode.execute`. */
 export type CodeModeOptions<Tools extends Record<string, unknown> = {}> = Omit<ExecuteOptions<Tools>, "code"> & {
   /** Progressive-disclosure configuration for the agent-facing tool catalog. */
   readonly discovery?: DiscoveryOptions
 }
 
-/** Input schema for the single agent-facing tool produced by `runtime.agentTool()`. */
-export const ExecuteInputSchema = Schema.Struct({ code: Schema.String })
+/** Schema for a CodeMode execution request. */
+const Input = Schema.Struct({ code: Schema.String })
 
 const DiagnosticKindSchema = Schema.Literals([
   "ParseError",
@@ -130,8 +130,8 @@ const DiagnosticKindSchema = Schema.Literals([
   "ExecutionFailure",
 ])
 
-/** Structured success or diagnostic result schema returned by CodeMode execution. */
-export const ExecuteResultSchema = Schema.Union([
+/** Schema for the structured success or diagnostic returned by CodeMode execution. */
+const Result = Schema.Union([
   Schema.Struct({
     ok: Schema.Literal(true),
     value: Schema.Json,
@@ -153,23 +153,12 @@ export const ExecuteResultSchema = Schema.Union([
   }),
 ])
 
-/** Agent-facing projection of a configured CodeMode runtime. */
-export type AgentToolDefinition<R = never> = {
-  readonly name: "code"
-  readonly description: string
-  readonly input: typeof ExecuteInputSchema
-  readonly output: typeof ExecuteResultSchema
-  readonly execute: (input: { readonly code: string }) => Effect.Effect<ExecuteResult, never, R>
-}
-
 /** Reusable confined runtime over one explicit tool tree. */
 export type CodeModeRuntime<R = never> = {
   /** Lists schema-described tool paths provided by the host. */
   readonly catalog: () => ReadonlyArray<ToolDescription>
   /** Builds model-facing syntax guidance and visible tool signatures. */
   readonly instructions: () => string
-  /** Projects the configured runtime as one agent-facing `code` tool. */
-  readonly agentTool: () => AgentToolDefinition<R>
   /** Executes a program using this runtime's configured host tools. */
   readonly execute: (code: string) => Effect.Effect<ExecuteResult, never, R>
 }
@@ -4088,13 +4077,12 @@ export const execute = <const Tools extends Record<string, unknown>>(
 /**
  * Creates an Effect-native runtime over explicit, schema-described tools.
  *
- * Use `execute` for host-driven execution or `agentTool` to expose one confined code tool to an
- * agent framework. Tool requirements remain in the returned Effect environment.
+ * Use `execute` for host-driven execution. Tool requirements remain in the returned Effect environment.
  *
  * @example
  * ```ts
  * const runtime = CodeMode.make({ tools: { orders: { lookup } } })
- * const code = runtime.agentTool()
+ * const result = runtime.execute("return await tools.orders.lookup({ id: 'order_42' })")
  * ```
  */
 export const make = <const Tools extends Record<string, unknown> = {}>(
@@ -4111,16 +4099,9 @@ export const make = <const Tools extends Record<string, unknown> = {}>(
   return {
     catalog: () => catalog,
     instructions: () => instructions,
-    agentTool: () => ({
-      name: "code",
-      description: instructions,
-      input: ExecuteInputSchema,
-      output: ExecuteResultSchema,
-      execute: ({ code }) => executeProgram(code),
-    }),
     execute: executeProgram,
   }
 }
 
 /** Constructors for one-shot and reusable CodeMode execution. */
-export const CodeMode = { make, execute }
+export const CodeMode = { Input, Result, make, execute }
diff --git a/packages/codemode/src/index.ts b/packages/codemode/src/index.ts
@@ -1,10 +1,9 @@
-export { ToolError, CodeMode, ExecuteInputSchema, ExecuteResultSchema, toolError } from "./codemode.js"
+export { ToolError, CodeMode, toolError } from "./codemode.js"
 export { Tool } from "./tool.js"
 export * as OpenAPI from "./openapi/index.js"
 export type { Definition as ToolDefinition, JsonSchema, ToolSchema } from "./tool.js"
 export type { ToolCallEnded, ToolCallHooks } from "./tool-runtime.js"
 export type {
-  AgentToolDefinition,
   CodeModeOptions,
   CodeModeRuntime,
   DataValue,
diff --git a/packages/codemode/test/codemode.test.ts b/packages/codemode/test/codemode.test.ts
@@ -1,13 +1,6 @@
 import { describe, expect, test } from "bun:test"
 import { Cause, Effect, Schema } from "effect"
-import {
-  CodeMode,
-  ExecuteInputSchema,
-  ExecuteResultSchema,
-  Tool,
-  toolError,
-  type ExecutionLimits,
-} from "../src/index.js"
+import { CodeMode, Tool, toolError, type ExecutionLimits } from "../src/index.js"
 import type { Definition } from "../src/tool.js"
 
 const run = (tool: Definition<never>) =>
@@ -235,7 +228,7 @@ describe("CodeMode console capture", () => {
       logs: ['Thread info: {"name":"Demo","count":2}', "[warn] careful"],
       toolCalls: [],
     })
-    expect(Schema.decodeUnknownSync(ExecuteResultSchema)(JSON.parse(JSON.stringify(result)))).toStrictEqual(result)
+    expect(Schema.decodeUnknownSync(CodeMode.Result)(JSON.parse(JSON.stringify(result)))).toStrictEqual(result)
   })
 
   test("keeps logs captured before failures", async () => {
@@ -371,7 +364,7 @@ describe("CodeMode output budget", () => {
     expect(result.value).toMatch(
       /^\{"data":"x+ \[result truncated: \d+ bytes exceeds the 40-byte output limit; return a smaller value\]$/,
     )
-    expect(Schema.decodeUnknownSync(ExecuteResultSchema)(JSON.parse(JSON.stringify(result)))).toStrictEqual(result)
+    expect(Schema.decodeUnknownSync(CodeMode.Result)(JSON.parse(JSON.stringify(result)))).toStrictEqual(result)
   })
 
   test("keeps leading logs within the remaining budget and marks the cut", async () => {
@@ -501,24 +494,16 @@ describe("CodeMode public contract", () => {
   const tools = { orders: { lookup } }
   const source = `return await tools.orders.lookup({ id: "order_42" })`
 
-  test("keeps one-shot, reusable, and agent-tool execution equivalent", async () => {
+  test("keeps one-shot and reusable execution equivalent", async () => {
     const runtime = CodeMode.make({ tools })
-    const agentTool = runtime.agentTool()
-    const [oneShot, reusable, projected] = await Promise.all([
+    const [oneShot, reusable] = await Promise.all([
       Effect.runPromise(CodeMode.execute({ tools, code: source })),
       Effect.runPromise(runtime.execute(source)),
-      Effect.runPromise(agentTool.execute({ code: source })),
     ])
 
     expect(reusable).toStrictEqual(oneShot)
-    expect(projected).toStrictEqual(oneShot)
-    expect(agentTool.name).toBe("code")
-    expect(agentTool.input).toBe(ExecuteInputSchema)
-    expect(agentTool.output).toBe(ExecuteResultSchema)
-    expect(agentTool.description).toBe(runtime.instructions())
-    expect(Schema.decodeUnknownSync(ExecuteResultSchema)(JSON.parse(JSON.stringify(projected)))).toStrictEqual(
-      projected,
-    )
+    expect(Schema.decodeUnknownSync(CodeMode.Input)({ code: source })).toStrictEqual({ code: source })
+    expect(Schema.decodeUnknownSync(CodeMode.Result)(JSON.parse(JSON.stringify(reusable)))).toStrictEqual(reusable)
   })
 
   test("inlines a COMPLETE small catalog and keeps search registered but unadvertised", async () => {
@@ -1035,7 +1020,7 @@ describe("CodeMode public contract", () => {
       value: { top: null, nested: [1, null] },
       toolCalls: [],
     })
-    expect(Schema.decodeUnknownSync(ExecuteResultSchema)(JSON.parse(JSON.stringify(result)))).toStrictEqual(result)
+    expect(Schema.decodeUnknownSync(CodeMode.Result)(JSON.parse(JSON.stringify(result)))).toStrictEqual(result)
   })
 
   test("rejects invalid configuration and discovery limits", async () => {