fix: set meaningful tool titles via tool.execute.after hook (#17)

kuitos · web-flow · commit 6310f248370e · 2026-04-18T19:36:10.000+08:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -20,5 +20,8 @@ jobs:
       - name: Install dependencies
         run: bun install
 
+      - name: Run E2E regression test
+        run: bun test test/tool-titles-e2e.test.ts
+
       - name: Run tests
         run: bun test
diff --git a/src/index.ts b/src/index.ts
@@ -123,10 +123,67 @@ function extractRecentTools(
   return tools
 }
 
+// Tracks how many memory entries a memory_list call saw so tool.execute.after
+// can render a meaningful title without re-reading the filesystem. Keyed by
+// callID, which uniquely identifies a single tool invocation.
+const memoryListCountByCallID = new Map<string, number>()
+const memorySearchCountByCallID = new Map<string, number>()
+
+function buildMemoryToolTitle(
+  toolID: string,
+  args: Record<string, unknown> | undefined,
+  callID: string | undefined,
+): string | undefined {
+  switch (toolID) {
+    case "memory_save": {
+      const type = typeof args?.type === "string" ? args.type : ""
+      const name = typeof args?.name === "string" ? args.name : ""
+      if (type && name) return `${type}: ${name}`
+      if (name) return name
+      return undefined
+    }
+    case "memory_delete":
+    case "memory_read": {
+      const fileName = typeof args?.file_name === "string" ? args.file_name : ""
+      return fileName || undefined
+    }
+    case "memory_list": {
+      const count = callID ? memoryListCountByCallID.get(callID) : undefined
+      if (callID) memoryListCountByCallID.delete(callID)
+      if (count === undefined) return "list memories"
+      return `${count} ${count === 1 ? "memory" : "memories"}`
+    }
+    case "memory_search": {
+      const query = typeof args?.query === "string" ? args.query : ""
+      const count = callID ? memorySearchCountByCallID.get(callID) : undefined
+      if (callID) memorySearchCountByCallID.delete(callID)
+      if (query && count !== undefined) {
+        return `"${query}" · ${count} ${count === 1 ? "match" : "matches"}`
+      }
+      if (query) return `"${query}"`
+      return undefined
+    }
+    default:
+      return undefined
+  }
+}
+
+function getCallID(ctx: unknown): string | undefined {
+  if (!ctx || typeof ctx !== "object") return undefined
+  const v = (ctx as { callID?: unknown }).callID
+  return typeof v === "string" ? v : undefined
+}
+
 export const MemoryPlugin: Plugin = async ({ worktree }) => {
   getMemoryDir(worktree)
 
   return {
+    "tool.execute.after": async (input, output) => {
+      if (!input.tool.startsWith("memory_")) return
+      const title = buildMemoryToolTitle(input.tool, input.args, input.callID)
+      if (title) output.title = title
+    },
+
     "experimental.chat.messages.transform": async (_input, output) => {
       const { query, sessionID } = getLastUserQuery(output.messages)
 
@@ -219,7 +276,7 @@ export const MemoryPlugin: Plugin = async ({ worktree }) => {
               "Memory content. For feedback/project types, structure as: rule/fact, then **Why:** and **How to apply:** lines",
             ),
         },
-        async execute(args) {
+        async execute(args, _ctx) {
           const filePath = saveMemory(worktree, args.file_name, args.name, args.description, args.type, args.content)
           return `Memory saved to ${filePath}`
         },
@@ -230,7 +287,7 @@ export const MemoryPlugin: Plugin = async ({ worktree }) => {
         args: {
           file_name: tool.schema.string().describe("File name of the memory to delete (with or without .md extension)"),
         },
-        async execute(args) {
+        async execute(args, _ctx) {
           const deleted = deleteMemory(worktree, args.file_name)
           return deleted ? `Memory "${args.file_name}" deleted.` : `Memory "${args.file_name}" not found.`
         },
@@ -242,8 +299,10 @@ export const MemoryPlugin: Plugin = async ({ worktree }) => {
           "Use this to check what memories exist before saving a new one (to avoid duplicates) " +
           "or when you need to recall what's been stored.",
         args: {},
-        async execute() {
+        async execute(_args, ctx) {
           const entries = listMemories(worktree)
+          const callID = getCallID(ctx)
+          if (callID) memoryListCountByCallID.set(callID, entries.length)
           if (entries.length === 0) {
             return "No memories saved yet."
           }
@@ -261,8 +320,10 @@ export const MemoryPlugin: Plugin = async ({ worktree }) => {
         args: {
           query: tool.schema.string().describe("Search query — searches across name, description, and content"),
         },
-        async execute(args) {
+        async execute(args, ctx) {
           const results = searchMemories(worktree, args.query)
+          const callID = getCallID(ctx)
+          if (callID) memorySearchCountByCallID.set(callID, results.length)
           if (results.length === 0) {
             return `No memories matching "${args.query}".`
           }
@@ -278,7 +339,7 @@ export const MemoryPlugin: Plugin = async ({ worktree }) => {
         args: {
           file_name: tool.schema.string().describe("File name of the memory to read (with or without .md extension)"),
         },
-        async execute(args) {
+        async execute(args, _ctx) {
           const entry = readMemory(worktree, args.file_name)
           if (!entry) {
             return `Memory "${args.file_name}" not found.`
diff --git a/test/github-actions-ci.test.ts b/test/github-actions-ci.test.ts
@@ -5,7 +5,7 @@ import { join } from "path"
 const workflowPath = join(process.cwd(), ".github", "workflows", "ci.yml")
 
 describe("GitHub Actions CI workflow", () => {
-  test("defines pull request validation that installs dependencies and runs bun test", () => {
+  test("defines pull request validation that runs the dedicated e2e regression test before the full suite", () => {
     expect(existsSync(workflowPath)).toBe(true)
 
     const workflow = readFileSync(workflowPath, "utf-8")
@@ -16,6 +16,8 @@ describe("GitHub Actions CI workflow", () => {
     expect(workflow).toContain("branches: [main]")
     expect(workflow).toContain("oven-sh/setup-bun")
     expect(workflow).toContain("bun install")
+    expect(workflow).toContain("Run E2E regression test")
+    expect(workflow).toContain("bun test test/tool-titles-e2e.test.ts")
     expect(workflow).toContain("bun test")
   })
 })
diff --git a/test/tool-titles-e2e.test.ts b/test/tool-titles-e2e.test.ts
@@ -0,0 +1,137 @@
+import { afterEach, describe, expect, test } from "bun:test"
+import { mkdtempSync, mkdirSync, rmSync } from "fs"
+import { tmpdir } from "os"
+import { join } from "path"
+import { MemoryPlugin } from "../src/index.js"
+
+const tempDirs: string[] = []
+
+function makeTempGitRepo(): string {
+  const root = mkdtempSync(join(tmpdir(), "tool-title-e2e-"))
+  mkdirSync(join(root, ".git"), { recursive: true })
+  tempDirs.push(root)
+  return root
+}
+
+afterEach(() => {
+  while (tempDirs.length > 0) {
+    const dir = tempDirs.pop()
+    if (dir) rmSync(dir, { recursive: true, force: true })
+  }
+})
+
+type ToolCallContext = { callID?: string }
+
+type ToolExecute<TArgs extends object> = (args: TArgs, ctx: ToolCallContext) => Promise<string>
+
+type MemoryTools = {
+  memory_save: {
+    execute: ToolExecute<{
+      file_name: string
+      name: string
+      description: string
+      type: "user" | "feedback" | "project" | "reference"
+      content: string
+    }>
+  }
+  memory_list: {
+    execute: ToolExecute<Record<string, never>>
+  }
+  memory_search: {
+    execute: ToolExecute<{ query: string }>
+  }
+  memory_read: {
+    execute: ToolExecute<{ file_name: string }>
+  }
+  memory_delete: {
+    execute: ToolExecute<{ file_name: string }>
+  }
+}
+
+type ToolExecuteAfter = (
+  input: { tool: string; args?: Record<string, unknown>; callID?: string },
+  output: { title?: string },
+) => Promise<void>
+
+async function runToolWithAfter<TArgs extends object>(
+  afterHook: ToolExecuteAfter,
+  toolName: keyof MemoryTools,
+  execute: ToolExecute<TArgs>,
+  args: TArgs,
+  callID: string,
+): Promise<{ result: string; title?: string }> {
+  const result = await execute(args, { callID })
+  const output: { title?: string } = {}
+  await afterHook({ tool: toolName, args: args as Record<string, unknown>, callID }, output)
+  return { result, title: output.title }
+}
+
+describe("memory tool titles end-to-end", () => {
+  test("persists human-readable titles across the full plugin tool lifecycle", async () => {
+    const repo = makeTempGitRepo()
+    const plugin = await MemoryPlugin({ worktree: repo } as never)
+    const tools = plugin.tool as unknown as MemoryTools
+    const afterHook = plugin["tool.execute.after"] as unknown as ToolExecuteAfter
+
+    const save = await runToolWithAfter(
+      afterHook,
+      "memory_save",
+      tools.memory_save.execute,
+      {
+        file_name: "title_verification",
+        name: "Title Verification Test",
+        description: "Verifies final tool titles are persisted",
+        type: "reference",
+        content: "Used to validate the completed tool title in end-to-end flow.",
+      },
+      "call-save",
+    )
+
+    expect(save.result).toContain("Memory saved to")
+    expect(save.title).toBe("reference: Title Verification Test")
+
+    const list = await runToolWithAfter(afterHook, "memory_list", tools.memory_list.execute, {}, "call-list")
+    expect(list.result).toContain("Title Verification Test")
+    expect(list.title).toBe("1 memory")
+
+    const search = await runToolWithAfter(
+      afterHook,
+      "memory_search",
+      tools.memory_search.execute,
+      { query: "verification" },
+      "call-search",
+    )
+    expect(search.result).toContain("Title Verification Test")
+    expect(search.title).toBe('"verification" · 1 match')
+
+    const read = await runToolWithAfter(
+      afterHook,
+      "memory_read",
+      tools.memory_read.execute,
+      { file_name: "title_verification.md" },
+      "call-read",
+    )
+    expect(read.result).toContain("# Title Verification Test")
+    expect(read.title).toBe("title_verification.md")
+
+    const remove = await runToolWithAfter(
+      afterHook,
+      "memory_delete",
+      tools.memory_delete.execute,
+      { file_name: "title_verification.md" },
+      "call-delete",
+    )
+    expect(remove.result).toContain('Memory "title_verification.md" deleted.')
+    expect(remove.title).toBe("title_verification.md")
+
+    const emptyList = await runToolWithAfter(
+      afterHook,
+      "memory_list",
+      tools.memory_list.execute,
+      {},
+      "call-empty-list",
+    )
+    expect(emptyList.result).toBe("No memories saved yet.")
+    expect(emptyList.title).toBe("0 memories")
+  })
+})