diff --git a/apps/mesh/src/api/routes/decopilot/conversation.ts b/apps/mesh/src/api/routes/decopilot/conversation.ts index c57de0c2d1..6aa16cb3bf 100644 --- a/apps/mesh/src/api/routes/decopilot/conversation.ts +++ b/apps/mesh/src/api/routes/decopilot/conversation.ts @@ -5,8 +5,6 @@ */ import type { ChatMessage } from "./types"; -import type { Memory } from "./memory"; -import type { ThreadMessage } from "@/storage/types"; export { denyPendingApprovals, processConversation, @@ -25,45 +23,3 @@ export function splitRequestMessages(messages: ChatMessage[]): { const requestMessage = messages.find((m) => m.role !== "system")!; return { systemMessages, requestMessage }; } - -async function loadMemory(memory: Memory, windowSize: number) { - const threadMessages = await memory.loadHistory(windowSize); - return threadMessages; -} - -function mergeMessages( - threadMessages: ThreadMessage[], - requestMessage?: ChatMessage, -): ChatMessage[] { - // Filter out messages with empty parts to prevent bricked threads - // (e.g. assistant messages saved after an LLM error before any content was generated) - const validMessages = threadMessages.filter( - (m) => m.parts && m.parts.length > 0, - ); - if (!requestMessage) { - return validMessages as ChatMessage[]; - } - const matchIndex = validMessages.findIndex((m) => m.id === requestMessage.id); - const conversation = - matchIndex >= 0 - ? [...validMessages.slice(0, matchIndex), requestMessage] - : [...validMessages, requestMessage]; - return conversation; -} - -export async function loadAndMergeMessages( - memory: Memory, - requestMessage: ChatMessage | undefined, - systemMessages: ChatMessage[], - windowSize: number, - /** Subagent runs start FRESH: skip the thread's history and send only the - * system prompt + the request (the subtask prompt). */ - skipHistory = false, -): Promise { - const threadMessages = skipHistory - ? [] - : await loadMemory(memory, windowSize); - const conversation = mergeMessages(threadMessages, requestMessage); - const allMessages: ChatMessage[] = [...systemMessages, ...conversation]; - return allMessages; -} diff --git a/apps/mesh/src/api/routes/decopilot/dispatch-run.ts b/apps/mesh/src/api/routes/decopilot/dispatch-run.ts index e410d05ada..18833b4521 100644 --- a/apps/mesh/src/api/routes/decopilot/dispatch-run.ts +++ b/apps/mesh/src/api/routes/decopilot/dispatch-run.ts @@ -30,8 +30,6 @@ import { posthog } from "@/posthog"; import type { UIMessageChunk } from "ai"; import { InProcessSandboxClient } from "@/harnesses/in-process-sandbox-client"; import { - offloadKey, - sha256Hex, shouldOffload, type MessagesRef, } from "@decocms/harness/offload-messages"; @@ -63,21 +61,23 @@ import type { ModelsConfig, } from "@/harnesses"; import { createSecretModelSource } from "@/harnesses"; -import { - WORKSPACE_CWD_DEFAULT, - WORKSPACE_CWD_REPO, -} from "@decocms/harness/workspace-cwd"; -import type { CodingWorkspacePromptInput } from "@decocms/harness/coding-workspace-prompt"; +import { setDecopilotRunContext } from "@decocms/harness/decopilot/run-context"; +import type { + DecopilotHttpMcpSource, + DecopilotObjectStorageSource, + HarnessWorkspace, +} from "@decocms/harness/types"; +import { WORKSPACE_CWD_REPO } from "@decocms/harness/workspace-cwd"; import { createProviderFromSecret } from "@decocms/harness/decopilot/provider-from-secret"; import { classifyStreamError, stringifyError, -} from "@decocms/harness/decopilot/stream-error"; +} from "@decocms/harness/stream-error"; import { isCliHarness } from "@decocms/harness/cli-harness"; import { DEFAULT_WINDOW_SIZE, generateMessageId } from "./constants"; import { mintRunFenceToken } from "./dispatch-fence"; import { synthesizedErrorMessageId } from "./message-ids"; -import { loadAndMergeMessages } from "./conversation"; +import { loadDecopilotContext } from "@/harnesses/decopilot/context-loader"; import { PartEmitter } from "./part-emitter"; import { ProgressBumpThrottle } from "./progress-bump"; import { uploadFileParts, resolveStorageRefs } from "./file-materializer"; @@ -108,7 +108,7 @@ import { resolveThreadStatus } from "./status"; import type { StreamBuffer } from "./stream-buffer"; import type { ChatMessage, ModelsConfig as ClientModelsConfig } from "./types"; import type { CancelBroadcast } from "./cancel-broadcast"; -import { computeCliDelta, resolveCliSessionRef } from "./cli-session-messages"; +import { resolveCliSessionRef } from "./cli-session-messages"; import { getInternalUrl, getPublicUrl } from "@/core/server-constants"; import { mintOrgFsConfigJson } from "@/file-storage/mount/provisioning"; import { meter, traced } from "@/observability"; @@ -307,27 +307,11 @@ export function resolveHarnessId(providerId: string | undefined): HarnessId { return "decopilot"; } -/** Symbolic cwd: "/repo" for repo-backed sandbox dispatch (the daemon rebases - * onto its sandbox root); "default" otherwise (harness uses its SDK default, - * never fails). Hosted/in-pod runs always get "default". */ -function resolveWorkspaceCwd( - virtualMcp: { metadata?: unknown } | null, - sandboxProviderKind: DispatchTarget["sandboxProviderKind"], -): { cwd: string } { - const hasRepo = !!( - virtualMcp?.metadata as { githubRepo?: unknown } | undefined - )?.githubRepo; - if (sandboxProviderKind === "user-desktop" && hasRepo) { - return { cwd: WORKSPACE_CWD_REPO }; - } - return { cwd: WORKSPACE_CWD_DEFAULT }; -} - -export function buildCodingWorkspaceInput(input: { +export function buildHarnessWorkspaceInput(input: { virtualMcp: { metadata?: unknown } | null; branch?: string | null; - workspace: { cwd: string }; -}): CodingWorkspacePromptInput { + cwd?: "/repo" | null; +}): HarnessWorkspace { const githubRepo = ( input.virtualMcp?.metadata as { githubRepo?: unknown } | undefined )?.githubRepo; @@ -344,11 +328,13 @@ export function buildCodingWorkspaceInput(input: { } : undefined; + if (!repo) return { cwd: null }; + if (input.cwd !== WORKSPACE_CWD_REPO) return { cwd: null }; + return { - ...(repo ? { repo } : {}), + cwd: WORKSPACE_CWD_REPO, + repo, branch: input.branch ?? null, - cwd: input.workspace.cwd, - workspaceKind: repo ? "github" : "unknown", }; } @@ -627,7 +613,7 @@ export async function dispatchRunAndWait( * `signal` field. This is exactly what the desktop daemon validates against * `harnessStreamInputSchema` and what the link work item carries. * - * Built eagerly in `prepareRun`'s main body (mcp mint + message + * Built eagerly in `prepareRun`'s main body (mcp mint + userMessage * materialization + field assembly) so it's available without consuming * `uiStream`. The hosted dispatch path layers the signal on top inside the * lazy harness chunk source: @@ -649,7 +635,7 @@ interface PreparedRun { /** Minted by prepareRun for link-dispatched threads. */ runFenceToken: string; /** - * Fully-assembled wire harness input (mcp minted, messages materialized, + * Fully-assembled wire harness input (mcp minted, userMessage materialized, * fence token attached). `dispatchRunAndWait` ignores this (it consumes * `uiStream`); `prepareLinkWorkDispatch` returns it so the gate can publish * it as the link work item's `harnessInput`. @@ -1156,17 +1142,6 @@ async function prepareRun( const pendingOps: Promise[] = []; - // Pre-load conversation (no system messages — those are built separately) - // When resuming, requestMessage is undefined — conversation loads entirely - // from DB via createMemory / loadAndMergeMessages. - const allMessages = await loadAndMergeMessages( - mem, - materializedRequestMessage, - systemMessages, - windowSize, - input.isSubagent === true, - ); - // CLI-harness delta + resume only holds on the long-lived desktop daemon: // the on-disk session survives between turns *only* on user-desktop. On any // other sandbox kind there is no persistent rollout to resume, so fall back @@ -1175,8 +1150,11 @@ async function prepareRun( const cliResumable = isCliHarness(harnessId) && target.sandboxProviderKind === "user-desktop"; + const cliHistory = cliResumable + ? ((await mem.loadHistory(windowSize)) as ChatMessage[]) + : undefined; const resumeSessionRef = cliResumable - ? resolveCliSessionRef(allMessages, harnessId) + ? resolveCliSessionRef(cliHistory ?? [], harnessId) : undefined; const organization = ctx.organization!; @@ -1184,7 +1162,7 @@ async function prepareRun( // ── Build the wire HarnessStreamInput EAGERLY ─────────────────────────── // Everything the daemon's `harnessStreamInputSchema` needs (mcp endpoint, - // materialized messages, virtualMcp, fence token, …) is assembled here, + // materialized userMessage, workspace, fence token, …) is assembled here, // before the lazy harness chunk source runs. Two consumers read it: // - hosted dispatch (`dispatchHarnessChunks` below) layers the // non-serializable `signal` on top: @@ -1192,38 +1170,32 @@ async function prepareRun( // - `prepareLinkWorkDispatch` returns it verbatim so the thread gate can // publish it as the link work item's `harnessInput`. - // Resolve mesh-storage: URIs to fresh presigned URLs every turn. - // Also handles legacy data: URLs from threads predating this pipeline. - // `processConversation` (which depends on the harness-owned tool set for - // `toModelOutput` handlers) runs inside the decopilot harness itself; we - // forward materialized UIMessages so each harness decides how to convert - // them. - // CLI harnesses (codex, claude-code) resume an on-disk session and only - // need the new user message(s); decopilot — and any CLI harness not on - // user-desktop — still gets the full transcript (see `cliResumable`). - const messagesForHarness = cliResumable - ? computeCliDelta(allMessages, harnessId) - : allMessages; - - // A resumable CLI turn must carry at least the new user message(s). An empty - // delta means a resumed turn whose history tail is already a completed - // assistant anchor — there is no new user input to forward. Sending zero - // messages would drive an empty CLI turn (or a downstream "empty prompt" - // crash the stale-session guard would not catch), so surface a defined - // permanent error instead of silently degrading. - if (cliResumable && messagesForHarness.length === 0) { + // Resolve mesh-storage: URIs to fresh presigned URLs for the current user + // message only. The v3 harness contract carries one wire-ready userMessage; + // long-lived CLI context is represented separately by harness.sessionId. + const wireUserMessage = materializedRequestMessage + ? (await resolveStorageRefs([materializedRequestMessage], ctx))[0] + : undefined; + + if (!wireUserMessage || !materializedRequestMessage) { throw new PermanentRunError( "empty_request", - "No new user message to send to the CLI harness (resumed turn with empty delta).", + "No user message found in input — expected at least one non-system message", ); } - const materializedMessages = await resolveStorageRefs( - messagesForHarness, - ctx, - ); - - ensureModelCompatibility(input.models, materializedMessages); + ensureModelCompatibility(input.models, [wireUserMessage]); + const decopilotMessages = + harnessId === "decopilot" + ? await loadDecopilotContext({ + ctx, + threadId: mem.thread.id, + userMessage: materializedRequestMessage, + windowSize, + isSubagent: input.isSubagent === true, + systemMessages, + }) + : undefined; // Build the MCP endpoint for CLI harnesses. Hosted decopilot uses an // in-process passthrough client (no HTTP MCP connection needed), so we @@ -1258,7 +1230,7 @@ async function prepareRun( // Never log `modelSources` (any slot) or `mcp.headers` values. const mcp: HarnessStreamInput["mcp"] = mcpBase; - const mcpSource: HarnessStreamInput["mcpSource"] = + const mcpSource: DecopilotHttpMcpSource | undefined = mcp.expiresAt > 0 ? { kind: "http", @@ -1267,7 +1239,7 @@ async function prepareRun( expiresAt: mcp.expiresAt, } : undefined; - const objectStorageSource: HarnessStreamInput["objectStorageSource"] = + const objectStorageSource: DecopilotObjectStorageSource | undefined = target.sandboxProviderKind === "user-desktop" && organization.slug ? { kind: "http", @@ -1276,51 +1248,55 @@ async function prepareRun( expiresAt: mcp.expiresAt, } : undefined; - const workspace = resolveWorkspaceCwd( - effectiveVirtualMcp, - target.sandboxProviderKind, - ); - const codingWorkspace = buildCodingWorkspaceInput({ + const workspace = buildHarnessWorkspaceInput({ virtualMcp: effectiveVirtualMcp, branch: input.branch, - workspace, + cwd: + target.sandboxProviderKind === "user-desktop" + ? WORKSPACE_CWD_REPO + : null, }); + const agentInstructions = + typeof (effectiveVirtualMcp.metadata as { instructions?: unknown }) + ?.instructions === "string" + ? (effectiveVirtualMcp.metadata as { instructions: string }) + .instructions + : undefined; + const decopilotRunContext = { + taskId: input.taskId, + isSubagent: input.isSubagent, + subtaskJobId: input.subtaskJobId, + resumedFromBackground: input.resumedFromBackground, + virtualMcp: effectiveVirtualMcp, + branch: input.branch, + messages: decopilotMessages, + modelSources, + mcpSource, + objectStorageSource, + userContext, + }; const wireHarnessInput: WireHarnessInput = { harnessId, threadId: mem.thread.id, - runId: mem.thread.id, // RunRegistry keys runs by taskId today - resumeSessionRef, - messages: materializedMessages, + userMessage: wireUserMessage, + harness: { sessionId: resumeSessionRef }, workspace, - codingWorkspace, models, - modelSources, - mcpSource, - objectStorageSource, mcp, mode: input.mode, temperature: input.temperature, toolApprovalLevel: input.toolApprovalLevel, toolAllowlist: input.toolAllowlist ?? null, maxAgentSteps: input.maxAgentSteps, - isSubagent: input.isSubagent, - subtaskJobId: input.subtaskJobId, - resumedFromBackground: input.resumedFromBackground, user: { id: input.userId, email: ctx.auth.user?.email ?? "" }, organizationId: input.organizationId, organizationSlug: organization.slug, - projectSlug: organization.slug, - virtualMcp: effectiveVirtualMcp, - agent: { id: input.agent.id }, - branch: input.branch, - taskId: input.taskId, + agent: { id: input.agent.id, instructions: agentInstructions }, triggerId: input.triggerId, currentThreadTitle: mem.thread.title, runFenceToken, - userContext, }; - // ── LAZY harness dispatch ─────────────────────────────────────────────── // This generator's body — local harness dispatch — runs only when the // kernel pulls the first chunk, which (via `lazyStream` below) happens only @@ -1352,12 +1328,15 @@ async function prepareRun( const dispatchHarnessChunks = async function* (): AsyncIterable { // Layer the non-serializable `signal` onto the eagerly-built wire - // input. Everything else (mcp, materialized messages, fence token, …) + // input. Everything else (mcp, materialized userMessage, fence token, …) // was assembled above and is shared verbatim with the desktop work item. const harnessInput: HarnessStreamInput = { ...wireHarnessInput, signal: registrySignal, }; + if (harnessId === "decopilot") { + setDecopilotRunContext(harnessInput, decopilotRunContext); + } // User-desktop runs are routed by the gate to the link work publisher, // which never consumes this lazy stream. Reaching this generator for a @@ -1781,8 +1760,8 @@ async function resolveLinkSandboxConfig( * fires FINISH. * * The work item's `harnessInput` is the complete `wireHarnessInput` that - * prepareRun builds eagerly (mcp endpoint minted, messages materialized, - * virtualMcp + fence token attached), exactly the shape the daemon validates + * prepareRun builds eagerly (mcp endpoint minted, userMessage materialized, + * workspace + fence token attached), exactly the shape the daemon validates * against `harnessStreamInputSchema`. The non-serializable `signal` member is * intentionally absent; it only exists for the hosted dispatch path. * @@ -1812,69 +1791,33 @@ export async function prepareLinkWorkDispatch( deps, _rootSpan, ); + const failPreparedRun = async (message: string): Promise => { + await deps.runRegistry.execute({ + type: "FINISH", + taskId, + threadStatus: "failed", + }); + throw new Error(message); + }; - // ── Message offload ───────────────────────────────────────────────── - // The work item is published through the tunnel as a NATS request; NATS - // rejects payloads exceeding MAX_PUBLISH_BYTES. The conversation - // `messages` array is the dominant large part — when the encoded - // harnessInput exceeds the budget, offload it to object storage (via the - // shared `offload-messages` helpers), then carry the ref on the work item - // so the daemon can forward it to the sandbox daemon's /_sandbox/dispatch - // (which re-inflates from messagesRef). - let effectiveHarnessInput: WireHarnessInput = wireHarnessInput; - let messagesRef: MessagesRef | null = null; - // `messages` dominates the payload — serialize it once and reuse the same - // bytes for both the offload size-gate and the object-storage body, - // instead of stringifying the whole input and then the messages array - // again on the offload path. - const messagesBytes = new TextEncoder().encode( - JSON.stringify(wireHarnessInput.messages), - ); - const inputByteLength = - messagesBytes.byteLength + - Buffer.byteLength( - JSON.stringify({ ...wireHarnessInput, messages: undefined }), - "utf8", + if (wireHarnessInput.harnessId === "decopilot") { + await failPreparedRun( + "prepareLinkWorkDispatch: decopilot desktop dispatch needs a " + + "private DecopilotRunContext transport outside harnessInput", ); + } + + const effectiveHarnessInput: WireHarnessInput = wireHarnessInput; + const messagesRef: MessagesRef | null = null; + const inputByteLength = Buffer.byteLength( + JSON.stringify(wireHarnessInput), + "utf8", + ); if (shouldOffload(inputByteLength)) { - if (ctx.objectStorage) { - try { - const reqId = crypto.randomUUID(); - const key = offloadKey(reqId); - await ctx.objectStorage.put(key, messagesBytes, { - contentType: "application/json", - }); - const url = await ctx.objectStorage.presignedGetUrl(key, 600, { - requireFetchable: true, - }); - messagesRef = { - url, - bytes: messagesBytes.byteLength, - sha256: await sha256Hex(messagesBytes), - }; - // Replace messages inline with [] — the real messages live at the ref. - effectiveHarnessInput = { ...wireHarnessInput, messages: [] }; - console.log( - `[prepareLinkWorkDispatch] offloaded messages to object storage key=${key} bytes=${messagesBytes.byteLength} runId=${taskId}`, - ); - } catch (err) { - // Offload failed — fall through with the full payload and let - // NATS reject it with MAX_PAYLOAD_EXCEEDED rather than silently - // dropping the ref. The publish will throw and the gate will - // surface the error. - console.error( - `[prepareLinkWorkDispatch] message offload failed, work item may exceed NATS limit runId=${taskId}:`, - err instanceof Error ? err.message : String(err), - ); - } - } else { - // No object storage — fall through with the full payload. Same - // "fail loudly at publish time" approach: better a clear NATS - // MAX_PAYLOAD_EXCEEDED than a silent truncation. - console.warn( - `[prepareLinkWorkDispatch] harnessInput exceeds NATS limit but no object storage configured — work item may be rejected runId=${taskId}`, - ); - } + await failPreparedRun( + "prepareLinkWorkDispatch: harnessInput exceeds the link payload " + + "limit and v3 userMessage offload is not implemented", + ); } // Resolve the sandbox handle for the desktop work item. @@ -1929,8 +1872,9 @@ export async function prepareLinkWorkDispatch( .executeTakeFirst(); orgSlug = orgRow?.slug ?? null; } - if (!orgSlug) { - throw new Error( + const resolvedOrgSlug = orgSlug; + if (!resolvedOrgSlug) { + return await failPreparedRun( `prepareLinkWorkDispatch: could not resolve org slug for organization ${input.organizationId}`, ); } @@ -1941,7 +1885,7 @@ export async function prepareLinkWorkDispatch( harnessInput: effectiveHarnessInput, messagesRef, sandboxConfig, - orgSlug, + orgSlug: resolvedOrgSlug, }; }, dispatchRunSpanAttrs(input), diff --git a/apps/mesh/src/api/routes/decopilot/dispatch-sandbox.test.ts b/apps/mesh/src/api/routes/decopilot/dispatch-sandbox.test.ts index 6f4d46dc37..65cac1ccac 100644 --- a/apps/mesh/src/api/routes/decopilot/dispatch-sandbox.test.ts +++ b/apps/mesh/src/api/routes/decopilot/dispatch-sandbox.test.ts @@ -24,7 +24,7 @@ mock.module("@/tools/sandbox/start", () => ({ })); const { - buildCodingWorkspaceInput, + buildHarnessWorkspaceInput, computeDesktopSandboxHandle, resolveEffectiveVirtualMcpForHarness, } = await import("./dispatch-run"); @@ -82,9 +82,9 @@ describe("computeDesktopSandboxHandle", () => { }); }); -describe("buildCodingWorkspaceInput", () => { +describe("buildHarnessWorkspaceInput", () => { it("returns repo, branch, cwd, and connected GitHub state for repo-backed workspaces", () => { - const result = buildCodingWorkspaceInput({ + const result = buildHarnessWorkspaceInput({ virtualMcp: { metadata: { githubRepo: { @@ -95,23 +95,22 @@ describe("buildCodingWorkspaceInput", () => { }, }, branch: "feature-branch", - workspace: { cwd: "/repo" }, + cwd: "/repo", }); expect(result).toEqual({ + cwd: "/repo", repo: { owner: "deco", name: "site", connectedGithub: true, }, branch: "feature-branch", - cwd: "/repo", - workspaceKind: "github", }); }); it("marks public clone repo workspaces as not connected to GitHub", () => { - const result = buildCodingWorkspaceInput({ + const result = buildHarnessWorkspaceInput({ virtualMcp: { metadata: { githubRepo: { @@ -121,32 +120,48 @@ describe("buildCodingWorkspaceInput", () => { }, }, branch: "main", - workspace: { cwd: "/repo" }, + cwd: "/repo", }); expect(result).toEqual({ + cwd: "/repo", repo: { owner: "deco", name: "public-site", connectedGithub: false, }, branch: "main", - cwd: "/repo", - workspaceKind: "github", }); }); - it("marks default non-repo workspaces as unknown without inventing GitHub metadata", () => { - const result = buildCodingWorkspaceInput({ + it("returns null cwd for non-repo workspaces", () => { + const result = buildHarnessWorkspaceInput({ virtualMcp: { metadata: {} }, branch: null, - workspace: { cwd: "default" }, }); expect(result).toEqual({ - branch: null, - cwd: "default", - workspaceKind: "unknown", + cwd: null, + }); + }); + + it("returns null cwd for repo metadata when no repo checkout is available", () => { + const result = buildHarnessWorkspaceInput({ + virtualMcp: { + metadata: { + githubRepo: { + owner: "deco", + name: "site", + connectionId: "conn-1", + }, + }, + }, + branch: "feature-branch", + cwd: null, + }); + + expect(result).toEqual({ + cwd: null, }); }); }); diff --git a/apps/mesh/src/api/routes/decopilot/memory.ts b/apps/mesh/src/api/routes/decopilot/memory.ts index 77c8bf77e3..1a49e887ca 100644 --- a/apps/mesh/src/api/routes/decopilot/memory.ts +++ b/apps/mesh/src/api/routes/decopilot/memory.ts @@ -33,7 +33,7 @@ export interface MemoryConfig { * - Message history loading * - Pruning for context window management */ -export class Memory { +class Memory { readonly thread: Thread; readonly organization_id: string; diff --git a/apps/mesh/src/api/routes/decopilot/routes.ts b/apps/mesh/src/api/routes/decopilot/routes.ts index 56ece62bf1..56c0838328 100644 --- a/apps/mesh/src/api/routes/decopilot/routes.ts +++ b/apps/mesh/src/api/routes/decopilot/routes.ts @@ -43,7 +43,7 @@ import { StreamRequestSchema } from "./schemas"; import type { ChatMessage, ModelsConfig } from "./types"; import type { DispatchRunInput } from "./dispatch-run"; import { resolveHarnessId } from "./dispatch-run"; -import { stringifyError } from "@decocms/harness/decopilot/stream-error"; +import { stringifyError } from "@decocms/harness/stream-error"; import { enqueueThreadRun } from "@/dispatch-queue"; import { publishRunStatusStage, @@ -228,6 +228,7 @@ async function resolvePerRequestModels( id: entry.modelId, title: entry.label, provider: harnessId, + capabilities: { vision: true, text: true }, }, }; } diff --git a/apps/mesh/src/harnesses/decopilot/context-loader.test.ts b/apps/mesh/src/harnesses/decopilot/context-loader.test.ts new file mode 100644 index 0000000000..e0e2e8fb6a --- /dev/null +++ b/apps/mesh/src/harnesses/decopilot/context-loader.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, test } from "bun:test"; +import type { ChatMessage } from "@/api/routes/decopilot/types"; +import { assembleDecopilotContextForTest } from "./context-loader"; + +describe("assembleDecopilotContextForTest", () => { + test("includes system messages, historical messages, and current user message in order", async () => { + const history = [ + { id: "u1", role: "user", parts: [{ type: "text", text: "old" }] }, + { + id: "a1", + role: "assistant", + parts: [{ type: "text", text: "reply" }], + }, + ] as ChatMessage[]; + const userMessage = { + id: "u2", + role: "user", + parts: [{ type: "text", text: "new" }], + } as ChatMessage; + + const result = await assembleDecopilotContextForTest({ + history, + userMessage, + systemMessages: [ + { + id: "s1", + role: "system", + parts: [{ type: "text", text: "sys" }], + }, + ] as ChatMessage[], + }); + + expect(result.map((m) => m.id)).toEqual(["s1", "u1", "a1", "u2"]); + }); +}); diff --git a/apps/mesh/src/harnesses/decopilot/context-loader.ts b/apps/mesh/src/harnesses/decopilot/context-loader.ts new file mode 100644 index 0000000000..c15f46d004 --- /dev/null +++ b/apps/mesh/src/harnesses/decopilot/context-loader.ts @@ -0,0 +1,57 @@ +import { DEFAULT_WINDOW_SIZE } from "@/api/routes/decopilot/constants"; +import { resolveStorageRefs } from "@/api/routes/decopilot/file-materializer"; +import { createMemory } from "@/api/routes/decopilot/memory"; +import type { ChatMessage } from "@/api/routes/decopilot/types"; +import type { StudioContext } from "@/core/studio-context"; +import type { ThreadMessage } from "@/storage/types"; + +function mergeHistoryWithUserMessage( + history: ThreadMessage[] | ChatMessage[], + userMessage: ChatMessage, +): ChatMessage[] { + const validHistory = history.filter((m) => m.parts && m.parts.length > 0); + const matchIndex = validHistory.findIndex((m) => m.id === userMessage.id); + const conversation = + matchIndex >= 0 + ? [...validHistory.slice(0, matchIndex), userMessage] + : [...validHistory, userMessage]; + return conversation as ChatMessage[]; +} + +export async function assembleDecopilotContextForTest(input: { + history: ChatMessage[]; + userMessage: ChatMessage; + systemMessages: ChatMessage[]; +}): Promise { + return [ + ...input.systemMessages, + ...mergeHistoryWithUserMessage(input.history, input.userMessage), + ]; +} + +export async function loadDecopilotContext(input: { + ctx: StudioContext; + threadId: string; + userMessage: ChatMessage; + windowSize: number | undefined; + isSubagent: boolean; + systemMessages: ChatMessage[]; +}): Promise { + const history = input.isSubagent + ? [] + : await ( + await createMemory(input.ctx.storage.threads, { + thread_id: input.threadId, + organization_id: input.ctx.organization?.id ?? "", + userId: input.ctx.auth.user?.id ?? "", + defaultWindowSize: input.windowSize ?? DEFAULT_WINDOW_SIZE, + }) + ).loadHistory(input.windowSize ?? DEFAULT_WINDOW_SIZE); + + const assembled = await assembleDecopilotContextForTest({ + history: history as ChatMessage[], + userMessage: input.userMessage, + systemMessages: input.systemMessages, + }); + return resolveStorageRefs(assembled, input.ctx); +} diff --git a/apps/mesh/src/harnesses/decopilot/harness-deps.ts b/apps/mesh/src/harnesses/decopilot/harness-deps.ts index 473bd98cdf..1370a3df95 100644 --- a/apps/mesh/src/harnesses/decopilot/harness-deps.ts +++ b/apps/mesh/src/harnesses/decopilot/harness-deps.ts @@ -1,22 +1,13 @@ /** * CLUSTER Decopilot environment-deps assembler (spec §5.2/§9 — "ONE factory"). * - * The Decopilot harness runs ONE orchestration loop (`runDecopilotCore`); the - * only difference between the cluster and the desktop daemon is which - * `DecopilotToolRuntime` + `telemetry` the environment builds. The single - * `decopilotHarnessFactory` (`./index.ts`) selects between this StudioContext- - * backed assembler and the desktop one by inspecting the injected context shape: + * The Decopilot harness runs ONE orchestration loop (`runDecopilotCore`). The + * package factory gets its StudioContext-backed `DecopilotToolRuntime` + + * `telemetry` through this registered cluster assembler: * - * - `buildClusterEnvironmentTools` (here) — the StudioContext-backed branch: - * the in-process virtual-MCP passthrough client + the full cluster tool set + * - the in-process virtual-MCP passthrough client + the full cluster tool set * (web_search / update_interests / Browserless built-ins) + the per-run * HTML-artifact buffer/watcher, plus the ctx-coupled `runAgentLoop` engine. - * - `buildDesktopEnvironmentTools` (`./desktop-runtime.ts`) — the import- - * isolated daemon branch: an HTTP MCP passthrough client + the local-OK - * built-ins + the portable `runNativeAgentLoopCore` engine with a - * cluster-storage-free system prompt. It lives in its own `@/`-free module - * so the desktop daemon factory (`./desktop-factory.ts`) can pull it WITHOUT - * dragging this file's cluster `@/*` imports into the daemon bundle. */ import type { @@ -48,6 +39,7 @@ import type { import { runAgentLoop } from "./run-agent-loop"; import type { DecopilotTelemetry } from "@decocms/harness/decopilot/run-stream"; import { createBackgroundToolDispatcher } from "./background-tool-workflow"; +import { requireDecopilotRunContext } from "@decocms/harness/decopilot/run-context"; /** * Cluster engine adapter: maps the portable `RunEngineArgs` onto the ctx-coupled @@ -130,6 +122,7 @@ export function buildClusterEnvironmentTools(args: { const toolRuntime: DecopilotToolRuntime = { buildEnvironmentTools: async ({ input: streamInput, onChildUsage }) => { + const runContext = requireDecopilotRunContext(streamInput); const toolOutputMap = new Map(); const pendingImages: PendingImage[] = []; const { resolveArgs, onToolCalled } = buildClusterMcpToolHooks(ctx); @@ -152,7 +145,7 @@ export function buildClusterEnvironmentTools(args: { // Cluster-side: `virtualMcp` is the real `VirtualMCPEntity`; // the transport type widens the field to a loose bag so the // daemon can ship without the cluster's storage types. - const vm = streamInput.virtualMcp as VirtualMCPEntity; + const vm = runContext.virtualMcp as VirtualMCPEntity; // Surface the dev sandbox's tools when the user has a running sandbox // for this agent. Cheap local pre-filter ("does the user have a // sandbox entry?"), no repo/pairing flag — agents without a sandbox @@ -168,7 +161,7 @@ export function buildClusterEnvironmentTools(args: { ctx, vm.id, streamInput.user.id, - streamInput.branch ?? undefined, + runContext.branch ?? undefined, ).catch(() => null); } return createVirtualClientFrom( @@ -200,7 +193,7 @@ export function buildClusterEnvironmentTools(args: { agentId: streamInput.agent.id, temperature: streamInput.temperature, toolApprovalLevel: streamInput.toolApprovalLevel, - branch: streamInput.branch ?? null, + branch: runContext.branch ?? null, }), htmlArtifactBuffer, // Roll subtask child usage into the parent run's accumulator diff --git a/apps/mesh/src/harnesses/decopilot/tools.ts b/apps/mesh/src/harnesses/decopilot/tools.ts index 66f9eabf7f..d215b5b43d 100644 --- a/apps/mesh/src/harnesses/decopilot/tools.ts +++ b/apps/mesh/src/harnesses/decopilot/tools.ts @@ -37,6 +37,7 @@ import { type ToolCallAnalytics, } from "@decocms/harness/decopilot/mcp-tools"; import { MCP_TOOL_CALL_TIMEOUT_MS } from "@decocms/harness/decopilot/harness-constants"; +import { requireDecopilotRunContext } from "@decocms/harness/decopilot/run-context"; import type { HarnessStreamInput } from "@decocms/harness/types"; /** Raw MCP tool entries returned by `passthroughClient.listTools()`. */ @@ -215,6 +216,7 @@ export async function assembleDecopilotTools( ctx: StudioContext, extras: AssembleDecopilotToolsExtras, ): Promise { + const runContext = requireDecopilotRunContext(input); const organization = ctx.organization!; const isPlanMode = input.mode === "plan"; // Per-run tool allowlist (model-facing names). Empty array is treated as @@ -281,7 +283,7 @@ export async function assembleDecopilotTools( // Tradeoff: concurrent threads share /app, /home/sandbox, /tmp — // parallel writes to overlapping filenames can race. Fine for // reads and scoped outputs; revisit if it bites. - const vmMetadata = input.virtualMcp.metadata as { + const vmMetadata = runContext.virtualMcp.metadata as { githubRepo?: GithubRepo | null; }; const isEphemeralAgent = !vmMetadata.githubRepo; @@ -290,7 +292,7 @@ export async function assembleDecopilotTools( virtualMcpId: input.agent.id, branch: isEphemeralAgent ? "ephemeral" - : (input.branch ?? `thread:${extras.threadId}`), + : (runContext.branch ?? `thread:${extras.threadId}`), userId: input.user.id, // Used by share_with_user to scope artifacts under // model-outputs//. Cannot be derived from the diff --git a/apps/mesh/src/harnesses/in-process-sandbox-client.test.ts b/apps/mesh/src/harnesses/in-process-sandbox-client.test.ts index c2a74b7dcc..ddd93fbc5f 100644 --- a/apps/mesh/src/harnesses/in-process-sandbox-client.test.ts +++ b/apps/mesh/src/harnesses/in-process-sandbox-client.test.ts @@ -13,9 +13,13 @@ import type { StudioContext } from "../core/studio-context"; const makeInput = (): HarnessStreamInput => ({ threadId: "t1", - runId: "r1", - messages: [], - workspace: { cwd: "default" }, + userMessage: { + id: "m1", + role: "user", + parts: [{ type: "text", text: "hi" }], + }, + harness: {}, + workspace: { cwd: null }, models: { thinking: { id: "m-thinking", title: "Thinking", credentialId: "cred-1" }, } as unknown as HarnessStreamInput["models"], @@ -25,7 +29,6 @@ const makeInput = (): HarnessStreamInput => ({ toolApprovalLevel: "auto", user: { id: "u1", email: "u1@example.com" }, organizationId: "org-1", - virtualMcp: { id: "agent-1" } as HarnessStreamInput["virtualMcp"], agent: { id: "agent-1" }, signal: new AbortController().signal, }); diff --git a/apps/mesh/src/harnesses/index.test.ts b/apps/mesh/src/harnesses/index.test.ts index a0b8bb31e3..154352a26d 100644 --- a/apps/mesh/src/harnesses/index.test.ts +++ b/apps/mesh/src/harnesses/index.test.ts @@ -29,4 +29,10 @@ describe("harness registration", () => { test("codex is registered", () => { expect(getHarnessFactory("codex")?.id).toBe("codex"); }); + + test("cluster harness registration does not register desktop Decopilot builder", async () => { + const source = await Bun.file("apps/mesh/src/harnesses/index.ts").text(); + expect(source).not.toContain("registerDesktopEnvironmentBuilder"); + expect(source).not.toContain("buildDesktopEnvironmentTools"); + }); }); diff --git a/apps/mesh/src/harnesses/index.ts b/apps/mesh/src/harnesses/index.ts index b8b873f92a..55446de2f0 100644 --- a/apps/mesh/src/harnesses/index.ts +++ b/apps/mesh/src/harnesses/index.ts @@ -3,20 +3,15 @@ import { claudeCodeHarnessFactory } from "@decocms/harness/claude-code/index"; import { decopilotHarnessFactory, registerClusterEnvironmentBuilder, - registerDesktopEnvironmentBuilder, } from "@decocms/harness/decopilot/index"; import { codexHarnessFactory } from "@decocms/harness/codex/index"; import { buildClusterEnvironmentTools } from "./decopilot/harness-deps"; -import { buildDesktopEnvironmentTools } from "@decocms/harness/decopilot/desktop-runtime"; import { registerHarnessFactory } from "@decocms/harness/registry"; // Register the environment-deps builders for the unified decopilot factory. -// The factory (`./decopilot`) is environment-agnostic and looks these up at -// dispatch time; this barrel is the sole in-process registration point, so -// registering here guarantees both are present before any cluster/desktop -// dispatch. The cluster builder is `@/`-coupled (StudioContext) and the desktop -// builder reaches `@decocms/sandbox` — keeping the registration here (mesh) lets -// the factory itself stay portable. +// This barrel is the sole in-process registration point for cluster Decopilot +// dispatch. Desktop Decopilot is intentionally not registered; user-desktop +// execution is reserved for CLI harnesses. registerClusterEnvironmentBuilder((args) => { const ctx = args.ctx as StudioContext; return buildClusterEnvironmentTools({ @@ -25,7 +20,6 @@ registerClusterEnvironmentBuilder((args) => { organization: ctx.organization as OrganizationScope, }); }); -registerDesktopEnvironmentBuilder(buildDesktopEnvironmentTools); // Side-effect registration. Importing this module wires up the three // in-tree harnesses. Out-of-tree harnesses register themselves the same way. @@ -37,11 +31,6 @@ registerHarnessFactory(decopilotHarnessFactory); registerHarnessFactory(claudeCodeHarnessFactory); registerHarnessFactory(codexHarnessFactory); -// The import-isolated DESKTOP decopilot factory -// (`@decocms/harness/decopilot/desktop-factory`) is registered DIRECTLY in the -// daemon (packages/sandbox/daemon/entry.ts) — never through this cluster barrel, -// which only handles the in-process cluster dispatch path. - export { localDispatch } from "./local-dispatch"; export { createSecretModelSource } from "@decocms/harness/types"; export type { diff --git a/apps/mesh/src/harnesses/local-dispatch.test.ts b/apps/mesh/src/harnesses/local-dispatch.test.ts index 9d8fc97285..9cd3ff437e 100644 --- a/apps/mesh/src/harnesses/local-dispatch.test.ts +++ b/apps/mesh/src/harnesses/local-dispatch.test.ts @@ -14,9 +14,13 @@ import { localDispatch } from "./local-dispatch"; const makeInput = (): HarnessStreamInput => ({ threadId: "t1", - runId: "r1", - messages: [], - workspace: { cwd: "default" }, + userMessage: { + id: "m1", + role: "user", + parts: [{ type: "text", text: "hi" }], + }, + harness: {}, + workspace: { cwd: null }, models: { thinking: { id: "m-thinking", title: "Thinking", credentialId: "cred-1" }, } as unknown as HarnessStreamInput["models"], @@ -26,7 +30,6 @@ const makeInput = (): HarnessStreamInput => ({ toolApprovalLevel: "auto", user: { id: "u1", email: "u1@example.com" }, organizationId: "org-1", - virtualMcp: { id: "agent-1" } as HarnessStreamInput["virtualMcp"], agent: { id: "agent-1" }, signal: new AbortController().signal, }); diff --git a/apps/mesh/src/link-daemon/handle-local-dispatch.test.ts b/apps/mesh/src/link-daemon/handle-local-dispatch.test.ts index d190d19594..26b040f395 100644 --- a/apps/mesh/src/link-daemon/handle-local-dispatch.test.ts +++ b/apps/mesh/src/link-daemon/handle-local-dispatch.test.ts @@ -553,6 +553,7 @@ describe("handleLocalDispatch", () => { it("forwards messagesRef to the sandbox dispatch when present on the work item", async () => { const fp = fakePublisher(); let capturedDispatchBody: { + runId: string; harnessId: string; input: Record; messagesRef?: unknown; @@ -564,6 +565,7 @@ describe("handleLocalDispatch", () => { ): Promise => { if (url.includes("/_sandbox/dispatch")) { capturedDispatchBody = JSON.parse(init?.body as string) as { + runId: string; harnessId: string; input: Record; messagesRef?: unknown; @@ -604,6 +606,7 @@ describe("handleLocalDispatch", () => { // The dispatch body must carry messagesRef so the sandbox daemon can // re-inflate messages from object storage (same shape the WS path sends). expect(capturedDispatchBody).not.toBeNull(); + expect(capturedDispatchBody!.runId).toBe(workWithRef.runId); expect(capturedDispatchBody!.messagesRef).toEqual(messagesRef); // messages should be the stripped [] (the real ones are at the ref) expect(capturedDispatchBody!.input.messages).toEqual([]); @@ -612,6 +615,7 @@ describe("handleLocalDispatch", () => { it("does not include messagesRef in sandbox dispatch when absent on the work item", async () => { const fp = fakePublisher(); let capturedDispatchBody: { + runId: string; harnessId: string; input: Record; messagesRef?: unknown; @@ -623,6 +627,7 @@ describe("handleLocalDispatch", () => { ): Promise => { if (url.includes("/_sandbox/dispatch")) { capturedDispatchBody = JSON.parse(init?.body as string) as { + runId: string; harnessId: string; input: Record; messagesRef?: unknown; @@ -648,6 +653,7 @@ describe("handleLocalDispatch", () => { await handleLocalDispatch(validWorkItem, deps); expect(capturedDispatchBody).not.toBeNull(); + expect(capturedDispatchBody!.runId).toBe(validWorkItem.runId); expect(capturedDispatchBody!.messagesRef).toBeUndefined(); }); diff --git a/apps/mesh/src/link-daemon/handle-local-dispatch.ts b/apps/mesh/src/link-daemon/handle-local-dispatch.ts index e76c8922ae..4d47a8c0e9 100644 --- a/apps/mesh/src/link-daemon/handle-local-dispatch.ts +++ b/apps/mesh/src/link-daemon/handle-local-dispatch.ts @@ -237,11 +237,13 @@ export async function handleLocalDispatch( // from messagesRef when present. const dispatchBody = work.messagesRef ? JSON.stringify({ + runId: work.runId, harnessId, input: work.harnessInput, messagesRef: work.messagesRef, }) : JSON.stringify({ + runId: work.runId, harnessId, input: work.harnessInput, }); diff --git a/packages/e2e/tests/cli-session-resume.spec.ts b/packages/e2e/tests/cli-session-resume.spec.ts index 7bf3377e8b..49bb0897ed 100644 --- a/packages/e2e/tests/cli-session-resume.spec.ts +++ b/packages/e2e/tests/cli-session-resume.spec.ts @@ -6,12 +6,10 @@ * `codingAgentProvider: "codex"` lands on the persisted assistant message * metadata so the NEXT dispatch can read it back. * (b) A second turn's dispatch work item carries - * `harnessInput.resumeSessionRef` equal to the first turn's session id - * AND `harnessInput.messages` is the DELTA only (user messages after the - * session anchor, not the full transcript). Both together prove the - * codex resume round-trip: `resolveCliSessionRef` finds the session id - * and `computeCliDelta` strips prior history so the CLI doesn't receive - * messages it already knows about (which would cause "ran out of room"). + * `harnessInput.harness.sessionId` equal to the first turn's session id + * AND a single `harnessInput.userMessage` for the current turn. Both + * together prove the codex resume round-trip: `resolveCliSessionRef` + * finds the session id and v3 sends only the new user message. * (c) A "stale session" error relay (what the daemon sends when the codex * harness throws `CliSessionExpiredError`) persists an error part whose * message matches /session expired/i and transitions the run to "failed". @@ -173,7 +171,6 @@ interface WorkItem { userId: string; runFenceToken: string; harnessInput: Record; - messagesRef?: { url: string; bytes: number; sha256: string }; } /** @@ -291,16 +288,16 @@ test.describe("CLI session resume (codex, desktop-link relay)", () => { } }); - test("second turn: dispatch work item carries resumeSessionRef AND only the delta (not full history)", async ({ + test("second turn: dispatch work item carries harness.sessionId AND only the new user message", async ({ authedPage, }) => { // Drives TWO turns on one codex-pinned thread and asserts the second - // work item's harnessInput.resumeSessionRef equals the first turn's - // relayed session id AND harnessInput.messages is the delta only (the - // new user message after the session anchor, not the full transcript). + // work item's harnessInput.harness.sessionId equals the first turn's + // relayed session id AND harnessInput.userMessage is the current turn + // only, not the full transcript. // Together these prove the codex resume round-trip: // - resolveCliSessionRef → correct session id on the work item - // - computeCliDelta → CLI receives only what it doesn't already know + // - v3 userMessage → CLI receives only what it doesn't already know test.setTimeout(CASE_TIMEOUT_MS * 2); const { page, orgSlug, user } = authedPage; const api = page.context().request; @@ -361,21 +358,19 @@ test.describe("CLI session resume (codex, desktop-link relay)", () => { turn2UserText, ); - // (b) resumeSessionRef round-trip - expect(t2.workItem.harnessInput.resumeSessionRef).toBe(sessionId); + // (b) session id round-trip + expect( + (t2.workItem.harnessInput.harness as { sessionId?: string }).sessionId, + ).toBe(sessionId); - // (b2) delta: only the new user message, not turn 1's text - const wireMessages = t2.workItem.harnessInput.messages as Array<{ + // (b2) v3 carries exactly the new user message, not turn 1's text. + const userMessage = t2.workItem.harnessInput.userMessage as { role: string; - }>; - const userMessages = wireMessages.filter((m) => m.role === "user"); - expect( - userMessages.length, - "delta must contain exactly one user message (turn 2 only)", - ).toBe(1); + }; + expect(userMessage.role).toBe("user"); // Materialized UIMessages carry text in `parts`, not a `content` field — // stringify the whole message so the assertion is shape-agnostic. - const contentStr = JSON.stringify(userMessages[0]); + const contentStr = JSON.stringify(userMessage); expect(contentStr).toContain(turn2UserText); expect(contentStr).not.toContain(turn1UserText); } finally { diff --git a/packages/e2e/tests/harness-conformance.spec.ts b/packages/e2e/tests/harness-conformance.spec.ts index 820974c660..26076d7c2a 100644 --- a/packages/e2e/tests/harness-conformance.spec.ts +++ b/packages/e2e/tests/harness-conformance.spec.ts @@ -196,14 +196,12 @@ interface WorkItem { userId: string; runFenceToken: string; harnessInput: Record; - messagesRef?: { url: string; bytes: number; sha256: string }; } /** * Trigger a dispatch by POSTing a user message, then wait for the tunnel daemon * to receive the matching work item. Returns the runId + the work item (whose - * runFenceToken the relay POST presents). `messageText` may be large (offload - * case). + * runFenceToken the relay POST presents). */ async function dispatchAndClaimWorkItem( api: APIRequestContext, @@ -674,13 +672,13 @@ test.describe("harness conformance — relay driver", () => { } }); - test("claude-code session id round-trips into the next turn's resumeSessionRef", async ({ + test("claude-code session id round-trips into the next turn's harness.sessionId", async ({ authedPage, }) => { - // Task 7 fix: a finish-anchor's codingAgentSessionId must survive the v2 - // parts read path so the NEXT turn's dispatch carries it as - // harnessInput.resumeSessionRef. Drive TWO turns on one thread and assert - // the second work item's harnessInput.resumeSessionRef === the first turn's + // A finish-anchor's codingAgentSessionId must survive the parts read path + // so the NEXT turn's dispatch carries it as + // harnessInput.harness.sessionId. Drive TWO turns on one thread and assert + // the second work item's harnessInput.harness.sessionId === the first turn's // relayed session id. test.setTimeout(CASE_TIMEOUT_MS * 2); const { page, orgSlug, user } = authedPage; @@ -737,7 +735,9 @@ test.describe("harness conformance — relay driver", () => { daemon, "second turn", ); - expect(t2.workItem.harnessInput.resumeSessionRef).toBe(sessionId); + expect( + (t2.workItem.harnessInput.harness as { sessionId?: string }).sessionId, + ).toBe(sessionId); } finally { await daemon?.close(); await db.end(); @@ -928,23 +928,15 @@ test.describe("harness conformance — relay endpoint rejections", () => { }); // --------------------------------------------------------------------------- -// Offload conformance — the pull seam's body-offload happy path. -// -// Carry-forward from the deleted link-dispatch-offload.spec.ts (Task 11). A -// conversation whose encoded harnessInput exceeds NATS MAX_PUBLISH_BYTES -// (768 KiB) is offloaded cluster-side in pullDispatch: messages are PUT to -// object storage, `harnessInput.messages` becomes `[]` inline, and a -// `messagesRef` is carried on the work item. The e2e workflow boots MinIO + -// sets S3_* so ctx.objectStorage is the real S3Service path. +// Oversized-payload conformance — v3 has no message-array offload path. // -// Depth: this asserts the CLUSTER-SIDE offload decision (work item shape: ref -// present + messages emptied + ref fetchable). The daemon-side re-inflate -// (parseMessagesRef + splice back into input.messages, with SSRF allowlist) is -// unit-covered in offload-messages.test.ts / handle-local-dispatch.test.ts. +// V3 sends a single `userMessage` and intentionally does not reuse the old +// `messagesRef` array protocol. Oversized desktop payloads fail before publish +// and settle the run as failed instead of delivering a work item. // --------------------------------------------------------------------------- -test.describe("harness conformance — pull-seam body offload", () => { - test("a large message offloads → work item carries messagesRef + empty inline messages, ref is fetchable, then chunks relay + parts persist", async ({ +test.describe("harness conformance — oversized v3 link payload", () => { + test("a large message fails terminally instead of using messagesRef offload", async ({ authedPage, }) => { test.setTimeout(CASE_TIMEOUT_MS); @@ -966,50 +958,30 @@ test.describe("harness conformance — pull-seam body offload", () => { // text is comfortably over the budget even after the rest of the wire // input adds overhead. const bigText = "x".repeat(1_024 * 1_024); - const { runId, workItem } = await dispatchAndClaimWorkItem( - api, - orgSlug, - agentId, - threadId, - daemon, - bigText, + const dispatchRes = await api.post( + `/api/${orgSlug}/decopilot/threads/${threadId}/messages`, + { + data: { + messages: [ + { role: "user", parts: [{ type: "text", text: bigText }] }, + ], + agent: { id: agentId }, + branch: "ephemeral", + temperature: 0.5, + harnessId: "claude-code", + sandboxProviderKind: "user-desktop", + }, + headers: { "content-type": "application/json" }, + }, ); + expect(dispatchRes.status()).toBe(202); + const { taskId: runId } = (await dispatchRes.json()) as { + taskId: string; + }; + expect(runId).toBeTruthy(); - // The cluster-side offload decision: messagesRef present, inline messages - // emptied. (When object storage is unavailable the offload falls through - // and the work item would never publish — so a delivered work item with - // a ref proves the production S3 path ran.) - expect( - workItem.messagesRef, - "work item carries messagesRef for an over-budget conversation", - ).toBeTruthy(); - expect(workItem.messagesRef!.url).toMatch(/^https?:\/\//); - expect(workItem.messagesRef!.bytes).toBeGreaterThan(768 * 1024); - expect(typeof workItem.messagesRef!.sha256).toBe("string"); - // Inline messages are emptied — the real messages live at the ref. - expect(workItem.harnessInput.messages).toEqual([]); - - // The ref is fetchable (presigned GET) and matches the advertised size. - const refRes = await api.get(workItem.messagesRef!.url); - expect(refRes.status()).toBe(200); - const refBytes = (await refRes.body()).byteLength; - expect(refBytes).toBe(workItem.messagesRef!.bytes); - - // The return path still works: relay a normal turn → parts persist + - // run completes. (The daemon re-inflate of the ref is unit-covered; here - // we only prove the ref doesn't break the cluster-side relay/commit.) - const { body } = buildTurnRelayBody({ - messageId: `msg_conf_offload_${Date.now()}`, - text: "offloaded-turn-answer", - }); - await relay(runId, workItem.runFenceToken, body); - // Parts + terminal status are written by the async durable projector, so - // poll until the run projects and completes. await expect(async () => { - const kinds = (await fetchParts(db, runId)).map((p) => p.kind); - expect(kinds).toContain("text"); - expect(kinds).toContain("finish"); - expect(await fetchThreadStatus(db, runId)).toBe("completed"); + expect(await fetchThreadStatus(db, runId)).toBe("failed"); }).toPass({ timeout: 20_000, intervals: [250, 500, 1000, 2000] }); } finally { await daemon?.close(); diff --git a/packages/e2e/tests/link-dispatch-pull.spec.ts b/packages/e2e/tests/link-dispatch-pull.spec.ts index 7a54010462..d79bf977b9 100644 --- a/packages/e2e/tests/link-dispatch-pull.spec.ts +++ b/packages/e2e/tests/link-dispatch-pull.spec.ts @@ -209,6 +209,96 @@ async function createPullThread( // --------------------------------------------------------------------------- test.describe("pull-transport round-trip", () => { + test("work item carries one Claude Code userMessage with image file parts", async ({ + authedPage, + }) => { + test.setTimeout(120_000); + + const { page, orgSlug, user } = authedPage; + const api = page.context().request; + const db = await connectDevDb(); + let daemon: TunnelLinkDaemon | null = null; + + try { + daemon = await createTunnelLinkDaemon(api, user.userId, [ + "body-offload", + "claude-code", + ]); + const orgId = await orgIdForSlug(db, orgSlug); + const { agentId, threadId } = await createPullThread( + api, + db, + orgSlug, + orgId, + ); + + const imageDataUrl = + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII="; + const dispatchRes = await api.post( + `/api/${orgSlug}/decopilot/threads/${threadId}/messages`, + { + data: { + messages: [ + { + role: "user", + parts: [ + { type: "text", text: "Describe this image briefly." }, + { + type: "file", + mediaType: "image/png", + filename: "fixture.png", + url: imageDataUrl, + }, + ], + }, + ], + agent: { id: agentId }, + branch: "ephemeral", + temperature: 0.5, + harnessId: "claude-code", + sandboxProviderKind: "user-desktop", + }, + headers: { "content-type": "application/json" }, + }, + ); + + expect(dispatchRes.status()).toBe(202); + const { taskId: runId } = (await dispatchRes.json()) as { + taskId: string; + }; + const workItem = await daemon.nextWorkItem(runId); + + expect(workItem.harnessInput).not.toHaveProperty("messages"); + const userMessage = workItem.harnessInput.userMessage as { + role?: string; + parts?: Array>; + }; + expect(userMessage.role).toBe("user"); + expect(Array.isArray(userMessage.parts)).toBe(true); + const imagePart = userMessage.parts?.find( + (part) => part.type === "file" && part.mediaType === "image/png", + ); + expect(imagePart).toBeTruthy(); + expect(typeof imagePart?.url).toBe("string"); + expect((imagePart?.url as string).length).toBeGreaterThan(0); + + const messageId = `msg_pull_image_e2e_${Date.now()}`; + const relayBody = buildRelayBody(messageId, "image received"); + await publishRelayBody({ + runId, + fenceToken: workItem.runFenceToken, + body: relayBody, + }); + + await expect(async () => { + expect(await fetchThreadStatus(db, threadId)).toBe("completed"); + }).toPass({ timeout: 20_000, intervals: [250, 500, 1000, 2000] }); + } finally { + await daemon?.close(); + await db.end(); + } + }); + test("work item is served over tunnel and ingest commits parts + releases gate", async ({ authedPage, }) => { @@ -281,7 +371,6 @@ test.describe("pull-transport round-trip", () => { // harnessInput must carry at least the fields the daemon validates. expect(workItem.harnessInput).toMatchObject({ threadId, - runId, runFenceToken: workItem.runFenceToken, }); diff --git a/packages/harness/package.json b/packages/harness/package.json index 1f1cf3ca9f..994ebbc27b 100644 --- a/packages/harness/package.json +++ b/packages/harness/package.json @@ -15,7 +15,6 @@ "./registry": "./src/registry.ts", "./claude-code": "./src/claude-code/index.ts", "./codex": "./src/codex/index.ts", - "./decopilot": "./src/decopilot/index.ts", "./sources": "./src/sources.ts", "./*": [ "./src/*.ts", diff --git a/packages/harness/src/claude-code/index.test.ts b/packages/harness/src/claude-code/index.test.ts index e51a5cda18..22eb55eed4 100644 --- a/packages/harness/src/claude-code/index.test.ts +++ b/packages/harness/src/claude-code/index.test.ts @@ -1,12 +1,25 @@ import { describe, expect, test } from "bun:test"; -import { buildClaudeCodeSystemPrompt, claudeCodeHarnessFactory } from "./index"; import { createCliMessageMetadata } from "../cli-stream-metadata"; +import { prepCliMessages } from "../cli-message-prep"; import type { HarnessContext, HarnessStreamInput } from "../types"; +import { + buildClaudeCodeModelOptions, + buildClaudeCodeSystemPrompt, + claudeCodeHarnessFactory, +} from "./index"; -function makeInput(): HarnessStreamInput { +function makeInput( + overrides: Partial = {}, +): HarnessStreamInput { return { - agent: { id: "agent-1" }, - workspace: { cwd: "default" }, + threadId: "thread-1", + userMessage: { + id: "msg-default", + role: "user", + parts: [{ type: "text", text: "default message" }], + }, + harness: {}, + workspace: { cwd: null }, models: { thinking: { id: "claude-code:sonnet", @@ -15,8 +28,21 @@ function makeInput(): HarnessStreamInput { credentialId: "cred-1", }, }, - threadId: "thread-1", - } as unknown as HarnessStreamInput; + mcp: { + url: "https://mcp.example.com", + headers: { authorization: "Bearer token" }, + expiresAt: Date.now() + 60_000, + }, + mode: "default", + temperature: 0, + toolApprovalLevel: "readonly", + user: { id: "user-1", email: "user@example.com" }, + organizationId: "org-1", + agent: { id: "agent-1", instructions: "Prefer small focused patches." }, + currentThreadTitle: "Renamed thread", + signal: new AbortController().signal, + ...overrides, + }; } /** @@ -25,9 +51,7 @@ function makeInput(): HarnessStreamInput { * Exercising the actual streamText loop requires a working `claude` CLI * subprocess (the harness spawns it via `ai-sdk-provider-claude-code`), * so that path is left to end-to-end / resilience tests. The unit tests - * here verify only the factory shape — id, create() return type, and - * stream() being a function. Task 12 will own the integration coverage - * via the shared dispatcher. + * here verify factory shape and pure input/model-option preparation. */ describe("claudeCodeHarnessFactory", () => { test("has id 'claude-code'", () => { @@ -39,11 +63,37 @@ describe("claudeCodeHarnessFactory", () => { expect(harness.id).toBe("claude-code"); expect(typeof harness.stream).toBe("function"); }); + + test("uses single userMessage and harness session id", async () => { + const input = makeInput({ + userMessage: { + id: "msg-1", + role: "user", + parts: [{ type: "text", text: "hello" }], + }, + harness: { sessionId: "session-1" }, + workspace: { cwd: null }, + }); + + expect(input.harness.sessionId).toBe("session-1"); + expect(input.userMessage.parts[0]).toEqual({ type: "text", text: "hello" }); + + const options = buildClaudeCodeModelOptions(input, undefined, undefined); + expect(options).toMatchObject({ + resume: "session-1", + cwd: undefined, + }); + + const messages = await prepCliMessages([input.userMessage]); + expect(messages).toHaveLength(1); + expect(messages[0]!.role).toBe("user"); + expect(messages[0]!.content).toEqual([{ type: "text", text: "hello" }]); + }); }); test("buildClaudeCodeSystemPrompt appends coding workspace and agent instructions to Claude Code preset", () => { const prompt = buildClaudeCodeSystemPrompt({ - codingWorkspace: { + workspace: { repo: { owner: "deco", name: "site", @@ -51,7 +101,6 @@ test("buildClaudeCodeSystemPrompt appends coding workspace and agent instruction }, branch: "main", cwd: "/repo", - workspaceKind: "github", }, agentInstructions: "Prefer small focused patches.", now: new Date("2026-06-18T12:34:00.000Z"), diff --git a/packages/harness/src/claude-code/index.ts b/packages/harness/src/claude-code/index.ts index 5fda732ce0..97b3bfbf49 100644 --- a/packages/harness/src/claude-code/index.ts +++ b/packages/harness/src/claude-code/index.ts @@ -8,18 +8,15 @@ * - Does NOT build a Decopilot-style system prompt or tool catalog; it only * appends CLI-safe workspace/instruction context through the SDK's * `systemPrompt` preset. - * - Supports resume via `input.resumeSessionRef`, derived by the shared - * layer (Task 12) from prior - * `finish-step.providerMetadata["claude-code"].sessionId`. The harness - * just forwards that opaque token to the SDK's `resume` setting. + * - Supports resume via `input.harness.sessionId`, derived by the shared + * layer from prior `finish-step.providerMetadata["claude-code"].sessionId`. + * The harness just forwards that opaque token to the SDK's `resume` + * setting. * * Working-directory resolution: the cluster sends `input.workspace.cwd` as - * a SYMBOLIC value — either "default" (no checkout, use SDK default) or - * "/repo" (repo checkout inside the sandbox). The daemon rebases "/repo" - * onto its own sandbox root before the harness ever sees it, so by the - * time we reach `effectiveCwd()` the value is already a host-absolute path - * or the "default" sentinel. `effectiveCwd("default")` returns `undefined` - * so the CLI subprocess inherits `process.cwd()` from the daemon. + * a symbolic value: null means no SDK cwd override; "/repo" means the repo + * checkout. The desktop daemon rebases "/repo" onto its sandbox checkout path + * before invoking the harness after receiving dispatch input. * * Behavior parity with stream-core: the inline call at lines 888–906 * passes `mcpServers` (single `cms` entry), `toolApprovalLevel`, @@ -39,10 +36,10 @@ import { effectiveCwd } from "../workspace-cwd"; import { extractUserText, prepCliMessages } from "../cli-message-prep"; import { createCliMessageMetadata } from "../cli-stream-metadata"; import { buildCodingWorkspacePrompt } from "../coding-workspace-prompt"; -import { buildCurrentContextPrompt } from "../decopilot/system-prompt"; +import { buildCurrentContextPrompt } from "../current-context-prompt"; import { mergeTitleResult, shouldGenerateTitle } from "../title-merge"; -import { genTitle } from "../decopilot/title-generator"; -import { stringifyError } from "../decopilot/stream-error"; +import { genTitle } from "../title-generator"; +import { stringifyError } from "../stream-error"; import { CliSessionExpiredError, isStaleSessionError, @@ -54,13 +51,37 @@ import type { HarnessStreamInput, } from "../types"; +export function buildClaudeCodeModelOptions( + input: HarnessStreamInput, + cwd: string | undefined, + systemPrompt: ReturnType, +): Parameters[1] { + return { + mcpServers: { + // Server name kept as `cms` for parity with the inline call + // site (stream-core.ts:892). Changing this would alter the + // qualified tool names the CLI emits. + cms: { + type: "http", + url: input.mcp.url, + headers: input.mcp.headers, + }, + }, + toolApprovalLevel: input.toolApprovalLevel, + isPlanMode: input.mode === "plan", + resume: input.harness.sessionId, + cwd, + systemPrompt, + }; +} + export function buildClaudeCodeSystemPrompt(input: { - codingWorkspace?: HarnessStreamInput["codingWorkspace"]; + workspace?: HarnessStreamInput["workspace"]; agentInstructions?: string; now?: Date; }) { const parts = [ - buildCodingWorkspacePrompt(input.codingWorkspace), + buildCodingWorkspacePrompt(input.workspace), input.agentInstructions?.trim() ? `\n${input.agentInstructions.trim()}\n` : null, @@ -86,12 +107,10 @@ export const claudeCodeHarnessFactory: HarnessFactory = { // line 889. const sdkModelId = resolveClaudeCodeModelId(input.models.thinking.id); - // 2. Translate the symbolic workspace.cwd to an SDK option. The daemon - // has already rebased "/repo" onto its sandbox root before the - // harness runs, so we receive either the rebased absolute path or - // the "default" sentinel. `effectiveCwd("default")` → undefined - // (SDK default = process.cwd()); any other value passes through - // as the CLI subprocess working directory. + // 2. Translate the workspace cwd to an SDK option. `null` means no + // override (SDK default = process.cwd()); "/repo" passes through + // unless a desktop daemon has already rebased it to its sandbox + // checkout path before calling the harness. const cwd = effectiveCwd(input.workspace.cwd); // Diagnostics: on the user-desktop path this runs inside the spawned @@ -101,7 +120,7 @@ export const claudeCodeHarnessFactory: HarnessFactory = { // signal — wrong cwd (CLI runs outside the checkout), an // unreachable MCP endpoint, or a bad model id. console.log( - `[claude-code] stream start model=${sdkModelId} cwd=${cwd ?? "(default)"} mcpUrl=${input.mcp.url} mode=${input.mode} resume=${input.resumeSessionRef ? "yes" : "no"}`, + `[claude-code] stream start model=${sdkModelId} cwd=${cwd ?? "(default)"} mcpUrl=${input.mcp.url} mode=${input.mode} resume=${input.harness.sessionId ? "yes" : "no"}`, ); // 3. Build the Claude Code language model. The MCP URL + headers @@ -110,33 +129,13 @@ export const claudeCodeHarnessFactory: HarnessFactory = { // Mirrors stream-core lines 888–906 — the CLI options and // preset append prompt are threaded through. const systemPrompt = buildClaudeCodeSystemPrompt({ - codingWorkspace: input.codingWorkspace, - agentInstructions: - typeof input.virtualMcp.metadata === "object" && - input.virtualMcp.metadata !== null && - typeof (input.virtualMcp.metadata as { instructions?: unknown }) - .instructions === "string" - ? (input.virtualMcp.metadata as { instructions: string }) - .instructions - : undefined, - }); - const languageModel = createClaudeCodeModel(sdkModelId, { - mcpServers: { - // Server name kept as `cms` for parity with the inline call - // site (stream-core.ts:892). Changing this would alter the - // qualified tool names the CLI emits. - cms: { - type: "http", - url: input.mcp.url, - headers: input.mcp.headers, - }, - }, - toolApprovalLevel: input.toolApprovalLevel, - isPlanMode: input.mode === "plan", - resume: input.resumeSessionRef, - cwd, - systemPrompt, + workspace: input.workspace, + agentInstructions: input.agent.instructions, }); + const languageModel = createClaudeCodeModel( + sdkModelId, + buildClaudeCodeModelOptions(input, cwd, systemPrompt), + ); // 4. Convert UIMessages to ModelMessages. The AI SDK's // `streamText` validates the prompt via Zod in @@ -145,7 +144,7 @@ export const claudeCodeHarnessFactory: HarnessFactory = { // `apps/mesh/src/harnesses/cli-message-prep.ts` for details — // a previous `as never` cast hid this mismatch and would have // thrown `InvalidPromptError` at runtime. - const messages = await prepCliMessages(input.messages); + const messages = await prepCliMessages([input.userMessage]); // 4a. Start title generation with Claude Code's fast model. The // cluster interceptor only persists/broadcasts the result chunk. @@ -196,7 +195,7 @@ export const claudeCodeHarnessFactory: HarnessFactory = { // `codingAgentSessionId` / `codingAgentProvider` at the top of // the message metadata so the shared layer persists them onto // the response message's metadata. Subsequent turns read those - // fields back to recover `input.resumeSessionRef`. Matches the + // fields back to recover `input.harness.sessionId`. Matches the // inline original (stream-core.ts:1404–1417 + 1549–1550) // byte-for-byte. // @@ -237,7 +236,7 @@ export const claudeCodeHarnessFactory: HarnessFactory = { `[claude-code] stream error model=${sdkModelId} cwd=${cwd ?? "(default)"}:`, stringifyError(err), ); - if (input.resumeSessionRef && isStaleSessionError(err)) { + if (input.harness.sessionId && isStaleSessionError(err)) { throw new CliSessionExpiredError(err); } throw err; diff --git a/packages/harness/src/cli-import-boundary.test.ts b/packages/harness/src/cli-import-boundary.test.ts new file mode 100644 index 0000000000..9148fffb38 --- /dev/null +++ b/packages/harness/src/cli-import-boundary.test.ts @@ -0,0 +1,10 @@ +import { expect, test } from "bun:test"; + +test("cli harnesses do not import from decopilot namespace", async () => { + const claude = await Bun.file( + "packages/harness/src/claude-code/index.ts", + ).text(); + const codex = await Bun.file("packages/harness/src/codex/index.ts").text(); + expect(claude).not.toContain("../decopilot/"); + expect(codex).not.toContain("../decopilot/"); +}); diff --git a/packages/harness/src/codex/index.test.ts b/packages/harness/src/codex/index.test.ts index 1e0455dd0a..cd975a7104 100644 --- a/packages/harness/src/codex/index.test.ts +++ b/packages/harness/src/codex/index.test.ts @@ -1,7 +1,49 @@ import { describe, expect, test } from "bun:test"; import { createCliMessageMetadata } from "../cli-stream-metadata"; -import type { HarnessContext } from "../types"; -import { buildCodexDeveloperInstructions, codexHarnessFactory } from "./index"; +import { prepCliMessages } from "../cli-message-prep"; +import type { HarnessContext, HarnessStreamInput } from "../types"; +import { + buildCodexDeveloperInstructions, + buildCodexModelOptions, + codexHarnessFactory, +} from "./index"; + +function makeInput( + overrides: Partial = {}, +): HarnessStreamInput { + return { + threadId: "thread-2", + userMessage: { + id: "msg-default", + role: "user", + parts: [{ type: "text", text: "default message" }], + }, + harness: {}, + workspace: { cwd: null }, + models: { + thinking: { + id: "codex:gpt-5.4", + title: "GPT 5.4", + provider: "openai", + credentialId: "cred-2", + }, + }, + mcp: { + url: "https://mcp.example.com", + headers: { authorization: "Bearer token" }, + expiresAt: Date.now() + 60_000, + }, + mode: "default", + temperature: 0, + toolApprovalLevel: "readonly", + user: { id: "user-2", email: "user@example.com" }, + organizationId: "org-2", + agent: { id: "agent-2", instructions: "Prefer tests first." }, + currentThreadTitle: "Renamed thread", + signal: new AbortController().signal, + ...overrides, + }; +} /** * Contract tests for the Codex harness factory. @@ -9,9 +51,8 @@ import { buildCodexDeveloperInstructions, codexHarnessFactory } from "./index"; * Exercising the actual streamText loop requires a working `codex` * app-server subprocess (the harness spawns it via * `ai-sdk-provider-codex-cli`), so that path is left to end-to-end / - * resilience tests. The unit tests here verify only the factory shape — - * id, create() return type, and stream() being a function. Task 12 will - * own the integration coverage via the shared dispatcher. + * resilience tests. The unit tests here verify factory shape and pure + * input/model-option preparation. * * Provider-cleanup correctness (the try/finally around `provider.close()`) * is verified by code review of `index.ts` — exercising the close path @@ -28,11 +69,37 @@ describe("codexHarnessFactory", () => { expect(harness.id).toBe("codex"); expect(typeof harness.stream).toBe("function"); }); + + test("uses single userMessage and harness session id", async () => { + const input = makeInput({ + userMessage: { + id: "msg-1", + role: "user", + parts: [{ type: "text", text: "hello" }], + }, + harness: { sessionId: "session-1" }, + workspace: { cwd: null }, + }); + + expect(input.harness.sessionId).toBe("session-1"); + expect(input.userMessage.parts[0]).toEqual({ type: "text", text: "hello" }); + + const options = buildCodexModelOptions(input, undefined, undefined); + expect(options).toMatchObject({ + resume: "session-1", + cwd: undefined, + }); + + const messages = await prepCliMessages([input.userMessage]); + expect(messages).toHaveLength(1); + expect(messages[0]!.role).toBe("user"); + expect(messages[0]!.content).toEqual([{ type: "text", text: "hello" }]); + }); }); test("buildCodexDeveloperInstructions includes coding workspace and agent instructions only", () => { const instructions = buildCodexDeveloperInstructions({ - codingWorkspace: { + workspace: { repo: { owner: "deco", name: "site", @@ -40,7 +107,6 @@ test("buildCodexDeveloperInstructions includes coding workspace and agent instru }, branch: "main", cwd: "/repo", - workspaceKind: "template", }, agentInstructions: "Prefer tests before implementation.", now: new Date("2026-06-18T12:34:00.000Z"), diff --git a/packages/harness/src/codex/index.ts b/packages/harness/src/codex/index.ts index 915fd1e1e4..084a58204d 100644 --- a/packages/harness/src/codex/index.ts +++ b/packages/harness/src/codex/index.ts @@ -7,7 +7,7 @@ * reaches mesh's MCP endpoint directly). * - Does NOT build a system prompt (the CLI has its own). * - Supports resume: each turn spawns a fresh codex app-server process, but - * `resume: input.resumeSessionRef` reloads the on-disk thread (the app + * `resume: input.harness.sessionId` reloads the on-disk thread (the app * server persists rollouts under HOME, which survives the per-turn * subprocess on the long-lived desktop daemon). `threadMode: "persistent"` * (set in createCodexModel) makes the first turn's thread non-ephemeral so @@ -31,9 +31,8 @@ * * The harness yields raw `UIMessageChunk` — including the * `finish-step.providerMetadata["codex-app-server"]` block. The shared - * stream layer (Task 12) is responsible for inspecting those chunks if - * it needs to surface any codex-specific metadata (e.g., for analytics); - * Codex does NOT use it for resume. + * stream layer extracts the Codex thread id from that metadata and feeds + * it back as `input.harness.sessionId` on the next turn. */ import { streamText, type UIMessageChunk } from "ai"; @@ -47,8 +46,8 @@ import { isStaleSessionError, } from "../cli-session-error"; import { mergeTitleResult, shouldGenerateTitle } from "../title-merge"; -import { buildCurrentContextPrompt } from "../decopilot/system-prompt"; -import { genTitle } from "../decopilot/title-generator"; +import { buildCurrentContextPrompt } from "../current-context-prompt"; +import { genTitle } from "../title-generator"; import type { Harness, HarnessContext, @@ -56,13 +55,37 @@ import type { HarnessStreamInput, } from "../types"; +export function buildCodexModelOptions( + input: HarnessStreamInput, + cwd: string | undefined, + developerInstructions: string | undefined, +): Parameters[1] { + return { + mcpServers: { + // Server name kept as `cms` for parity with the inline call + // site (stream-core.ts:925). Changing this would alter the + // qualified tool names the CLI emits. + cms: { + transport: "http", + url: input.mcp.url, + headers: input.mcp.headers, + }, + }, + toolApprovalLevel: input.toolApprovalLevel, + isPlanMode: input.mode === "plan", + cwd, + developerInstructions, + resume: input.harness.sessionId, + }; +} + export function buildCodexDeveloperInstructions(input: { - codingWorkspace?: HarnessStreamInput["codingWorkspace"]; + workspace?: HarnessStreamInput["workspace"]; agentInstructions?: string; now?: Date; }): string | undefined { const parts = [ - buildCodingWorkspacePrompt(input.codingWorkspace), + buildCodingWorkspacePrompt(input.workspace), input.agentInstructions?.trim() ? `\n${input.agentInstructions.trim()}\n` : null, @@ -82,24 +105,14 @@ export const codexHarnessFactory: HarnessFactory = { // name (e.g. `gpt-5.4`). Mirrors stream-core line 922. const sdkModelId = resolveCodexModelId(input.models.thinking.id); - // 2. Translate the symbolic workspace.cwd to an SDK option. The daemon - // has already rebased "/repo" onto its sandbox root before the - // harness runs, so we receive either the rebased absolute path or - // the "default" sentinel. `effectiveCwd("default")` → undefined - // (SDK default = process.cwd()); any other value passes through - // as the codex app-server subprocess working directory. + // 2. Translate the workspace cwd to an SDK option. `null` means no + // override (SDK default = process.cwd()); "/repo" passes through + // unless a desktop daemon has already rebased it to its sandbox + // checkout path before calling the harness. const cwd = effectiveCwd(input.workspace.cwd); - const agentInstructions = - typeof input.virtualMcp.metadata === "object" && - input.virtualMcp.metadata !== null && - typeof (input.virtualMcp.metadata as { instructions?: unknown }) - .instructions === "string" - ? (input.virtualMcp.metadata as { instructions: string }) - .instructions - : undefined; const developerInstructions = buildCodexDeveloperInstructions({ - codingWorkspace: input.codingWorkspace, - agentInstructions, + workspace: input.workspace, + agentInstructions: input.agent.instructions, }); // Diagnostics: on the user-desktop path this runs inside the spawned @@ -124,23 +137,10 @@ export const codexHarnessFactory: HarnessFactory = { // `apps/mesh/src/ai-providers/coding-agents/codex/index.ts` // line 18, where http servers are normalized to the codex // SDK's `httpHeaders` shape internally. - const { model, provider } = createCodexModel(sdkModelId, { - mcpServers: { - // Server name kept as `cms` for parity with the inline call - // site (stream-core.ts:925). Changing this would alter the - // qualified tool names the CLI emits. - cms: { - transport: "http", - url: input.mcp.url, - headers: input.mcp.headers, - }, - }, - toolApprovalLevel: input.toolApprovalLevel, - isPlanMode: input.mode === "plan", - cwd, - developerInstructions, - resume: input.resumeSessionRef, - }); + const { model, provider } = createCodexModel( + sdkModelId, + buildCodexModelOptions(input, cwd, developerInstructions), + ); try { // 3. Convert UIMessages to ModelMessages. The AI SDK's @@ -150,7 +150,7 @@ export const codexHarnessFactory: HarnessFactory = { // See `apps/mesh/src/harnesses/cli-message-prep.ts` for // details — a previous `as never` cast hid this mismatch // and would have thrown `InvalidPromptError` at runtime. - const messages = await prepCliMessages(input.messages); + const messages = await prepCliMessages([input.userMessage]); // 3a. Start title generation with Codex's fast model. This uses a // separate app-server process so title generation can run in @@ -208,11 +208,8 @@ export const codexHarnessFactory: HarnessFactory = { // 5. Pipe UIMessageChunk through. We surface // `codingAgentSessionId` / `codingAgentProvider` at the top // of the message metadata so the shared layer persists them - // onto the response message's metadata. Codex doesn't use - // these for resume (per the comment at the top of this - // file), but the inline original at stream-core.ts:1411– - // 1417 + 1549–1550 wrote them anyway for parity with - // Claude Code — so we keep the same shape. + // onto the response message's metadata. Subsequent turns read + // those fields back to recover `input.harness.sessionId`. // // We also forward cumulative `usage` (with cache token // breakdown + OpenRouter cost) on both `finish-step` and @@ -243,7 +240,7 @@ export const codexHarnessFactory: HarnessFactory = { yield chunk; } } catch (err) { - if (input.resumeSessionRef && isStaleSessionError(err)) { + if (input.harness.sessionId && isStaleSessionError(err)) { throw new CliSessionExpiredError(err); } throw err; diff --git a/packages/harness/src/current-context-prompt.ts b/packages/harness/src/current-context-prompt.ts new file mode 100644 index 0000000000..c41320fbdf --- /dev/null +++ b/packages/harness/src/current-context-prompt.ts @@ -0,0 +1,14 @@ +/** + * Per-request, non-cached system prompt content. + * + * Anything that varies between requests but is needed in the system layer + * lives here — kept outside cached prefixes so it does not invalidate provider + * prompt caches. + */ +export function buildCurrentContextPrompt(now: Date): string { + const iso = now.toISOString(); + return ` +Current date: ${iso.slice(0, 10)} +Current time: ${iso.slice(11, 16)} UTC +`; +} diff --git a/packages/harness/src/decopilot/desktop-factory.ts b/packages/harness/src/decopilot/desktop-factory.ts deleted file mode 100644 index a135cb9c93..0000000000 --- a/packages/harness/src/decopilot/desktop-factory.ts +++ /dev/null @@ -1,98 +0,0 @@ -/** - * desktop-factory — the import-isolated DESKTOP `HarnessFactory` registered in - * the desktop daemon (`packages/sandbox/daemon/entry.ts`) under the id - * "decopilot". - * - * The desktop runtime and the cluster runtime share ONE orchestration loop - * (`runDecopilotCore`); the only difference is which `DecopilotToolRuntime` + - * `telemetry` the environment builds. The unified cluster factory - * (`./index.ts`) selects between `buildClusterEnvironmentTools` (cluster) and - * `buildDesktopEnvironmentTools` (desktop) by inspecting the injected context — - * but it statically imports the cluster assembler (`harness-deps.ts`, which - * reaches `@/monitoring`, `@/tools/virtual/schema`, StudioContext, …). Pulling - * that into the daemon bundle drags the whole cluster module graph in and the - * bundle/`tsc` overflows. - * - * So the DAEMON imports THIS factory instead: it calls the SAME - * `buildDesktopEnvironmentTools` desktop assembler (`./desktop-runtime.ts`), - * but never references the cluster branch, so the daemon bundle stays free of - * `@/*` / StudioContext. There is no behavioral fork — the desktop tool runtime - * is single-sourced; this file is only the daemon's import-safe entrypoint into - * it. (The cluster cutover onto the unified factory lands with - * `createDesktopContext` in a later slice.) - * - * ⚠️ SECURITY: each `modelSources` slot (kind="secret") carries an org - * chat-completion API key in plaintext over HTTPS. Never log it. - */ - -import type { UIMessageChunk } from "ai"; -import type { - Harness, - HarnessContext, - HarnessFactory, - HarnessStreamInput, -} from "../types"; -import { createProviderFromSecret } from "./provider-from-secret"; -import { createSideChannelWriter } from "../side-channel-writer"; -import { - buildModelRuntimeFromSources, - runDecopilotCore, - type ModelRuntime, -} from "./run-core"; -import { buildDesktopEnvironmentTools } from "./desktop-runtime"; - -export const decopilotDesktopHarnessFactory: HarnessFactory = { - id: "decopilot", - create(_ctx: HarnessContext): Harness { - return { - id: "decopilot", - async *stream(input: HarnessStreamInput): AsyncIterable { - const { mcp } = input; - - // ── Model runtime: providers from the resolved secret sources. Never - // log a modelSource — each carries a provider API key. ─────────── - const modelRuntime: ModelRuntime = buildModelRuntimeFromSources( - { models: input.models, modelSources: input.modelSources }, - createProviderFromSecret, - ); - - // Diagnostics (provider id only, never the key). On the desktop this - // runs inside the spawned daemon, so it surfaces in the link terminal. - const thinkingSource = input.modelSources?.thinking; - const providerId = - thinkingSource?.kind === "secret" ? thinkingSource.providerId : "?"; - console.log( - `[decopilot-desktop] stream start provider=${providerId} ` + - `model=${input.models.thinking.id} mcpUrl=${mcp.url} mode=${input.mode}`, - ); - - // ── Per-run side-channel + MCP-client cleanup (desktop lifecycle). ── - const sideChannel = createSideChannelWriter(); - // Captured inside buildEnvironmentTools so the finally below runs it - // even if the core throws mid-stream. - const cleanup: { close?: () => Promise } = {}; - - const toolRuntime = buildDesktopEnvironmentTools({ - input, - modelRuntime, - sideChannel, - cleanup, - }); - - try { - yield* runDecopilotCore({ - input, - modelRuntime, - toolRuntime, - // Desktop runs stay OTel-invisible this phase (no monitoring sink). - telemetry: undefined, - kind: "main", - }); - } finally { - sideChannel.close(); - await cleanup.close?.().catch(() => {}); - } - }, - }; - }, -}; diff --git a/packages/harness/src/decopilot/desktop-local-tools.ts b/packages/harness/src/decopilot/desktop-local-tools.ts deleted file mode 100644 index 41a39291bd..0000000000 --- a/packages/harness/src/decopilot/desktop-local-tools.ts +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Desktop built-in tool adapter. - * - * The implementation lives in the shared Decopilot portable built-ins module; - * this file only preserves the desktop harness import boundary and parameter - * names. - */ - -import type { Tool, ToolSet, UIMessageStreamWriter } from "ai"; -import { buildPortableBuiltInTools } from "./built-in-tools/portable-built-ins"; -import type { - PortableImageModelInfo, - PortableImageProvider, -} from "./built-in-tools/portable-media-tools"; -import type { VirtualClient } from "./built-in-tools/sandbox"; -import { createVmTools } from "./built-in-tools/vm-tools/index"; -import type { PendingImage } from "./built-in-tools/vm-tools/types"; -import type { SandboxFsHooks } from "./built-in-tools/vm-tools/sandbox-fs-hooks-types"; -import type { ToolApprovalLevel } from "./mcp-tools"; -import type { DesktopToolCtx } from "./desktop-tool-ctx"; -import type { BackgroundDispatcher } from "./built-in-tools/backgroundable"; - -export interface BuildLocalToolsParams { - writer: UIMessageStreamWriter; - toolOutputMap: Map; - passthroughClient: VirtualClient; - toolApprovalLevel: ToolApprovalLevel; - isPlanMode: boolean; - ctx: DesktopToolCtx; - pendingImages: PendingImage[]; - threadId: string; - virtualMcpId: string; - /** - * Flat sandbox filesystem hooks the VM tools run over. Built by the desktop - * glue (`buildDesktopSandboxFs`, which owns the `@decocms/sandbox` provider) - * and injected here so this assembler stays sandbox-free (spec §4.3). - */ - fs: SandboxFsHooks; - imageProvider?: PortableImageProvider; - imageModelInfo?: PortableImageModelInfo; - /** When present, generate_image enqueues a durable cluster job instead of - * running on the daemon (so the turn doesn't block). Built by the daemon - * runtime from the run's cluster callback + auth. */ - imageBackgroundDispatcher?: BackgroundDispatcher | null; - /** The real desktop-local `subtask` tool (built by the harness factory from - * `createLocalSubtaskTool` + the daemon `runSubtask`). Injected here so it - * joins the desktop toolset alongside the VM + portable built-ins. Absent on - * delegated subtask runs (depth-1 — the core strips it anyway). */ - subtask?: Tool; -} - -export function buildLocalTools(params: BuildLocalToolsParams): ToolSet { - const portableTools = buildPortableBuiltInTools({ - writer: params.writer, - toolOutputMap: params.toolOutputMap, - passthroughClient: params.passthroughClient, - toolApprovalLevel: params.toolApprovalLevel, - isPlanMode: params.isPlanMode, - objectStorage: params.ctx.objectStorage, - includeUnavailableClusterOnlyTools: true, - pendingImages: params.pendingImages, - imageTool: - params.imageProvider && params.imageModelInfo - ? { - provider: params.imageProvider, - imageModelInfo: params.imageModelInfo, - } - : undefined, - imageBackgroundDispatcher: params.imageBackgroundDispatcher, - }); - - const vmNeedsApproval = - params.isPlanMode || params.toolApprovalLevel !== "auto"; - const vmTools = createVmTools({ - fs: params.fs, - toolOutputMap: params.toolOutputMap, - needsApproval: vmNeedsApproval, - pendingImages: params.pendingImages, - ctx: params.ctx as never, - threadId: params.threadId, - virtualMcpId: params.virtualMcpId, - }); - - return { - ...portableTools, - ...vmTools, - // Real desktop-local subtask (self + cross-agent) — runs the shared core - // in-process via spawnSubtask. Absent on delegated subtask runs (depth-1). - ...(params.subtask ? { subtask: params.subtask } : {}), - }; -} diff --git a/packages/harness/src/decopilot/desktop-parity.fixtures.ts b/packages/harness/src/decopilot/desktop-parity.fixtures.ts deleted file mode 100644 index 2a1cc2d383..0000000000 --- a/packages/harness/src/decopilot/desktop-parity.fixtures.ts +++ /dev/null @@ -1,103 +0,0 @@ -/** - * Parity fixtures for the desktop Decopilot tool set (§12 "parity test, run - * before 1-last"). - * - * The desktop fork (`harnesses/decopilot-desktop/`) is GONE; the unified - * factory's desktop assembler (`buildDesktopEnvironmentTools`, - * `./desktop-runtime.ts`) is the only desktop path. Both builders below now - * drive that SAME assembler, so the `expect(newKeys).toEqual(oldKeys)` - * assertion in `desktop-parity.test.ts` is a regression LOCK on the unified - * desktop tool-key set (paired with a hardcoded baseline list in the test). - * - * The fakes use a fake MCP `Client` returning empty tool/prompt/resource lists - * (so the passthrough set is empty and the bundle's keys are exactly the desktop - * local built-ins) — the MCP source is opened through the `openHttp` test seam - * so no real network connection is made. The tool bundle is only inspected for - * its KEYS; nothing is executed. - */ - -import type { OpenedMcpSource } from "../sources"; -import { buildModelRuntimeFromSources } from "./run-core"; -import { buildDesktopEnvironmentTools } from "./desktop-runtime"; -import { createProviderFromSecret } from "./provider-from-secret"; -import { createSideChannelWriter } from "../side-channel-writer"; -import { registerDesktopSandboxFsBuilder } from "./desktop-sandbox-fs-registry"; -import type { HarnessStreamInput } from "../types"; - -// The parity fixtures only inspect the assembled tool KEYS (nothing executes), -// so register a no-op desktop sandbox-fs builder — the real one lives in the -// daemon (@decocms/sandbox) and isn't available in a unit test. -registerDesktopSandboxFsBuilder(() => ({ - onRead: async () => "", - onWrite: async () => {}, - onEdit: async () => {}, - onBash: async () => ({ stdout: "", stderr: "", exitCode: 0 }), - onGlob: async () => [], - onGrep: async () => [], - onProxy: async () => ({}), -})); - -/** A fake MCP `Client` returning empty listings — the desktop passthrough set - * is therefore empty, so the assembled tool keys are exactly the desktop - * local built-ins. Cast to the structural shape `buildEnvironmentTools` - * consumes (`listTools`, `getInstructions`, …). */ -function createFakeMcpClient(): OpenedMcpSource { - const client = { - listTools: async () => ({ tools: [] }), - listPrompts: async () => ({ prompts: [] }), - listResources: async () => ({ resources: [] }), - readResource: async () => ({ contents: [] }), - getPrompt: async () => ({ messages: [] }), - callTool: async () => ({ content: [] }), - getInstructions: () => undefined, - close: async () => {}, - } as never; - return { client, close: async () => {} }; -} - -/** Assemble one turn's desktop tool bundle via the unified factory's desktop - * assembler and return its sorted tool-key set. */ -async function buildDesktopToolKeys( - input: HarnessStreamInput, -): Promise { - const modelRuntime = buildModelRuntimeFromSources( - { models: input.models, modelSources: input.modelSources }, - createProviderFromSecret, - ); - const sideChannel = createSideChannelWriter(); - const cleanup: { close?: () => Promise } = {}; - - const toolRuntime = buildDesktopEnvironmentTools({ - input, - modelRuntime, - sideChannel, - cleanup, - openHttp: async () => createFakeMcpClient(), - }); - - try { - const bundle = await toolRuntime.buildEnvironmentTools({ input }); - return Object.keys(bundle.tools).sort(); - } finally { - sideChannel.close(); - await cleanup.close?.().catch(() => {}); - } -} - -/** Builder 1 — historically the legacy desktop fork's wiring; now the unified - * factory's desktop assembler (the fork is deleted). Kept as a separate export - * so the parity test reads as a before/after regression lock. */ -export async function buildDesktopToolKeysViaExistingAdapter( - input: HarnessStreamInput, -): Promise { - return buildDesktopToolKeys(input); -} - -/** Builder 2 — the unified factory's desktop assembler - * (`buildDesktopEnvironmentTools`). Same path as Builder 1; the - * `expect(newKeys).toEqual(oldKeys)` assertion locks the desktop tool set. */ -export async function buildDesktopToolKeysViaUnifiedFactory( - input: HarnessStreamInput, -): Promise { - return buildDesktopToolKeys(input); -} diff --git a/packages/harness/src/decopilot/desktop-parity.test.ts b/packages/harness/src/decopilot/desktop-parity.test.ts deleted file mode 100644 index 237bb1199c..0000000000 --- a/packages/harness/src/decopilot/desktop-parity.test.ts +++ /dev/null @@ -1,84 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import type { HarnessStreamInput } from "../types"; -import { - buildDesktopToolKeysViaExistingAdapter, - buildDesktopToolKeysViaUnifiedFactory, -} from "./desktop-parity.fixtures"; - -const input = { - threadId: "t1", - runId: "r1", - messages: [], - workspace: { cwd: "default" }, - models: { thinking: { id: "gpt-4.1", title: "GPT", credentialId: "c1" } }, - modelSources: { - thinking: { - kind: "secret", - providerId: "anthropic", - apiKey: "sk", - modelId: "gpt-4.1", - }, - }, - mcpSource: { - kind: "http", - url: "https://studio.example/mcp/agent-1", - headers: { Authorization: "Bearer x" }, - expiresAt: 9999999999000, - }, - mcp: { - url: "https://studio.example/mcp/agent-1", - headers: {}, - expiresAt: 9999999999000, - }, - mode: "default", - temperature: 0.5, - toolApprovalLevel: "auto", - user: { id: "u1", email: "u@e.com" }, - organizationId: "org-1", - virtualMcp: { id: "agent-1", metadata: {} }, - agent: { id: "agent-1" }, - signal: new AbortController().signal, -} satisfies HarnessStreamInput; - -// The sorted desktop tool-key baseline captured at the unification cutover. -// The desktop fork is deleted, so this is now a REGRESSION LOCK: any drift in -// the unified factory's desktop tool set must update this list deliberately. -// The cluster-only tools (`web_search`, `update_interests`) appear here only as -// UNAVAILABLE stubs (`includeUnavailableClusterOnlyTools` — see -// portable-built-ins): they surface to the model but throw "only available in -// cluster Decopilot" at execution, so the desktop never runs the real cluster -// path. -const DESKTOP_TOOL_KEYS_BASELINE = [ - "bash", - "edit", - "glob", - "grep", - "propose_plan", - "read", - "read_tool_output", - "skill", - "subtask", - "todo_write", - "update_interests", - "user_ask", - "web_search", - "write", -]; - -describe("decopilot desktop tool-set parity", () => { - it("unified factory yields the hardcoded desktop tool-key baseline", async () => { - const newKeys = await buildDesktopToolKeysViaUnifiedFactory(input); - expect(newKeys).toEqual(DESKTOP_TOOL_KEYS_BASELINE); - // Sanity: desktop set must include the local built-ins (e.g. `read`). - expect(newKeys).toContain("read"); - expect(newKeys).toContain("web_search"); - expect(newKeys).toContain("update_interests"); - }); - - it("both parity builders drive the unified desktop assembler, exactly", async () => { - const oldKeys = await buildDesktopToolKeysViaExistingAdapter(input); - const newKeys = await buildDesktopToolKeysViaUnifiedFactory(input); - expect(newKeys).toEqual(oldKeys); - expect(newKeys).toEqual(DESKTOP_TOOL_KEYS_BASELINE); - }); -}); diff --git a/packages/harness/src/decopilot/desktop-prompt.test.ts b/packages/harness/src/decopilot/desktop-prompt.test.ts deleted file mode 100644 index 1b6da0f7ec..0000000000 --- a/packages/harness/src/decopilot/desktop-prompt.test.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { describe, expect, test } from "bun:test"; -import { buildDesktopPrompt } from "./desktop-prompt"; - -describe("buildDesktopPrompt", () => { - test("includes shared coding workspace context", () => { - const prompt = buildDesktopPrompt({ - agentId: "vir_test", - isDecopilotAgent: false, - connectionsBlockTools: [], - connectionTitleMap: new Map(), - agentInstructions: "Use the repo carefully.", - codingWorkspace: { - repo: { - owner: "deco", - name: "site", - connectedGithub: true, - }, - branch: "main", - cwd: "/repo", - workspaceKind: "github", - }, - }); - - const joined = prompt.systemMessages.map((m) => m.content).join("\n\n"); - expect(joined).toContain(""); - expect(joined).toContain("Repository: deco/site"); - expect(joined).toContain("Use the repo carefully."); - }); -}); diff --git a/packages/harness/src/decopilot/desktop-prompt.ts b/packages/harness/src/decopilot/desktop-prompt.ts deleted file mode 100644 index bb9911a0d1..0000000000 --- a/packages/harness/src/decopilot/desktop-prompt.ts +++ /dev/null @@ -1,81 +0,0 @@ -/** - * local-prompt — minimal system-prompt assembly for the desktop harness. - * - * Shared prompt builders live in `prompt-constants.ts` and - * `../coding-workspace-prompt.ts`. This module assembles the - * desktop-specific prompt shape. - * - * `buildDesktopPrompt` assembles the minimal prompt: - * base platform + coding workspace block + connections block + - * todo-write guidance + agent identity. - * It deliberately SKIPS the cluster-only blocks that need `ctx.storage` - * (agents block via `virtualMcps.list`) or studio-pack resolution. Cache - * markers + the per-request current-context tail come from the portable - * `system-prompt` leaf (reused by relative path). - */ - -import { - buildConnectionsBlock, - type ConnectionsBlockTool, -} from "./connections-block"; -import { buildSystemMessages, type SystemMessage } from "./system-prompt"; -export { PARENT_STEP_LIMIT } from "./prompt-constants"; -import { - buildBasePlatformPrompt, - buildDecopilotAgentPrompt, - buildTodoWritePrompt, -} from "./prompt-constants"; -import { - buildCodingWorkspacePrompt, - type CodingWorkspacePromptInput, -} from "../coding-workspace-prompt"; - -export interface DesktopPromptInput { - /** Active agent (virtual MCP) id — used to decide whether the decopilot - * identity prompt is emitted. */ - agentId: string; - /** True when the active agent is the well-known decopilot agent. */ - isDecopilotAgent: boolean; - /** Connections-block tool list (built from the passthrough MCP listing). */ - connectionsBlockTools: ConnectionsBlockTool[]; - /** connectionId → human title map (best-effort; may be empty on desktop). */ - connectionTitleMap: Map; - /** The active agent's own instructions, when it is NOT decopilot. */ - agentInstructions?: string; - /** Plan-mode prompt fragment, when mode === "plan". */ - planPrompt?: string | null; - /** Shared coding workspace context for repo-aware runs. */ - codingWorkspace?: CodingWorkspacePromptInput; - /** Web-search behaviour hint, when mode === "web-search". */ - webSearchPrompt?: string | null; -} - -/** - * Build the minimal desktop system prompt. Mirrors the cluster's prompt - * ordering for the blocks the desktop can produce, dropping the ones that - * require cluster storage (agents block) or studio-pack resolution. - */ -export function buildDesktopPrompt(input: DesktopPromptInput): { - systemMessages: SystemMessage[]; -} { - const basePrompt = buildBasePlatformPrompt(); - const connectionsBlock = buildConnectionsBlock( - input.connectionsBlockTools, - input.connectionTitleMap, - ); - const agentPrompt = input.isDecopilotAgent - ? buildDecopilotAgentPrompt() - : input.agentInstructions; - - const parts = [ - basePrompt, - input.planPrompt, - buildCodingWorkspacePrompt(input.codingWorkspace), - connectionsBlock, - buildTodoWritePrompt(), - input.webSearchPrompt, - agentPrompt, - ].filter((s): s is string => Boolean(s?.trim())); - - return { systemMessages: buildSystemMessages(parts, new Date()) }; -} diff --git a/packages/harness/src/decopilot/desktop-runtime.test.ts b/packages/harness/src/decopilot/desktop-runtime.test.ts deleted file mode 100644 index a8f179a97e..0000000000 --- a/packages/harness/src/decopilot/desktop-runtime.test.ts +++ /dev/null @@ -1,99 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import type { HarnessStreamInput } from "../types"; -import { - resolveDesktopRuntimeSources, - resolveDesktopSubtaskCodingWorkspace, -} from "./desktop-runtime"; - -const baseInput = { - threadId: "thread-1", - runId: "run-1", - messages: [], - workspace: { cwd: "default" }, - models: { - thinking: { id: "gpt-4.1", title: "GPT", credentialId: "cred-1" }, - }, - mcp: { - url: "https://studio.example.com/mcp/agent-1", - headers: { Authorization: "Bearer test" }, - expiresAt: 9999999999000, - }, - mode: "default", - temperature: 0.5, - toolApprovalLevel: "auto", - user: { id: "user-1", email: "user@example.com" }, - organizationId: "org-1", - virtualMcp: { id: "agent-1", metadata: {} }, - agent: { id: "agent-1" }, - signal: new AbortController().signal, -} satisfies HarnessStreamInput; - -const thinkingSecret = { - kind: "secret", - providerId: "openai", - apiKey: "sk-test", - modelId: "gpt-4.1", -} as const; - -describe("resolveDesktopRuntimeSources", () => { - it("requires a resolved secret thinking model source", () => { - expect(() => resolveDesktopRuntimeSources(baseInput)).toThrow( - /secret thinking model source/, - ); - }); - - it("uses the top-level HTTP MCP source when present", () => { - const result = resolveDesktopRuntimeSources({ - ...baseInput, - modelSources: { thinking: thinkingSecret }, - mcpSource: { - kind: "http", - url: "https://studio.example.com/mcp/source", - headers: { Authorization: "Bearer source" }, - expiresAt: 9999999999001, - }, - }); - - expect(result.mcpSource.url).toBe("https://studio.example.com/mcp/source"); - expect(result.mcpSource.headers.Authorization).toBe("Bearer source"); - }); - - it("falls back to the legacy HTTP mcp envelope when mcpSource is absent", () => { - const result = resolveDesktopRuntimeSources({ - ...baseInput, - modelSources: { thinking: thinkingSecret }, - }); - - expect(result.mcpSource).toEqual({ - kind: "http", - url: baseInput.mcp.url, - headers: baseInput.mcp.headers, - expiresAt: baseInput.mcp.expiresAt, - }); - }); -}); - -describe("resolveDesktopSubtaskCodingWorkspace", () => { - const codingWorkspace = { - repo: { - owner: "deco", - name: "site", - connectedGithub: true, - }, - branch: "main", - cwd: "/repo", - workspaceKind: "github", - } as const; - - it("preserves the parent coding workspace for self-clone subtasks", () => { - expect( - resolveDesktopSubtaskCodingWorkspace({ codingWorkspace }, undefined), - ).toBe(codingWorkspace); - }); - - it("clears the parent coding workspace for cross-agent subtasks", () => { - expect( - resolveDesktopSubtaskCodingWorkspace({ codingWorkspace }, "agent-2"), - ).toBeUndefined(); - }); -}); diff --git a/packages/harness/src/decopilot/desktop-runtime.ts b/packages/harness/src/decopilot/desktop-runtime.ts deleted file mode 100644 index 6cf60c81f4..0000000000 --- a/packages/harness/src/decopilot/desktop-runtime.ts +++ /dev/null @@ -1,654 +0,0 @@ -/** - * desktop-runtime — the import-isolated DESKTOP tool-runtime for the shared - * Decopilot core. Runs INSIDE the desktop daemon (`packages/sandbox/daemon`) and - * is ALSO consumed by the unified cluster factory's desktop branch - * (`decopilot/index.ts` → `buildDesktopEnvironmentTools`). - * - * Like the cluster path, the desktop path builds the environment-specific deps - * and hands them to `runDecopilotCore` (`./run-core`) — ONE loop drives both - * environments. Unlike the cluster (which threads the full `StudioContext`, - * vault, storage, run-registry, and OTel monitoring), this module: - * - activates the chat provider from the injected `modelSources` secrets - * (`buildModelRuntimeFromSources` + `createProviderFromSecret`) instead of - * `ctx.aiProviders.activate` + vault; - * - opens an HTTP MCP `Client` to `mcp.url` and exposes its tools as - * passthrough tools (`toolsFromMCP`); - * - assembles only the LOCAL-OK built-ins (`buildLocalTools`) — the cluster - * built-ins are reached through `mcp.url` as passthrough tools; - * - runs `runEngine` against the PORTABLE `runNativeAgentLoopCore` with a - * DESKTOP system prompt (`buildDesktopPrompt`) — NOT the cluster's - * ctx-coupled `runAgentLoop` / `buildAgentSystemPrompt`, so neither of those - * enters the daemon bundle; - * - passes `telemetry: undefined` — desktop runs stay OTel-invisible this - * phase (no `@/monitoring` sink, no run-registry coupling). The engine still - * returns a no-op OTel span so the shared loop's span attributes are safe. - * - implements a REAL local `subtask` (self + cross-agent) by building - * TARGET-agent core deps and calling `spawnSubtask` — the cluster - * `SUBTASK_MCP` relay is gone. - * - * It imports ONLY portable leaves (relative paths) + `../types`. No `@/*` - * specifier and no `StudioContext` ever enters this graph, so the daemon bundles - * it and `tsc` does not overflow. - * - * ⚠️ SECURITY: each `modelSources` slot (kind="secret") carries an org - * chat-completion API key in plaintext over HTTPS. Never log it. Hardening - * (cluster model-proxy, spec §3.9) is deferred. - */ - -import { stepCountIs, type Tool, type ToolSet } from "ai"; -import type { Client } from "@modelcontextprotocol/sdk/client/index.js"; -import { trace } from "@opentelemetry/api"; -import type { - DecopilotHttpMcpSource, - DecopilotSecretModelSource, - HarnessStreamInput, -} from "../types"; -import { - openMcpSource, - openObjectStorageSource, - type OpenMcpSourceOptions, -} from "../sources"; -import { createLanguageModel } from "./mesh-provider"; -import { toolsFromMCP } from "./mcp-tools"; -import { buildLocalTools } from "./desktop-local-tools"; -import { createMcpBackgroundDispatcher } from "./built-in-tools/mcp-background-dispatcher"; -import type { BackgroundDispatcher } from "./built-in-tools/backgroundable"; -import { - createDesktopSubtaskDispatcher, - type DesktopSubtaskDispatcherOptions, -} from "./built-in-tools/desktop-subtask-dispatcher"; -import { getDesktopSandboxFsBuilder } from "./desktop-sandbox-fs-registry"; -import { buildDesktopPrompt, PARENT_STEP_LIMIT } from "./desktop-prompt"; -import { resolveModeConfig } from "./mode-config"; -import { runNativeAgentLoopCore } from "./native-agent-loop-core"; -import { resolveMaxOutputTokens } from "./harness-constants"; -import { estimateJsonTokens } from "./built-in-tools/read-tool-output"; -import { - spawnSubtask, - type DecopilotToolRuntime, - type ModelRuntime, - type RunDecopilotCoreDeps, - type SubtaskRunResult, -} from "./run-core"; -import type { - AssembledEngineHandle, - HarnessAssembledTools, - RunEngineArgs, -} from "./engine"; -import type { ConnectionsBlockTool } from "./connections-block"; -import type { VirtualClient } from "./built-in-tools/sandbox"; -import type { PendingImage } from "./built-in-tools/vm-tools/types"; -import { - createLocalSubtaskTool, - SubtaskInputSchema, -} from "./built-in-tools/local-subtask"; -import { makeBackgroundable } from "./built-in-tools/backgroundable"; -import type { DesktopToolCtx } from "./desktop-tool-ctx"; -import { - createSideChannelWriter, - type SideChannelWriter, -} from "../side-channel-writer"; -import { swapVirtualMcpAgent } from "./swap-virtual-mcp-agent"; - -function isDesktopToolVisible(tool: { - name: string; - _meta?: Record; -}): boolean { - const ui = tool._meta?.ui as { visibility?: string | string[] } | undefined; - const visibility = ui?.visibility; - if (visibility == null) return true; - if (typeof visibility === "string") return visibility === "model"; - if (Array.isArray(visibility)) return visibility.includes("model"); - return true; -} - -export function resolveDesktopRuntimeSources(input: HarnessStreamInput): { - modelSource: DecopilotSecretModelSource; - mcpSource: DecopilotHttpMcpSource; -} { - const modelSource = - input.modelSources?.thinking?.kind === "secret" - ? input.modelSources.thinking - : null; - if (!modelSource) { - throw new Error( - "decopilot-desktop requires a secret thinking model source. The cluster " + - "must inject the chat-model credential when routing decopilot to " + - "user-desktop.", - ); - } - - const mcpSource = - input.mcpSource?.kind === "http" - ? input.mcpSource - : { - kind: "http" as const, - url: input.mcp.url, - headers: input.mcp.headers, - expiresAt: input.mcp.expiresAt, - }; - - return { modelSource, mcpSource }; -} - -export function resolveDesktopSubtaskCodingWorkspace( - input: Pick, - targetAgentId: string | undefined, -): HarnessStreamInput["codingWorkspace"] { - return targetAgentId ? undefined : input.codingWorkspace; -} - -/** - * Build the studio management-MCP source (`/mcp/self`) for a desktop run, or - * null when the link material is absent (no object-storage source / fence token, - * or the base URL doesn't carry the org-scoped `/object-storage` suffix). Reuses - * the run's bearer (same temp credentials as the object-storage source). The - * org-scoped API base is derived from the object-storage source so the `:org` - * slug exactly matches what the cluster minted. - */ -function buildSelfMcpSource( - input: HarnessStreamInput, -): DecopilotHttpMcpSource | null { - const objectStorageBase = input.objectStorageSource?.baseUrl ?? ""; - const apiBase = objectStorageBase.replace(/\/object-storage\/?$/, ""); - if ( - !input.objectStorageSource || - !input.runFenceToken || - apiBase === objectStorageBase - ) { - return null; - } - return { - kind: "http", - url: `${apiBase}/mcp/self`, - headers: input.objectStorageSource.headers, - expiresAt: input.objectStorageSource.expiresAt, - }; -} - -function snapshotOf(input: HarnessStreamInput) { - return { - agentId: input.agent.id, - temperature: input.temperature, - toolApprovalLevel: input.toolApprovalLevel, - branch: input.branch ?? null, - }; -} - -/** generate_image background dispatcher: enqueue on the cluster (it has no local - * sandbox), via `THREAD_BACKGROUND_TOOL_START` over `/mcp/self`. */ -function buildBackgroundDispatcher( - input: HarnessStreamInput, -): BackgroundDispatcher | null { - const source = buildSelfMcpSource(input); - if (!source || !input.runFenceToken) return null; - return createMcpBackgroundDispatcher({ - source, - threadId: input.threadId, - fenceToken: input.runFenceToken, - snapshot: snapshotOf(input), - }); -} - -/** subtask background dispatcher: runs the subagent HERE (the daemon's real - * sandbox), detached, then delivers its report to the cluster via - * `THREAD_SUBTASK_DELIVER` over `/mcp/self`. Null when link material is absent - * → `background` falls back to inline. */ -function buildSubtaskDispatcher( - input: HarnessStreamInput, - runSubtask: DesktopSubtaskDispatcherOptions["runSubtask"], -): BackgroundDispatcher | null { - const source = buildSelfMcpSource(input); - if (!source || !input.runFenceToken) return null; - return createDesktopSubtaskDispatcher({ - source, - threadId: input.threadId, - fenceToken: input.runFenceToken, - snapshot: snapshotOf(input), - runSubtask, - }); -} - -/** - * Desktop engine adapter: assembles the DESKTOP system prompt - * (`buildDesktopPrompt` — the cluster-storage-free prompt) + the desktop tool - * set and drives the PORTABLE `runNativeAgentLoopCore`. Closes over the - * per-run assembled tool bundle (passthrough tools + local tools + - * connections data, built in `buildEnvironmentTools`) the same way the cluster - * closure captures `ctx`. - * - * The shared loop (`run-stream`) passes the portable `RunEngineArgs`: - * - `messages` / `provider` / `models` / `temperature` / `abortSignal`, - * - `prepareStep` (image injection + plan-mode filter + enabled-tool gating), - * - `extraTools` (the local built-ins + state-dependent `enable_tool`), - * - `additionalSystemMessages` (inline blocks + enabled-tools tail), - * - `connectionsData` / `isDecopilot` / `systemAgentInstructions` / `planMode`. - * The full streamText tool set = passthrough (from the closure) + extraTools. - * - * `stepLimit` (set to `SUBAGENT_STEP_LIMIT` for `kind: "subtask"` core runs) - * overrides the default `PARENT_STEP_LIMIT` stop condition. - * - * No telemetry, no run-registry, no monitoring — the engine returns a no-op - * OTel span so the shared loop's span attribute writes are harmless. - */ -function runDesktopEngine( - closure: { - input: HarnessStreamInput; - passthroughTools: ToolSet; - }, - args: RunEngineArgs, -): AssembledEngineHandle { - const { input } = closure; - // resolveModeConfig is the source of planPrompt/webSearchInstructionPrompt - // strings; args.planMode === (input.mode === "plan") for the boolean guard. - const modeConfig = resolveModeConfig(input.mode, { isCliAgent: false }); - - // ── DESKTOP system prompt (cluster-storage-free) ────────────────────── - const prompt = buildDesktopPrompt({ - agentId: args.virtualMcp.id, - isDecopilotAgent: args.isDecopilot, - connectionsBlockTools: args.connectionsData.tools, - connectionTitleMap: args.connectionsData.connectionTitleMap, - agentInstructions: args.systemAgentInstructions, - planPrompt: modeConfig.planPrompt, - codingWorkspace: input.codingWorkspace, - webSearchPrompt: modeConfig.webSearchInstructionPrompt, - }); - // Append the per-request inline blocks + enabled-tools tail the - // loop reconstructed (mirrors the cluster runAgentLoop's - // `additionalSystemMessages` append after buildAgentSystemPrompt). - const systemMessages = [ - ...prompt.systemMessages, - ...args.additionalSystemMessages, - ]; - - // ── Tool set: passthrough (closure) + extraTools (local built-ins + - // enable_tool). Mirrors run-stream's streamTools ordering. ────────── - const tools: ToolSet = { - ...closure.passthroughTools, - ...args.extraTools, - }; - - // No tracer on the desktop — a getTracer span is a no-op without a - // registered SDK, satisfying the shared loop's `handle.span.setAttribute`. - const span = trace - .getTracer("decopilot-desktop") - .startSpan("decopilot.agent_loop", { - attributes: { - "decopilot.agent.id": args.virtualMcp.id, - "decopilot.agent.kind": args.kind, - "decopilot.organization.id": input.organizationId, - "decopilot.model.id": args.models.thinking.id, - }, - }); - - const model = createLanguageModel(args.provider, args.models.thinking); - const handle = runNativeAgentLoopCore({ - model, - systemMessages, - messages: args.messages, - tools, - prepareStep: args.prepareStep, - temperature: args.temperature, - maxOutputTokens: resolveMaxOutputTokens( - args.models.thinking.limits, - estimateJsonTokens(systemMessages) + - estimateJsonTokens(args.messages) + - estimateJsonTokens(tools), - ), - // Delegated subtask core runs cap at SUBAGENT_STEP_LIMIT (args.stepLimit); - // top-level runs fall back to PARENT_STEP_LIMIT. - stopWhen: stepCountIs(args.stepLimit ?? PARENT_STEP_LIMIT), - abortSignal: args.abortSignal, - onStepFinish: args.onStepFinish, - onError: (_message, error) => { - console.error("[decopilot-desktop] stream error", error); - }, - }); - - Promise.resolve(handle.result.finishReason).finally(() => span.end()); - - return { - result: handle.result, - error: handle.error, - span, - assembledSystemMessages: systemMessages, - }; -} - -/** - * Build a desktop `DecopilotToolRuntime` for one MCP endpoint. Shared by the - * top-level run (parent's `mcpSource`) and a cross-agent subtask (the TARGET - * agent's swapped virtual-MCP URL). Owns the per-runtime MCP-client lifecycle - * (assigned into `cleanup.close`); the caller runs `cleanup.close` in its - * `finally`. - * - * `agentOverride`, when present, makes `buildEnvironmentTools` build the desktop - * built-ins for the TARGET agent id (so `read_resource`/`sandbox`/etc. scope to - * the target) and `runEngine` assemble the prompt with the TARGET's - * server-provided instructions read from the target MCP client - * (`getInstructions()`), and report the target id on the span. The shared core - * passes `subtask: undefined` for these runs anyway (depth-1 strip), so the - * target toolset never re-exposes `subtask`. - */ -function createDesktopToolRuntime(args: { - input: HarnessStreamInput; - mcpSource: DecopilotHttpMcpSource; - modelRuntime: ModelRuntime; - sideChannel: SideChannelWriter; - cleanup: { close?: () => Promise }; - /** Cross-agent subtask: override the agent id the desktop tools + prompt - * scope to. Omitted for the parent run and self-clone subtasks. */ - agentOverride?: { id: string }; - /** The real local `subtask` tool, injected only into the parent run's - * toolset (depth-1 — never into a delegated subtask runtime). */ - subtask?: Tool; - /** Test-only seam: override how the HTTP MCP source is opened so the parity - * test can inject a fake MCP `Client` without a real network connection. - * Production leaves this undefined and `openMcpSource` opens the real - * Streamable-HTTP transport. */ - openHttp?: OpenMcpSourceOptions["openHttp"]; -}): DecopilotToolRuntime { - const { input, mcpSource, modelRuntime, sideChannel, cleanup } = args; - const imageProvider = - modelRuntime.image?.provider ?? modelRuntime.thinking.provider; - // The agent the desktop tools + prompt scope to. The parent uses the run's - // own agent; a cross-agent subtask overrides it with the target id. - const targetAgentId = args.agentOverride?.id ?? input.agent.id; - - // passthroughTools is set by buildEnvironmentTools and consumed by runEngine. - // The sentinel `undefined` (not `{}`) distinguishes "not yet built" from - // "legitimately empty passthrough set", so runEngine can guard against - // temporal coupling if the call order ever breaks. `serverInstructions` is - // captured the same way (target prompt needs the target MCP's instructions). - let builtPassthroughTools: ToolSet | undefined = undefined; - let builtServerInstructions: string | undefined = undefined; - - return { - buildEnvironmentTools: async ({ input: streamInput }) => { - const toolOutputMap = new Map(); - const pendingImages: PendingImage[] = []; - - // 1. Open the MCP client to the (parent or target) virtual-mcp endpoint. - const openedMcp = await openMcpSource(mcpSource, { - clientInfo: { name: "decopilot-desktop", version: "1" }, - openHttp: args.openHttp, - }); - const mcpClient = openedMcp.client as Client; - cleanup.close = openedMcp.close; - - try { - // 2. Passthrough tools from the MCP endpoint. - const { - tools: passthroughTools, - nameMap, - rawTools: passthroughToolList, - } = await toolsFromMCP( - mcpClient, - toolOutputMap, - undefined, - streamInput.toolApprovalLevel, - { - isPlanMode: streamInput.mode === "plan", - isToolVisible: isDesktopToolVisible, - }, - ); - - // 3. Connections-block list + read-only annotations from the raw - // listing (drives enable_tool + the connections prompt block + - // plan-mode gating). Reuses the listing toolsFromMCP already - // fetched — avoids a second full listTools round-trip per run, - // which is costly at high tool counts (100s of tools). - const connectionsBlockTools: ConnectionsBlockTool[] = []; - const toolAnnotations = new Map(); - for (const t of passthroughToolList) { - const safeName = nameMap.get(t.name); - if (!safeName) continue; - const connectionId = - typeof t._meta?.gatewayClientId === "string" - ? t._meta.gatewayClientId - : "unknown"; - connectionsBlockTools.push({ - rawName: t.name, - safeName, - connectionId, - }); - if (t.annotations?.readOnlyHint !== undefined) { - toolAnnotations.set(safeName, { - readOnlyHint: t.annotations.readOnlyHint, - }); - } - } - - // 4. LOCAL-OK built-in tools. - const objectStorage = await openObjectStorageSource( - streamInput.objectStorageSource, - ); - const orgSlug = streamInput.organizationSlug ?? streamInput.projectSlug; - const baseUrl = streamInput.objectStorageSource - ? new URL(streamInput.objectStorageSource.baseUrl).origin - : ""; - const toolCtx: DesktopToolCtx = { - objectStorage, - organization: { id: streamInput.organizationId, slug: orgSlug }, - auth: { user: { id: streamInput.user.id } }, - baseUrl, - }; - // Flat sandbox fs hooks built by the desktop glue (owns the - // `@decocms/sandbox` provider) so buildLocalTools stays sandbox-free. - const fs = getDesktopSandboxFsBuilder()({ - virtualMcpId: targetAgentId, - branch: streamInput.branch, - userId: streamInput.user.id, - }); - // Backgroundable built-ins (generate_image, subtask): enqueue on the - // cluster (DBOS + org credentials) so the daemon's turn doesn't block. - const imageBackgroundDispatcher = - buildBackgroundDispatcher(streamInput); - const localTools = buildLocalTools({ - writer: sideChannel.writer, - toolOutputMap, - passthroughClient: mcpClient as unknown as VirtualClient, - toolApprovalLevel: streamInput.toolApprovalLevel, - isPlanMode: streamInput.mode === "plan", - ctx: toolCtx, - imageProvider, - imageModelInfo: streamInput.models.image, - imageBackgroundDispatcher, - pendingImages, - threadId: streamInput.threadId, - // VM/sandbox + prompt scope to the target agent (parent or subtask). - virtualMcpId: targetAgentId, - fs, - // Real desktop-local subtask, only on the parent run. Absent on - // delegated subtask runtimes (depth-1; the core strips it too). - subtask: args.subtask, - }); - - // Server instructions for the prompt: read from the live MCP client so - // a cross-agent subtask gets the TARGET agent's identity, not the - // parent's. (`getInstructions()` reflects the connected endpoint.) - const serverInstructions = mcpClient.getInstructions() ?? undefined; - - // Stash for runDesktopEngine. The undefined→value transition is the - // type-enforced gate: runEngine throws if these were never assigned. - builtPassthroughTools = passthroughTools; - builtServerInstructions = serverInstructions; - - const bundle: HarnessAssembledTools = { - tools: { ...passthroughTools, ...localTools }, - passthroughTools, - builtInTools: localTools, - connectionsBlockTools, - toolAnnotations, - connectionTitleMap: new Map(), - serverInstructions, - passthroughClient: mcpClient, - writer: sideChannel.writer, - pendingImages, - sideChunks: sideChannel.stream, - closeSideChunks: sideChannel.close, - close: openedMcp.close, - }; - return bundle; - } catch (err) { - // Construction failed mid-way — close the MCP client we already opened - // so the session doesn't leak, then re-throw. - await openedMcp.close().catch(() => {}); - cleanup.close = undefined; - throw err; - } - }, - runEngine: async (engineArgs) => { - if (builtPassthroughTools === undefined) { - throw new Error( - "[decopilot-desktop] runEngine called before buildEnvironmentTools — " + - "passthroughTools not yet assembled. This is a harness wiring bug.", - ); - } - // For a cross-agent subtask, force the engine to assemble the prompt for - // the TARGET agent id + its server instructions (the shared loop derives - // these from `input.agent.id` / `tools.serverInstructions`, which already - // reflect the override here, but the engine's `virtualMcp.id` also drives - // the desktop identity prompt — keep it consistent). - const scopedArgs: RunEngineArgs = args.agentOverride - ? { - ...engineArgs, - virtualMcp: { ...engineArgs.virtualMcp, id: targetAgentId }, - systemAgentInstructions: - engineArgs.systemAgentInstructions ?? builtServerInstructions, - } - : engineArgs; - return runDesktopEngine( - { input, passthroughTools: builtPassthroughTools }, - scopedArgs, - ); - }, - }; -} - -/** - * Build the DESKTOP environment deps: the HTTP-passthrough + local-built-ins - * tool runtime, including the real desktop-local `subtask` tool (self + - * cross-agent). `telemetry` is undefined for the desktop (runs stay - * OTel-invisible this phase). `cleanup.close` is assigned inside - * `buildEnvironmentTools` so the factory's `finally` closes the MCP client. - * - * It builds a parent `createDesktopToolRuntime`, threads the subtask tool's - * `runSubtask` closure (which swaps the virtual-MCP agent path segment for - * cross-agent delegation and runs `spawnSubtask`), and captures the core's - * child-usage sink so child runs fold into the parent accumulator. - * - * `openHttp` is the test seam that lets the parity test inject a fake MCP - * `Client`; production leaves it undefined so `openMcpSource` opens the real - * Streamable-HTTP transport. - */ -export function buildDesktopEnvironmentTools(args: { - input: HarnessStreamInput; - modelRuntime: ModelRuntime; - sideChannel: SideChannelWriter; - cleanup: { close?: () => Promise }; - openHttp?: OpenMcpSourceOptions["openHttp"]; -}): DecopilotToolRuntime { - const { input, modelRuntime, sideChannel, cleanup, openHttp } = args; - const { mcpSource } = resolveDesktopRuntimeSources(input); - - // ── Local subtask — self + cross-agent. Builds TARGET-agent core deps and - // runs the shared core via spawnSubtask. ────────────────────────────── - const runSubtask = async ( - prompt: string, - targetAgentId: string | undefined, - signal: AbortSignal, - ): Promise => { - const targetUrl = swapVirtualMcpAgent(mcpSource.url, targetAgentId); - const targetMcpSource: DecopilotHttpMcpSource = { - kind: "http", - url: targetUrl, - headers: mcpSource.headers, - expiresAt: mcpSource.expiresAt, - }; - const subSideChannel = createSideChannelWriter(); - const subCleanup: { close?: () => Promise } = {}; - const targetInput: HarnessStreamInput = targetAgentId - ? { - ...input, - agent: { id: targetAgentId }, - virtualMcp: { ...input.virtualMcp, id: targetAgentId }, - codingWorkspace: resolveDesktopSubtaskCodingWorkspace( - input, - targetAgentId, - ), - } - : input; - const targetToolRuntime = createDesktopToolRuntime({ - input: targetInput, - mcpSource: targetMcpSource, - modelRuntime, - sideChannel: subSideChannel, - cleanup: subCleanup, - agentOverride: targetAgentId ? { id: targetAgentId } : undefined, - openHttp, - // depth-1: a delegated run NEVER gets its own subtask tool. The - // core also strips it (kind:"subtask"); this is belt-and-braces. - }); - const deps: Omit = { - input: targetInput, - modelRuntime, - toolRuntime: targetToolRuntime, - telemetry: undefined, - }; - try { - return await spawnSubtask({ prompt, deps, signal }); - } finally { - subSideChannel.close(); - await subCleanup.close?.().catch(() => {}); - } - }; - - // The core supplies its usage roll-up sink to buildEnvironmentTools - // (`onChildUsage`, for kind:"main"). Capture it so the locally-built - // subtask tool — created BEFORE buildEnvironmentTools runs — can fold - // each child run's usage into the SAME accumulator that builds the - // parent's final `message-metadata.usage` (parity with the cluster). - let parentOnChildUsage: - | ((usage: SubtaskRunResult["usage"]) => void) - | undefined; - - // Made backgroundable: the model can opt a subtask into a durable cluster - // run (`background: true`) instead of blocking the daemon's turn — same seam - // generate_image uses. Null dispatcher (no link material) → runs inline. - const subtaskTool = makeBackgroundable( - "subtask", - SubtaskInputSchema, - createLocalSubtaskTool({ - writer: sideChannel.writer, - selfAgentId: input.agent.id, - models: input.models, - needsApproval: - input.mode === "plan" || input.toolApprovalLevel !== "auto", - runSubtask, - onChildUsage: (usage) => parentOnChildUsage?.(usage), - }), - // Backgrounded subtask runs on the daemon (real sandbox) detached, then - // delivers — NOT enqueued on the cluster like generate_image. - buildSubtaskDispatcher(input, runSubtask), - ); - - const parentToolRuntime = createDesktopToolRuntime({ - input, - mcpSource, - modelRuntime, - sideChannel, - cleanup, - subtask: subtaskTool, - openHttp, - }); - - return { - buildEnvironmentTools: (buildArgs) => { - parentOnChildUsage = buildArgs.onChildUsage; - return parentToolRuntime.buildEnvironmentTools(buildArgs); - }, - runEngine: parentToolRuntime.runEngine, - }; -} diff --git a/packages/harness/src/decopilot/desktop-sandbox-fs-registry.ts b/packages/harness/src/decopilot/desktop-sandbox-fs-registry.ts deleted file mode 100644 index c37628f95f..0000000000 --- a/packages/harness/src/decopilot/desktop-sandbox-fs-registry.ts +++ /dev/null @@ -1,38 +0,0 @@ -import type { SandboxFsHooks } from "./built-in-tools/vm-tools/sandbox-fs-hooks-types"; - -/** - * Desktop sandbox-fs builder registry. - * - * The portable desktop runtime needs flat `SandboxFsHooks` for its VM tools, but - * the only impl (`buildDesktopSandboxFs`) is `@decocms/sandbox`-coupled — it owns - * the local control-URL `SandboxProvider` + `createSandboxFsHooks`. To keep - * `@decocms/harness` sandbox-free, the impl lives in `@decocms/sandbox`; the - * daemon registers it here at boot and the runtime looks it up. Mirrors the - * cluster/desktop environment-builder registration seam. - */ -export interface DesktopSandboxFsParams { - virtualMcpId: string; - branch?: string | null; - userId?: string; -} -export type DesktopSandboxFsBuilder = ( - p: DesktopSandboxFsParams, -) => SandboxFsHooks; - -let builder: DesktopSandboxFsBuilder | undefined; - -export function registerDesktopSandboxFsBuilder( - b: DesktopSandboxFsBuilder, -): void { - builder = b; -} - -export function getDesktopSandboxFsBuilder(): DesktopSandboxFsBuilder { - if (!builder) { - throw new Error( - "[desktop] sandbox-fs builder not registered — the daemon must call " + - "registerDesktopSandboxFsBuilder(buildDesktopSandboxFs) before dispatching", - ); - } - return builder; -} diff --git a/packages/harness/src/decopilot/desktop-tool-ctx.ts b/packages/harness/src/decopilot/desktop-tool-ctx.ts deleted file mode 100644 index 0e763a4bad..0000000000 --- a/packages/harness/src/decopilot/desktop-tool-ctx.ts +++ /dev/null @@ -1,33 +0,0 @@ -/** - * desktop-tool-ctx — narrow context types for the import-isolated desktop path. - * - * The desktop daemon factory (`./desktop-factory.ts`) is registered in the - * daemon (`packages/sandbox/daemon/entry.ts`) and the daemon-safe desktop leaves - * it pulls (this file, `desktop-runtime`, `desktop-local-tools`, `desktop-prompt`) - * therefore MUST NOT pull cluster code into the bundle. Two rules keep them - * portable: - * - * 1. NEVER import `StudioContext` (`@/core/studio-context`). That type derives - * from Better-Auth's recursive plugin API; threading it into this tree - * makes `tsc` instantiate the deep type and overflow the stack. Use - * `DesktopToolCtx` everywhere a reused leaf would otherwise want a - * `StudioContext`. - * 2. Import portable leaves by RELATIVE path (never `@/*`) so the bundle - * resolves without the apps/mesh tsconfig alias. - * - * `DesktopToolCtx` is the narrow, structurally-typed context the lean tools - * read. It mirrors the small subset of `StudioContext` the LOCAL-OK built-ins - * actually touch (`objectStorage`, `organization`, `auth`, `metadata`), and - * objectStorage may be backed by Studio's HTTP object-storage API on the - * desktop so shared storage-aware built-ins can run without S3 credentials. - */ - -import type { OpenedObjectStorageSource } from "../sources"; - -export interface DesktopToolCtx { - objectStorage?: OpenedObjectStorageSource | null; - organization?: { id: string; slug?: string }; - auth?: { user?: { id: string } }; - baseUrl?: string; - metadata?: { requestId?: string; userAgent?: string | null }; -} diff --git a/packages/harness/src/decopilot/index.test.ts b/packages/harness/src/decopilot/index.test.ts index 5fdf796631..05e5370149 100644 --- a/packages/harness/src/decopilot/index.test.ts +++ b/packages/harness/src/decopilot/index.test.ts @@ -1,13 +1,18 @@ import { describe, expect, it } from "bun:test"; import { decopilotHarnessFactory } from "./index"; import type { HarnessStreamInput } from "../types"; +import { setDecopilotRunContext } from "./run-context"; function makeInput(overrides: Partial): HarnessStreamInput { - return { + const input: HarnessStreamInput = { threadId: "thread-1", - runId: "run-1", - messages: [], - workspace: { cwd: "default" }, + userMessage: { + id: "m1", + role: "user", + parts: [{ type: "text", text: "hi" }], + }, + harness: {}, + workspace: { cwd: null }, models: { thinking: { id: "claude-sonnet-4", @@ -25,12 +30,15 @@ function makeInput(overrides: Partial): HarnessStreamInput { toolApprovalLevel: "auto", user: { id: "user-1", email: "user@example.com" }, organizationId: "org-1", - virtualMcp: { id: "agent-1", metadata: {}, connections: [] }, agent: { id: "agent-1" }, signal: new AbortController().signal, - taskId: "thread-1", ...overrides, }; + setDecopilotRunContext(input, { + taskId: "thread-1", + virtualMcp: { id: "agent-1", metadata: {} }, + }); + return input; } describe("decopilotHarnessFactory", () => { diff --git a/packages/harness/src/decopilot/index.ts b/packages/harness/src/decopilot/index.ts index 0c2485fe59..8a2faff926 100644 --- a/packages/harness/src/decopilot/index.ts +++ b/packages/harness/src/decopilot/index.ts @@ -2,27 +2,18 @@ * Decopilot harness — the UNIFIED factory for the shared Decopilot core. * * The orchestration (processConversation → engine → streamText → title + - * side-channel merge) lives in `runDecopilotCore` (`./run-core`). ONE loop - * drives BOTH environments; this factory only selects which environment-deps - * bag to build: + * side-channel merge) lives in `runDecopilotCore` (`./run-core`). This factory + * builds the cluster environment-deps bag: * * - CLUSTER: when the injected `harnessCtx` carries a full `StudioContext` * (`"storage" in harnessCtx`), build the StudioContext-backed deps via * `buildClusterEnvironmentTools` — in-process virtual-MCP passthrough + the * full cluster tool set (web_search / update_interests / Browserless * built-ins) + the ctx-coupled `runAgentLoop` engine + cluster telemetry. - * - DESKTOP: otherwise (the import-isolated daemon constructs a bare - * `HarnessContext`), build the desktop deps via - * `buildDesktopEnvironmentTools` — HTTP MCP passthrough + local built-ins + - * the portable `runNativeAgentLoopCore` engine, with `telemetry: undefined`. - * * Created per-call (one `Harness` instance per stream) because the underlying * loop is stateful. The factory captures `ctx` so `HarnessStreamInput` stays * serializable for the remote transport. The per-run side-channel + MCP-client - * cleanup is owned here (the `try/finally` below) for both environments. - * - * The desktop-fork factory (`harnesses/decopilot-desktop/`) is collapsed into - * this unified factory; its remaining registration is dropped in a follow-up. + * cleanup is owned here (the `try/finally` below). */ import type { UIMessageChunk } from "ai"; @@ -43,6 +34,7 @@ import { type DecopilotToolRuntime, type ModelRuntime, } from "./run-core"; +import { requireDecopilotRunContext } from "./run-context"; import type { DecopilotTelemetry } from "./run-stream"; /** True when the injected context is a full cluster context (it carries @@ -54,15 +46,10 @@ function isClusterContext(ctx: HarnessContext): boolean { } // ── Environment-deps registration seam ────────────────────────────────────── -// The factory is environment-agnostic: it looks the cluster/desktop deps builder -// up from a module-scoped registry instead of statically importing the -// `@/`-coupled cluster assembler (`./harness-deps`) or the desktop runtime -// (`./desktop-runtime`, which reaches `@decocms/sandbox` via the desktop sandbox -// glue). That keeps this file — the `@decocms/harness/decopilot` entry — free of -// `@/` and `@decocms/sandbox` so it can move into the package. The mesh barrel -// (`apps/mesh/src/harnesses/index.ts`) registers both impls at import time, -// before any dispatch; every cluster dispatch path imports that barrel -// transitively, so the builder is present when `create().stream()` runs. +// The factory looks the cluster deps builder up from a module-scoped registry +// instead of statically importing the `@/`-coupled cluster assembler +// (`apps/mesh/src/harnesses/decopilot/harness-deps`). That keeps this package +// entry free of `@/` imports; the mesh barrel registers the implementation. export interface ClusterEnvironmentBuilderArgs { ctx: HarnessContext; modelRuntime: ModelRuntime; @@ -76,29 +63,13 @@ export type ClusterEnvironmentBuilder = ( telemetry?: DecopilotTelemetry; }; -export interface DesktopEnvironmentBuilderArgs { - input: HarnessStreamInput; - modelRuntime: ModelRuntime; - sideChannel: SideChannelWriter; - cleanup: { close?: () => Promise }; -} -export type DesktopEnvironmentBuilder = ( - args: DesktopEnvironmentBuilderArgs, -) => DecopilotToolRuntime; - let clusterEnvironmentBuilder: ClusterEnvironmentBuilder | undefined; -let desktopEnvironmentBuilder: DesktopEnvironmentBuilder | undefined; export function registerClusterEnvironmentBuilder( builder: ClusterEnvironmentBuilder, ): void { clusterEnvironmentBuilder = builder; } -export function registerDesktopEnvironmentBuilder( - builder: DesktopEnvironmentBuilder, -): void { - desktopEnvironmentBuilder = builder; -} export const decopilotHarnessFactory: HarnessFactory = { id: "decopilot", @@ -106,10 +77,11 @@ export const decopilotHarnessFactory: HarnessFactory = { return { id: "decopilot", async *stream(input: HarnessStreamInput): AsyncIterable { + const runContext = requireDecopilotRunContext(input); // ── Model runtime: providers from resolved secret sources (both // environments use the same secret→provider factory). ──────────── const modelRuntime = buildModelRuntimeFromSources( - { models: input.models, modelSources: input.modelSources }, + { models: input.models, modelSources: runContext.modelSources }, createProviderFromSecret, ); @@ -121,47 +93,29 @@ export const decopilotHarnessFactory: HarnessFactory = { // runs it even if the core throws mid-stream. const cleanup: { close?: () => Promise } = {}; - // ── Select the environment deps bag. ────────────────────────────── - let toolRuntime: DecopilotToolRuntime; - let telemetry: DecopilotTelemetry | undefined; - if (isClusterContext(harnessCtx)) { - if (!clusterEnvironmentBuilder) { - throw new Error( - "[decopilot] cluster environment builder not registered — " + - "apps/mesh/src/harnesses must be imported before dispatching " + - "the decopilot harness in cluster mode", - ); - } - const built = clusterEnvironmentBuilder({ - ctx: harnessCtx, - modelRuntime, - sideChannel, - cleanup, - }); - toolRuntime = built.toolRuntime; - telemetry = built.telemetry; - } else { - if (!desktopEnvironmentBuilder) { - throw new Error( - "[decopilot] desktop environment builder not registered", - ); - } - // Desktop runs stay OTel-invisible this phase (no monitoring sink). - toolRuntime = desktopEnvironmentBuilder({ - input, - modelRuntime, - sideChannel, - cleanup, - }); - telemetry = undefined; + if (!isClusterContext(harnessCtx)) { + throw new Error("[decopilot] desktop dispatch is not supported"); + } + if (!clusterEnvironmentBuilder) { + throw new Error( + "[decopilot] cluster environment builder not registered — " + + "apps/mesh/src/harnesses must be imported before dispatching " + + "the decopilot harness in cluster mode", + ); } + const built = clusterEnvironmentBuilder({ + ctx: harnessCtx, + modelRuntime, + sideChannel, + cleanup, + }); try { yield* runDecopilotCore({ input, modelRuntime, - toolRuntime, - telemetry, + toolRuntime: built.toolRuntime, + telemetry: built.telemetry, kind: "main", }); } finally { diff --git a/packages/harness/src/decopilot/mcp-tools.ts b/packages/harness/src/decopilot/mcp-tools.ts index d689848366..817bbaa8f4 100644 --- a/packages/harness/src/decopilot/mcp-tools.ts +++ b/packages/harness/src/decopilot/mcp-tools.ts @@ -293,9 +293,9 @@ export async function toolsFromMCP( ]; }); - // Return the raw listing too: callers (desktop-runtime's connections-block) - // otherwise re-call client.listTools(), doubling the list+parse pass over the - // full tool surface every run — costly at high tool counts (100s of tools). + // Return the raw listing too: callers otherwise re-call client.listTools(), + // doubling the list+parse pass over the full tool surface every run — costly + // at high tool counts (100s of tools). return { tools: Object.fromEntries(toolEntries), nameMap, diff --git a/packages/harness/src/decopilot/mesh-provider.ts b/packages/harness/src/decopilot/mesh-provider.ts index bdf7fa848a..d61995376d 100644 --- a/packages/harness/src/decopilot/mesh-provider.ts +++ b/packages/harness/src/decopilot/mesh-provider.ts @@ -13,7 +13,7 @@ import type { LanguageModelV3 } from "@ai-sdk/provider"; import type { ProviderV3 } from "@ai-sdk/provider"; import { wrapLanguageModel, type LanguageModelMiddleware } from "ai"; import type { ModelCapability, ProviderId } from "@decocms/mesh-sdk"; -import { isCreditError } from "./stream-error"; +import { isCreditError } from "../stream-error"; import { withThoughtSignatureCodec } from "./thought-signature"; export interface ProviderInfo { diff --git a/packages/harness/src/decopilot/prompt-constants.ts b/packages/harness/src/decopilot/prompt-constants.ts index 045d0fcb59..cce5f0c214 100644 --- a/packages/harness/src/decopilot/prompt-constants.ts +++ b/packages/harness/src/decopilot/prompt-constants.ts @@ -1,4 +1,5 @@ import type { GithubRepo } from "@decocms/mesh-sdk"; +export { DEFAULT_THREAD_TITLE } from "../thread-title"; export const DEFAULT_WINDOW_SIZE = 50; export const PARENT_STEP_LIMIT = 30; @@ -8,12 +9,6 @@ export const PARENT_STEP_LIMIT = 30; * can bundle it; re-exported from the route constants for the cluster. */ export const SUBAGENT_STEP_LIMIT = 15; -/** Title a thread carries until it's auto-titled or the user renames it. The - * producer-side title gate (`needsTitle`) and the cluster's title interceptor - * both compare against this. Lives here (a `@/*`-free harnesses leaf) so the - * daemon can bundle it; re-exported from the route constants for the cluster. */ -export const DEFAULT_THREAD_TITLE = "New chat"; - export function buildBasePlatformPrompt(): string { return ` You are an AI agent running on Deco CMS — a control plane for connecting diff --git a/packages/harness/src/decopilot/run-context.ts b/packages/harness/src/decopilot/run-context.ts new file mode 100644 index 0000000000..18f134dbed --- /dev/null +++ b/packages/harness/src/decopilot/run-context.ts @@ -0,0 +1,53 @@ +import type { + DecopilotHttpMcpSource, + DecopilotObjectStorageSource, + DecopilotSecretModelSources, + ChatMessage, + HarnessStreamInput, + HarnessUserContext, +} from "../types"; + +export interface DecopilotRunContext { + taskId?: string; + isSubagent?: boolean; + subtaskJobId?: string; + resumedFromBackground?: boolean; + virtualMcp: { + id: string; + metadata?: unknown; + }; + branch?: string | null; + messages?: ChatMessage[]; + modelSources?: DecopilotSecretModelSources; + mcpSource?: DecopilotHttpMcpSource; + objectStorageSource?: DecopilotObjectStorageSource; + userContext?: HarnessUserContext; +} + +const runContexts = new WeakMap(); + +export function setDecopilotRunContext( + input: object, + context: DecopilotRunContext, +): void { + runContexts.set(input, context); +} + +export function getDecopilotRunContext( + input: HarnessStreamInput, +): DecopilotRunContext | undefined { + return runContexts.get(input); +} + +export function requireDecopilotRunContext( + input: HarnessStreamInput, +): DecopilotRunContext { + const context = getDecopilotRunContext(input); + if (!context) { + throw new Error( + "Decopilot requires process-local run context. Dispatch must attach " + + "DecopilotRunContext before invoking the Decopilot harness.", + ); + } + return context; +} diff --git a/packages/harness/src/decopilot/run-core.test.ts b/packages/harness/src/decopilot/run-core.test.ts index 40bebf2da7..74997c840b 100644 --- a/packages/harness/src/decopilot/run-core.test.ts +++ b/packages/harness/src/decopilot/run-core.test.ts @@ -16,6 +16,7 @@ import { SUBTASK_MAX_CONCURRENT, type RunDecopilotCoreDeps, } from "./run-core"; +import { setDecopilotRunContext } from "./run-context"; import type { AssembledEngineHandle, HarnessAssembledTools, @@ -115,9 +116,13 @@ function makeToolRuntime(opts: { const baseInput = { threadId: "t1", - runId: "run1", - messages: [], - workspace: { cwd: "default" }, + userMessage: { + id: "m1", + role: "user", + parts: [{ type: "text", text: "hi" }], + }, + harness: {}, + workspace: { cwd: null }, models: { thinking: { id: "m1", credentialId: "c1", limits: {} }, }, @@ -127,12 +132,15 @@ const baseInput = { toolApprovalLevel: "auto", user: { id: "u1", email: "u@x.com" }, organizationId: "org1", - virtualMcp: { id: "vir_1", metadata: {} }, agent: { id: "vir_1" }, currentThreadTitle: "Some existing title", signal: new AbortController().signal, } as RunDecopilotCoreDeps["input"]; +setDecopilotRunContext(baseInput, { + virtualMcp: { id: "vir_1", metadata: {} }, +}); + const modelRuntime = { thinking: { model: { id: "m1", credentialId: "c1" }, @@ -256,6 +264,47 @@ describe("SUBTASK_MAX_CONCURRENT", () => { }); }); +describe("runDecopilotCore conversation input", () => { + test("uses DecopilotRunContext messages so hosted runs keep thread history", async () => { + const input: RunDecopilotCoreDeps["input"] = { + ...baseInput, + signal: new AbortController().signal, + userMessage: { + id: "current", + role: "user", + parts: [{ type: "text", text: "current" }], + }, + }; + setDecopilotRunContext(input, { + virtualMcp: { id: "vir_1", metadata: {} }, + messages: [ + { + id: "previous", + role: "user", + parts: [{ type: "text", text: "previous" }], + }, + input.userMessage, + ], + }); + const captured: { args?: RunEngineArgs } = {}; + + for await (const _ of runDecopilotCore({ + input, + modelRuntime, + toolRuntime: makeToolRuntime({ + chunks: [{ type: "finish" } as UIMessageChunk], + totalUsage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 }, + captured, + }), + kind: "main", + })) { + // drain + } + + expect(captured.args?.messages).toHaveLength(2); + }); +}); + describe("usage roll-up (parent final metadata includes child tokens)", () => { test("a main run folds onChildUsage into the parent's final finish usage", async () => { // Holder the test fills from buildEnvironmentTools' onChildUsage. A main @@ -335,9 +384,11 @@ describe("usage roll-up (parent final metadata includes child tokens)", () => { input: { ...baseInput, signal: new AbortController().signal, - messages: [ - { id: "u-1", role: "user", parts: [{ type: "text", text: "hi" }] }, - ] as never, + userMessage: { + id: "u-1", + role: "user", + parts: [{ type: "text", text: "hi" }], + }, }, modelRuntime, toolRuntime: toolRuntime as never, @@ -371,13 +422,11 @@ describe("runDecopilotCore main-run subtask policy", () => { input: { ...baseInput, signal: new AbortController().signal, - messages: [ - { - id: "u-1", - role: "user", - parts: [{ type: "text", text: "hello" }], - }, - ] as never, + userMessage: { + id: "u-1", + role: "user", + parts: [{ type: "text", text: "hello" }], + }, }, modelRuntime, toolRuntime: makeToolRuntime({ diff --git a/packages/harness/src/decopilot/run-core.ts b/packages/harness/src/decopilot/run-core.ts index 080aadc374..dcc5e4aa02 100644 --- a/packages/harness/src/decopilot/run-core.ts +++ b/packages/harness/src/decopilot/run-core.ts @@ -33,6 +33,7 @@ import type { UIMessageChunk } from "ai"; import type { + DecopilotSecretModelSources, DecopilotSecretModelSource, HarnessStreamInput, ModelSelection, @@ -52,6 +53,7 @@ import { processConversation } from "./conversation"; import { DEFAULT_WINDOW_SIZE, SUBAGENT_STEP_LIMIT } from "./prompt-constants"; import { createUsageAccumulator } from "../usage-accumulator"; import { createSemaphore } from "../semaphore"; +import { getDecopilotRunContext, setDecopilotRunContext } from "./run-context"; export type { DecopilotTelemetry } from "./run-stream"; @@ -88,7 +90,7 @@ export interface ModelRuntime { */ export interface ModelRuntimeSources { models: HarnessStreamInput["models"]; - modelSources: HarnessStreamInput["modelSources"]; + modelSources?: DecopilotSecretModelSources; } /** @@ -214,6 +216,7 @@ export async function* runDecopilotCore( deps: RunDecopilotCoreDeps, ): AsyncIterable { const { input, modelRuntime, toolRuntime, telemetry } = deps; + const runContext = getDecopilotRunContext(input); const isSubtask = deps.kind === "subtask"; // The core owns the cumulative-usage accumulator so a MAIN run's `subtask` @@ -243,7 +246,7 @@ export async function* runDecopilotCore( systemMessages: processedSystemMessages, messages: processedMessages, originalMessages, - } = await processConversation(input.messages, { + } = await processConversation(runContext?.messages ?? [input.userMessage], { windowSize: DEFAULT_WINDOW_SIZE, models: input.models, tools: tools.tools, @@ -288,6 +291,7 @@ export async function* runDecopilotCore( // Shared so the subtask tool's child-usage roll-up lands in the same // accumulator that builds the final `message-metadata.usage`. usageAccumulator, + runContext, }, }); } @@ -380,15 +384,20 @@ async function runSubtaskCore( // Chain the parent tool-call signal into the child core run. const subtaskInput: HarnessStreamInput = { ...deps.input, - messages: [ - { - id: "subtask-prompt", - role: "user", - parts: [{ type: "text", text: prompt }], - }, - ], + userMessage: { + id: "subtask-prompt", + role: "user", + parts: [{ type: "text", text: prompt }], + }, signal, }; + const runContext = getDecopilotRunContext(deps.input); + if (runContext) { + setDecopilotRunContext(subtaskInput, { + ...runContext, + messages: [subtaskInput.userMessage], + }); + } try { for await (const chunk of runDecopilotCore({ diff --git a/packages/harness/src/decopilot/run-stream.ts b/packages/harness/src/decopilot/run-stream.ts index b2d8d649da..fa6ad902a3 100644 --- a/packages/harness/src/decopilot/run-stream.ts +++ b/packages/harness/src/decopilot/run-stream.ts @@ -47,14 +47,15 @@ import { resolveModeConfig } from "./mode-config"; import { makeTitleResultChunk } from "../title-chunk"; import { shouldGenerateTitle } from "../title-merge"; import { createLanguageModel } from "./mesh-provider"; -import { genTitle } from "./title-generator"; +import { genTitle } from "../title-generator"; import type { ChatMessage, HarnessStreamInput, ModelSelection } from "../types"; import type { AssembledEngineHandle, HarnessAssembledTools, RunEngine, } from "./engine"; -import { sanitizeStreamError, stringifyError } from "./stream-error"; +import type { DecopilotRunContext } from "./run-context"; +import { sanitizeStreamError, stringifyError } from "../stream-error"; import { isDecopilot } from "@decocms/mesh-sdk"; import { createAgentPrepareStep, @@ -135,7 +136,7 @@ export interface DecopilotTelemetry { * paths, or the `toUIMessageStream({ messageMetadata })` decorator). */ export interface RunDecopilotStreamExtras { - /** Provider reconstructed from `input.modelSources.thinking` (thinking slot). */ + /** Provider reconstructed from DecopilotRunContext.modelSources.thinking. */ provider: MeshProvider; /** Provider/model used only for title generation. Lets Decopilot use the org @@ -196,6 +197,7 @@ export interface RunDecopilotStreamExtras { * includes child tokens (Task 17 roll-up). Omitted by callers that don't * delegate; defaults to a fresh accumulator. */ usageAccumulator?: UsageAccumulator; + runContext?: DecopilotRunContext; } export interface RunDecopilotStreamArgs { @@ -276,6 +278,7 @@ export async function* runDecopilotStream( args: RunDecopilotStreamArgs, ): AsyncGenerator { const { input, tools, runEngine, extras } = args; + const runContext = extras.runContext; const { provider, titleProvider, @@ -358,8 +361,10 @@ export async function* runDecopilotStream( // sees only the always-on built-ins (the "light toolset" bug). Give a subagent // its full toolset up front instead: every passthrough tool active from step 1. // `extras.kind === "subtask"` is the reliable signal on the desktop subagent - // path (it runs through this loop); `input.isSubagent` covers the cluster one. - const isDelegatedSubagent = extras.kind === "subtask" || input.isSubagent; + // path (it runs through this loop); DecopilotRunContext carries the cluster + // top-level subagent flag without widening HarnessStreamInput. + const isDelegatedSubagent = + extras.kind === "subtask" || runContext?.isSubagent === true; const enabledTools = isDelegatedSubagent ? new Set(passthroughToolNames) : reconstructEnabledTools(originalMessages, passthroughToolNames); @@ -414,11 +419,22 @@ export async function* runDecopilotStream( // - `extraTools` → built-ins + enable_tool (state-dependent) // - `prepareStep` → image injection + plan-mode filter // - `additionalSystemMessages` → per-request inline blocks - const vmMetadata = input.virtualMcp.metadata as { - githubRepo?: import("@decocms/mesh-sdk").GithubRepo | null; - }; + const vmMetadata = runContext?.virtualMcp.metadata as + | { + githubRepo?: import("@decocms/mesh-sdk").GithubRepo | null; + } + | undefined; + const codingWorkspace = + input.workspace.cwd === "/repo" + ? { + repo: input.workspace.repo, + branch: input.workspace.branch, + cwd: input.workspace.cwd, + workspaceKind: "github" as const, + } + : undefined; const handle: AssembledEngineHandle = await runEngine({ - kind: input.isSubagent ? "subagent" : "agent", + kind: runContext?.isSubagent ? "subagent" : "agent", virtualMcp: { id: input.agent.id, repo: vmMetadata?.githubRepo ?? undefined, @@ -431,11 +447,11 @@ export async function* runDecopilotStream( temperature: input.temperature, planMode: modeConfig.isPlanMode, isDecopilot: isDecopilot(input.agent.id) !== null, - codingWorkspace: input.codingWorkspace, + codingWorkspace, systemAgentInstructions: tools.serverInstructions, currentThreadId: threadId, user: { id: input.user.id, email: input.user.email }, - userContext: input.userContext, + userContext: runContext?.userContext, writer, prepareStep: parentPrepareStep, onStepFinish: extras.onStepFinish, @@ -711,10 +727,12 @@ export async function* runDecopilotStream( // Set for a backgrounded subtask run: correlates this message to the // originating `subtask` tool call (== its `jobId`) so the UI nests it // inside that card instead of rendering it top-level. - ...(input.subtaskJobId ? { subtaskJobId: input.subtaskJobId } : {}), + ...(runContext?.subtaskJobId + ? { subtaskJobId: runContext.subtaskJobId } + : {}), // Set when this turn resumes the agent after a backgrounded tool // completed — the UI shows a "resumed" indicator on the message. - ...(input.resumedFromBackground + ...(runContext?.resumedFromBackground ? { resumedFromBackground: true } : {}), }; diff --git a/packages/harness/src/decopilot/system-prompt.ts b/packages/harness/src/decopilot/system-prompt.ts index b3cd5d5938..22487a3a4f 100644 --- a/packages/harness/src/decopilot/system-prompt.ts +++ b/packages/harness/src/decopilot/system-prompt.ts @@ -18,8 +18,11 @@ * markers propagate through OpenRouter routes too. */ +import { buildCurrentContextPrompt } from "../current-context-prompt"; import { EPHEMERAL_5M } from "./cache-instrumentation"; +export { buildCurrentContextPrompt } from "../current-context-prompt"; + export interface SystemMessage { role: "system"; content: string; @@ -30,21 +33,6 @@ export interface SystemMessage { }; } -/** - * Per-request, non-cached system prompt content. - * - * Anything that varies between requests but is needed in the system layer - * lives here — kept outside the cached prefix so it doesn't invalidate - * Anthropic cache breakpoints or OpenAI/Gemini automatic prefix caches. - */ -export function buildCurrentContextPrompt(now: Date): string { - const iso = now.toISOString(); - return ` -Current date: ${iso.slice(0, 10)} -Current time: ${iso.slice(11, 16)} UTC -`; -} - const EPHEMERAL_5M_PROVIDER_OPTIONS = { anthropic: { cacheControl: EPHEMERAL_5M }, }; diff --git a/packages/harness/src/decopilot/stream-error.test.ts b/packages/harness/src/stream-error.test.ts similarity index 100% rename from packages/harness/src/decopilot/stream-error.test.ts rename to packages/harness/src/stream-error.test.ts diff --git a/packages/harness/src/decopilot/stream-error.ts b/packages/harness/src/stream-error.ts similarity index 100% rename from packages/harness/src/decopilot/stream-error.ts rename to packages/harness/src/stream-error.ts diff --git a/packages/harness/src/thread-title.ts b/packages/harness/src/thread-title.ts new file mode 100644 index 0000000000..c6be093e2d --- /dev/null +++ b/packages/harness/src/thread-title.ts @@ -0,0 +1,2 @@ +/** Title a thread carries until it is auto-titled or the user renames it. */ +export const DEFAULT_THREAD_TITLE = "New chat"; diff --git a/packages/harness/src/decopilot/title-generator.test.ts b/packages/harness/src/title-generator.test.ts similarity index 100% rename from packages/harness/src/decopilot/title-generator.test.ts rename to packages/harness/src/title-generator.test.ts diff --git a/packages/harness/src/decopilot/title-generator.ts b/packages/harness/src/title-generator.ts similarity index 100% rename from packages/harness/src/decopilot/title-generator.ts rename to packages/harness/src/title-generator.ts diff --git a/packages/harness/src/title-merge.test.ts b/packages/harness/src/title-merge.test.ts index 4aa8937b3e..484df52735 100644 --- a/packages/harness/src/title-merge.test.ts +++ b/packages/harness/src/title-merge.test.ts @@ -6,7 +6,7 @@ import { type TitleHandle, } from "./title-merge"; import { isTitleResultChunk } from "./title-chunk"; -import { DEFAULT_THREAD_TITLE } from "./decopilot/prompt-constants"; +import { DEFAULT_THREAD_TITLE } from "./thread-title"; /** Build an async stream from a fixed list of chunks. */ async function* streamOf( diff --git a/packages/harness/src/title-merge.ts b/packages/harness/src/title-merge.ts index a43117ff42..eb21bf9ca0 100644 --- a/packages/harness/src/title-merge.ts +++ b/packages/harness/src/title-merge.ts @@ -22,8 +22,8 @@ */ import type { UIMessageChunk } from "ai"; import { makeTitleResultChunk } from "./title-chunk"; -import { DEFAULT_THREAD_TITLE } from "./decopilot/prompt-constants"; -import { stringifyError } from "./decopilot/stream-error"; +import { stringifyError } from "./stream-error"; +import { DEFAULT_THREAD_TITLE } from "./thread-title"; /** * Producer-side auto-title gate (decision D13). Auto-title only an *unrenamed* diff --git a/packages/harness/src/types.ts b/packages/harness/src/types.ts index 30772f7061..514d30b48e 100644 --- a/packages/harness/src/types.ts +++ b/packages/harness/src/types.ts @@ -5,12 +5,6 @@ import type { UIMessage, UIMessageChunk } from "ai"; // `ai` dependency — keeping a SINGLE hoisted `ai` instance (avoids the // double-AI-SDK / broken-instanceof hazard). export type { UIMessageChunk } from "ai"; -import type { - DecopilotMcpSource, - DecopilotModelSources, - DecopilotObjectStorageSource, -} from "./sources"; -import type { CodingWorkspacePromptInput } from "./coding-workspace-prompt"; export { createSecretModelSource } from "./sources"; export type { @@ -94,6 +88,25 @@ export interface ModelsConfig { * the AI SDK's generic `UIMessage` already provides. */ export type ChatMessage = UIMessage; +export type HarnessWorkspace = + | { + cwd: "/repo"; + repo: { + owner: string; + name: string; + connectedGithub: boolean; + }; + branch: string | null; + } + | { + cwd: null; + }; + +export interface HarnessAgent { + id: string; + instructions?: string; +} + /** One recent thread, pre-resolved agent-side for the prompt's history block. * `updated_at` is an ISO string (the portable prompt builder formats the date * label). Mirrors the fields `renderRecentThreadsSection` reads. */ @@ -128,139 +141,33 @@ export interface HarnessUserContext { agents?: PromptAgentSummary[]; } -/** Input passed to every Harness.stream() call. Fully serializable except - * AbortSignal — designed so a future remote transport can JSON-serialize it - * over an HTTP+SSE wire (cancel becomes a separate RPC). */ export interface HarnessStreamInput { - // ===== Identity ===== threadId: string; - runId: string; - /** Opaque resume token, restored from prior `finish-message.providerMetadata`. */ - resumeSessionRef?: string; - - // ===== Conversation ===== - messages: ChatMessage[]; - - // ===== Workspace ===== - /** Symbolic, logically-resolved working directory (see workspace-cwd.ts). - * Required. The daemon rebases non-"default" values onto its sandbox root. */ - workspace: { cwd: string }; - /** Pre-renderable coding workspace facts shared by Decopilot and CLI harness prompts. */ - codingWorkspace?: CodingWorkspacePromptInput; - - // ===== Models (already resolved: credential → key/headers, permissions checked) ===== + userMessage: ChatMessage; + harness: { + sessionId?: string; + }; + workspace: HarnessWorkspace; models: ModelsConfig; - /** Resolved Decopilot model sources by slot. `thinking` is the canonical - * primary slot; optional slots let built-ins and auto-title use the - * credential already selected by the cluster without receiving cluster - * provider objects. Secret sources are serializable and may cross the link - * protocol; in-process model sources are local-only and must not cross it. */ - modelSources?: DecopilotModelSources; - /** Resolved MCP source. HTTP sources are serializable; in-process clients are - * local-only and stripped before remote dispatch. */ - mcpSource?: DecopilotMcpSource; - /** HTTP object-storage API source for runtimes that cannot access - * cluster-local object-storage clients. */ - objectStorageSource?: DecopilotObjectStorageSource; - - // ===== Tool gateway ===== - /** Serializable HTTP MCP endpoint the harness should connect to. - * In-process Decopilot may use DecopilotMcpSource(kind="in-process") - * outside the wire schema; such values must never cross the link - * protocol boundary. The Bearer token is a 1h-TTL temp key — - * `expiresAt` carries its absolute deadline so remote daemons can - * refresh proactively. */ mcp: { url: string; headers: Record; expiresAt: number; }; - - // ===== Mode (forwarded; each harness interprets independently) ===== mode: ChatMode; - - // ===== Knobs ===== temperature: number; toolApprovalLevel: ToolApprovalLevel; - /** - * Optional allowlist of model-facing tool names. When set, the assembled - * toolset (MCP + built-ins) is filtered down to just these names before the - * model sees it. `null`/absent = full toolset. Set by automations that pin a - * specific subset of tools. - */ toolAllowlist?: string[] | null; - /** - * Parent agent-loop step cap (AI SDK `stopWhen: stepCountIs(...)`). Absent - * leaves the decopilot harness on its `PARENT_STEP_LIMIT` default. Set by - * automations that pin a custom ceiling. - */ maxAgentSteps?: number; - /** - * Run is a subagent (a backgrounded `subtask` dispatched as its own - * serialized run on the parent thread). Drives `runEngine({ kind: "subagent" })` - * — excludes the nested `subtask`/`user_ask`/`propose_plan` built-ins (depth-1) - * and uses the subagent prompt. The caller also skips history-seeding and caps - * steps via `maxAgentSteps`. - */ - isSubagent?: boolean; - /** - * When this run is a backgrounded subtask, the originating `subtask` tool - * call's job id. Stamped onto the run's assistant-message metadata so the UI - * can nest the run inside that tool-call card. - */ - subtaskJobId?: string; - /** - * This run was auto-enqueued to resume the agent after a backgrounded tool - * (image / subtask) finished. Stamped onto the assistant-message metadata so - * the UI can flag the turn as a background-completion resume. - */ - resumedFromBackground?: boolean; - - // ===== Identity context (for prompts, audit) ===== user: { id: string; email: string }; organizationId: string; organizationSlug?: string; - /** Optional project slug for agents pinned to a project. */ - projectSlug?: string; - - /** Loaded VirtualMcp entity (the agent definition). Decopilot reads metadata, - * connection list, and github-repo info from this; CLI harnesses use `id` - * and may append `metadata.instructions` to their CLI-safe prompt context. - * Typed as a permissive bag in the package — the cluster passes its richer - * `VirtualMCPEntity` shape and TS accepts the widening. */ - virtualMcp: { id: string; metadata?: unknown; [k: string]: unknown }; - /** Convenience: same as `virtualMcp.id`. Kept separate to avoid forcing CLI - * harnesses to destructure the full entity. */ - agent: { id: string }; - - // ===== Optional thread state ===== - branch?: string | null; - taskId?: string; + agent: HarnessAgent; triggerId?: string; - /** Current persisted thread title. Decopilot harness uses this to decide - * whether to run auto-title (only when title still equals the default). */ currentThreadTitle?: string; - - // ===== Lifecycle ===== - /** Aborts when the consumer disconnects or the user cancels. */ signal: AbortSignal; - - // ===== Trace propagation ===== traceparent?: string; - - /** - * Single-writer fence token for this run (spec §3.5). Minted by - * prepareRun (Phase B) and included in every ingest append by the - * desktop daemon. Absent on ws-path runs. - */ runFenceToken?: string; - - // ===== Pre-resolved prompt data (read agent-side before dispatch) ===== - /** Threads / interests / sibling-agents, pre-resolved by `prepareRun` so the - * portable prompt builder renders them without any `ctx.storage` reach-in. - * Absent on runs whose caller didn't pre-resolve (e.g. desktop) ⇒ the - * corresponding prompt blocks are skipped. */ - userContext?: HarnessUserContext; } /** A Harness produces a stream of UI message chunks for a conversation turn. diff --git a/packages/harness/src/workspace-cwd.test.ts b/packages/harness/src/workspace-cwd.test.ts index a2f52bcd9f..bc397a560a 100644 --- a/packages/harness/src/workspace-cwd.test.ts +++ b/packages/harness/src/workspace-cwd.test.ts @@ -1,19 +1,12 @@ import { describe, expect, it } from "bun:test"; -import { - effectiveCwd, - WORKSPACE_CWD_DEFAULT, - WORKSPACE_CWD_REPO, -} from "./workspace-cwd"; +import { effectiveCwd } from "./workspace-cwd"; describe("workspace cwd contract", () => { - it("treats the 'default' sentinel as no SDK cwd override", () => { - expect(effectiveCwd(WORKSPACE_CWD_DEFAULT)).toBeUndefined(); + it("treats null cwd as no SDK cwd override", () => { + expect(effectiveCwd(null)).toBeUndefined(); }); - it("passes symbolic non-default paths through unchanged", () => { - expect(effectiveCwd(WORKSPACE_CWD_REPO)).toBe("/repo"); - expect(effectiveCwd("/data/sandboxes/h1/repo")).toBe( - "/data/sandboxes/h1/repo", - ); + it("passes /repo through as the symbolic repo cwd", () => { + expect(effectiveCwd("/repo")).toBe("/repo"); }); }); diff --git a/packages/harness/src/workspace-cwd.ts b/packages/harness/src/workspace-cwd.ts index cde4c0a099..186cc0721b 100644 --- a/packages/harness/src/workspace-cwd.ts +++ b/packages/harness/src/workspace-cwd.ts @@ -4,15 +4,14 @@ * `workspace.cwd` on the wire is LOGICALLY resolved, never host-absolute: * - "/repo" — repo checkout inside the sandbox; the daemon rebases it * onto its own sandbox root on receipt. - * - "default" — no on-disk checkout; the harness uses its SDK default - * (process.cwd()) and NEVER fails the run on cwd. + * - null — no SDK cwd override; the harness uses its SDK default. */ -export const WORKSPACE_CWD_DEFAULT = "default"; +export type HarnessCwd = "/repo" | null; /** Repo checkout location inside any sandbox (desktop or hosted container). */ -export const WORKSPACE_CWD_REPO = "/repo"; +export const WORKSPACE_CWD_REPO = "/repo" as const; /** Harness-side: translate the wire value into an SDK cwd option. */ -export function effectiveCwd(cwd: string): string | undefined { - return cwd === WORKSPACE_CWD_DEFAULT ? undefined : cwd; +export function effectiveCwd(cwd: HarnessCwd): string | undefined { + return cwd ?? undefined; } diff --git a/packages/sandbox/daemon/daemon.dispatch.e2e.test.ts b/packages/sandbox/daemon/daemon.dispatch.e2e.test.ts index 111ef7db1a..3f6af1ecdf 100644 --- a/packages/sandbox/daemon/daemon.dispatch.e2e.test.ts +++ b/packages/sandbox/daemon/daemon.dispatch.e2e.test.ts @@ -61,13 +61,29 @@ describe("daemon e2e: dispatch", () => { ); }); - it("POST /dispatch with a malformed input envelope → 400 bad_input", async () => { + it("POST /dispatch without runId → 400 missing_run_id", async () => { const res = await fetch(url(d, "/_sandbox/dispatch"), { method: "POST", headers: jsonAuthHeaders(), body: toBody({ harnessId: "claude-code", input: {} }), }); expect(res.status).toBe(400); + expect(((await res.json()) as { error: string }).error).toBe( + "missing_run_id", + ); + }); + + it("POST /dispatch with a malformed input envelope → 400 bad_input", async () => { + const res = await fetch(url(d, "/_sandbox/dispatch"), { + method: "POST", + headers: jsonAuthHeaders(), + body: toBody({ + runId: "run-bad-input", + harnessId: "claude-code", + input: {}, + }), + }); + expect(res.status).toBe(400); expect(((await res.json()) as { error: string }).error).toBe("bad_input"); }); diff --git a/packages/sandbox/daemon/dispatch-registry.test.ts b/packages/sandbox/daemon/dispatch-registry.test.ts index 784cc205b5..9d674a6c42 100644 --- a/packages/sandbox/daemon/dispatch-registry.test.ts +++ b/packages/sandbox/daemon/dispatch-registry.test.ts @@ -11,8 +11,8 @@ import { // @decocms/harness registry and looks them up by id. The registry keys ARE the // factory ids, so a factory.id that drifts from the dispatch protocol's harness // id would silently 404 "unknown harness". Lock the id↔registry-key invariant -// for the package-side CLI factories. (decopilotDesktopHarnessFactory.id = -// "decopilot" lives in apps/mesh — asserted by the desktop-factory definition.) +// for the package-side CLI factories. Decopilot is cluster-only and is not +// registered in the desktop daemon. describe("daemon harness registry (id ↔ key)", () => { it("resolves the CLI harness factories by their dispatch id", () => { resetRegistryForTests(); diff --git a/packages/sandbox/daemon/entry.ts b/packages/sandbox/daemon/entry.ts index 4fbfe151c8..fae8fa2f5b 100644 --- a/packages/sandbox/daemon/entry.ts +++ b/packages/sandbox/daemon/entry.ts @@ -40,29 +40,14 @@ import { makeConfigUpdateHandler, } from "./routes/config"; import { handleCancelRequest, handleDispatchRequest } from "./routes/dispatch"; -// Import harness factories from their subpaths (rather than the barrel -// `apps/mesh/src/harnesses/index.ts`) to avoid pulling in the cluster-only -// unified `decopilotHarnessFactory` (`harnesses/decopilot/index.ts`) and its -// dependency tree (which references cluster modules that cause a TS stack -// overflow in the daemon bundle). -// -// `decopilotDesktopHarnessFactory` is the IMPORT-ISOLATED daemon entrypoint into -// the shared desktop tool-runtime (`harnesses/decopilot/desktop-factory.ts` → -// `desktop-runtime.ts`): it activates its provider from the injected -// `modelSources.thinking`, reaches cluster-coupled tools via `mcp.url`, and -// imports only portable, `@/`-free leaves — so it bundles here without dragging -// in StudioContext / storage / vault. See `desktop-factory.ts` for the -// isolation contract (it deliberately never references the cluster branch in -// `harness-deps.ts`). +// Import harness factories from their subpaths (rather than the mesh barrel). +// The daemon runs desktop CLI harnesses only; Decopilot remains cluster-side. import { claudeCodeHarnessFactory } from "@decocms/harness/claude-code/index"; import { codexHarnessFactory } from "@decocms/harness/codex/index"; -import { decopilotDesktopHarnessFactory } from "@decocms/harness/decopilot/desktop-factory"; import { getHarnessFactory, registerHarnessFactory, } from "@decocms/harness/registry"; -import { registerDesktopSandboxFsBuilder } from "@decocms/harness/decopilot/desktop-sandbox-fs-registry"; -import { buildDesktopSandboxFs } from "../dispatch/desktop-sandbox-fs"; import type { HarnessContext, HarnessId, @@ -453,24 +438,15 @@ const proxyH = makeProxyHandler({ broadcaster, getDevPort }); // loopback when it pulls a work item; the daemon spawns the named factory's // CLI in-process and streams `UIMessageChunk` back as SSE. // -// The CLI factories plus the import-isolated desktop decopilot factory live in -// the daemon. The cluster `decopilotHarnessFactory` (RunRegistry, run-stream -// internals, StudioContext) is NOT here — desktop decopilot runs via -// `decopilotDesktopHarnessFactory`, which activates from `modelSources.thinking` -// and reaches cluster-coupled tools through `mcp.url`. +// The CLI factories live in the daemon. The cluster `decopilotHarnessFactory` +// (RunRegistry, run-stream internals, StudioContext) is NOT here. // Register the daemon's harness factories into the shared @decocms/harness // registry (the same registry the cluster barrel uses, but a separate // module-singleton in the daemon process). Keys are the factory ids -// (claude-code / codex / decopilot — the desktop factory shares the decopilot -// id). Lookup goes through `getHarnessFactory` so the daemon and the cluster -// share one registry abstraction. +// (claude-code / codex). Lookup goes through `getHarnessFactory` so the daemon +// and the cluster share one registry abstraction. registerHarnessFactory(claudeCodeHarnessFactory); registerHarnessFactory(codexHarnessFactory); -registerHarnessFactory(decopilotDesktopHarnessFactory); -// The desktop runtime (decopilot) builds its VM fs hooks via the registered -// sandbox-fs builder — kept in @decocms/sandbox so @decocms/harness stays -// sandbox-free. Register it before any decopilot dispatch. -registerDesktopSandboxFsBuilder(buildDesktopSandboxFs); const dispatchTracer = trace.getTracer("link-daemon"); const dispatchMeter = metrics.getMeter("link-daemon"); const lookupDispatchHarness = (id: string, input: unknown) => { diff --git a/packages/sandbox/daemon/rebase-workspace-cwd.test.ts b/packages/sandbox/daemon/rebase-workspace-cwd.test.ts index cfb8e8979a..6b4aae7b30 100644 --- a/packages/sandbox/daemon/rebase-workspace-cwd.test.ts +++ b/packages/sandbox/daemon/rebase-workspace-cwd.test.ts @@ -4,18 +4,13 @@ import { rebaseWorkspaceCwd } from "./rebase-workspace-cwd"; describe("rebaseWorkspaceCwd", () => { const appRoot = "/data/link/sandboxes/h1"; - it("passes the default sentinel through untouched", () => { - expect(rebaseWorkspaceCwd("default", appRoot)).toBe("default"); + it("passes null cwd through untouched", () => { + expect(rebaseWorkspaceCwd(null, appRoot)).toBeNull(); }); - it("rebases /repo onto the sandbox root", () => { + it("rebases only /repo onto the sandbox root", () => { expect(rebaseWorkspaceCwd("/repo", appRoot)).toBe( "/data/link/sandboxes/h1/repo", ); }); - - it("contains escape attempts — falls back to default, never fails", () => { - expect(rebaseWorkspaceCwd("/../../etc", appRoot)).toBe("default"); - expect(rebaseWorkspaceCwd("/repo/../../..", appRoot)).toBe("default"); - }); }); diff --git a/packages/sandbox/daemon/rebase-workspace-cwd.ts b/packages/sandbox/daemon/rebase-workspace-cwd.ts index 914986f442..44abebddde 100644 --- a/packages/sandbox/daemon/rebase-workspace-cwd.ts +++ b/packages/sandbox/daemon/rebase-workspace-cwd.ts @@ -2,23 +2,21 @@ * Rebase the wire's SYMBOLIC workspace.cwd onto this daemon's sandbox root * (spec decision Q4: containment by construction — the cluster never dictates * a host-absolute path on a user machine). - * - * The sentinel value "default" is the same as `WORKSPACE_CWD_DEFAULT` in - * `apps/mesh/src/harnesses/workspace-cwd.ts`. It is re-declared here rather - * than imported across the packages→apps boundary — same convention as the - * rest of the daemon graph. */ import { resolve, sep } from "node:path"; -const WORKSPACE_CWD_DEFAULT = "default"; +export type WireWorkspaceCwd = "/repo" | null; -export function rebaseWorkspaceCwd(cwd: string, appRoot: string): string { - if (cwd === WORKSPACE_CWD_DEFAULT) return cwd; +export function rebaseWorkspaceCwd( + cwd: WireWorkspaceCwd, + appRoot: string, +): string | null { + if (cwd === null) return null; + if (cwd !== "/repo") return null; const root = resolve(appRoot); const rebased = resolve(root, "." + sep + cwd.replace(/^[/\\]+/, "")); if (rebased !== root && !rebased.startsWith(root + sep)) { - // Escape attempt or malformed value — fall back, never fail (Q5). - return WORKSPACE_CWD_DEFAULT; + return null; } return rebased; } diff --git a/packages/sandbox/daemon/routes/dispatch.test.ts b/packages/sandbox/daemon/routes/dispatch.test.ts index 1849ed154a..eac021d9c7 100644 --- a/packages/sandbox/daemon/routes/dispatch.test.ts +++ b/packages/sandbox/daemon/routes/dispatch.test.ts @@ -56,8 +56,12 @@ async function readSSE(res: Response): Promise { describe("POST /_sandbox/dispatch", () => { it("emits the harness's UIMessageChunks as SSE", async () => { const body = JSON.stringify({ + runId: "run-dispatch-1", harnessId: "fake", - input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId: "run-dispatch-1" }, + input: { + ...fixtures.FIXTURE_MINIMAL_INPUT, + threadId: "thread-dispatch-1", + }, }); const res = await handleDispatchRequest(authedDispatch(body), makeDeps()); expect(res.status).toBe(200); @@ -85,8 +89,9 @@ describe("POST /_sandbox/dispatch", () => { }), }); const body = JSON.stringify({ + runId: "run-prelude", harnessId: "fake", - input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId: "run-prelude" }, + input: { ...fixtures.FIXTURE_MINIMAL_INPUT, threadId: "thread-prelude" }, }); const res = await handleDispatchRequest(authedDispatch(body), deps); @@ -122,8 +127,9 @@ describe("POST /_sandbox/dispatch", () => { it("rejects a bearer token that does not match", async () => { const body = JSON.stringify({ + runId: "run-token-2", harnessId: "fake", - input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId: "run-token-2" }, + input: { ...fixtures.FIXTURE_MINIMAL_INPUT, threadId: "thread-token-2" }, }); const res = await handleDispatchRequest( authedDispatch(body, "wrong-token"), @@ -133,7 +139,20 @@ describe("POST /_sandbox/dispatch", () => { }); it("returns 400 on invalid input shape", async () => { - const body = JSON.stringify({ harnessId: "fake", input: { bogus: true } }); + const body = JSON.stringify({ + runId: "run-invalid-input", + harnessId: "fake", + input: { bogus: true }, + }); + const res = await handleDispatchRequest(authedDispatch(body), makeDeps()); + expect(res.status).toBe(400); + }); + + it("returns 400 when the dispatch envelope has no runId", async () => { + const body = JSON.stringify({ + harnessId: "fake", + input: fixtures.FIXTURE_MINIMAL_INPUT, + }); const res = await handleDispatchRequest(authedDispatch(body), makeDeps()); expect(res.status).toBe(400); }); @@ -148,8 +167,12 @@ describe("POST /_sandbox/dispatch", () => { // Subsequent dispatch with the same runId should be rejected. const body = JSON.stringify({ + runId, harnessId: "fake", - input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId }, + input: { + ...fixtures.FIXTURE_MINIMAL_INPUT, + threadId: "thread-tombstone-1", + }, }); const res = await handleDispatchRequest(authedDispatch(body), makeDeps()); expect(res.status).toBe(410); @@ -196,10 +219,11 @@ describe("POST /_sandbox/dispatch", () => { }); const body = JSON.stringify({ + runId: "run-cancel-midstream", harnessId: "fake", input: { ...fixtures.FIXTURE_MINIMAL_INPUT, - runId: "run-cancel-midstream", + threadId: "thread-cancel-midstream", }, }); const res = await handleDispatchRequest(authedDispatch(body), deps); @@ -251,8 +275,12 @@ describe("POST /_sandbox/dispatch", () => { }); const body = JSON.stringify({ + runId, harnessId: "fake", - input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId }, + input: { + ...fixtures.FIXTURE_MINIMAL_INPUT, + threadId: "thread-signal-inject", + }, }); const res = await handleDispatchRequest(authedDispatch(body), deps); expect(res.status).toBe(200); @@ -273,37 +301,35 @@ describe("POST /_sandbox/dispatch", () => { await reader.cancel(); }); - it("rebases symbolic workspace.cwd and codingWorkspace.cwd onto the daemon's sandbox root before the harness sees it", async () => { + it("rebases symbolic workspace.cwd onto the daemon's sandbox root before the harness sees it", async () => { // The wire carries the symbolic value "/repo"; the daemon must rebase it // onto its own sandbox root (daemonAppRoot()) before handing the input to // the harness. The harness MUST receive the rebased absolute path, not the // wire symbol — so `effectiveCwd(input.workspace.cwd)` yields a real path. const runId = "run-cwd-rebase"; - let capturedInput: - | { workspace?: { cwd: string }; codingWorkspace?: { cwd?: string } } - | undefined; + let capturedInput: { workspace?: { cwd: string | null } } | undefined; const deps = makeDeps({ lookupHarness: (_id, input) => { - capturedInput = input as { - workspace?: { cwd: string }; - codingWorkspace?: { cwd?: string }; - }; + capturedInput = input as { workspace?: { cwd: string | null } }; return makeFakeHarness(); }, }); const body = JSON.stringify({ + runId, harnessId: "fake", input: { ...fixtures.FIXTURE_MINIMAL_INPUT, - runId, - workspace: { cwd: "/repo" }, - codingWorkspace: { - repo: { owner: "deco", name: "site", connectedGithub: true }, - branch: "main", + threadId: "thread-cwd-rebase", + workspace: { cwd: "/repo", - workspaceKind: "github", + repo: { + owner: "deco", + name: "studio", + connectedGithub: true, + }, + branch: "main", }, }, }); @@ -319,14 +345,41 @@ describe("POST /_sandbox/dispatch", () => { expect(rebasedCwd).not.toBe("/repo"); // Must end with /repo (rebased to the daemon's sandbox root) expect(rebasedCwd?.endsWith("/repo")).toBe(true); - expect(capturedInput?.codingWorkspace?.cwd).toBe(rebasedCwd); + }); + + it("passes null workspace.cwd through to the harness", async () => { + const runId = "run-cwd-null"; + let capturedInput: { workspace?: { cwd: string | null } } | undefined; + + const deps = makeDeps({ + lookupHarness: (_id, input) => { + capturedInput = input as { workspace?: { cwd: string | null } }; + return makeFakeHarness(); + }, + }); + + const body = JSON.stringify({ + runId, + harnessId: "fake", + input: { + ...fixtures.FIXTURE_MINIMAL_INPUT, + threadId: "thread-cwd-null", + workspace: { cwd: null }, + }, + }); + const res = await handleDispatchRequest(authedDispatch(body), deps); + expect(res.status).toBe(200); + await readSSE(res); + + expect(capturedInput?.workspace?.cwd).toBeNull(); }); it("wraps harness errors as an error SSE event followed by done", async () => { const harnessId = "throws"; const body = JSON.stringify({ + runId: "run-error-1", harnessId, - input: { ...fixtures.FIXTURE_MINIMAL_INPUT, runId: "run-error-1" }, + input: { ...fixtures.FIXTURE_MINIMAL_INPUT, threadId: "thread-error-1" }, }); const deps = makeDeps({ lookupHarness: () => ({ diff --git a/packages/sandbox/daemon/routes/dispatch.ts b/packages/sandbox/daemon/routes/dispatch.ts index eee2ce8064..c44207dbb3 100644 --- a/packages/sandbox/daemon/routes/dispatch.ts +++ b/packages/sandbox/daemon/routes/dispatch.ts @@ -30,6 +30,7 @@ import { dispatchSSEEventSchema, harnessStreamInputSchema, type DispatchSSEEvent, + type HarnessStreamInputWire, } from "../../dispatch/index"; import type { LinkErrorCode } from "../../dispatch/error-codes"; import { requireToken } from "../auth"; @@ -67,6 +68,25 @@ export interface CancelDeps { daemonToken: string; } +type RebasedHarnessInput = Omit & { + workspace: Omit & { + cwd: string | null; + }; + signal?: AbortSignal; +}; + +function rebaseHarnessInput( + input: HarnessStreamInputWire, +): RebasedHarnessInput { + return { + ...input, + workspace: { + ...input.workspace, + cwd: rebaseWorkspaceCwd(input.workspace.cwd, daemonAppRoot()), + }, + }; +} + const TOMBSTONE_MS = 60_000; /** AbortController per active dispatch, keyed by runId. The cancel route @@ -111,17 +131,6 @@ function writeDispatchAcceptedPrelude( } } -function rebaseInputWorkspace(input: { - workspace: { cwd: string }; - codingWorkspace?: { cwd?: string | null }; -}): void { - const cwd = rebaseWorkspaceCwd(input.workspace.cwd, daemonAppRoot()); - input.workspace = { cwd }; - if (input.codingWorkspace?.cwd != null) { - input.codingWorkspace = { ...input.codingWorkspace, cwd }; - } -} - /** Drive a harness's stream into an SSE controller. Reused verbatim by both * paths so chunk relaying / abort handling / error wrapping / `done` framing * stay identical. Always emits `done` and closes the controller; never hangs. @@ -180,9 +189,13 @@ export async function handleDispatchRequest( const body = await req.text(); - let parsed: { harnessId: unknown; input: unknown }; + let parsed: { runId?: unknown; harnessId?: unknown; input?: unknown }; try { - parsed = JSON.parse(body) as { harnessId: unknown; input: unknown }; + parsed = JSON.parse(body) as { + runId?: unknown; + harnessId?: unknown; + input?: unknown; + }; } catch { return new Response(JSON.stringify({ error: "bad_json" }), { status: 400, @@ -195,7 +208,14 @@ export async function handleDispatchRequest( headers: { "content-type": "application/json" }, }); } + if (typeof parsed.runId !== "string") { + return new Response(JSON.stringify({ error: "missing_run_id" }), { + status: 400, + headers: { "content-type": "application/json" }, + }); + } const harnessId = parsed.harnessId; + const runId = parsed.runId; const encoder = new TextEncoder(); @@ -278,23 +298,22 @@ export async function handleDispatchRequest( } const input = inputParse.data; - // Rebase the symbolic workspace cwd fields onto this daemon's sandbox - // root (spec: "Harness Input Contract" Q4 — containment by - // construction). - rebaseInputWorkspace(input); + // Rebase the symbolic workspace.cwd onto this daemon's sandbox root + // (spec: "Harness Input Contract" Q4 — containment by construction). + const rebasedInput = rebaseHarnessInput(input); // 3. Tombstone check — a cancel may have landed first. Surface it as a // terminal error rather than starting a doomed harness. - const tombstoneExpiry = tombstones.get(input.runId); + const tombstoneExpiry = tombstones.get(runId); if (tombstoneExpiry && tombstoneExpiry > Date.now()) { const code: LinkErrorCode = "tombstoned"; - fail(code, `runId ${input.runId} was cancelled`); + fail(code, `runId ${runId} was cancelled`); return; } else if (tombstoneExpiry) { - tombstones.delete(input.runId); + tombstones.delete(runId); } - activeRuns.set(input.runId, ctrl); + activeRuns.set(runId, ctrl); // Re-inject the per-run AbortSignal. `HarnessStreamInput.signal` is an // AbortSignal and therefore NOT JSON-serializable, so it never survives @@ -304,20 +323,20 @@ export async function handleDispatchRequest( // `genTitle({ abortSignal })` receive a real signal instead of // `undefined` (which crashes genTitle's `addEventListener`) — and so a // DELETE /_sandbox/runs/:id actually aborts the in-flight model call. - (input as { signal?: AbortSignal }).signal = ctrl.signal; + (rebasedInput as { signal?: AbortSignal }).signal = ctrl.signal; console.log( - `[dispatch] received (offload) harness=${harnessId} runId=${input.runId} threadId=${input.threadId} bytes=${messagesRef.bytes}`, + `[dispatch] received (offload) harness=${harnessId} runId=${runId} threadId=${rebasedInput.threadId} bytes=${messagesRef.bytes}`, ); // 4. Look up + run. let harness: DispatchHarness; try { - harness = deps.lookupHarness(harnessId, input); + harness = deps.lookupHarness(harnessId, rebasedInput); } catch (err) { - activeRuns.delete(input.runId); + activeRuns.delete(runId); console.error( - `[dispatch] lookupHarness failed harness=${harnessId} runId=${input.runId}:`, + `[dispatch] lookupHarness failed harness=${harnessId} runId=${runId}:`, err, ); const code: LinkErrorCode = "unknown_harness"; @@ -331,7 +350,7 @@ export async function handleDispatchRequest( harness, ctrl, harnessId, - input.runId, + runId, streamState, ); }, @@ -363,12 +382,12 @@ export async function handleDispatchRequest( // Rebase the symbolic workspace cwd fields onto this daemon's sandbox root // (spec: "Harness Input Contract" Q4 — containment by construction). - rebaseInputWorkspace(input); + const rebasedInput = rebaseHarnessInput(input); // Tombstone check — a cancel landed before this dispatch did. Decline // and let the cluster surface a clear cancellation instead of starting // a CLI process that will be immediately torn down. - const tombstoneExpiry = tombstones.get(input.runId); + const tombstoneExpiry = tombstones.get(runId); if (tombstoneExpiry && tombstoneExpiry > Date.now()) { return new Response(JSON.stringify({ error: "tombstoned" }), { status: 410, @@ -376,28 +395,28 @@ export async function handleDispatchRequest( }); } else if (tombstoneExpiry) { // Expired entry — clean up opportunistically. - tombstones.delete(input.runId); + tombstones.delete(runId); } const ctrl = new AbortController(); - activeRuns.set(input.runId, ctrl); + activeRuns.set(runId, ctrl); // Re-inject the per-run AbortSignal — see the offload path above for the full // rationale. The wire input never carries `signal` (not serializable), so the // harness would otherwise see `input.signal === undefined`. - (input as { signal?: AbortSignal }).signal = ctrl.signal; + (rebasedInput as { signal?: AbortSignal }).signal = ctrl.signal; console.log( - `[dispatch] received harness=${harnessId} runId=${input.runId} threadId=${input.threadId}`, + `[dispatch] received harness=${harnessId} runId=${runId} threadId=${rebasedInput.threadId}`, ); let harness: DispatchHarness; try { - harness = deps.lookupHarness(harnessId, input); + harness = deps.lookupHarness(harnessId, rebasedInput); } catch (err) { - activeRuns.delete(input.runId); + activeRuns.delete(runId); console.error( - `[dispatch] lookupHarness failed harness=${harnessId} runId=${input.runId}:`, + `[dispatch] lookupHarness failed harness=${harnessId} runId=${runId}:`, err, ); return new Response( @@ -438,7 +457,7 @@ export async function handleDispatchRequest( harness, ctrl, harnessId, - input.runId, + runId, streamState, ); }, diff --git a/packages/sandbox/dispatch/desktop-local-tools.test.ts b/packages/sandbox/dispatch/desktop-local-tools.test.ts deleted file mode 100644 index 7432097b33..0000000000 --- a/packages/sandbox/dispatch/desktop-local-tools.test.ts +++ /dev/null @@ -1,190 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import type { SandboxProvider } from "../server/provider"; -import { buildLocalTools } from "@decocms/harness/decopilot/desktop-local-tools"; -import { - buildDesktopSandboxFs, - createDesktopLocalSandboxProvider, -} from "./desktop-sandbox-fs"; - -const writer = { - write: () => {}, - merge: async () => {}, - onError: () => {}, -} as never; - -const passthroughClient = { - readResource: async () => ({ contents: [] }), - getPrompt: async () => ({ messages: [] }), - listTools: async () => ({ tools: [] }), - callTool: async () => ({ content: [] }), - listResources: async () => ({ resources: [] }), - listPrompts: async () => ({ prompts: [] }), -} as never; - -function fakeRunner( - calls: Array<{ - kind: "ensure" | "proxy"; - handle?: string; - path?: string; - body?: string | null; - }>, -) { - return { - kind: "user-desktop", - ensure: async () => { - calls.push({ kind: "ensure" }); - return { handle: "ensured-local", workdir: "/", previewUrl: null }; - }, - delete: async () => {}, - alive: async () => true, - getPreviewUrl: async () => null, - watchClaimLifecycle: async function* () { - yield { kind: "ready" as const }; - }, - proxyDaemonRequest: async (handle, path, init) => { - calls.push({ - kind: "proxy", - handle, - path, - body: typeof init.body === "string" ? init.body : null, - }); - return Response.json({ - kind: "text", - content: "1 hello\n", - lineCount: 1, - }); - }, - } satisfies SandboxProvider; -} - -describe("buildLocalTools", () => { - it("uses shared VM tools for desktop read/write/edit/grep/glob/bash", async () => { - const calls: Array<{ - kind: "ensure" | "proxy"; - handle?: string; - path?: string; - body?: string | null; - }> = []; - const tools = buildLocalTools({ - writer, - toolOutputMap: new Map(), - passthroughClient, - toolApprovalLevel: "auto", - isPlanMode: false, - ctx: { - objectStorage: null, - organization: { id: "org-1" }, - auth: { user: { id: "user-1" } }, - }, - pendingImages: [], - threadId: "thread-1", - virtualMcpId: "agent-1", - // fs is now injected (built by the desktop glue) — drive it through the - // fake runner so the ensure→proxy sequence assertions below still hold. - fs: buildDesktopSandboxFs({ - runner: fakeRunner(calls), - virtualMcpId: "agent-1", - userId: "user-1", - }), - }); - - expect(Object.keys(tools)).toContain("read"); - expect(Object.keys(tools)).toContain("write"); - expect(Object.keys(tools)).toContain("edit"); - expect(Object.keys(tools)).toContain("grep"); - expect(Object.keys(tools)).toContain("glob"); - expect(Object.keys(tools)).toContain("bash"); - - const read = tools.read as unknown as { - execute: (input: { path: string }) => Promise; - }; - const result = await read.execute({ path: "README.md" }); - - expect(calls).toEqual([ - { - kind: "ensure", - }, - { - kind: "proxy", - handle: "ensured-local", - path: "/_sandbox/read", - body: JSON.stringify({ path: "README.md" }), - }, - ]); - expect(result).toEqual({ - kind: "text", - content: "1 hello\n", - lineCount: 1, - }); - }); -}); - -describe("createDesktopLocalSandboxProvider", () => { - it("provisions through the local control URL when configured", async () => { - const originalControlUrl = process.env.DESKTOP_SANDBOX_CONTROL_URL; - const originalHandle = process.env.SANDBOX_HANDLE; - const originalFetch = globalThis.fetch; - const calls: Array<{ url: string; method: string; body?: string | null }> = - []; - - process.env.DESKTOP_SANDBOX_CONTROL_URL = "http://127.0.0.1:7777"; - process.env.SANDBOX_HANDLE = "handle-1"; - globalThis.fetch = (async ( - input: RequestInfo | URL, - init?: RequestInit, - ) => { - calls.push({ - url: String(input), - method: init?.method ?? "GET", - body: typeof init?.body === "string" ? init.body : null, - }); - if (String(input).endsWith("/api/sandboxes")) { - return Response.json({ - sandboxApiUrl: "http://127.0.0.1:9999", - previewUrl: "http://handle-1.localhost:7070", - }); - } - return Response.json({ ok: true }); - }) as typeof fetch; - - try { - const provider = createDesktopLocalSandboxProvider(); - const sandbox = await provider.ensure({ - userId: "user-1", - projectRef: "agent-1", - }); - await provider.proxyDaemonRequest("handle-1", "/_sandbox/read", { - method: "POST", - headers: new Headers({ "content-type": "application/json" }), - body: JSON.stringify({ path: "README.md" }), - }); - - expect(sandbox.handle).toBe("handle-1"); - expect(sandbox.workdir).toBe("http://127.0.0.1:9999"); - expect(calls).toEqual([ - { - url: "http://127.0.0.1:7777/api/sandboxes", - method: "POST", - body: JSON.stringify({ handle: "handle-1" }), - }, - { - url: "http://127.0.0.1:7777/_sandbox/handle-1/read", - method: "POST", - body: JSON.stringify({ path: "README.md" }), - }, - ]); - } finally { - globalThis.fetch = originalFetch; - if (originalControlUrl === undefined) { - delete process.env.DESKTOP_SANDBOX_CONTROL_URL; - } else { - process.env.DESKTOP_SANDBOX_CONTROL_URL = originalControlUrl; - } - if (originalHandle === undefined) { - delete process.env.SANDBOX_HANDLE; - } else { - process.env.SANDBOX_HANDLE = originalHandle; - } - } - }); -}); diff --git a/packages/sandbox/dispatch/desktop-sandbox-fs.ts b/packages/sandbox/dispatch/desktop-sandbox-fs.ts deleted file mode 100644 index 76d5cb07ee..0000000000 --- a/packages/sandbox/dispatch/desktop-sandbox-fs.ts +++ /dev/null @@ -1,133 +0,0 @@ -/** - * DESKTOP sandbox-fs glue (option-b sandbox decoupling). - * - * Isolates the `@decocms/sandbox` imports (the `SandboxProvider` + the - * `createSandboxFsHooks` builder) that the portable desktop tool assembler - * (`desktop-local-tools.ts`) must NOT carry. Owns the local control-URL - * `SandboxProvider` and the desktop fs-hook lifecycle (ensure / invalidate), and - * returns the flat `SandboxFsHooks` the harness VM tools consume. - * - * ASSEMBLER-GLUE: stays `@decocms/sandbox`-coupled; slated to relocate into the - * daemon assembler (`createDesktopContext`) in the package-move phase (spec - * Phase 5). - */ - -import { createSandboxFsHooks, type SandboxProvider } from "../server/provider"; -import type { SandboxFsHooks } from "@decocms/harness/decopilot/built-in-tools/vm-tools/sandbox-fs-hooks-types"; - -export function createDesktopLocalSandboxProvider(): SandboxProvider { - const port = Number( - process.env.DAEMON_PORT ?? process.env.PROXY_PORT ?? 9000, - ); - const token = process.env.DAEMON_TOKEN ?? ""; - const controlUrl = - process.env.DESKTOP_SANDBOX_CONTROL_URL ?? - process.env.SANDBOX_CONTROL_URL ?? - ""; - let sandboxApiUrl = `http://127.0.0.1:${port}`; - const defaultHandle = process.env.SANDBOX_HANDLE ?? "local"; - - return { - kind: "user-desktop", - ensure: async () => { - if (!controlUrl) { - return { - handle: defaultHandle, - workdir: process.cwd(), - previewUrl: null, - }; - } - const res = await fetch(`${controlUrl}/api/sandboxes`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ handle: defaultHandle }), - }); - if (!res.ok) { - throw new Error(`local sandbox ensure failed (${res.status})`); - } - const body = (await res.json()) as { - sandboxApiUrl?: unknown; - previewUrl?: unknown; - }; - if (typeof body.sandboxApiUrl !== "string") { - throw new Error("local sandbox ensure did not return sandboxApiUrl"); - } - sandboxApiUrl = body.sandboxApiUrl; - return { - handle: defaultHandle, - workdir: sandboxApiUrl, - previewUrl: - typeof body.previewUrl === "string" ? body.previewUrl : sandboxApiUrl, - }; - }, - delete: async (handle) => { - if (!controlUrl) return; - await fetch(`${controlUrl}/api/sandboxes/${encodeURIComponent(handle)}`, { - method: "DELETE", - }).catch(() => {}); - }, - alive: async () => true, - getPreviewUrl: async () => null, - watchClaimLifecycle: async function* () { - yield { kind: "ready" as const }; - }, - proxyDaemonRequest: async (handle, path, init) => { - const headers = new Headers(init.headers); - if (token) headers.set("authorization", `Bearer ${token}`); - const target = controlUrl - ? `${controlUrl}/_sandbox/${encodeURIComponent(handle)}${path.startsWith("/_sandbox/") ? path.slice("/_sandbox".length) : path}` - : `${sandboxApiUrl}${path}`; - return fetch(target, { - method: init.method, - headers, - body: init.body, - signal: init.signal, - }); - }, - }; -} - -/** - * Build the desktop flat fs hooks. `runner` defaults to the local control-URL - * provider; tests inject a fake. The lazy `ensureHandle` memoises the first - * `ensure` so later ops reuse the handle; `invalidateHandle` reaps it on - * sandbox death. `userId` falls back to `"desktop"` (preserving the prior - * inline `ctx.auth?.user?.id ?? "desktop"` behavior). - */ -export function buildDesktopSandboxFs(params: { - runner?: SandboxProvider; - virtualMcpId: string; - branch?: string | null; - userId?: string; -}): SandboxFsHooks { - const runner = params.runner ?? createDesktopLocalSandboxProvider(); - let cachedHandle: Promise | null = null; - const ensureHandle = () => { - if (!cachedHandle) { - cachedHandle = runner - .ensure( - { - userId: params.userId ?? "desktop", - projectRef: params.virtualMcpId, - }, - params.branch ? { branch: params.branch } : undefined, - ) - .then((sandbox) => sandbox.handle); - cachedHandle.catch(() => { - cachedHandle = null; - }); - } - return cachedHandle; - }; - return createSandboxFsHooks(runner, { - ensureHandle, - invalidateHandle: async () => { - const handlePromise = cachedHandle; - cachedHandle = null; - if (!handlePromise) return; - const handle = await handlePromise.catch(() => null); - if (handle) await runner.delete(handle); - }, - canAutoRestart: false, - }); -} diff --git a/packages/sandbox/dispatch/fixtures.ts b/packages/sandbox/dispatch/fixtures.ts index 566bbd2694..78669d68a0 100644 --- a/packages/sandbox/dispatch/fixtures.ts +++ b/packages/sandbox/dispatch/fixtures.ts @@ -8,10 +8,13 @@ import type { HarnessStreamInputWire } from "./schemas"; export const FIXTURE_MINIMAL_INPUT: HarnessStreamInputWire = { threadId: "thr-fixture", - runId: "run-fixture", - taskId: "thr-fixture", - messages: [], - workspace: { cwd: "default" }, + userMessage: { + id: "msg-fixture", + role: "user", + parts: [{ type: "text", text: "hello" }], + }, + harness: {}, + workspace: { cwd: null }, models: { thinking: { id: "claude-code:opus", @@ -29,6 +32,5 @@ export const FIXTURE_MINIMAL_INPUT: HarnessStreamInputWire = { toolApprovalLevel: "auto", user: { id: "user-fixture", email: "fixture@example.com" }, organizationId: "org-fixture", - virtualMcp: { id: "agent-fixture" }, agent: { id: "agent-fixture" }, }; diff --git a/packages/sandbox/dispatch/schemas.test.ts b/packages/sandbox/dispatch/schemas.test.ts index 4dbc0f8b64..784af5db22 100644 --- a/packages/sandbox/dispatch/schemas.test.ts +++ b/packages/sandbox/dispatch/schemas.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it } from "bun:test"; +import { describe, expect, it, test } from "bun:test"; import { capabilitySchema, capabilitiesArraySchema, @@ -6,6 +6,7 @@ import { harnessStreamInputSchema, type HarnessStreamInputWire, } from "./schemas"; +import { FIXTURE_MINIMAL_INPUT } from "./fixtures"; describe("dispatchSSEEventSchema", () => { it("accepts ui-message-chunk", () => { @@ -31,35 +32,89 @@ describe("dispatchSSEEventSchema", () => { }); }); -describe("harnessStreamInputSchema (v2)", () => { - const minimalV2: HarnessStreamInputWire = { - threadId: "t1", - runId: "t1", - taskId: "t1", - messages: [], - workspace: { cwd: "/repo" }, - models: { thinking: { id: "m1", title: "M1", credentialId: "cred1" } }, - mcp: { - url: "https://x.test/mcp", - headers: {}, - expiresAt: 1, - }, - mode: "default", - temperature: 1, - toolApprovalLevel: "auto", - user: { id: "u1", email: "u@x.test" }, - organizationId: "o1", - virtualMcp: { id: "vm1" }, - agent: { id: "vm1" }, - }; - - it("accepts a minimal v2 input", () => { - const result = harnessStreamInputSchema.safeParse(minimalV2); +describe("harnessStreamInputSchema (v3)", () => { + test("accepts v3 single-message harness input", () => { + const result = harnessStreamInputSchema.safeParse({ + harnessId: "claude-code", + threadId: "thread-1", + userMessage: { + id: "msg-1", + role: "user", + parts: [{ type: "text", text: "diagnose" }], + }, + harness: { sessionId: "cli-session-1" }, + workspace: { + cwd: "/repo", + repo: { owner: "deco", name: "site", connectedGithub: true }, + branch: "main", + }, + models: { + thinking: { + id: "claude-code:opus", + title: "Opus", + credentialId: "cred-1", + }, + }, + mcp: { + url: "https://mesh.example.com/mcp/virtual-mcp/agent-1", + headers: { Authorization: "Bearer token" }, + expiresAt: 9999999999000, + }, + mode: "default", + temperature: 0.7, + toolApprovalLevel: "auto", + user: { id: "user-1", email: "u@example.com" }, + organizationId: "org-1", + organizationSlug: "acme", + agent: { id: "agent-1", instructions: "Help carefully." }, + }); + + expect(result.success).toBe(true); + }); + + test.each([ + ["runId", "run-1"], + ["taskId", "task-1"], + ["resumeSessionRef", "old-session"], + ["messages", []], + [ + "codingWorkspace", + { cwd: "/repo", branch: "main", workspaceKind: "github" }, + ], + ["projectSlug", "legacy"], + ["virtualMcp", { id: "agent-1" }], + ] as const)("rejects removed shared harness field %s", (field, value) => { + const result = harnessStreamInputSchema.safeParse({ + ...FIXTURE_MINIMAL_INPUT, + [field]: value, + }); + + expect(result.success).toBe(false); + }); + + test.each([ + ["user", { id: "user-fixture", email: "fixture@example.com", admin: true }], + ["agent", { id: "agent-fixture", metadata: {} }], + ] as const)("rejects unknown nested keys on %s", (field, value) => { + const result = harnessStreamInputSchema.safeParse({ + ...FIXTURE_MINIMAL_INPUT, + [field]: value, + }); + + expect(result.success).toBe(false); + }); + + const minimalV3: HarnessStreamInputWire = FIXTURE_MINIMAL_INPUT; + + it("accepts a minimal v3 input", () => { + const result = harnessStreamInputSchema.safeParse(minimalV3); expect(result.success).toBe(true); if (result.success) { - expect(result.data.threadId).toBe("t1"); - expect(result.data.workspace).toEqual({ cwd: "/repo" }); - expect(result.data.models.thinking.credentialId).toBe("cred1"); + expect(result.data.threadId).toBe("thr-fixture"); + expect(result.data.userMessage.id).toBe("msg-fixture"); + expect(result.data.harness).toEqual({}); + expect(result.data.workspace).toEqual({ cwd: null }); + expect(result.data.models.thinking.credentialId).toBe("cred-fixture"); } }); @@ -100,7 +155,7 @@ describe("harnessStreamInputSchema (v2)", () => { it("round-trips all five model slots with per-slot credentialId and harnessId", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, harnessId: "decopilot", models: { thinking: { @@ -178,7 +233,7 @@ describe("harnessStreamInputSchema (v2)", () => { it("rejects a slot without credentialId", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, models: { thinking: { id: "m1", title: "M1" } }, }); @@ -187,9 +242,9 @@ describe("harnessStreamInputSchema (v2)", () => { it("rejects a title slot inside models (strict object)", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, models: { - ...minimalV2.models, + ...minimalV3.models, title: { id: "gpt-4.1-mini", title: "Mini", credentialId: "cred1" }, }, }); @@ -199,9 +254,9 @@ describe("harnessStreamInputSchema (v2)", () => { it("rejects a coding slot inside models (strict object)", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, models: { - ...minimalV2.models, + ...minimalV3.models, coding: { id: "gpt-5-codex", title: "Codex", credentialId: "cred1" }, }, }); @@ -209,52 +264,62 @@ describe("harnessStreamInputSchema (v2)", () => { expect(result.success).toBe(false); }); - it("requires workspace with a non-empty cwd", () => { - const { workspace: _workspace, ...withoutWorkspace } = minimalV2; + it("requires workspace as null cwd or repo facts", () => { + const { workspace: _workspace, ...withoutWorkspace } = minimalV3; expect(harnessStreamInputSchema.safeParse(withoutWorkspace).success).toBe( false, ); expect( harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, workspace: { cwd: "" }, }).success, ).toBe(false); + expect( + harnessStreamInputSchema.safeParse({ + ...minimalV3, + workspace: { cwd: "/tmp" }, + }).success, + ).toBe(false); + expect( + harnessStreamInputSchema.safeParse({ + ...minimalV3, + workspace: { cwd: "/repo", branch: "main" }, + }).success, + ).toBe(false); }); - it("round-trips coding workspace facts for desktop harnesses", () => { + it("round-trips repo workspace facts", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, - codingWorkspace: { + ...minimalV3, + workspace: { + cwd: "/repo", repo: { owner: "deco", name: "site", connectedGithub: false, }, branch: "main", - cwd: "/repo", - workspaceKind: "github", }, }); expect(result.success).toBe(true); if (result.success) { - expect(result.data.codingWorkspace).toEqual({ + expect(result.data.workspace).toEqual({ + cwd: "/repo", repo: { owner: "deco", name: "site", connectedGithub: false, }, branch: "main", - cwd: "/repo", - workspaceKind: "github", }); } }); it("rejects unknown harness ids", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, harnessId: "made-up", }); @@ -263,7 +328,7 @@ describe("harnessStreamInputSchema (v2)", () => { it("rejects unknown harness modes", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, mode: "made-up", }); @@ -272,16 +337,16 @@ describe("harnessStreamInputSchema (v2)", () => { it("rejects unknown tool approval levels", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, toolApprovalLevel: "danger", }); expect(result.success).toBe(false); }); - it("strips signal and the removed singular modelSource", () => { + it("rejects signal and the removed singular modelSource", () => { const withExtras = { - ...minimalV2, + ...minimalV3, signal: { aborted: false }, modelSource: { kind: "secret", @@ -291,16 +356,12 @@ describe("harnessStreamInputSchema (v2)", () => { }, }; const result = harnessStreamInputSchema.safeParse(withExtras); - expect(result.success).toBe(true); - if (result.success) { - expect("signal" in result.data).toBe(false); - expect("modelSource" in result.data).toBe(false); - } + expect(result.success).toBe(false); }); it("rejects in-process MCP sources at the wire boundary", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, mcpSource: { kind: "in-process", client: {}, @@ -310,9 +371,9 @@ describe("harnessStreamInputSchema (v2)", () => { expect(result.success).toBe(false); }); - it("round-trips an HTTP MCP source", () => { + it("rejects an HTTP MCP source outside the v3 contract", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, mcpSource: { kind: "http", url: "https://mesh.example.com/mcp/virtual-mcp/agent-1", @@ -321,20 +382,12 @@ describe("harnessStreamInputSchema (v2)", () => { }, }); - expect(result.success).toBe(true); - if (result.success) { - expect(result.data.mcpSource).toEqual({ - kind: "http", - url: "https://mesh.example.com/mcp/virtual-mcp/agent-1", - headers: { Authorization: "Bearer fixture" }, - expiresAt: 9999999999000, - }); - } + expect(result.success).toBe(false); }); - it("round-trips an HTTP object-storage source", () => { + it("rejects an HTTP object-storage source outside the v3 contract", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, objectStorageSource: { kind: "http", baseUrl: "https://mesh.example.com/api/acme/object-storage", @@ -343,20 +396,12 @@ describe("harnessStreamInputSchema (v2)", () => { }, }); - expect(result.success).toBe(true); - if (result.success) { - expect(result.data.objectStorageSource).toEqual({ - kind: "http", - baseUrl: "https://mesh.example.com/api/acme/object-storage", - headers: { Authorization: "Bearer fixture" }, - expiresAt: 9999999999000, - }); - } + expect(result.success).toBe(false); }); - it("round-trips slot-keyed resolved Decopilot model sources", () => { + it("rejects slot-keyed resolved Decopilot model sources outside the v3 contract", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, modelSources: { thinking: { kind: "secret", @@ -392,21 +437,12 @@ describe("harnessStreamInputSchema (v2)", () => { }, }); - expect(result.success).toBe(true); - if (result.success) { - expect(result.data.modelSources?.thinking.providerId).toBe("anthropic"); - expect(result.data.modelSources?.fast?.baseUrl).toBe( - "https://litellm.example.com/v1", - ); - expect(result.data.modelSources?.smart?.providerId).toBe("anthropic"); - expect(result.data.modelSources?.image?.providerId).toBe("openrouter"); - expect(result.data.modelSources?.deepResearch?.providerId).toBe("google"); - } + expect(result.success).toBe(false); }); it("rejects a primary slot inside modelSources (strict object)", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, modelSources: { thinking: { kind: "secret", @@ -428,7 +464,7 @@ describe("harnessStreamInputSchema (v2)", () => { it("rejects in-process model sources at the wire boundary", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, modelSources: { thinking: { kind: "in-process", @@ -443,9 +479,9 @@ describe("harnessStreamInputSchema (v2)", () => { it("rejects legacy nested mcp model secrets", () => { const result = harnessStreamInputSchema.safeParse({ - ...minimalV2, + ...minimalV3, mcp: { - ...minimalV2.mcp, + ...minimalV3.mcp, modelSecret: { providerId: "anthropic", apiKey: "sk-ant", diff --git a/packages/sandbox/dispatch/schemas.ts b/packages/sandbox/dispatch/schemas.ts index 8916e13f28..6df37ae39e 100644 --- a/packages/sandbox/dispatch/schemas.ts +++ b/packages/sandbox/dispatch/schemas.ts @@ -32,9 +32,6 @@ export type DispatchSSEEvent = z.infer; const chatMessageSchema = z.record(z.string(), z.unknown()); // opaque to link-protocol -// v2 contract: per-slot credentialId (no root credential), no `coding`/`title` -// slots, and `.strict()` objects so old-shape inputs are rejected rather than -// silently accepted. const modelSelectionSchema = z .object({ id: z.string(), @@ -67,77 +64,31 @@ const modelsConfigSchema = z }) .strict(); -const secretModelSourceSchema = z.object({ - kind: z.literal("secret"), - providerId: z.string(), - apiKey: z.string(), - modelId: z.string(), - baseUrl: z.string().optional(), - extraHeaders: z.record(z.string(), z.string()).optional(), -}); - -const modelSourcesSchema = z - .object({ - thinking: secretModelSourceSchema, - fast: secretModelSourceSchema.optional(), - smart: secretModelSourceSchema.optional(), - image: secretModelSourceSchema.optional(), - deepResearch: secretModelSourceSchema.optional(), - }) - .strict(); - -const httpMcpSourceSchema = z.object({ - kind: z.literal("http"), - url: z.string().url(), - headers: z.record(z.string(), z.string()), - expiresAt: z.number().int().positive(), -}); - -const objectStorageSourceSchema = z.object({ - kind: z.literal("http"), - baseUrl: z.string().url(), - headers: z.record(z.string(), z.string()), - expiresAt: z.number().int().positive(), -}); - -const codingWorkspaceSchema = z - .object({ - repo: z - .object({ - owner: z.string(), - name: z.string(), - connectedGithub: z.boolean(), - }) - .strict() - .optional(), - branch: z.string().nullable().optional(), - cwd: z.string().nullable().optional(), - workspaceKind: z - .enum(["github", "template", "local", "unknown"]) - .optional(), - }) - .strict(); +const harnessWorkspaceSchema = z.discriminatedUnion("cwd", [ + z + .object({ + cwd: z.literal("/repo"), + repo: z + .object({ + owner: z.string(), + name: z.string(), + connectedGithub: z.boolean(), + }) + .strict(), + branch: z.string().nullable(), + }) + .strict(), + z.object({ cwd: z.null() }).strict(), +]); export const harnessStreamInputSchema = z .object({ - /** First-class harness id on the wire (v2). */ harnessId: z.enum(["decopilot", "claude-code", "codex"]).optional(), threadId: z.string(), - runId: z.string(), - taskId: z.string(), - resumeSessionRef: z.string().optional(), - messages: z.array(chatMessageSchema), - /** Symbolic, logically-resolved cwd (see harnesses/workspace-cwd.ts). - * Required — its absence rejects pre-v2 inputs. */ - workspace: z.object({ cwd: z.string().min(1) }).strict(), - codingWorkspace: codingWorkspaceSchema.optional(), + userMessage: chatMessageSchema, + harness: z.object({ sessionId: z.string().optional() }).strict(), + workspace: harnessWorkspaceSchema, models: modelsConfigSchema, - modelSources: modelSourcesSchema.optional(), - mcpSource: httpMcpSourceSchema.optional(), - objectStorageSource: objectStorageSourceSchema.optional(), - // Wire input is intentionally HTTP-only. In-process MCP clients are allowed - // only inside local cluster dispatch and must be normalized to this shape - // before remote dispatch to the link daemon. mcp: z .object({ url: z.string().url(), @@ -152,12 +103,12 @@ export const harnessStreamInputSchema = z toolAllowlist: z.array(z.string()).nullable().optional(), // Per-run parent agent-loop step cap. absent = PARENT_STEP_LIMIT default. maxAgentSteps: z.number().int().optional(), - user: z.object({ id: z.string(), email: z.string() }), + user: z.object({ id: z.string(), email: z.string() }).strict(), organizationId: z.string(), organizationSlug: z.string().optional(), - virtualMcp: z.record(z.string(), z.unknown()), - agent: z.object({ id: z.string() }), - branch: z.string().nullable().optional(), + agent: z + .object({ id: z.string(), instructions: z.string().optional() }) + .strict(), triggerId: z.string().optional(), currentThreadTitle: z.string().optional(), traceparent: z.string().optional(), @@ -168,6 +119,6 @@ export const harnessStreamInputSchema = z */ runFenceToken: z.string().optional(), }) - .strip(); + .strict(); export type HarnessStreamInputWire = z.infer; diff --git a/packages/sandbox/dispatch/version.test.ts b/packages/sandbox/dispatch/version.test.ts index 5fcb2721cb..386ba0e917 100644 --- a/packages/sandbox/dispatch/version.test.ts +++ b/packages/sandbox/dispatch/version.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "bun:test"; import { isVersionAcceptable, LINK_PROTOCOL_VERSION, + LINK_PROTOCOL_UPGRADE_MESSAGE, MIN_SUPPORTED_LINK_PROTOCOL, } from "./version"; @@ -29,13 +30,21 @@ describe("link protocol version", () => { it("rejects 0", () => { expect(isVersionAcceptable(0)).toBe(false); }); + + it("exposes a user-actionable upgrade message", () => { + expect(LINK_PROTOCOL_UPGRADE_MESSAGE).toContain( + "Your desktop link is out of date.", + ); + expect(LINK_PROTOCOL_UPGRADE_MESSAGE).toContain("bunx decocms@latest link"); + }); }); -describe("link protocol v2 hard break", () => { - it("pins version 2 and refuses v1 daemons", () => { - expect(LINK_PROTOCOL_VERSION).toBe(2); - expect(MIN_SUPPORTED_LINK_PROTOCOL).toBe(2); +describe("link protocol v3 hard break", () => { + it("pins version 3 and refuses v2 daemons", () => { + expect(LINK_PROTOCOL_VERSION).toBe(3); + expect(MIN_SUPPORTED_LINK_PROTOCOL).toBe(3); expect(isVersionAcceptable(1)).toBe(false); - expect(isVersionAcceptable(2)).toBe(true); + expect(isVersionAcceptable(2)).toBe(false); + expect(isVersionAcceptable(3)).toBe(true); }); }); diff --git a/packages/sandbox/dispatch/version.ts b/packages/sandbox/dispatch/version.ts index 4ec3872cf4..4b966c4017 100644 --- a/packages/sandbox/dispatch/version.ts +++ b/packages/sandbox/dispatch/version.ts @@ -10,22 +10,28 @@ * symbolic `workspace.cwd`, removal of the singular `modelSource` and the * `primary`/`title`/`coding` slots. * + * v3 (hard break): single-message `userMessage` contract, explicit + * `harness.sessionId`, and no shared message-array/offload protocol. + * * The daemon advertises this version on tunnel session registration via the * `x-link-protocol` header. */ -export const LINK_PROTOCOL_VERSION = 2; +export const LINK_PROTOCOL_VERSION = 3; /** * Cluster rejects daemons below this with 426 `protocol_mismatch`. Links MUST * upgrade. Bumped when an older version becomes too costly to support — - * v2 refuses v1 daemons outright (the v2 input contract is unintelligible + * v3 refuses v2 daemons outright (the v3 input contract is unintelligible * to them). Any rejection surfaced to a stale daemon MUST include the * remediation: re-run `bunx decocms@latest link`. * * Enforced by the link session route before a daemon can mint a presence claim * and receive work items it may not be able to parse. */ -export const MIN_SUPPORTED_LINK_PROTOCOL = 2; +export const MIN_SUPPORTED_LINK_PROTOCOL = 3; + +export const LINK_PROTOCOL_UPGRADE_MESSAGE = + "Your desktop link is out of date. Run `bunx decocms@latest link` and try again."; /** * Header the daemon uses to report its protocol version on daemon-facing