Skip to content

Commit b0c0477

Browse files
chore: sync public mirror from internal
1 parent e9efb25 commit b0c0477

454 files changed

Lines changed: 61256 additions & 2014 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

bun.lockb

5.2 KB
Binary file not shown.

evals/tools/surface-smoke-cases.json

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,12 @@
110110
}
111111
},
112112
{
113-
"name": "extract_document reads a local smoke fixture",
113+
"name": "extract_document blocks a local smoke fixture",
114114
"kind": "extractDocument",
115-
"judgeRubric": "extract_document should preserve the text payload, recognize the text format, and report the downloaded filename.",
115+
"judgeRubric": "extract_document should reject private or local document URLs instead of downloading them.",
116116
"expected": {
117-
"text": "Maestro extract document smoke test",
118-
"format": "text",
119-
"fileName": "fixture.txt"
117+
"blocked": true,
118+
"message": "Blocked document URL host: private or local address"
120119
}
121120
},
122121
{

package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,12 @@
5858
"check:workflow-footguns": "node scripts/check-workflow-footguns.mjs",
5959
"check:test-skip-hygiene": "node scripts/check-test-skip-hygiene.mjs",
6060
"check:test-timing-wait-hygiene": "node scripts/check-test-timing-wait-hygiene.mjs",
61+
"check:atomic-write-hygiene": "node scripts/check-atomic-write-hygiene.mjs",
6162
"check:codex-parity": "node scripts/check-codex-parity-conformance.mjs",
6263
"check:codex-operating-layer": "node scripts/check-codex-operating-layer-conformance.mjs",
6364
"check:platform-runtime-conformance": "node scripts/check-platform-runtime-conformance.mjs",
6465
"check:release-surface": "node scripts/check-release-surface-conformance.mjs",
65-
"lint:evals": "bun run lint:headless-proto && node scripts/ensure-deps.js --no-install --workspace @evalops/contracts && node scripts/verify-evals.js && node scripts/verify-tool-versions.js && node scripts/validate-system-paths.js && node scripts/validate-package-boundaries.js && node scripts/validate-public-package-deps.js && node scripts/check-public-surface-boundary.mjs && npm run check:context-manifest && npm run check:session-wire-contract && npm run check:cli-runtime-conformance && npm run check:rpc-protocol-conformance && npm run check:evidence-integrity && npm run check:maestro-release-gate-events && npm run check:session-replay-fixtures && npm run check:agent-trajectory-fixtures && npm run check:agent-trajectory-replay-fixtures && npm run check:agent-trajectory-score-fixtures && npm run check:agent-trajectory-inspection-fixtures && npm run check:agent-trajectory-scenario-fixtures && npm run check:slack-teammate-runtime-scenarios && npm run check:scripted-scenario-fixtures && node scripts/session-wire-format-codegen.mjs --check && node scripts/headless-protocol-codegen.mjs --check && npm run check:app-server-schema && npm run check:drift-surfaces && npm run check:staged-rollout && npm run check:workflow-footguns && npm run check:test-skip-hygiene && npm run check:test-timing-wait-hygiene && npm run check:codex-parity && npm run check:codex-operating-layer && npm run check:platform-runtime-conformance && npm run check:release-surface && npm run developer-surface:check",
66+
"lint:evals": "bun run lint:headless-proto && node scripts/ensure-deps.js --no-install --workspace @evalops/contracts && node scripts/verify-evals.js && node scripts/verify-tool-versions.js && node scripts/validate-system-paths.js && node scripts/validate-package-boundaries.js && node scripts/validate-public-package-deps.js && node scripts/check-public-surface-boundary.mjs && npm run check:context-manifest && npm run check:session-wire-contract && npm run check:cli-runtime-conformance && npm run check:rpc-protocol-conformance && npm run check:evidence-integrity && npm run check:maestro-release-gate-events && npm run check:session-replay-fixtures && npm run check:agent-trajectory-fixtures && npm run check:agent-trajectory-replay-fixtures && npm run check:agent-trajectory-score-fixtures && npm run check:agent-trajectory-inspection-fixtures && npm run check:agent-trajectory-scenario-fixtures && npm run check:slack-teammate-runtime-scenarios && npm run check:scripted-scenario-fixtures && node scripts/session-wire-format-codegen.mjs --check && node scripts/headless-protocol-codegen.mjs --check && npm run check:app-server-schema && npm run check:drift-surfaces && npm run check:staged-rollout && npm run check:workflow-footguns && npm run check:test-skip-hygiene && npm run check:test-timing-wait-hygiene && npm run check:atomic-write-hygiene && npm run check:codex-parity && npm run check:codex-operating-layer && npm run check:platform-runtime-conformance && npm run check:release-surface && npm run developer-surface:check",
6667
"platform:sdk-smoke": "tsx scripts/check-platform-sdk-contract.ts",
6768
"platform:agentruntime-e2e": "tsx scripts/smoke-platform-agentruntime-lifecycle.ts",
6869
"platform:timeline-e2e": "tsx scripts/smoke-platform-timeline-e2e.ts",
@@ -182,6 +183,7 @@
182183
"@bufbuild/protobuf": "^2.11.0",
183184
"@crosscopy/clipboard": "^0.2.8",
184185
"@modelcontextprotocol/sdk": "^1.29.0",
186+
"@napi-rs/keyring": "^1.3.0",
185187
"@openai/codex": "^0.135.0",
186188
"@opentelemetry/api": "^1.9.1",
187189
"@opentelemetry/auto-instrumentations-node": "^0.76.0",
@@ -221,6 +223,7 @@
221223
"postgres": "^3.4.8",
222224
"smol-toml": "^1.6.1",
223225
"string-width": "^8.2.0",
226+
"undici": "^7.25.0",
224227
"uuid": "^14.0.0",
225228
"vscode-jsonrpc": "^8.2.1",
226229
"ws": "^8.20.0",

packages/core/src/sandbox/daytona-sandbox.ts

Lines changed: 295 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,14 @@
88
* Caches the sandbox handle to avoid redundant API calls per operation.
99
*/
1010

11+
import { randomUUID } from "node:crypto";
12+
import { StringDecoder } from "node:string_decoder";
1113
import { Daytona } from "@daytonaio/sdk";
12-
import type { ExecResult, Sandbox } from "../../../../src/sandbox/types.js";
14+
import type {
15+
ExecResult,
16+
ExecWithArgsOptions,
17+
Sandbox,
18+
} from "../../../../src/sandbox/types.js";
1319

1420
export interface DaytonaSandboxConfig {
1521
apiKey: string;
@@ -22,9 +28,244 @@ type SandboxHandle = Awaited<
2228
ReturnType<InstanceType<typeof Daytona>["create"]>
2329
>;
2430

31+
type DaytonaSessionCommand = {
32+
cmdId?: string;
33+
exitCode?: number;
34+
};
35+
36+
type DaytonaSessionLogs = {
37+
output?: string;
38+
stdout?: string;
39+
stderr?: string;
40+
};
41+
42+
type DaytonaProcessApi = SandboxHandle["process"] & {
43+
createSession?: (sessionId: string) => Promise<void>;
44+
deleteSession?: (sessionId: string) => Promise<void>;
45+
executeSessionCommand?: (
46+
sessionId: string,
47+
req: {
48+
command: string;
49+
runAsync?: boolean;
50+
suppressInputEcho?: boolean;
51+
},
52+
timeout?: number,
53+
) => Promise<DaytonaSessionCommand>;
54+
getSessionCommand?: (
55+
sessionId: string,
56+
commandId: string,
57+
) => Promise<DaytonaSessionCommand>;
58+
getSessionCommandLogs?: (
59+
sessionId: string,
60+
commandId: string,
61+
) => Promise<DaytonaSessionLogs>;
62+
};
63+
64+
const SESSION_POLL_MS = 100;
65+
const SESSION_COMMAND_TIMEOUT_MS = 90_000;
66+
const EXEC_OUTPUT_MAX_BUFFER = 40 * 1024;
67+
68+
function cancelledExecResult(): ExecResult {
69+
return { stdout: "", stderr: "", exitCode: 1 };
70+
}
71+
72+
function quoteShellArg(value: string): string {
73+
if (/^[A-Za-z0-9_./:=@%+,-]+$/u.test(value)) {
74+
return value;
75+
}
76+
return `'${value.replace(/'/g, `'\\''`)}'`;
77+
}
78+
79+
function truncateOutput(value: string, maxBuffer?: number): string {
80+
if (maxBuffer === undefined) {
81+
return value;
82+
}
83+
const bytes = Buffer.from(value);
84+
if (bytes.length <= maxBuffer) {
85+
return value;
86+
}
87+
// `Buffer#toString("utf-8")` on a raw slice emits U+FFFD when the cut
88+
// lands inside a multi-byte sequence. Decode through StringDecoder
89+
// instead — `write()` returns only complete characters and buffers any
90+
// trailing partial bytes internally. Since we discard everything past
91+
// `maxBuffer`, the buffered bytes are dropped silently, so the result
92+
// is always ≤ maxBuffer bytes and never contains a replacement
93+
// character at the boundary.
94+
const decoder = new StringDecoder("utf8");
95+
return decoder.write(bytes.subarray(0, maxBuffer));
96+
}
97+
98+
function sleep(ms: number): Promise<void> {
99+
return new Promise((resolve) => setTimeout(resolve, ms));
100+
}
101+
25102
export class DaytonaSandbox implements Sandbox {
26103
private constructor(private handle: SandboxHandle) {}
27104

105+
private hasSessionApi(processApi: DaytonaProcessApi): boolean {
106+
return !!(
107+
processApi.createSession &&
108+
processApi.deleteSession &&
109+
processApi.executeSessionCommand &&
110+
processApi.getSessionCommand &&
111+
processApi.getSessionCommandLogs
112+
);
113+
}
114+
115+
private buildShellCommand(
116+
command: string,
117+
cwd?: string,
118+
env?: Record<string, string>,
119+
): string {
120+
let fullCommand = command;
121+
if (env && Object.keys(env).length > 0) {
122+
const envPrefix = Object.entries(env)
123+
.map(([k, v]) => {
124+
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(k)) {
125+
throw new Error(`Invalid environment variable name: ${k}`);
126+
}
127+
const escaped = v.replace(/'/g, "'\\''");
128+
return `${k}='${escaped}'`;
129+
})
130+
.join(" ");
131+
fullCommand = `${envPrefix} ${fullCommand}`;
132+
}
133+
if (cwd) {
134+
const escapedCwd = cwd.replace(/'/g, "'\\''");
135+
fullCommand = `cd '${escapedCwd}' && ${fullCommand}`;
136+
}
137+
return fullCommand;
138+
}
139+
140+
private async execWithSession(
141+
command: string,
142+
options: ExecWithArgsOptions = {},
143+
): Promise<ExecResult> {
144+
const processApi = this.handle.process as DaytonaProcessApi;
145+
if (!this.hasSessionApi(processApi)) {
146+
if (options.signal?.aborted) {
147+
return cancelledExecResult();
148+
}
149+
if (options.signal) {
150+
throw new Error(
151+
"Daytona abortable execution requires session API support",
152+
);
153+
}
154+
const result = await processApi.executeCommand(command);
155+
return {
156+
stdout: truncateOutput(result.result, options.maxBuffer),
157+
stderr: "",
158+
exitCode: result.exitCode,
159+
};
160+
}
161+
162+
const sessionId = `maestro-exec-${randomUUID()}`;
163+
let sessionDeleted = false;
164+
let sessionDeletePromise: Promise<void> | undefined;
165+
const deleteSession = async (): Promise<void> => {
166+
if (sessionDeleted) {
167+
return;
168+
}
169+
if (sessionDeletePromise) {
170+
await sessionDeletePromise;
171+
if (sessionDeleted) {
172+
return;
173+
}
174+
}
175+
sessionDeletePromise = (async () => {
176+
try {
177+
await processApi.deleteSession!(sessionId);
178+
sessionDeleted = true;
179+
} catch {
180+
// The session may not exist yet during setup cancellation.
181+
} finally {
182+
sessionDeletePromise = undefined;
183+
}
184+
})();
185+
await sessionDeletePromise;
186+
};
187+
// Tracks whether the async session command was started but never
188+
// observed to complete. We use this to warn loudly if the caller
189+
// aborts mid-execution: Daytona's `deleteSession` is documented to
190+
// terminate the associated process (see
191+
// `deleteSessionDeprecated`: "Delete a PTY session and terminate the
192+
// associated process"), but the SDK exposes no direct
193+
// command-cancellation endpoint, so the in-flight remote process
194+
// outliving the session would be invisible to us without this log.
195+
let inflightCmdId: string | null = null;
196+
const abortSession = (): void => {
197+
void deleteSession();
198+
};
199+
options.signal?.addEventListener("abort", abortSession, { once: true });
200+
201+
try {
202+
if (options.signal?.aborted) {
203+
return cancelledExecResult();
204+
}
205+
await processApi.createSession(sessionId);
206+
if (options.signal?.aborted) {
207+
return cancelledExecResult();
208+
}
209+
210+
const response = await processApi.executeSessionCommand(sessionId, {
211+
command,
212+
runAsync: true,
213+
suppressInputEcho: true,
214+
});
215+
if (!response.cmdId) {
216+
throw new Error("Daytona session command did not return a command id");
217+
}
218+
inflightCmdId = response.cmdId;
219+
220+
const startedAt = Date.now();
221+
while (!options.signal?.aborted) {
222+
if (Date.now() - startedAt >= SESSION_COMMAND_TIMEOUT_MS) {
223+
throw new Error("Daytona session command timed out");
224+
}
225+
const commandState = await processApi.getSessionCommand(
226+
sessionId,
227+
response.cmdId,
228+
);
229+
if (options.signal?.aborted) {
230+
return cancelledExecResult();
231+
}
232+
if (typeof commandState.exitCode === "number") {
233+
inflightCmdId = null;
234+
const logs = await processApi.getSessionCommandLogs(
235+
sessionId,
236+
response.cmdId,
237+
);
238+
if (options.signal?.aborted) {
239+
return cancelledExecResult();
240+
}
241+
return {
242+
stdout: truncateOutput(
243+
logs.stdout ?? logs.output ?? "",
244+
options.maxBuffer,
245+
),
246+
stderr: truncateOutput(logs.stderr ?? "", options.maxBuffer),
247+
exitCode: commandState.exitCode,
248+
};
249+
}
250+
await sleep(SESSION_POLL_MS);
251+
}
252+
253+
return cancelledExecResult();
254+
} finally {
255+
options.signal?.removeEventListener("abort", abortSession);
256+
await deleteSession();
257+
if (options.signal?.aborted && inflightCmdId) {
258+
// Surface the residual-process risk so a stuck/long-lived
259+
// remote command after an aborted session is at least
260+
// observable. The Daytona session API does not currently
261+
// expose a way for us to verify termination ourselves.
262+
console.warn(
263+
`[daytona] Session ${sessionId} aborted with command ${inflightCmdId} still in flight; relying on Daytona's documented deleteSession-terminates-process contract.`,
264+
);
265+
}
266+
}
267+
}
268+
28269
/**
29270
* Create a new Daytona sandbox. This is async because it provisions
30271
* a remote sandbox environment.
@@ -49,30 +290,65 @@ export class DaytonaSandbox implements Sandbox {
49290
command: string,
50291
cwd?: string,
51292
env?: Record<string, string>,
293+
signal?: AbortSignal,
52294
): Promise<ExecResult> {
53295
try {
54-
// Build command with env vars and cwd if provided
55-
let fullCommand = command;
56-
if (env && Object.keys(env).length > 0) {
57-
const envPrefix = Object.entries(env)
58-
.map(([k, v]) => {
59-
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(k)) {
60-
throw new Error(`Invalid environment variable name: ${k}`);
61-
}
62-
// Use single quotes to prevent shell interpretation
63-
const escaped = v.replace(/'/g, "'\\''");
64-
return `${k}='${escaped}'`;
65-
})
66-
.join(" ");
67-
fullCommand = `${envPrefix} ${fullCommand}`;
296+
const fullCommand = this.buildShellCommand(command, cwd, env);
297+
const processApi = this.handle.process as DaytonaProcessApi;
298+
if (signal?.aborted) {
299+
return cancelledExecResult();
300+
}
301+
if (signal && this.hasSessionApi(processApi)) {
302+
return await this.execWithSession(fullCommand, {
303+
signal,
304+
maxBuffer: EXEC_OUTPUT_MAX_BUFFER,
305+
});
68306
}
69-
if (cwd) {
70-
const escapedCwd = cwd.replace(/'/g, "'\\''");
71-
fullCommand = `cd '${escapedCwd}' && ${fullCommand}`;
307+
const result = await processApi.executeCommand(fullCommand);
308+
// Apply the same `EXEC_OUTPUT_MAX_BUFFER` cap as the session
309+
// path so a single sandbox can't accidentally load unbounded
310+
// log output through one entry point but not the other
311+
// (Cursor Bugbot rounds 4–5 on PR #2748).
312+
return {
313+
stdout: truncateOutput(result.result, EXEC_OUTPUT_MAX_BUFFER),
314+
stderr: "",
315+
exitCode: result.exitCode,
316+
};
317+
} catch (err) {
318+
return {
319+
stdout: "",
320+
stderr: err instanceof Error ? err.message : String(err),
321+
exitCode: 1,
322+
};
323+
}
324+
}
325+
326+
async execWithArgs(
327+
command: string,
328+
args: string[] = [],
329+
options: ExecWithArgsOptions = {},
330+
): Promise<ExecResult> {
331+
try {
332+
const fullCommand = this.buildShellCommand(
333+
[command, ...args].map(quoteShellArg).join(" "),
334+
options.cwd,
335+
options.env,
336+
);
337+
// Default `maxBuffer` to `EXEC_OUTPUT_MAX_BUFFER` so both the
338+
// signal/session path and the plain executeCommand path apply
339+
// the same cap. Without this default the caller could omit
340+
// `maxBuffer` and load unbounded stdout — the inconsistency
341+
// Cursor Bugbot flagged on PR #2748.
342+
const maxBuffer = options.maxBuffer ?? EXEC_OUTPUT_MAX_BUFFER;
343+
if (options.signal) {
344+
return await this.execWithSession(fullCommand, {
345+
...options,
346+
maxBuffer,
347+
});
72348
}
73349
const result = await this.handle.process.executeCommand(fullCommand);
74350
return {
75-
stdout: result.result,
351+
stdout: truncateOutput(result.result, maxBuffer),
76352
stderr: "",
77353
exitCode: result.exitCode,
78354
};

0 commit comments

Comments
 (0)