Skip to content

Commit bfdab6a

Browse files
Merge branch 'main' into docs/architecture-docker-driver
2 parents 52920d9 + 11b1937 commit bfdab6a

12 files changed

Lines changed: 1145 additions & 49 deletions

File tree

src/lib/state/sandbox.ts

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//
1010
// Credentials are stripped from backups using shared credential-filter.ts.
1111

12-
import { spawnSync } from "child_process";
12+
import { createHash } from "node:crypto";
1313
import {
1414
chmodSync,
1515
existsSync,
@@ -21,17 +21,17 @@ import {
2121
rmSync,
2222
writeFileSync,
2323
} from "node:fs";
24-
import { createHash } from "node:crypto";
2524
import os from "node:os";
2625
import path from "node:path";
26+
import { spawnSync } from "child_process";
2727

28-
import * as registry from "./registry.js";
29-
import { loadAgent } from "../agent/defs.js";
30-
import type { AgentStateFile } from "../agent/defs.js";
31-
import { resolveOpenshell } from "../adapters/openshell/resolve.js";
3228
import { captureOpenshellCommand } from "../adapters/openshell/client.js";
33-
import { sanitizeConfigFile, isSensitiveFile } from "../security/credential-filter.js";
29+
import { resolveOpenshell } from "../adapters/openshell/resolve.js";
30+
import type { AgentStateFile } from "../agent/defs.js";
31+
import { loadAgent } from "../agent/defs.js";
3432
import { shellQuote } from "../runner.js";
33+
import { isSensitiveFile, sanitizeConfigFile } from "../security/credential-filter.js";
34+
import * as registry from "./registry.js";
3535

3636
const HOME_DIR = path.resolve(process.env.HOME || os.homedir());
3737
const REBUILD_BACKUPS_DIR = path.join(HOME_DIR, ".nemoclaw", "rebuild-backups");
@@ -318,8 +318,7 @@ function auditExtractedSymlinks(dirPath: string, allowedRoots: string[]): string
318318
// path with a tampered target falls through to the normal
319319
// containment check.
320320
const relFromDir = path.relative(dirPath, fullPath).split(path.sep).join("/");
321-
const expectedTarget = AUDIT_SYMLINK_WHITELIST.get(relFromDir);
322-
if (expectedTarget !== undefined && expectedTarget === linkTarget) {
321+
if (isAllowedStateSymlink(relFromDir, linkTarget)) {
323322
continue;
324323
}
325324

@@ -565,17 +564,12 @@ function sanitizeBackupDirectory(dirPath: string): void {
565564

566565
const _verbose = () => process.env.NEMOCLAW_REBUILD_VERBOSE === "1";
567566

568-
// Symlinks baked into the base image at build time (Dockerfile.base) by
569-
// `openclaw plugins install`. npm creates these as part of its standard
570-
// install layout — peer-dependency links and .bin shortcuts — and the
571-
// pre-backup audit would otherwise treat them as agent-planted exfil
572-
// attempts. Source paths are relative to the agent state-dir root (e.g.
573-
// for OpenClaw, /sandbox/.openclaw); targets are matched exactly against
574-
// the value of `readlink(source)`. Source-only matching is unsafe: a
575-
// compromised agent could repoint one of these to /etc/passwd and the
576-
// audit would still let it through. Keep in lockstep with
577-
// WECHAT_PLUGIN_VERSION in Dockerfile.base — bump together if the plugin
578-
// install layout changes.
567+
// Exact symlinks baked into the base image at build time (Dockerfile.base) by
568+
// `openclaw plugins install`. Source paths are relative to the agent state-dir
569+
// root (e.g. for OpenClaw, /sandbox/.openclaw); targets are matched exactly
570+
// against the value of `readlink(source)`. Source-only matching is unsafe: a
571+
// compromised agent could repoint one of these to /etc/passwd and the audit
572+
// would still let it through.
579573
const AUDIT_SYMLINK_WHITELIST: ReadonlyMap<string, string> = new Map([
580574
[
581575
"extensions/openclaw-weixin/node_modules/.bin/qrcode-terminal",
@@ -586,6 +580,33 @@ const AUDIT_SYMLINK_WHITELIST: ReadonlyMap<string, string> = new Map([
586580
"/usr/local/lib/node_modules/openclaw",
587581
],
588582
]);
583+
584+
const EXTENSION_NPM_BIN_RE = /^extensions\/[^/]+\/node_modules\/\.bin\/[^/]+$/;
585+
586+
function isAllowedExtensionNpmBinSymlink(relPath: string, linkTarget: string): boolean {
587+
const normalizedRelPath = relPath.split(path.sep).join("/");
588+
if (!EXTENSION_NPM_BIN_RE.test(normalizedRelPath)) return false;
589+
if (linkTarget.length === 0 || path.posix.isAbsolute(linkTarget)) return false;
590+
591+
const binDir = path.posix.dirname(normalizedRelPath);
592+
const nodeModulesDir = path.posix.dirname(binDir);
593+
const resolvedTarget = path.posix.normalize(path.posix.join(binDir, linkTarget));
594+
const targetWithinNodeModules = path.posix.relative(nodeModulesDir, resolvedTarget);
595+
596+
return (
597+
targetWithinNodeModules.length > 0 &&
598+
!targetWithinNodeModules.startsWith("../") &&
599+
!path.posix.isAbsolute(targetWithinNodeModules) &&
600+
!targetWithinNodeModules.startsWith(".bin/")
601+
);
602+
}
603+
604+
function isAllowedStateSymlink(relPath: string, linkTarget: string): boolean {
605+
const exactTarget = AUDIT_SYMLINK_WHITELIST.get(relPath.split(path.sep).join("/"));
606+
if (exactTarget !== undefined) return exactTarget === linkTarget;
607+
return isAllowedExtensionNpmBinSymlink(relPath, linkTarget);
608+
}
609+
589610
function _log(msg: string): void {
590611
if (_verbose()) console.error(` [sandbox-state ${new Date().toISOString()}] ${msg}`);
591612
}
@@ -1059,9 +1080,7 @@ export function backupSandboxState(sandboxName: string, options: BackupOptions =
10591080
const relPath = absPath.startsWith(dirPrefix)
10601081
? absPath.slice(dirPrefix.length)
10611082
: absPath;
1062-
const expectedTarget =
1063-
type === "l" ? AUDIT_SYMLINK_WHITELIST.get(relPath) : undefined;
1064-
if (expectedTarget !== undefined && expectedTarget === linkTarget) {
1083+
if (type === "l" && isAllowedStateSymlink(relPath, linkTarget)) {
10651084
whitelisted.push(entry);
10661085
} else {
10671086
violations.push(entry);

test/e2e/nemoclaw_scenarios/expected-states.yaml

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,11 @@ expected_states:
130130
policy_engine: supported
131131
shields: supported
132132

133-
# Negative preflight state. Introduced alongside its first consumer,
134-
# `ubuntu-no-docker-preflight-negative` (deferred from Phase 1).
135-
# Setup is expected to fail, and the runner must confirm that no
136-
# gateway or sandbox ghost state was left behind.
133+
# Negative preflight state. Setup is expected to fail and the runner
134+
# must confirm that no gateway or sandbox ghost state was left behind.
135+
# The `expected_failure` block (added for #3608) is the structured
136+
# contract the runner matches against; the legacy `failure` block is
137+
# retained as a drift guard while scenarios migrate.
137138
preflight-failure-no-sandbox:
138139
cli:
139140
installed: true
@@ -144,6 +145,15 @@ expected_states:
144145
failure:
145146
expected: true
146147
stage: preflight
148+
expected_failure:
149+
phase: preflight
150+
error_class: docker-missing
151+
# Docker, container, daemon, socket, or preflight - case insensitive.
152+
message_pattern: "(?i)docker|container|daemon|socket|preflight"
153+
forbidden_side_effects:
154+
- sandbox-created
155+
- gateway-started
156+
- credentials-written
147157

148158
onboarding-failure-invalid-nvidia-key:
149159
cli:
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
/**
5+
* Expected-failure matcher.
6+
*
7+
* Negative scenarios declare an `expected_failure` contract on their
8+
* expected state. The runner captures the failed setup's log plus a small
9+
* side-effect inventory (sandbox-created, gateway-started, credentials-written)
10+
* and asks this module whether the observation matches the contract.
11+
*
12+
* The contract has four parts:
13+
* - phase: which setup stage produced the failure (informational; the
14+
* runner is responsible for invoking the matcher only when that phase
15+
* actually ran).
16+
* - error_class: stable identifier for the failure mode.
17+
* - message_pattern: regex applied to the captured log when present.
18+
* - forbidden_side_effects: effects that MUST NOT be observed.
19+
*
20+
* Match result is structured (`ExpectedFailureReport`) so the runner can
21+
* write `expected-vs-actual.json` and surface a useful diff in CI.
22+
*/
23+
24+
import { compileMessagePattern } from "./load.ts";
25+
import type {
26+
ExpectedFailure,
27+
ExpectedFailurePhase,
28+
ExpectedFailureErrorClass,
29+
ExpectedFailureSideEffect,
30+
} from "./schema.ts";
31+
32+
export interface ObservedFailure {
33+
/** Phase the runner attempted; matched against `expected_failure.phase`. */
34+
phase: ExpectedFailurePhase;
35+
/**
36+
* Structured reason if the runner could derive one (preferred). When
37+
* absent, matching falls back to log-content heuristics in the runner.
38+
*/
39+
error_class?: ExpectedFailureErrorClass;
40+
/** Captured setup log; matched against `expected_failure.message_pattern`. */
41+
log: string;
42+
/**
43+
* Side effects the runner positively observed after the failure. Each
44+
* effect in `expected_failure.forbidden_side_effects` is checked against
45+
* this set; presence is a failure.
46+
*/
47+
observed_side_effects: ExpectedFailureSideEffect[];
48+
}
49+
50+
export interface ExpectedFailureCheck {
51+
name: "phase" | "error_class" | "message_pattern" | "forbidden_side_effects";
52+
ok: boolean;
53+
expected: string;
54+
actual: string;
55+
message?: string;
56+
}
57+
58+
export interface ExpectedFailureReport {
59+
ok: boolean;
60+
expected: ExpectedFailure;
61+
observed: ObservedFailure;
62+
checks: ExpectedFailureCheck[];
63+
}
64+
65+
export function matchExpectedFailure(
66+
expected: ExpectedFailure,
67+
observed: ObservedFailure,
68+
): ExpectedFailureReport {
69+
const checks: ExpectedFailureCheck[] = [];
70+
71+
const phaseOk = expected.phase === observed.phase;
72+
checks.push({
73+
name: "phase",
74+
ok: phaseOk,
75+
expected: expected.phase,
76+
actual: observed.phase,
77+
message: phaseOk
78+
? undefined
79+
: `phase mismatch: expected '${expected.phase}' but observed '${observed.phase}'`,
80+
});
81+
82+
if (observed.error_class !== undefined) {
83+
const classOk = expected.error_class === observed.error_class;
84+
checks.push({
85+
name: "error_class",
86+
ok: classOk,
87+
expected: expected.error_class,
88+
actual: observed.error_class,
89+
message: classOk
90+
? undefined
91+
: `error_class mismatch: expected '${expected.error_class}' but observed '${observed.error_class}'`,
92+
});
93+
} else {
94+
// No structured class from the runner; defer to message_pattern as
95+
// the discriminator. Record a SKIPPED entry so the report makes it
96+
// obvious that the class was not asserted structurally.
97+
checks.push({
98+
name: "error_class",
99+
ok: true,
100+
expected: expected.error_class,
101+
actual: "<unobserved>",
102+
message: "skipped: runner did not derive a structured error_class",
103+
});
104+
}
105+
106+
if (expected.message_pattern) {
107+
let regex: RegExp;
108+
try {
109+
regex = compileMessagePattern(expected.message_pattern);
110+
} catch (err) {
111+
checks.push({
112+
name: "message_pattern",
113+
ok: false,
114+
expected: expected.message_pattern,
115+
actual: "<invalid regex>",
116+
message: `message_pattern is not a valid regex: ${(err as Error).message}`,
117+
});
118+
return finalize(expected, observed, checks);
119+
}
120+
const ok = regex.test(observed.log);
121+
checks.push({
122+
name: "message_pattern",
123+
ok,
124+
expected: expected.message_pattern,
125+
actual: ok ? "<match>" : "<no match>",
126+
message: ok
127+
? undefined
128+
: `message_pattern '${expected.message_pattern}' did not match captured log`,
129+
});
130+
}
131+
132+
if (expected.forbidden_side_effects?.length) {
133+
const observedSet = new Set(observed.observed_side_effects);
134+
const found = expected.forbidden_side_effects.filter((e) => observedSet.has(e));
135+
const ok = found.length === 0;
136+
checks.push({
137+
name: "forbidden_side_effects",
138+
ok,
139+
expected: expected.forbidden_side_effects.join(","),
140+
actual: observed.observed_side_effects.join(",") || "<none>",
141+
message: ok
142+
? undefined
143+
: `forbidden side effects observed after failure: ${found.join(", ")}`,
144+
});
145+
}
146+
147+
return finalize(expected, observed, checks);
148+
}
149+
150+
function finalize(
151+
expected: ExpectedFailure,
152+
observed: ObservedFailure,
153+
checks: ExpectedFailureCheck[],
154+
): ExpectedFailureReport {
155+
return { ok: checks.every((c) => c.ok), expected, observed, checks };
156+
}
157+
158+
export function formatExpectedFailureReport(report: ExpectedFailureReport): string {
159+
const lines: string[] = [];
160+
lines.push(`expected-failure: ${report.ok ? "OK" : "FAILED"}`);
161+
for (const c of report.checks) {
162+
const status = c.ok ? "PASS" : "FAIL";
163+
lines.push(` ${status} ${c.name} expected=${c.expected} actual=${c.actual}`);
164+
if (c.message) lines.push(` ${c.message}`);
165+
}
166+
return lines.join("\n");
167+
}

0 commit comments

Comments
 (0)