Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
20d128b
fix(recovery): add connect probe recovery path
ericksoa Apr 30, 2026
d73c0eb
Merge branch 'main' into fix/gateway-recovery-probe-observability
ericksoa Apr 30, 2026
05bfe89
test(recovery): address CodeRabbit cleanup
ericksoa Apr 30, 2026
356b9c8
Merge branch 'main' into fix/gateway-recovery-probe-observability
ericksoa Apr 30, 2026
e6eb1af
test(recovery): assert probe fallback stays non-interactive
ericksoa Apr 30, 2026
f04fc23
Merge branch 'main' into fix/gateway-recovery-probe-observability
ericksoa Apr 30, 2026
8a55af9
Merge remote-tracking branch 'origin/main' into fix/gateway-recovery-…
ericksoa May 1, 2026
a0da39a
Merge branch 'main' into fix/gateway-recovery-probe-observability
ericksoa May 1, 2026
d1b9145
fix(recovery): stop on log hardening setup failure
ericksoa May 1, 2026
c4d0f63
fix(ci): keep PR checks green after main merge
ericksoa May 1, 2026
69e6291
Merge remote-tracking branch 'origin/main' into fix/gateway-recovery-…
ericksoa May 1, 2026
3bbf345
Merge branch 'main' into fix/gateway-recovery-probe-observability
cv May 1, 2026
cd5be30
test(wsl): simulate mutable OpenClaw config owner
ericksoa May 1, 2026
849c98e
Merge remote-tracking branch 'origin/main' into fix/gateway-recovery-…
ericksoa May 1, 2026
c15a003
test(wsl): avoid slow shields subprocess
ericksoa May 2, 2026
7cff948
Merge remote-tracking branch 'origin/main' into fix/gateway-recovery-…
ericksoa May 2, 2026
1d137c6
fix(recovery): address coderabbit follow-ups
ericksoa May 2, 2026
61bb845
fix(recovery): stabilize e2e follow-ups
ericksoa May 2, 2026
d74d5f3
Merge remote-tracking branch 'origin/main' into fix/gateway-recovery-…
ericksoa May 2, 2026
b8e27d5
fix(shields): keep lock-down write stripping mandatory
ericksoa May 2, 2026
69a2656
fix(onboard): accept configured agent binary path
ericksoa May 2, 2026
92cebda
fix(connect): wait for recovered gateway readiness
ericksoa May 2, 2026
46584a7
fix(status): bound sandbox version probe
ericksoa May 2, 2026
da85514
Merge remote-tracking branch 'origin/main' into fix/gateway-recovery-…
ericksoa May 2, 2026
71bfe6d
fix(status): bound nim status probes
ericksoa May 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scripts/nemoclaw-start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ normalize_mutable_config_perms() {
# Detect shields-up. Config dir owned by root means shields are
# currently locked; normalizing would weaken the contract.
local config_dir_owner
config_dir_owner="$(stat -c '%U' "$config_dir" 2>/dev/null || echo unknown)"
config_dir_owner="$(stat -c '%U' "$config_dir" 2>/dev/null || stat -f '%Su' "$config_dir" 2>/dev/null || echo unknown)"
if [ "$config_dir_owner" = "root" ]; then
return 0
fi
Expand Down
42 changes: 37 additions & 5 deletions src/lib/agent-onboard.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { describe, it, expect, beforeEach, afterEach, afterAll, vi } from "vites
import fs from "node:fs";
import path from "node:path";
// Import from compiled dist/ so coverage is attributed correctly.
import { printDashboardUi } from "../../dist/lib/agent-onboard";
import { printDashboardUi, verifyAgentBinaryAvailable } from "../../dist/lib/agent-onboard";
import type { AgentDefinition } from "./agent-defs";

function makeAgent(overrides: Partial<AgentDefinition> = {}): AgentDefinition {
Expand Down Expand Up @@ -128,10 +128,10 @@ describe("handleAgentSetup guards", () => {
const source = fs.readFileSync(path.join(import.meta.dirname, "agent-onboard.ts"), "utf-8");

expect(source).toContain("verifyAgentBinaryAvailable");
expect(source).toContain(
'resolved="$(command -v ${shellQuote(executable)} 2>/dev/null || true)"',
);
expect(source).toContain('[ "$resolved" = ${shellQuote(binaryPath)} ]');
expect(source).toContain("[ -e ${shellQuote(binaryPath)} ]");
expect(source).toContain("[ -x ${shellQuote(binaryPath)} ]");
expect(source).not.toContain('[ -n "$resolved" ] || { echo not_found');
expect(source).not.toContain("path_mismatch");
expect(source).toMatch(
/"sandbox",\s*"exec",\s*"-n",\s*sandboxName,\s*"--",\s*"sh",\s*"-lc",\s*script/,
);
Expand All @@ -150,4 +150,36 @@ describe("handleAgentSetup guards", () => {
expect(source).toContain('parsed.status === "ok"');
expect(source).not.toContain('.includes("ok")');
});

it("accepts an executable configured binary path when PATH lookup is empty", () => {
let script = "";
const result = verifyAgentBinaryAvailable(
"alpha",
makeAgent({ name: "hermes", binary_path: "/usr/local/bin/hermes" }),
(args) => {
script = String(args[7] || "");
return "ok";
},
);

expect(result).toEqual({ available: true });
expect(script).toContain("[ -e '/usr/local/bin/hermes' ]");
expect(script).toContain("[ -x '/usr/local/bin/hermes' ]");
expect(script).not.toContain("command -v 'hermes'");
});

it("does not reject a configured binary when PATH resolves the symlink target", () => {
let script = "";
const result = verifyAgentBinaryAvailable(
"alpha",
makeAgent({ name: "hermes", binary_path: "/usr/local/bin/hermes" }),
(args) => {
script = String(args[7] || "");
return "ok";
},
);

expect(result).toEqual({ available: true });
expect(script).not.toContain("path_mismatch");
});
});
22 changes: 4 additions & 18 deletions src/lib/agent-onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ type AgentBinaryAvailability =
| { available: true }
| {
available: false;
reason: "not_found" | "not_executable" | "path_mismatch";
reason: "not_found" | "not_executable";
binaryPath?: string;
resolvedPath?: string;
};

function verifyAgentBinaryAvailable(
// Exported for unit coverage of the sandbox-side guard without running onboarding.
export function verifyAgentBinaryAvailable(
sandboxName: string,
agent: AgentDefinition,
runCaptureOpenshell: OnboardContext["runCaptureOpenshell"],
Expand All @@ -138,10 +138,8 @@ function verifyAgentBinaryAvailable(
const binaryPath = typeof agent.binary_path === "string" ? agent.binary_path.trim() : "";
const script = binaryPath
? [
`resolved="$(command -v ${shellQuote(executable)} 2>/dev/null || true)"`,
`[ -n "$resolved" ] || { echo not_found; exit 1; }`,
`[ -e ${shellQuote(binaryPath)} ] || { echo not_found; exit 1; }`,
`[ -x ${shellQuote(binaryPath)} ] || { echo not_executable; exit 1; }`,
`[ "$resolved" = ${shellQuote(binaryPath)} ] || { printf 'path_mismatch:%s\\n' "$resolved"; exit 1; }`,
"echo ok",
].join(" && ")
: `command -v ${shellQuote(executable)} >/dev/null 2>&1 && echo ok || echo not_found`;
Expand All @@ -156,15 +154,6 @@ function verifyAgentBinaryAvailable(
return { available: true };
}
if (binaryPath && result) {
const mismatch = result.match(/path_mismatch:([^\n]+)/);
if (mismatch) {
return {
available: false,
reason: "path_mismatch",
binaryPath,
resolvedPath: mismatch[1].trim(),
};
}
if (result.includes("not_executable")) {
return { available: false, reason: "not_executable", binaryPath };
}
Expand All @@ -178,9 +167,6 @@ function describeAgentBinaryFailure(
result: Exclude<AgentBinaryAvailability, { available: true }>,
): string {
const executable = agentExecutableName(agent);
if (result.reason === "path_mismatch") {
return `${agent.displayName} binary '${executable}' resolves to '${result.resolvedPath}', expected '${result.binaryPath}' inside sandbox '${sandboxName}'`;
}
if (result.reason === "not_executable") {
return `${agent.displayName} configured binary '${result.binaryPath}' is not executable inside sandbox '${sandboxName}'`;
}
Expand Down
97 changes: 86 additions & 11 deletions src/lib/agent-runtime.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import { describe, it, expect } from "vitest";
// Import from compiled dist/ so coverage is attributed correctly.
import { buildRecoveryScript } from "../../dist/lib/agent-runtime";
import { buildOpenClawRecoveryScript, buildRecoveryScript } from "../../dist/lib/agent-runtime";
import type { AgentDefinition } from "./agent-defs";

function makeAgent(overrides: Partial<AgentDefinition> = {}): AgentDefinition {
Expand Down Expand Up @@ -60,13 +60,13 @@ describe("buildRecoveryScript", () => {
it("launches the default gateway command through the validated agent binary", () => {
const script = buildRecoveryScript(minimalAgent, 19000);
expect(script).toContain("command -v 'test-agent'");
expect(script).toContain('nohup "$AGENT_BIN" gateway run --port 19000');
expect(script).toContain('"$AGENT_BIN" gateway run --port 19000');
});

it("falls back to openclaw gateway run when gateway_command is absent", () => {
const agent = makeAgent({ gateway_command: undefined });
const script = buildRecoveryScript(agent, 19000);
expect(script).toContain('nohup "$AGENT_BIN" gateway run --port 19000');
expect(script).toContain('"$AGENT_BIN" gateway run --port 19000');
});

it("validates and launches custom gateway commands explicitly", () => {
Expand Down Expand Up @@ -123,26 +123,101 @@ describe("buildRecoveryScript", () => {

it("writes the warning to gateway.log so it persists for sysadmin tail", () => {
const script = buildRecoveryScript(minimalAgent, 19000);
// Both warnings must end up in /tmp/gateway.log, not just stderr —
// Both warnings must end up in the selected gateway log, not just stderr —
// executeSandboxCommand silently discards stderr from the recovery
// script, so a warning that only goes to stderr is invisible to
// anyone debugging a crash-loop. (#2478)
expect(script).toContain('echo "$_W" >> /tmp/gateway.log');
expect(script).toContain('echo "$_W" >> "$_GATEWAY_LOG"');
// And the warning must be deferred until AFTER gateway.log is
// freshly touched/chmod'd, otherwise the redirect targets a stale
// file that gets removed seconds later.
const touchIdx = script!.indexOf("touch /tmp/gateway.log");
const warnIdx = script!.indexOf('echo "$_W" >> /tmp/gateway.log');
expect(touchIdx).toBeLessThan(warnIdx);
// safely opened with O_NOFOLLOW, otherwise the redirect targets a
// stale or attacker-controlled file.
const gatewayPrepIdx = script!.indexOf(" /tmp/gateway.log || exit 1;");
const logSelectionIdx = script!.indexOf("_GATEWAY_LOG=/tmp/gateway.log");
const warnIdx = script!.indexOf('echo "$_W" >> "$_GATEWAY_LOG"');
expect(gatewayPrepIdx).toBeGreaterThanOrEqual(0);
expect(logSelectionIdx).toBeGreaterThanOrEqual(0);
expect(warnIdx).toBeGreaterThanOrEqual(0);
expect(gatewayPrepIdx).toBeLessThan(logSelectionIdx);
expect(logSelectionIdx).toBeLessThan(warnIdx);
});

it("stops recovery when hardened log setup fails", () => {
const script = buildOpenClawRecoveryScript(18789);
expect(script).toContain(" /tmp/gateway.log 'gateway' || exit 1;");
expect(script).toContain(" /tmp/auto-pair.log 'sandbox' || exit 1;");
});

it("appends (not truncates) gateway.log on launch so warnings survive", () => {
const script = buildRecoveryScript(minimalAgent, 19000);
// Truncating with `>` wipes the [gateway-recovery] WARNING that the
// recovery script wrote moments earlier — meaning a sysadmin tailing
// gateway.log would see the eventual crash without the explanation.
expect(script).toContain(">> /tmp/gateway.log 2>&1 &");
expect(script).toContain('>> "$_GATEWAY_LOG" 2>&1 &');
expect(script).not.toMatch(/[^>]> \/tmp\/gateway\.log 2>&1 &/);
});

it("preserves an existing gateway.log and has a writable fallback log", () => {
const script = buildOpenClawRecoveryScript(18789);
expect(script).not.toContain("rm -f /tmp/gateway.log");
expect(script).toContain("_GATEWAY_LOG=/tmp/gateway.log");
expect(script).toContain("_GATEWAY_LOG=/tmp/gateway-recovery.log");
expect(script).toContain('echo "$_W" >> "$_GATEWAY_LOG"');
expect(script).toContain('tail -5 "$_GATEWAY_LOG"');
expect(script).not.toContain('echo "$_W" >> /tmp/gateway.log');
expect(script).not.toContain("cat /tmp/gateway.log");
});

it("rejects a symlinked gateway.log before preparing the log", () => {
const script = buildOpenClawRecoveryScript(18789);
const noFollowIdx = script.indexOf("O_NOFOLLOW");
const openIdx = script.indexOf("os.open(path, flags, 0o644)");
const fchownIdx = script.indexOf("os.fchown(fd");
expect(script).toContain("refusing to prepare symlinked /tmp/gateway.log");
expect(script).toContain("sys.exit(1)");
expect(script).not.toContain(": > /tmp/gateway.log");
expect(script).not.toContain("chown 'gateway:gateway' /tmp/gateway.log");
expect(noFollowIdx).toBeGreaterThanOrEqual(0);
expect(openIdx).toBeGreaterThanOrEqual(0);
expect(fchownIdx).toBeGreaterThanOrEqual(0);
expect(noFollowIdx).toBeLessThan(openIdx);
expect(openIdx).toBeLessThan(fchownIdx);
});

it("prepares gateway.log for the real gateway-owned sandbox log", () => {
const script = buildOpenClawRecoveryScript(18789);
expect(script).toContain("os.fchown(fd");
expect(script).toContain("pw.pw_gid");
expect(script).not.toContain("grp.getgrnam");
expect(script).toContain("owner_mode = 0o644");
expect(script).toContain("os.fchmod(fd, owner_mode)");
expect(script).toContain("/tmp/gateway.log 'gateway'");
expect(script).toContain("gosu 'gateway'");
});

it("terminates the conditional launch branch before capturing the gateway pid", () => {
const script = buildOpenClawRecoveryScript(18789);
expect(script).toContain(" fi; GPID=$!");
expect(script).not.toContain(" fi GPID=$!");
});

it("prepares auto-pair.log without unlinking or following symlinks", () => {
const script = buildOpenClawRecoveryScript(18789);
expect(script).toContain("refusing to prepare symlinked /tmp/auto-pair.log");
expect(script).toContain("/tmp/auto-pair.log 'sandbox'");
expect(script).toContain("owner_mode = 0o600");
expect(script).not.toContain("rm -f /tmp/auto-pair.log");
expect(script).not.toContain(": > /tmp/auto-pair.log");
expect(script).not.toContain("touch /tmp/auto-pair.log");
expect(script).not.toContain("chown sandbox:sandbox /tmp/auto-pair.log");
expect(script).not.toContain("chmod 600 /tmp/auto-pair.log");
});

it("does not force non-OpenClaw agents to run as the gateway user", () => {
const script = buildRecoveryScript(minimalAgent, 19000);
expect(script).not.toContain("chown gateway:gateway /tmp/gateway.log");
expect(script).not.toContain("chown 'gateway:gateway' /tmp/gateway.log");
expect(script).not.toContain("gosu gateway");
expect(script).not.toContain("gosu 'gateway'");
});
});
});
Loading
Loading