Skip to content

Commit cf3fd66

Browse files
laitingshengjyaunchesprekshivyas
authored
fix(onboard): preserve disabled channels through rebuild so channels start can recover (#3395)
## Summary `nemoclaw <sandbox> channels start <name>` reported success but left the bot offline whenever it ran after a `channels stop <name>` + rebuild. The stop path correctly disabled the bridge, but the subsequent rebuild silently dropped the channel from the registry's configured set, leaving a later `channels start` with nothing to re-enable. ## Related Issue Fixes #3381 ## Changes - `src/lib/onboard.ts`: persist the operator's configured channel set on the new registry entry (input `enabledChannels`), not the post-disabled-filter `activeMessagingChannels`. `disabledChannels` remains the runtime modifier; the baked image and the gateway provider attachments still respect it. - `test/onboard.test.ts`: regression test that drives `createSandbox` with `disabledChannels: ["telegram"]` already in the registry and asserts the new `registerSandbox` payload keeps `messagingChannels: ["telegram"]` while the Dockerfile build arg and `--provider` flags continue to exclude the disabled channel. ## Type of Change - [x] Code change (feature, bug fix, or refactor) - [ ] Code change with doc updates - [ ] Doc only (prose changes, no code sample modifications) - [ ] Doc only (includes code sample changes) ## Verification - [x] `npx prek run --all-files` passes - [x] `npm test` passes - [x] Tests added or updated for new or changed behavior - [x] No secrets, API keys, or credentials committed - [ ] Docs updated for user-facing behavior changes - [ ] `make docs` builds without warnings (doc changes only) - [ ] Doc pages follow the [style guide](https://github.com/NVIDIA/NemoClaw/blob/main/docs/CONTRIBUTING.md) (doc changes only) - [ ] New doc pages include SPDX header and frontmatter (new pages only) --- Signed-off-by: Tinson Lai <tinsonl@nvidia.com> <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Messaging channels you configure are now preserved when recreating a sandbox, allowing them to be re-enabled in subsequent rebuilds. * Disabled messaging channels no longer incorrectly prevent other sandboxes from using the same credential token. [![Review Change Stack](https://storage.googleapis.com/coderabbit_public_assets/review-stack-in-coderabbit-ui.svg)](https://app.coderabbit.ai/change-stack/NVIDIA/NemoClaw/pull/3395) <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Tinson Lai <tinsonl@nvidia.com> Co-authored-by: J. Yaunches <jyaunches@nvidia.com> Co-authored-by: Prekshi Vyas <34834085+prekshivyas@users.noreply.github.com>
1 parent 7f6da90 commit cf3fd66

4 files changed

Lines changed: 212 additions & 2 deletions

File tree

src/lib/messaging-conflict.test.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,24 @@ describe("findChannelConflicts", () => {
9393
const registry = makeRegistry([{ name: "alice", messagingChannels: ["telegram"] }]);
9494
expect(findChannelConflicts("bob", [], registry)).toEqual([]);
9595
});
96+
97+
it("ignores a stopped (disabled) channel — its credential is not in use (#3381)", () => {
98+
const registry = makeRegistry([
99+
{
100+
name: "alice",
101+
messagingChannels: ["telegram"],
102+
disabledChannels: ["telegram"],
103+
providerCredentialHashes: { TELEGRAM_BOT_TOKEN: "hash-a" },
104+
},
105+
]);
106+
expect(
107+
findChannelConflicts(
108+
"bob",
109+
[{ channel: "telegram", credentialHashes: { TELEGRAM_BOT_TOKEN: "hash-a" } }],
110+
registry,
111+
),
112+
).toEqual([]);
113+
});
96114
});
97115

98116
describe("findAllOverlaps", () => {
@@ -161,6 +179,23 @@ describe("findAllOverlaps", () => {
161179
]);
162180
expect(findAllOverlaps(registry)).toEqual([]);
163181
});
182+
183+
it("ignores stopped (disabled) channels so nemoclaw status does not report phantom overlaps (#3381)", () => {
184+
const registry = makeRegistry([
185+
{
186+
name: "alice",
187+
messagingChannels: ["telegram"],
188+
disabledChannels: ["telegram"],
189+
providerCredentialHashes: { TELEGRAM_BOT_TOKEN: "hash-a" },
190+
},
191+
{
192+
name: "bob",
193+
messagingChannels: ["telegram"],
194+
providerCredentialHashes: { TELEGRAM_BOT_TOKEN: "hash-a" },
195+
},
196+
]);
197+
expect(findAllOverlaps(registry)).toEqual([]);
198+
});
164199
});
165200

166201
describe("backfillMessagingChannels", () => {

src/lib/messaging-conflict.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,13 @@ function getTokenKeys(channel: string): string[] {
7070
}
7171

7272
function hasStoredChannel(entry: SandboxEntry, channel: string): boolean {
73-
return Array.isArray(entry.messagingChannels) && entry.messagingChannels.includes(channel);
73+
if (!Array.isArray(entry.messagingChannels) || !entry.messagingChannels.includes(channel)) {
74+
return false;
75+
}
76+
// A `channels stop` sandbox keeps the channel in messagingChannels so a later
77+
// `channels start` can recover, but its bridge is paused — the credential is
78+
// not in use, so it must not block another sandbox from claiming the token.
79+
return !(Array.isArray(entry.disabledChannels) && entry.disabledChannels.includes(channel));
7480
}
7581

7682
function conflictReasonForRequest(
@@ -198,7 +204,11 @@ export function findAllOverlaps(registry: ConflictRegistry): Array<{
198204
const byChannel = new Map<string, SandboxEntry[]>();
199205
for (const entry of sandboxes) {
200206
if (!Array.isArray(entry.messagingChannels)) continue;
207+
const disabled = new Set(
208+
Array.isArray(entry.disabledChannels) ? entry.disabledChannels : [],
209+
);
201210
for (const channel of entry.messagingChannels) {
211+
if (disabled.has(channel)) continue;
202212
const list = byChannel.get(channel) || [];
203213
list.push(entry);
204214
byChannel.set(channel, list);

src/lib/onboard.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6324,7 +6324,13 @@ async function createSandbox(
63246324
providerCredentialHashes:
63256325
Object.keys(providerCredentialHashes).length > 0 ? providerCredentialHashes : undefined,
63266326
policies: initialSandboxPolicy.appliedPresets,
6327-
messagingChannels: activeMessagingChannels,
6327+
// Persist the operator's configured channel set, not the post-disabled-filter
6328+
// active set. After `channels stop X` + rebuild, activeMessagingChannels drops
6329+
// X, but X is still configured — losing it here means a later `channels start
6330+
// X` has nothing to re-enable (the next rebuild sees an empty channel set and
6331+
// never reattaches the gateway bridge). See #3381.
6332+
messagingChannels:
6333+
enabledChannels != null ? [...new Set(enabledChannels)] : activeMessagingChannels,
63286334
messagingChannelConfig: messagingChannelConfig || undefined,
63296335
disabledChannels: disabledChannels.length > 0 ? [...disabledChannels] : undefined,
63306336
dashboardPort: actualDashboardPort,

test/onboard.test.ts

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6271,6 +6271,165 @@ const { createSandbox } = require(${onboardPath});
62716271
},
62726272
);
62736273

6274+
it(
6275+
"preserves disabled channels in the registry after a recreate so `channels start` can re-enable them (#3381)",
6276+
{ timeout: 60_000 },
6277+
async () => {
6278+
const repoRoot = path.join(import.meta.dirname, "..");
6279+
const tmpDir = fs.mkdtempSync(
6280+
path.join(os.tmpdir(), "nemoclaw-onboard-disabled-channels-preserve-"),
6281+
);
6282+
const fakeBin = path.join(tmpDir, "bin");
6283+
const scriptPath = path.join(tmpDir, "disabled-channels-preserve.js");
6284+
const onboardPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "onboard.js"));
6285+
const runnerPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "runner.js"));
6286+
const registryPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "state", "registry.js"));
6287+
const preflightPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "onboard", "preflight.js"));
6288+
const credentialsPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "credentials", "store.js"));
6289+
6290+
fs.mkdirSync(fakeBin, { recursive: true });
6291+
fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\nexit 0\n", {
6292+
mode: 0o755,
6293+
});
6294+
6295+
const script = String.raw`
6296+
const runner = require(${runnerPath});
6297+
const _n = (c) => (Array.isArray(c) ? c.join(" ") : String(c)).replace(/'/g, "");
6298+
const registry = require(${registryPath});
6299+
const preflight = require(${preflightPath});
6300+
const credentials = require(${credentialsPath});
6301+
const childProcess = require("node:child_process");
6302+
const { EventEmitter } = require("node:events");
6303+
const fs = require("node:fs");
6304+
6305+
const commands = [];
6306+
const registerCalls = [];
6307+
registry.registerSandbox({
6308+
name: "my-assistant",
6309+
messagingChannels: ["telegram"],
6310+
disabledChannels: ["telegram"],
6311+
providerCredentialHashes: { TELEGRAM_BOT_TOKEN: "hash-telegram" },
6312+
});
6313+
runner.run = (command, opts = {}) => {
6314+
const normalized = _n(command);
6315+
commands.push({ command: normalized, env: opts.env || null });
6316+
if (normalized.includes("provider get my-assistant-telegram-bridge")) return { status: 0 };
6317+
if (normalized.includes("provider get")) return { status: 1 };
6318+
return { status: 0 };
6319+
};
6320+
runner.runCapture = (command) => {
6321+
if (_n(command).includes("sandbox get my-assistant")) return "";
6322+
if (_n(command).includes("sandbox list")) return "my-assistant Ready";
6323+
{
6324+
const sandboxExecCurl = require(${onboardScriptMocksPath}).mockSandboxExecCurl(command);
6325+
if (sandboxExecCurl !== null) return sandboxExecCurl;
6326+
}
6327+
if (_n(command).includes("forward list")) return "my-assistant 127.0.0.1 18789 12345 running";
6328+
return "";
6329+
};
6330+
registry.registerSandbox = (entry) => {
6331+
registerCalls.push(entry);
6332+
return true;
6333+
};
6334+
registry.updateSandbox = () => true;
6335+
registry.setDefault = () => true;
6336+
registry.removeSandbox = () => true;
6337+
preflight.checkPortAvailable = async () => ({ ok: true });
6338+
credentials.prompt = async () => "";
6339+
6340+
childProcess.spawn = (...args) => {
6341+
const child = new EventEmitter();
6342+
child.stdout = new EventEmitter();
6343+
child.stderr = new EventEmitter();
6344+
const command = _n(args[1][1]);
6345+
const entry = { command, env: args[2]?.env || null };
6346+
const dockerfileMatch = command.match(/--from ([^ ]+Dockerfile)/);
6347+
if (dockerfileMatch) {
6348+
try {
6349+
entry.dockerfileContent = fs.readFileSync(dockerfileMatch[1], "utf-8");
6350+
} catch (error) {
6351+
entry.dockerfileReadError = String(error);
6352+
}
6353+
}
6354+
commands.push(entry);
6355+
process.nextTick(() => {
6356+
child.stdout.emit("data", Buffer.from("Created sandbox: my-assistant\n"));
6357+
child.emit("close", 0);
6358+
});
6359+
return child;
6360+
};
6361+
6362+
const { createSandbox } = require(${onboardPath});
6363+
6364+
(async () => {
6365+
process.env.OPENSHELL_GATEWAY = "nemoclaw";
6366+
delete process.env.TELEGRAM_BOT_TOKEN;
6367+
const sandboxName = await createSandbox(
6368+
null, "gpt-5.4", "nvidia-prod", null, "my-assistant", null, ["telegram"],
6369+
);
6370+
console.log(JSON.stringify({ sandboxName, commands, registerCalls }));
6371+
})().catch((error) => {
6372+
console.error(error);
6373+
process.exit(1);
6374+
});
6375+
`;
6376+
fs.writeFileSync(scriptPath, script);
6377+
6378+
const result = spawnSync(process.execPath, [scriptPath], {
6379+
cwd: repoRoot,
6380+
encoding: "utf-8",
6381+
env: {
6382+
...process.env,
6383+
HOME: tmpDir,
6384+
PATH: `${fakeBin}:${process.env.PATH || ""}`,
6385+
NEMOCLAW_NON_INTERACTIVE: "1",
6386+
TELEGRAM_BOT_TOKEN: "",
6387+
},
6388+
});
6389+
6390+
assert.equal(result.status, 0, result.stderr);
6391+
const payloadLine = result.stdout
6392+
.trim()
6393+
.split("\n")
6394+
.slice()
6395+
.reverse()
6396+
.find((line) => line.startsWith("{") && line.endsWith("}"));
6397+
assert.ok(payloadLine, `expected JSON payload in stdout:\n${result.stdout}`);
6398+
const payload = JSON.parse(payloadLine);
6399+
6400+
const createCommand = payload.commands.find((entry: CommandEntry) =>
6401+
entry.command.includes("sandbox create"),
6402+
);
6403+
assert.ok(createCommand, "expected sandbox create command");
6404+
assert.equal(createCommand.dockerfileReadError, undefined);
6405+
6406+
const channelsLine = createCommand.dockerfileContent
6407+
?.split("\n")
6408+
.find((line: string) => line.startsWith("ARG NEMOCLAW_MESSAGING_CHANNELS_B64="));
6409+
assert.ok(channelsLine, "expected messaging build arg in Dockerfile");
6410+
const bakedChannels = JSON.parse(
6411+
Buffer.from(channelsLine.split("=")[1], "base64").toString(),
6412+
);
6413+
assert.deepEqual(bakedChannels, [], "disabled channel must not be baked into the image");
6414+
assert.doesNotMatch(
6415+
createCommand.command,
6416+
/--provider my-assistant-telegram-bridge/,
6417+
"disabled channel's bridge must not be attached to the new sandbox",
6418+
);
6419+
6420+
assert.deepEqual(
6421+
payload.registerCalls[0]?.messagingChannels,
6422+
["telegram"],
6423+
"registry.messagingChannels must keep the disabled-but-configured channel so `channels start` can recover it",
6424+
);
6425+
assert.deepEqual(
6426+
payload.registerCalls[0]?.disabledChannels,
6427+
["telegram"],
6428+
"registry.disabledChannels must round-trip through the rebuild",
6429+
);
6430+
},
6431+
);
6432+
62746433
it("aborts onboard when a messaging provider upsert fails", { timeout: 60_000 }, async () => {
62756434
const repoRoot = path.join(import.meta.dirname, "..");
62766435
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-provider-fail-"));

0 commit comments

Comments
 (0)