Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion dispatcher-strategies.js
Original file line number Diff line number Diff line change
Expand Up @@ -1206,11 +1206,16 @@ export async function executeAgent(job, ctx, deps) {
const {
waitForGateway, updateRunSession, setAgentStatus,
buildJobPrompt, runAgentTurnWithActivityTimeout,
// Sanctioned isolated dispatch primitive. Falls back to the activity-aware
// runner when callers (e.g. tests) wire only the older name -- both helpers
// share the same HTTP-only contract, no subprocess spawn.
runIsolatedAgentTurn,
updateContextSummary, releaseDispatch, releaseIdempotencyKey,
updateJob, matchesSentinel, detectTransientError,
listSessions,
sqliteNow, log,
} = deps;
const dispatchAgentTurn = runIsolatedAgentTurn || runAgentTurnWithActivityTimeout;
const result = makeDefaultResult();

// Gateway health check
Expand Down Expand Up @@ -1304,7 +1309,12 @@ export async function executeAgent(job, ctx, deps) {
}
}

const turnResult = await runAgentTurnWithActivityTimeout({
// Isolated dispatch primitive: HTTP-only chat completions call. The
// scheduler must never fork a sibling `openclaw` process to spawn an
// isolated session -- that variant has historically SIGTERM'd the
// launchd-tracked gateway parent and orphaned a node process on port
// 18789 (see ISOLATED_DISPATCH_PRIMITIVE in gateway.js).
const turnResult = await dispatchAgentTurn({
message: prompt,
agentId: job.agent_id || 'main',
sessionKey,
Expand Down
7 changes: 6 additions & 1 deletion dispatcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ import {
import { buildRetrievalContext } from './retrieval.js';
import { upsertAgent, setAgentStatus } from './agents.js';
import {
runAgentTurnWithActivityTimeout, sendSystemEvent, getAllSubAgentSessions, listSessions,
runAgentTurnWithActivityTimeout, runIsolatedAgentTurn,
sendSystemEvent, getAllSubAgentSessions, listSessions,
deliverMessage, checkGatewayHealth, waitForGateway, resolveDeliveryAlias,
applyAuthProfileToSessionStore,
syncAuthStoreToSession,
Expand Down Expand Up @@ -306,6 +307,10 @@ function buildDispatchDeps() {
// Agent
waitForGateway, updateRunSession, setAgentStatus,
buildJobPrompt, runAgentTurnWithActivityTimeout,
// Isolated cron-dispatch primitive: HTTP-only wrapper around the
// chat-completions API; never forks a sibling openclaw process that
// could SIGTERM the launchd-tracked gateway parent.
runIsolatedAgentTurn,
updateContextSummary, releaseIdempotencyKey,
matchesSentinel, detectTransientError,
listSessions,
Expand Down
39 changes: 39 additions & 0 deletions gateway.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,22 @@ const GATEWAY_URL = process.env.OPENCLAW_GATEWAY_URL || 'http://127.0.0.1:18789'
const HOME_DIR = process.env.HOME || homedir();
export const TELEGRAM_MAX_MESSAGE_LENGTH = 4096;

// -- Isolated dispatch primitive contract --------------------
//
// Cron jobs with session_target=isolated must reach the gateway via the
// public HTTP API only. Forking a sibling `openclaw` process to spawn the
// session is rejected: in production that primitive has SIGTERM'd the
// launchd-tracked gateway parent (the child inherits the parent's listening
// socket on port 18789 and the parent dies), leaving an orphan node process
// holding the port. See rh-bot.lan zombie-cascade incident report.
//
// runIsolatedAgentTurn is the only sanctioned dispatch primitive for
// session_target=isolated cron jobs. It MUST NOT spawn, fork, or exec any
// child process. Any future change that needs subprocess execution belongs
// behind a different, explicitly-named helper so reviewers can keep this
// contract intact.
export const ISOLATED_DISPATCH_PRIMITIVE = 'http-chat-completions';

let _cachedToken;
let _tokenLoaded = false;

Expand Down Expand Up @@ -246,6 +262,29 @@ export async function runAgentTurnWithActivityTimeout(opts) {
}
}

// -- Isolated dispatch primitive -----------------------------

/**
* Sanctioned dispatch primitive for session_target=isolated cron jobs.
*
* This is a thin wrapper around runAgentTurnWithActivityTimeout that names
* the contract: HTTP-only request to the gateway, no child process spawn.
* The scheduler routes every session_target=isolated job through this
* helper so the no-fork invariant is reviewable at one call site and
* testable in isolation (see the no-subprocess regression test in test.js).
*
* Why a named wrapper instead of calling runAgentTurnWithActivityTimeout
* directly: the dispatch primitive is the load-bearing surface that the
* rh-bot.lan zombie-on-port outage cascaded through. A named entry point
* gives operators and reviewers a single grep target ("runIsolatedAgentTurn")
* to audit the no-spawn invariant.
*
* Accepts the same options as runAgentTurnWithActivityTimeout.
*/
export async function runIsolatedAgentTurn(opts) {
return await runAgentTurnWithActivityTimeout(opts);
}

// -- System Events (main session) ----------------------------

/**
Expand Down
6 changes: 3 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

165 changes: 161 additions & 4 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3625,12 +3625,27 @@ console.log('\n-- Sync Auth Store Integration with executeAgent --');
const strategiesSrc = readFileSync(join(import.meta.dirname || '.', 'dispatcher-strategies.js'), 'utf-8');
const syncCallIdx = strategiesSrc.indexOf('syncAuthStoreToSession: syncAuth');
assert(syncCallIdx > -1, 'dispatcher-strategies.js destructures syncAuthStoreToSession');
// Find the actual syncAuth() invocation and the actual turnResult = await runAgentTurnWithActivityTimeout() call
// Find the actual syncAuth() invocation and the dispatch-primitive invocation.
// executeAgent calls the sanctioned isolated dispatch primitive via a local
// alias `dispatchAgentTurn` that resolves to runIsolatedAgentTurn (or its
// legacy alias runAgentTurnWithActivityTimeout) -- accept either name so the
// ordering invariant stays meaningful as the helper is renamed.
const syncInvokeIdx = strategiesSrc.indexOf('syncAuth(job.agent_id');
const turnInvokeIdx = strategiesSrc.indexOf('await runAgentTurnWithActivityTimeout(');
const turnInvokeIdx = (() => {
const candidates = [
'await dispatchAgentTurn(',
'await runIsolatedAgentTurn(',
'await runAgentTurnWithActivityTimeout(',
];
for (const needle of candidates) {
const idx = strategiesSrc.indexOf(needle);
if (idx > -1) return idx;
}
return -1;
})();
assert(syncInvokeIdx > -1, 'syncAuth invocation found');
assert(turnInvokeIdx > -1, 'runAgentTurnWithActivityTimeout invocation found');
assert(turnInvokeIdx > syncInvokeIdx, 'syncAuth is called before runAgentTurnWithActivityTimeout');
assert(turnInvokeIdx > -1, 'isolated dispatch primitive invocation found');
assert(turnInvokeIdx > syncInvokeIdx, 'syncAuth is called before the isolated dispatch primitive');

// Verify the sync call is NOT inside an if(resolvedAuthProfile) guard
// Extract the syncAuth call context (the ~20 lines around it)
Expand All @@ -3647,6 +3662,148 @@ console.log('\n-- Sync Auth Store Integration with executeAgent --');
console.log(' sync auth store integration with executeAgent: pass');
}

console.log('\n-- Isolated dispatch primitive: no subprocess spawn --');
{
// Regression guard. The session_target=isolated cron dispatch path must
// reach the gateway via the HTTP /v1/chat/completions endpoint only. A
// prior variant forked a sibling `openclaw` CLI to spawn the session and
// SIGTERM'd the launchd-tracked gateway parent (rh-bot.lan zombie-on-port
// outage, ~30 cascades per week). This test pins the no-fork contract two
// ways:
// 1. Source check -- the isolated dispatch primitive defined in
// gateway.js (runIsolatedAgentTurn) and the executeAgent strategy
// neither import nor reference child_process.
// 2. Behavior check -- executeAgent dispatched against the real gateway
// HTTP client hits /v1/chat/completions via fetch and surfaces the
// stubbed assistant reply.

const { executeAgent } = await import('./dispatcher-strategies.js');
const gateway = await import('./gateway.js');

// 1. Source-level check: the isolated dispatch primitive must not pull in
// child_process. Note: gateway.js still imports execFileSync for the
// legacy executeMain/fire-and-forget sendSystemEvent path, so the assertion
// narrows to the primitive's own definition rather than the whole module.
const gatewaySrc = readFileSync(join(import.meta.dirname || '.', 'gateway.js'), 'utf-8');
const strategiesSrc = readFileSync(join(import.meta.dirname || '.', 'dispatcher-strategies.js'), 'utf-8');

const isolatedFnStart = gatewaySrc.indexOf('export async function runIsolatedAgentTurn');
assert(isolatedFnStart > -1, 'runIsolatedAgentTurn is defined in gateway.js');
// Capture the function body bounded by the next top-level export so the
// check is local to the primitive rather than the surrounding module.
const isolatedFnEnd = (() => {
const after = gatewaySrc.indexOf('\nexport ', isolatedFnStart + 1);
return after === -1 ? gatewaySrc.length : after;
})();
const isolatedFnSrc = gatewaySrc.slice(isolatedFnStart, isolatedFnEnd);
for (const banned of ['child_process', 'execFile', 'spawn(', 'fork(', 'execSync']) {
assert(!isolatedFnSrc.includes(banned),
`runIsolatedAgentTurn body does not reference ${banned}`);
}

const executeAgentStart = strategiesSrc.indexOf('export async function executeAgent');
assert(executeAgentStart > -1, 'executeAgent is defined in dispatcher-strategies.js');
const executeAgentEnd = (() => {
const after = strategiesSrc.indexOf('\nexport ', executeAgentStart + 1);
return after === -1 ? strategiesSrc.length : after;
})();
const executeAgentSrc = strategiesSrc.slice(executeAgentStart, executeAgentEnd);
for (const banned of ['child_process', 'execFile', 'spawn(', 'fork(', 'execSync']) {
assert(!executeAgentSrc.includes(banned),
`executeAgent body does not reference ${banned}`);
}

// 2. Behavior check: stub global fetch so the dispatch primitive can run
// without contacting a real gateway. The stub records every URL so we can
// assert /v1/chat/completions was hit and no other dispatch transport was
// used. Set a sentinel gateway token before triggering the first fetch so
// gateway.js seeds its module-level token cache with a real value rather
// than null; otherwise downstream tests that swap globalThis.fetch later
// see the cached null token and stop emitting the auth/scope headers they
// assert on.
if (!process.env.OPENCLAW_GATEWAY_TOKEN) {
process.env.OPENCLAW_GATEWAY_TOKEN = 'isolated-dispatch-test-token';
}
Comment on lines +3724 to +3726
const fetchCalls = [];
const originalFetch = globalThis.fetch;
globalThis.fetch = async (url, init) => {
fetchCalls.push({ url: String(url), method: init?.method || 'GET' });
return {
ok: true,
status: 200,
headers: { get: () => null },
json: async () => ({
choices: [{ message: { role: 'assistant', content: 'isolated-ok' } }],
usage: { total_tokens: 7 },
}),
text: async () => '',
};
};

let executionError = null;
let executionResult = null;
try {
const job = {
id: 'isolated-no-spawn',
name: 'isolated-no-spawn',
agent_id: 'main',
session_target: 'isolated',
payload_kind: 'agentTurn',
payload_message: 'noop',
delivery_mode: 'none',
run_timeout_ms: 10_000,
payload_timeout_seconds: 5,
};
const ctx = {
run: { id: 'run-isolated-no-spawn', started_at: new Date().toISOString() },
dispatchRecord: null,
idemKey: null,
v02Outcomes: null,
};
const deps = {
waitForGateway: async () => true,
updateRunSession: () => {},
setAgentStatus: () => {},
buildJobPrompt: () => ({ prompt: 'noop', contextMeta: {} }),
// Using the real exported helper proves the production code path goes
// through HTTP, not a test-local mock.
runIsolatedAgentTurn: gateway.runIsolatedAgentTurn,
runAgentTurnWithActivityTimeout: gateway.runAgentTurnWithActivityTimeout,
updateContextSummary: () => {},
releaseDispatch: () => {},
releaseIdempotencyKey: () => {},
updateJob: () => {},
matchesSentinel: () => false,
detectTransientError: () => false,
listSessions: async () => ({ result: { sessions: [] } }),
sqliteNow: (offsetMs = 0) => new Date(Date.now() + offsetMs).toISOString(),
log: () => {},
finishRun: () => {},
syncAuthStoreToSession: () => ({ ok: true }),
applyAuthProfileToSessionStore: () => ({ ok: true }),
};
executionResult = await executeAgent(job, ctx, deps);
} catch (err) {
executionError = err;
} finally {
globalThis.fetch = originalFetch;
}
Comment on lines +3788 to +3790

assert(executionError === null, `executeAgent ran without error (got: ${executionError?.message || 'none'})`);
const chatCall = fetchCalls.find(c => c.url.includes('/v1/chat/completions'));
assert(chatCall !== undefined, 'isolated dispatch primitive hit /v1/chat/completions via HTTP');
assert(chatCall?.method === 'POST', 'isolated dispatch primitive used HTTP POST');
assert(executionResult?.content === 'isolated-ok', 'executeAgent surfaced the assistant reply');
assert(executionResult?.status === 'ok', 'executeAgent reported ok status for the stubbed reply');

// Module-level contract marker is exported and stable so reviewers grepping
// for the primitive name find the dispatch path quickly.
assert(gateway.ISOLATED_DISPATCH_PRIMITIVE === 'http-chat-completions',
'gateway.ISOLATED_DISPATCH_PRIMITIVE names the HTTP primitive');
assert(typeof gateway.runIsolatedAgentTurn === 'function',
'gateway.runIsolatedAgentTurn is exported as the sanctioned isolated dispatch helper');
}

console.log('\n-- Migration Guard --');
{
const legacyDir = mkdtempSync(join(tmpdir(), 'scheduler-migrate-'));
Expand Down