Skip to content

Commit 5d65115

Browse files
committed
fix: harden node runtime cleanup
1 parent 83042cc commit 5d65115

21 files changed

Lines changed: 617 additions & 522 deletions

.github/actions/setup-node-pnpm/action.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ inputs:
55
node-version:
66
description: 'Node.js version'
77
required: false
8-
default: '22'
8+
default: '22.19'
99
pnpm-version:
1010
description: 'pnpm version'
1111
required: false
12-
default: '9.12.2'
12+
default: '10.33.2'
1313
install-deps:
1414
description: 'Whether to install dependencies with pnpm install --frozen-lockfile'
1515
required: false

package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"url": "https://github.com/callstackincubator/agent-device/issues"
1414
},
1515
"type": "module",
16+
"packageManager": "pnpm@10.33.2",
1617
"main": "dist/src/index.js",
1718
"types": "dist/src/index.d.ts",
1819
"exports": {
@@ -58,7 +59,7 @@
5859
}
5960
},
6061
"engines": {
61-
"node": ">=22"
62+
"node": ">=22.19"
6263
},
6364
"bin": {
6465
"agent-device": "bin/agent-device.mjs"
@@ -75,7 +76,7 @@
7576
"build:all": "pnpm build:node && pnpm build:xcuitest",
7677
"ad": "node bin/agent-device.mjs",
7778
"lint": "oxlint . --deny-warnings",
78-
"format": "oxfmt --write src test skills package.json tsconfig.json .oxlintrc.json .oxfmtrc.json '!test/skillgym/.skillgym-results/**'",
79+
"format": "oxfmt --write src test skills package.json tsconfig.json tsconfig.lib.json rslib.config.ts vitest.config.ts .github/actions/setup-node-pnpm/action.yml .oxlintrc.json .oxfmtrc.json '!test/skillgym/.skillgym-results/**'",
7980
"fallow": "fallow --summary",
8081
"fallow:baseline": "(fallow dead-code --save-baseline fallow-baselines/dead-code.json --summary || true) && (fallow dupes --save-baseline fallow-baselines/dupes.json --summary || true) && (fallow health --save-baseline fallow-baselines/health.json --summary || true)",
8182
"check:fallow": "fallow audit",

src/__tests__/client-companion-tunnel-worker.test.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,15 @@ function createDeferred<T>(): Deferred<T> {
3838
}
3939

4040
function waitFor<T>(promise: Promise<T>, timeoutMs: number, label: string): Promise<T> {
41-
return Promise.race([
42-
promise,
43-
delay(timeoutMs).then(() => {
44-
throw new Error(`Timed out waiting for ${label}.`);
45-
}),
46-
]);
41+
let timeout: NodeJS.Timeout | null = null;
42+
const timeoutPromise = new Promise<never>((_, reject) => {
43+
timeout = setTimeout(() => {
44+
reject(new Error(`Timed out waiting for ${label}.`));
45+
}, timeoutMs);
46+
});
47+
return Promise.race([promise, timeoutPromise]).finally(() => {
48+
if (timeout) clearTimeout(timeout);
49+
});
4750
}
4851

4952
function encodeTextFrame(text: string): Buffer {

src/__tests__/client-metro.test.ts

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import os from 'node:os';
1010
import path from 'node:path';
1111
import { prepareMetroRuntime, reloadMetro } from '../client-metro.ts';
1212
import { AppError } from '../utils/errors.ts';
13+
import { isProcessAlive, waitForProcessExit } from '../utils/process-identity.ts';
1314

1415
const TEST_TOKEN = 'agent-device-proxy-test-token';
1516

@@ -163,11 +164,7 @@ test('prepareMetroRuntime starts Metro, bridges through proxy, and writes runtim
163164
socket.destroy();
164165
}
165166
await closeServer(proxyServer);
166-
if (pid) {
167-
try {
168-
process.kill(pid);
169-
} catch {}
170-
}
167+
await stopProcess(pid);
171168
rmSync(tempRoot, { recursive: true, force: true });
172169
}
173170
});
@@ -326,3 +323,19 @@ async function closeServer(server: ReturnType<typeof createServer>): Promise<voi
326323
});
327324
});
328325
}
326+
327+
async function stopProcess(pid: number): Promise<void> {
328+
if (!pid || !isProcessAlive(pid)) return;
329+
try {
330+
process.kill(pid, 'SIGTERM');
331+
} catch {
332+
return;
333+
}
334+
if (await waitForProcessExit(pid, 1_500)) return;
335+
try {
336+
process.kill(pid, 'SIGKILL');
337+
} catch {
338+
return;
339+
}
340+
await waitForProcessExit(pid, 1_500);
341+
}

src/daemon-client.ts

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import http from 'node:http';
33
import https from 'node:https';
44
import fs from 'node:fs';
55
import path from 'node:path';
6+
import { pipeline } from 'node:stream/promises';
67
import { sleep } from './utils/timeouts.ts';
78
import { AppError, toAppErrorCode } from './utils/errors.ts';
89
import type {
@@ -1334,13 +1335,6 @@ export async function downloadRemoteArtifact(params: {
13341335
});
13351336
return;
13361337
}
1337-
const output = fs.createWriteStream(params.destinationPath);
1338-
output.on('error', (error) => {
1339-
settle(error instanceof Error ? error : new Error(String(error)));
1340-
});
1341-
res.on('error', (error) => {
1342-
settle(error instanceof Error ? error : new Error(String(error)));
1343-
});
13441338
res.on('aborted', () => {
13451339
settle(
13461340
new AppError('COMMAND_FAILED', 'Remote artifact download was interrupted', {
@@ -1349,10 +1343,10 @@ export async function downloadRemoteArtifact(params: {
13491343
}),
13501344
);
13511345
});
1352-
output.on('finish', () => {
1353-
output.close(() => settle());
1354-
});
1355-
res.pipe(output);
1346+
void pipeline(res, fs.createWriteStream(params.destinationPath)).then(
1347+
() => settle(),
1348+
(error: unknown) => settle(error instanceof Error ? error : new Error(String(error))),
1349+
);
13561350
},
13571351
);
13581352
const timeoutHandle = setTimeout(() => {

src/daemon.ts

Lines changed: 136 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ import { createDaemonHttpServer } from './daemon/http-server.ts';
88
import { trackDownloadableArtifact } from './daemon/artifact-tracking.ts';
99
import { LeaseRegistry } from './daemon/lease-registry.ts';
1010
import { createRequestHandler } from './daemon/request-router.ts';
11+
import { teardownSessionResources } from './daemon/handlers/session-close.ts';
12+
import {
13+
emitDiagnostic,
14+
flushDiagnosticsToSessionFile,
15+
withDiagnosticsScope,
16+
} from './utils/diagnostics.ts';
1117
import {
1218
acquireDaemonLock,
1319
parseIntegerEnv,
@@ -18,7 +24,14 @@ import {
1824
resolveDaemonCodeSignature,
1925
writeInfo,
2026
} from './daemon/server-lifecycle.ts';
21-
import { createSocketServer, listenHttpServer, listenNetServer } from './daemon/transport.ts';
27+
import {
28+
createSocketServer,
29+
listenHttpServer,
30+
listenNetServer,
31+
type DaemonServer,
32+
} from './daemon/transport.ts';
33+
34+
const DAEMON_SHUTDOWN_TIMEOUT_MS = 5_000;
2235

2336
const daemonPaths = resolveDaemonPaths(process.env.AGENT_DEVICE_STATE_DIR);
2437
const { baseDir, infoPath, lockPath, logPath, sessionsDir } = daemonPaths;
@@ -45,6 +58,115 @@ const handleRequest = createRequestHandler({
4558
trackDownloadableArtifact,
4659
});
4760

61+
function forceCloseServer(server: DaemonServer): void {
62+
server.destroyConnections?.();
63+
const closeAllConnections =
64+
'closeAllConnections' in server ? server.closeAllConnections : undefined;
65+
if (typeof closeAllConnections === 'function') {
66+
closeAllConnections.call(server);
67+
return;
68+
}
69+
const closeIdleConnections =
70+
'closeIdleConnections' in server ? server.closeIdleConnections : undefined;
71+
if (typeof closeIdleConnections === 'function') closeIdleConnections.call(server);
72+
}
73+
74+
async function closeDaemonServers(servers: DaemonServer[]): Promise<void> {
75+
await Promise.all(
76+
servers.map(async (server) => {
77+
let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
78+
await new Promise<void>((resolve) => {
79+
timeoutHandle = setTimeout(() => {
80+
forceCloseServer(server);
81+
resolve();
82+
}, DAEMON_SHUTDOWN_TIMEOUT_MS);
83+
try {
84+
server.close(() => resolve());
85+
} catch {
86+
resolve();
87+
}
88+
});
89+
if (timeoutHandle) clearTimeout(timeoutHandle);
90+
}),
91+
);
92+
}
93+
94+
async function emitFatalDiagnostic(error: unknown): Promise<void> {
95+
await withDiagnosticsScope(
96+
{ command: 'daemon', session: 'daemon', logPath, debug: true },
97+
async () => {
98+
emitDiagnostic({
99+
level: 'error',
100+
phase: 'daemon_fatal',
101+
data: {
102+
error: error instanceof Error ? error.message : String(error),
103+
},
104+
});
105+
flushDiagnosticsToSessionFile({ force: true });
106+
},
107+
);
108+
}
109+
110+
async function teardownDaemonSessions(): Promise<void> {
111+
const sessionsToStop = sessionStore.toArray();
112+
for (const session of sessionsToStop) {
113+
await teardownSessionResources(session, session.name).catch((error) => {
114+
process.stderr.write(
115+
`Daemon session teardown error (${session.name}): ${
116+
error instanceof Error ? error.message : String(error)
117+
}\n`,
118+
);
119+
});
120+
sessionStore.writeSessionLog(session);
121+
sessionStore.delete(session.name);
122+
}
123+
}
124+
125+
async function openDaemonServers(): Promise<{
126+
servers: DaemonServer[];
127+
socketPort?: number;
128+
httpPort?: number;
129+
}> {
130+
const servers: DaemonServer[] = [];
131+
let socketPort: number | undefined;
132+
let httpPort: number | undefined;
133+
const startSocketServer = daemonServerMode !== 'http';
134+
const startHttpServer = daemonServerMode !== 'socket';
135+
if (startSocketServer) {
136+
const socketServer = createSocketServer(handleRequest);
137+
servers.push(socketServer);
138+
socketPort = await listenNetServer(socketServer);
139+
}
140+
141+
if (startHttpServer) {
142+
const httpServer = await createDaemonHttpServer({ handleRequest, token });
143+
servers.push(httpServer);
144+
httpPort = await listenHttpServer(httpServer);
145+
}
146+
return { servers, socketPort, httpPort };
147+
}
148+
149+
function publishDaemonInfo(socketPort: number | undefined, httpPort: number | undefined): void {
150+
writeInfo(baseDir, infoPath, logPath, {
151+
socketPort,
152+
httpPort,
153+
token,
154+
version,
155+
codeSignature: daemonCodeSignature,
156+
processStartTime: daemonProcessStartTime,
157+
});
158+
if (socketPort) process.stdout.write(`AGENT_DEVICE_DAEMON_PORT=${socketPort}\n`);
159+
if (httpPort) process.stdout.write(`AGENT_DEVICE_DAEMON_HTTP_PORT=${httpPort}\n`);
160+
}
161+
162+
function closeServersBestEffort(servers: DaemonServer[]): void {
163+
for (const server of servers) {
164+
try {
165+
server.close(() => {});
166+
} catch {}
167+
}
168+
}
169+
48170
async function start(): Promise<void> {
49171
const lockData = {
50172
pid: process.pid,
@@ -58,73 +180,34 @@ async function start(): Promise<void> {
58180
return;
59181
}
60182

61-
const servers: Array<{ close: (cb: (err?: Error) => void) => void }> = [];
62-
let socketPort: number | undefined;
63-
let httpPort: number | undefined;
64-
183+
let servers: DaemonServer[] = [];
65184
try {
66-
if (daemonServerMode === 'socket' || daemonServerMode === 'dual') {
67-
const socketServer = createSocketServer(handleRequest);
68-
servers.push(socketServer);
69-
socketPort = await listenNetServer(socketServer);
70-
}
71-
72-
if (daemonServerMode === 'http' || daemonServerMode === 'dual') {
73-
const httpServer = await createDaemonHttpServer({ handleRequest, token });
74-
servers.push(httpServer);
75-
httpPort = await listenHttpServer(httpServer);
76-
}
77-
78-
writeInfo(baseDir, infoPath, logPath, {
79-
socketPort,
80-
httpPort,
81-
token,
82-
version,
83-
codeSignature: daemonCodeSignature,
84-
processStartTime: daemonProcessStartTime,
85-
});
86-
if (socketPort) process.stdout.write(`AGENT_DEVICE_DAEMON_PORT=${socketPort}\n`);
87-
if (httpPort) process.stdout.write(`AGENT_DEVICE_DAEMON_HTTP_PORT=${httpPort}\n`);
185+
const opened = await openDaemonServers();
186+
servers = opened.servers;
187+
publishDaemonInfo(opened.socketPort, opened.httpPort);
88188
} catch (error) {
89189
const appErr = asAppError(error);
90190
process.stderr.write(`Daemon error: ${appErr.message}\n`);
91-
for (const server of servers) {
92-
try {
93-
server.close(() => {});
94-
} catch {}
95-
}
191+
closeServersBestEffort(servers);
96192
removeInfo(infoPath);
97193
releaseDaemonLock(lockPath);
98194
process.exit(1);
99195
return;
100196
}
101197

102198
let shuttingDown = false;
103-
const closeServers = async (): Promise<void> => {
104-
await Promise.all(
105-
servers.map(async (server) => {
106-
await new Promise<void>((resolve) => {
107-
try {
108-
server.close(() => resolve());
109-
} catch {
110-
resolve();
111-
}
112-
});
113-
}),
114-
);
115-
};
116-
const shutdown = async () => {
199+
const shutdown = async (options: { exitCode?: number; cause?: unknown } = {}) => {
117200
if (shuttingDown) return;
118201
shuttingDown = true;
119-
await closeServers();
120-
const sessionsToStop = sessionStore.toArray();
121-
for (const session of sessionsToStop) {
122-
sessionStore.writeSessionLog(session);
202+
if (options.cause) {
203+
await emitFatalDiagnostic(options.cause);
123204
}
205+
await closeDaemonServers(servers);
206+
await teardownDaemonSessions();
124207
await stopAllIosRunnerSessions();
125208
removeInfo(infoPath);
126209
releaseDaemonLock(lockPath);
127-
process.exit(0);
210+
process.exit(options.exitCode ?? 0);
128211
};
129212

130213
process.on('SIGINT', () => {
@@ -139,13 +222,13 @@ async function start(): Promise<void> {
139222
process.on('uncaughtException', (err) => {
140223
const appErr = err instanceof AppError ? err : asAppError(err);
141224
process.stderr.write(`Daemon error: ${appErr.message}\n`);
142-
void shutdown();
225+
void shutdown({ exitCode: 1, cause: err });
143226
});
144227
process.on('unhandledRejection', (reason) => {
145228
const err = reason instanceof Error ? reason : new Error(String(reason));
146229
const appErr = err instanceof AppError ? err : asAppError(err);
147230
process.stderr.write(`Daemon error: ${appErr.message}\n`);
148-
void shutdown();
231+
void shutdown({ exitCode: 1, cause: err });
149232
});
150233
}
151234

0 commit comments

Comments
 (0)