Skip to content

Commit 26e8ef5

Browse files
Merge pull request #1039 from heygen-com/fix/orphaned-child-processes
fix: clean up orphaned Chrome/ffmpeg on preview exit
2 parents f2e2311 + 7e4ce96 commit 26e8ef5

13 files changed

Lines changed: 396 additions & 26 deletions

File tree

packages/cli/src/commands/preview.ts

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import {
2828
killActiveServers,
2929
type FindPortResult,
3030
} from "../server/portUtils.js";
31+
import { killOrphanedProcesses, killProcessTree } from "../utils/orphanCleanup.js";
3132

3233
export default defineCommand({
3334
meta: { name: "preview", description: "Start the studio for previewing compositions" },
@@ -96,6 +97,14 @@ export default defineCommand({
9697
return;
9798
}
9899

100+
// Kill orphaned chrome-headless-shell processes from previous crashed sessions.
101+
const orphansKilled = killOrphanedProcesses();
102+
if (orphansKilled > 0) {
103+
console.log(
104+
` ${c.dim(`Cleaned up ${orphansKilled} orphaned process${orphansKilled === 1 ? "" : "es"} from a previous session.`)}`,
105+
);
106+
}
107+
99108
const rawArg = args.dir;
100109
const dir = resolve(rawArg ?? ".");
101110

@@ -249,8 +258,18 @@ async function runDevMode(
249258
});
250259
}
251260

252-
// Wait for child to exit. Ctrl+C sends SIGINT to the entire process group,
253-
// so the child (Vite) receives it directly — no need to intercept or forward.
261+
// Kill the child's entire process tree on SIGTERM/SIGINT. Ctrl+C sends
262+
// SIGINT to the foreground process group (covers the common case), but
263+
// `kill <pid>` only targets this process — the child tree (Vite + Chrome)
264+
// would survive without explicit cleanup.
265+
// On Windows, killProcessTree is a no-op (pgrep/ps unavailable); Ctrl+C
266+
// propagates via the console process group instead.
267+
const shutdown = () => {
268+
if (child.pid) killProcessTree(child.pid);
269+
};
270+
process.once("SIGINT", shutdown);
271+
process.once("SIGTERM", shutdown);
272+
254273
return new Promise<void>((resolve) => {
255274
child.on("close", () => resolve());
256275
});
@@ -349,6 +368,13 @@ async function runLocalStudioMode(
349368
});
350369
}
351370

371+
// Same tree-kill handler as dev mode. No-op on Windows (see comment above).
372+
const shutdown = () => {
373+
if (child.pid) killProcessTree(child.pid);
374+
};
375+
process.once("SIGINT", shutdown);
376+
process.once("SIGTERM", shutdown);
377+
352378
return new Promise<void>((resolve) => {
353379
child.on("close", () => resolve());
354380
});
@@ -477,21 +503,42 @@ async function runEmbeddedMode(
477503
shuttingDown = true;
478504
process.off("SIGINT", shutdown);
479505
process.off("SIGTERM", shutdown);
480-
// Close the readline interface so a second Ctrl+C during the grace
481-
// period below doesn't re-emit SIGINT and trigger Node's default
482-
// exit-130 behaviour, contradicting our intent to exit cleanly.
483506
rl?.close();
484-
// `server.close()` can take a second or two to drain keep-alive
485-
// connections; surface progress so the terminal doesn't look frozen.
486507
console.log();
487508
console.log(` ${c.dim("Shutting down studio...")}`);
488-
result.server.close(() => resolveRun());
489-
// If close() hangs on an open connection, force exit after a short
490-
// grace period. Exit 0 because user-initiated Ctrl+C isn't an error
491-
// — a non-zero code makes pnpm / npm print ELIFECYCLE.
492-
setTimeout(() => process.exit(0), 2000).unref();
509+
510+
// Hard deadline: if cleanup hangs (e.g. dead Chrome never responds to
511+
// browser.close()), force exit. Armed before awaiting cleanup so it
512+
// can't be blocked by a stuck drainBrowserPool().
513+
setTimeout(() => process.exit(0), 3000).unref();
514+
515+
// Kill ffmpeg first (sync, fast), then drain browsers (async, slower).
516+
const cleanup = async () => {
517+
const { closeThumbnailBrowser } = await import("../server/studioServer.js");
518+
const { drainBrowserPool, killTrackedProcesses } = await import("@hyperframes/engine");
519+
killTrackedProcesses();
520+
await closeThumbnailBrowser().catch(() => {});
521+
await drainBrowserPool().catch(() => {});
522+
};
523+
524+
cleanup()
525+
.catch(() => {})
526+
.finally(() => {
527+
result.server.close(() => resolveRun());
528+
});
493529
};
494530
process.once("SIGINT", shutdown);
495531
process.once("SIGTERM", shutdown);
532+
533+
// Last-resort cleanup for crash paths (unhandled exceptions/rejections)
534+
// that bypass the signal handlers. Eagerly resolve the sync killer so
535+
// the 'exit' handler (which is synchronous) can call it directly.
536+
import("@hyperframes/engine")
537+
.then(({ killTrackedProcesses }) => {
538+
process.once("exit", () => {
539+
if (!shuttingDown) killTrackedProcesses();
540+
});
541+
})
542+
.catch(() => {});
496543
});
497544
}

packages/cli/src/server/studioServer.ts

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -148,16 +148,6 @@ async function getThumbnailBrowser(): Promise<import("puppeteer-core").Browser |
148148
_thumbnailBrowser = null;
149149
_thumbnailBrowserInitializing = null;
150150
});
151-
// Release the pool ref on process exit so the browser closes cleanly.
152-
const onExit = async () => {
153-
const { releaseBrowser } = await import("@hyperframes/engine");
154-
if (_thumbnailBrowser) {
155-
await releaseBrowser(_thumbnailBrowser).catch(() => {});
156-
_thumbnailBrowser = null;
157-
}
158-
};
159-
process.once("SIGTERM", () => void onExit());
160-
process.once("SIGINT", () => void onExit());
161151
return _thumbnailBrowser;
162152
} catch (err) {
163153
console.warn(
@@ -172,6 +162,15 @@ async function getThumbnailBrowser(): Promise<import("puppeteer-core").Browser |
172162
return _thumbnailBrowserInitializing;
173163
}
174164

165+
export async function closeThumbnailBrowser(): Promise<void> {
166+
if (!_thumbnailBrowser) return;
167+
const browser = _thumbnailBrowser;
168+
_thumbnailBrowser = null;
169+
_thumbnailBrowserInitializing = null;
170+
const { releaseBrowser } = await import("@hyperframes/engine");
171+
await releaseBrowser(browser).catch(() => {});
172+
}
173+
175174
// ── Server factory ──────────────────────────────────────────────────────────
176175

177176
export interface StudioServerOptions {
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import { describe, it, expect } from "vitest";
2+
import { spawn } from "node:child_process";
3+
import { killProcessTree, killOrphanedProcesses } from "./orphanCleanup.js";
4+
5+
const IS_UNIX = process.platform !== "win32";
6+
7+
describe.skipIf(!IS_UNIX)("killProcessTree", () => {
8+
it("kills a process and all its children", async () => {
9+
// Spawn a parent that spawns two sleeping children
10+
const parent = spawn("bash", ["-c", "sleep 60 & sleep 60 & wait"], { stdio: "ignore" });
11+
// Let children spawn
12+
await new Promise((r) => setTimeout(r, 200));
13+
14+
const exitPromise = new Promise<void>((resolve) => parent.on("close", resolve));
15+
killProcessTree(parent.pid!);
16+
17+
await exitPromise;
18+
19+
// Verify parent is dead
20+
expect(() => process.kill(parent.pid!, 0)).toThrow();
21+
}, 5000);
22+
23+
it("handles non-existent PID gracefully", () => {
24+
// Should not throw for a PID that doesn't exist
25+
killProcessTree(999999999);
26+
});
27+
28+
it("escalates to SIGKILL after grace period", async () => {
29+
// Spawn a process that traps SIGTERM
30+
const proc = spawn("bash", ["-c", "trap '' TERM; sleep 60"], { stdio: "ignore" });
31+
await new Promise((r) => setTimeout(r, 100));
32+
33+
const exitPromise = new Promise<void>((resolve) => proc.on("close", resolve));
34+
killProcessTree(proc.pid!);
35+
36+
// Should die within 1s (500ms SIGKILL grace + buffer)
37+
await exitPromise;
38+
expect(() => process.kill(proc.pid!, 0)).toThrow();
39+
}, 5000);
40+
});
41+
42+
describe.skipIf(!IS_UNIX)("killOrphanedProcesses", () => {
43+
it("returns 0 when no orphans exist", () => {
44+
const killed = killOrphanedProcesses();
45+
expect(killed).toBe(0);
46+
});
47+
48+
it("does not kill non-orphaned Chrome processes", () => {
49+
// Our current process is not an orphan (PPID !== 1), so any
50+
// chrome-headless-shell processes we'd find with our PID as
51+
// ancestor wouldn't be killed.
52+
const killed = killOrphanedProcesses();
53+
expect(killed).toBe(0);
54+
});
55+
});
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import { execSync } from "node:child_process";
2+
3+
/**
4+
* Find and kill orphaned Chrome processes from previous crashed sessions.
5+
* Targets both chrome-headless-shell (production/CI) and Google Chrome
6+
* launched by Puppeteer (dev mode). Puppeteer Chrome is identified by the
7+
* `puppeteer_dev_chrome_profile` marker in its user-data-dir argument.
8+
*
9+
* An orphan is a process whose PPID=1 (reparented to init/launchd after
10+
* its parent died). We kill the orphan's entire subtree so child helper
11+
* processes (GPU, renderer, network, etc.) are also cleaned up.
12+
*
13+
* Scoped to the current user via `pgrep -u` to avoid touching other
14+
* users' processes on shared machines.
15+
*
16+
* Returns the count of killed process trees.
17+
*/
18+
export function killOrphanedProcesses(): number {
19+
if (process.platform === "win32") return 0;
20+
21+
let killed = 0;
22+
23+
for (const name of ["chrome-headless-shell", "chrome_headless_shell"]) {
24+
killed += killOrphansByName(name);
25+
}
26+
27+
killed += killOrphansByName("puppeteer_dev_chrome_profile");
28+
29+
return killed;
30+
}
31+
32+
/**
33+
* Kill an entire process tree rooted at `pid`. Walks descendants
34+
* depth-first so children are killed before parents, preventing
35+
* re-adoption races.
36+
*
37+
* No-op on Windows — process groups are managed differently and
38+
* the pgrep/ps utilities are not available.
39+
*/
40+
export function killProcessTree(pid: number, signal: NodeJS.Signals = "SIGTERM"): void {
41+
if (process.platform === "win32") return;
42+
43+
const descendants = getDescendants(pid);
44+
const allPids = [...descendants.reverse(), pid];
45+
46+
for (const p of allPids) {
47+
try {
48+
process.kill(p, signal);
49+
} catch {
50+
// Already exited.
51+
}
52+
}
53+
54+
// Escalate to SIGKILL after a short grace period for any survivors.
55+
if (signal !== "SIGKILL") {
56+
setTimeout(() => {
57+
for (const p of allPids) {
58+
try {
59+
process.kill(p, "SIGKILL");
60+
} catch {
61+
// Already exited.
62+
}
63+
}
64+
}, 500).unref();
65+
}
66+
}
67+
68+
function getDescendants(pid: number): number[] {
69+
let children: number[];
70+
try {
71+
const raw = execSync(`pgrep -P ${pid}`, { encoding: "utf-8", timeout: 2000 }).trim();
72+
if (!raw) return [];
73+
children = raw
74+
.split("\n")
75+
.map((s) => parseInt(s, 10))
76+
.filter((n) => !isNaN(n) && n > 0);
77+
} catch {
78+
return [];
79+
}
80+
const all: number[] = [];
81+
for (const child of children) {
82+
all.push(child);
83+
all.push(...getDescendants(child));
84+
}
85+
return all;
86+
}
87+
88+
function killOrphansByName(processName: string): number {
89+
const uid = getUid();
90+
const userFlag = uid !== null ? `-u ${uid} ` : "";
91+
let pids: number[];
92+
try {
93+
const raw = execSync(`pgrep ${userFlag}-f ${processName}`, {
94+
encoding: "utf-8",
95+
timeout: 3000,
96+
}).trim();
97+
if (!raw) return 0;
98+
pids = raw
99+
.split("\n")
100+
.map((s) => parseInt(s, 10))
101+
.filter((n) => !isNaN(n) && n > 0);
102+
} catch {
103+
return 0;
104+
}
105+
106+
let killed = 0;
107+
for (const pid of pids) {
108+
if (!isOrphan(pid)) continue;
109+
killProcessTree(pid);
110+
killed++;
111+
}
112+
return killed;
113+
}
114+
115+
let _cachedUid: string | null | undefined;
116+
117+
function getUid(): string | null {
118+
if (_cachedUid !== undefined) return _cachedUid;
119+
try {
120+
_cachedUid = execSync("id -u", { encoding: "utf-8", timeout: 1000 }).trim();
121+
} catch {
122+
_cachedUid = null;
123+
}
124+
return _cachedUid;
125+
}
126+
127+
function isOrphan(pid: number): boolean {
128+
try {
129+
const ppid = execSync(`ps -p ${pid} -o ppid=`, {
130+
encoding: "utf-8",
131+
timeout: 2000,
132+
}).trim();
133+
return ppid === "1";
134+
} catch {
135+
return false;
136+
}
137+
}

packages/engine/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ export {
186186
type RunFfmpegResult,
187187
} from "./utils/runFfmpeg.js";
188188

189+
export { trackChildProcess, killTrackedProcesses } from "./utils/processTracker.js";
190+
189191
export {
190192
decodePng,
191193
decodePngToRgb48le,

packages/engine/src/services/chunkEncoder.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import { spawn } from "child_process";
99
import { copyFileSync, existsSync, mkdirSync, readdirSync, statSync, writeFileSync } from "fs";
1010
import { join, dirname } from "path";
11+
import { trackChildProcess } from "../utils/processTracker.js";
1112
import { DEFAULT_CONFIG, type EngineConfig } from "../config.js";
1213
import {
1314
type GpuEncoder,
@@ -404,6 +405,7 @@ export async function encodeFramesFromDir(
404405

405406
return new Promise((resolve) => {
406407
const ffmpeg = spawn("ffmpeg", args);
408+
trackChildProcess(ffmpeg);
407409
let stderr = "";
408410
const onAbort = () => {
409411
ffmpeg.kill("SIGTERM");
@@ -535,6 +537,7 @@ export async function encodeFramesChunkedConcat(
535537
const args = buildEncoderArgs(options, inputArgs, chunkPath, gpuEncoder);
536538
const chunkResult = await new Promise<{ success: boolean; error?: string }>((resolve) => {
537539
const ffmpeg = spawn("ffmpeg", args);
540+
trackChildProcess(ffmpeg);
538541
let stderr = "";
539542
ffmpeg.stderr.on("data", (d) => {
540543
stderr += d.toString();
@@ -578,6 +581,7 @@ export async function encodeFramesChunkedConcat(
578581
];
579582
const concatResult = await new Promise<{ success: boolean; error?: string }>((resolve) => {
580583
const ffmpeg = spawn("ffmpeg", concatArgs);
584+
trackChildProcess(ffmpeg);
581585
let stderr = "";
582586
ffmpeg.stderr.on("data", (d) => {
583587
stderr += d.toString();

packages/engine/src/services/streamingEncoder.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
*/
1414

1515
import { spawn, type ChildProcess } from "child_process";
16+
import { trackChildProcess } from "../utils/processTracker.js";
1617
import { existsSync, mkdirSync, statSync } from "fs";
1718
import { dirname } from "path";
1819

@@ -375,6 +376,7 @@ export async function spawnStreamingEncoder(
375376
const ffmpeg: ChildProcess = spawn("ffmpeg", args, {
376377
stdio: ["pipe", "pipe", "pipe"],
377378
});
379+
trackChildProcess(ffmpeg);
378380

379381
let exitStatus: "running" | "success" | "error" = "running";
380382
let stderr = "";

0 commit comments

Comments
 (0)