diff --git a/CHANGELOG.md b/CHANGELOG.md index 340a4c9be7..15bc157680 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ After a release, run `/update-changelog` in Claude Code to analyze commits, writ #### Fixed - **[Pro]** **Fixed TanStack Router SSR hydration mismatches in the async path**: Client hydration now restores server match data before first render, uses `RouterProvider` directly to match the server-rendered tree, and stops the post-hydration load when a custom `router.options.hydrate` callback fails instead of continuing with partially hydrated client state. [PR 2932](https://github.com/shakacode/react_on_rails/pull/2932) by [justin808](https://github.com/justin808). +- **[Pro] Fixed infinite fork loop when node renderer worker fails to bind port**: When a worker failed during `app.listen()` (e.g., `EADDRINUSE`), the master previously reforked unconditionally, causing an infinite fork/crash loop that consumed CPU and filled logs. Workers now send a `WORKER_STARTUP_FAILURE` IPC message to the master before exiting; the master sets an abort flag and exits with a clear error message instead of reforking. Scheduled restarts and runtime crashes continue to refork as before. [PR 2881](https://github.com/shakacode/react_on_rails/pull/2881) by [justin808](https://github.com/justin808). ### [16.6.0.rc.0] - 2026-04-01 diff --git a/packages/react-on-rails-pro-node-renderer/src/master.ts b/packages/react-on-rails-pro-node-renderer/src/master.ts index 3030d1480d..4c804263d8 100644 --- a/packages/react-on-rails-pro-node-renderer/src/master.ts +++ b/packages/react-on-rails-pro-node-renderer/src/master.ts @@ -10,6 +10,7 @@ import { buildConfig, Config, logSanitizedConfig } from './shared/configBuilder. import restartWorkers from './master/restartWorkers.js'; import * as errorReporter from './shared/errorReporter.js'; import { getLicenseStatus } from './shared/licenseValidator.js'; +import { isWorkerStartupFailureMessage, type WorkerStartupFailureMessage } from './shared/workerMessages.js'; const MILLISECONDS_IN_MINUTE = 60000; // How often to scan for orphaned upload directories. @@ -77,22 +78,81 @@ export default function masterRun(runningConfig?: Partial) { })(); }, ORPHAN_CLEANUP_INTERVAL_MS); + let isAbortingForStartupFailure = false; + let fatalStartupFailure: { workerId: number; failure: WorkerStartupFailureMessage } | null = null; + let hasInitiatedShutdown = false; + + const abortForStartupFailure = (): boolean => { + if (!(isAbortingForStartupFailure && fatalStartupFailure)) return false; + + if (!hasInitiatedShutdown) { + hasInitiatedShutdown = true; + // Note: the exiting worker may differ from the one that sent the + // failure message if multiple workers exit in rapid succession. + // We always report the first failure received. + const { failure, workerId: failedWorkerId } = fatalStartupFailure; + const msg = + failure.code === 'EADDRINUSE' + ? `Node renderer startup failed: ${failure.host}:${failure.port} is already in use` + : `Node renderer startup failed in worker ${failedWorkerId}: ${failure.message}`; + + errorReporter.message(msg); + // Disconnect all live workers so they release their ports before the + // master exits. cluster.disconnect() is async — the callback fires + // once every worker has disconnected. A hard-deadline timer guarantees + // the master still exits if a worker is stuck (leaked handle, blocking + // syscall, etc.), following the same pattern as restartWorkers.ts. + const MASTER_SHUTDOWN_TIMEOUT_MS = 5000; + const shutdownTimer = setTimeout(() => process.exit(1), MASTER_SHUTDOWN_TIMEOUT_MS); + if (typeof shutdownTimer.unref === 'function') shutdownTimer.unref(); + cluster.disconnect(() => { + clearTimeout(shutdownTimer); + process.exit(1); + }); + } + + return true; + }; + + cluster.on('message', (worker, message) => { + // Check the abort flag first to short-circuit the type-guard on every + // ordinary IPC message once we are already aborting. + if (isAbortingForStartupFailure || !isWorkerStartupFailureMessage(message)) return; + + isAbortingForStartupFailure = true; + fatalStartupFailure = { workerId: worker.id, failure: message }; + }); + for (let i = 0; i < workersCount; i += 1) { cluster.fork(); } // Listen for dying workers: cluster.on('exit', (worker) => { + // Once a startup failure has been detected, abort regardless of whether + // this particular exit was from the failing worker, a scheduled restart, + // or an unrelated crash. Don't fork any more workers. + if (abortForStartupFailure()) { + return; + } + if (worker.isScheduledRestart) { log.info('Restarting worker #%d on schedule', worker.id); - } else { + cluster.fork(); + return; + } + + // Give in-flight startup-failure IPC messages one event-loop turn to be + // processed before classifying this as an ordinary runtime crash. + setImmediate(() => { + if (abortForStartupFailure()) return; + // TODO: Track last rendering request per worker.id // TODO: Consider blocking a given rendering request if it kills a worker more than X times const msg = `Worker ${worker.id} died UNEXPECTEDLY :(, restarting`; errorReporter.message(msg); - } - // Replace the dead worker: - cluster.fork(); + cluster.fork(); + }); }); // Schedule regular restarts of workers diff --git a/packages/react-on-rails-pro-node-renderer/src/shared/configBuilder.ts b/packages/react-on-rails-pro-node-renderer/src/shared/configBuilder.ts index 3ede56a3e9..949673bf03 100644 --- a/packages/react-on-rails-pro-node-renderer/src/shared/configBuilder.ts +++ b/packages/react-on-rails-pro-node-renderer/src/shared/configBuilder.ts @@ -142,6 +142,13 @@ function logLevel(level: string): LevelWithSilent { } } +function validatePort(port: number): string | null { + if (!Number.isInteger(port) || !Number.isFinite(port) || port < 0 || port > 65535) { + return `RENDERER_PORT must be an integer between 0 and 65535. Received: ${String(port)}`; + } + return null; +} + function normalizedRuntimeEnvs() { return [env.RAILS_ENV, env.NODE_ENV] .filter((value): value is string => Boolean(value)) @@ -380,6 +387,17 @@ export function buildConfig(providedUserConfig?: Partial): Config { } }); + // Coerce port to a number — user configs frequently pass env-derived strings + // (e.g. `port: env.RENDERER_PORT || 3800` yields the string "3800"). + // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-conversion -- runtime value may be string despite the type + config.port = Number(config.port); + + const portValidationError = validatePort(config.port); + if (portValidationError) { + log.error(portValidationError); + process.exit(1); + } + if ( 'honeybadgerApiKey' in config || 'sentryDsn' in config || diff --git a/packages/react-on-rails-pro-node-renderer/src/shared/workerMessages.ts b/packages/react-on-rails-pro-node-renderer/src/shared/workerMessages.ts new file mode 100644 index 0000000000..6b8e1c1bc4 --- /dev/null +++ b/packages/react-on-rails-pro-node-renderer/src/shared/workerMessages.ts @@ -0,0 +1,34 @@ +export const WORKER_STARTUP_FAILURE = 'NODE_RENDERER_WORKER_STARTUP_FAILURE' as const; + +export interface WorkerStartupFailureMessage { + type: typeof WORKER_STARTUP_FAILURE; + stage: 'listen'; + code?: string; + errno?: number; + syscall?: string; + host: string; + port: number; + message: string; +} + +export function isWorkerStartupFailureMessage(value: unknown): value is WorkerStartupFailureMessage { + if (typeof value !== 'object' || value === null) { + return false; + } + + const message = value as Partial; + + // stage: 'listen' is the only supported stage today. To handle pre-listen + // failures (e.g. plugin registration), add a new stage value here and + // update the master handler accordingly. + return ( + message.type === WORKER_STARTUP_FAILURE && + message.stage === 'listen' && + typeof message.host === 'string' && + typeof message.port === 'number' && + Number.isInteger(message.port) && + message.port >= 0 && + message.port <= 65535 && + typeof message.message === 'string' + ); +} diff --git a/packages/react-on-rails-pro-node-renderer/src/worker.ts b/packages/react-on-rails-pro-node-renderer/src/worker.ts index eb2d0ec9f9..a08a7c91e4 100644 --- a/packages/react-on-rails-pro-node-renderer/src/worker.ts +++ b/packages/react-on-rails-pro-node-renderer/src/worker.ts @@ -23,6 +23,7 @@ import { type ProvidedNewBundle, } from './worker/handleRenderRequest.js'; import handleGracefulShutdown from './worker/handleGracefulShutdown.js'; +import { handleStartupListenError } from './worker/startupErrorHandler.js'; import { badRequestResponseResult, errorResponseResult, @@ -510,8 +511,8 @@ export default function run(config: Partial) { if (workersCount === 0 || cluster.isWorker) { app.listen({ port, host }, (err, address) => { if (err) { - log.error({ err, host, port }, 'Node renderer failed to start'); - process.exit(1); + handleStartupListenError({ err, host, port }); + return; } const workerName = worker ? `worker #${worker.id}` : 'master (single-process)'; log.info({ workerName, address }, 'Node renderer listening'); diff --git a/packages/react-on-rails-pro-node-renderer/src/worker/startupErrorHandler.ts b/packages/react-on-rails-pro-node-renderer/src/worker/startupErrorHandler.ts new file mode 100644 index 0000000000..622775e9fa --- /dev/null +++ b/packages/react-on-rails-pro-node-renderer/src/worker/startupErrorHandler.ts @@ -0,0 +1,65 @@ +import cluster from 'cluster'; +import log from '../shared/log.js'; +import { WORKER_STARTUP_FAILURE, type WorkerStartupFailureMessage } from '../shared/workerMessages.js'; + +export type StartupListenErrorHandlerOptions = { + err: Error; + host: string; + port: number; + isWorker?: boolean; + send?: NodeJS.Process['send']; + exit?: NodeJS.Process['exit']; +}; + +export function handleStartupListenError({ + err, + host, + port, + isWorker = cluster.isWorker, + send, + exit, +}: StartupListenErrorHandlerOptions) { + const sendFn = send ?? process.send?.bind(process); + const exitFn = exit ?? ((code?: number) => process.exit(code)); + + log.error({ err, host, port }, 'Node renderer failed to start'); + + if (isWorker) { + if (!sendFn) { + log.error('Cluster worker has no IPC channel; cannot notify master of startup failure'); + exitFn(1); + return; + } + + const startupFailure: WorkerStartupFailureMessage = { + type: WORKER_STARTUP_FAILURE, + stage: 'listen', + code: (err as NodeJS.ErrnoException).code, + errno: (err as NodeJS.ErrnoException).errno, + syscall: (err as NodeJS.ErrnoException).syscall, + host, + port, + message: err.message, + }; + try { + let exited = false; + const doExit = (sendErr?: Error | null) => { + if (exited) return; + exited = true; + if (sendErr) log.error({ err: sendErr }, 'Failed to send startup failure message to master'); + exitFn(1); + }; + sendFn(startupFailure, undefined, undefined, doExit); + // Safety net: if the IPC channel is half-broken the callback may never + // fire, leaving this worker alive indefinitely. Force exit after a timeout. + const IPC_SEND_TIMEOUT_MS = 2000; + const timer = setTimeout(() => doExit(), IPC_SEND_TIMEOUT_MS); + if (typeof timer.unref === 'function') timer.unref(); + } catch (sendErr) { + log.error({ err: sendErr as Error }, 'Failed to send startup failure message to master'); + exitFn(1); + } + } else { + exitFn(1); + } +} diff --git a/packages/react-on-rails-pro-node-renderer/tests/configBuilder.test.ts b/packages/react-on-rails-pro-node-renderer/tests/configBuilder.test.ts index 277f1295c2..3ebd53a775 100644 --- a/packages/react-on-rails-pro-node-renderer/tests/configBuilder.test.ts +++ b/packages/react-on-rails-pro-node-renderer/tests/configBuilder.test.ts @@ -1,6 +1,7 @@ describe('configBuilder', () => { const envVarsToRestore = [ 'RENDERER_HOST', + 'RENDERER_PORT', 'NODE_ENV', 'RENDERER_PASSWORD', 'RAILS_ENV', @@ -113,6 +114,39 @@ describe('configBuilder', () => { expect(finalSettings.password).toBe(''); }); + describe('port validation', () => { + it('throws when configured port is outside the valid TCP range', () => { + process.env.NODE_ENV = 'development'; + process.env.RAILS_ENV = 'development'; + const processExit = mockProcessExit(); + const { buildConfig, error } = loadConfigBuilderWithMockedLogger(); + + expect(() => buildConfig({ port: 70000 })).toThrow('process.exit: 1'); + expect(processExit).toHaveBeenCalledWith(1); + expect(error).toHaveBeenCalledWith( + 'RENDERER_PORT must be an integer between 0 and 65535. Received: 70000', + ); + }); + + it('allows port 0 for ephemeral-port test setups', () => { + process.env.NODE_ENV = 'development'; + process.env.RAILS_ENV = 'development'; + const { buildConfig } = loadConfigBuilderWithMockedLogger(); + + expect(buildConfig({ port: 0 }).port).toBe(0); + }); + + it('coerces a string port from env vars to a number', () => { + process.env.NODE_ENV = 'development'; + process.env.RAILS_ENV = 'development'; + const { buildConfig } = loadConfigBuilderWithMockedLogger(); + + // Simulates `port: env.RENDERER_PORT || 3800` where env var is the string "3800" + const config = buildConfig({ port: '3800' as unknown as number }); + expect(config.port).toBe(3800); + }); + }); + describe('password validation in production-like environments', () => { it('throws when no password is set in production', () => { process.env.NODE_ENV = 'production'; diff --git a/packages/react-on-rails-pro-node-renderer/tests/masterStartupFailure.test.ts b/packages/react-on-rails-pro-node-renderer/tests/masterStartupFailure.test.ts new file mode 100644 index 0000000000..c3cf8ddb5b --- /dev/null +++ b/packages/react-on-rails-pro-node-renderer/tests/masterStartupFailure.test.ts @@ -0,0 +1,295 @@ +import { WORKER_STARTUP_FAILURE, type WorkerStartupFailureMessage } from '../src/shared/workerMessages'; + +type MockWorker = { + id: number; + isScheduledRestart?: boolean; + process: { exitCode: number | null }; +}; + +type ClusterHandlers = { + message: (worker: MockWorker, message: unknown) => void; + exit: (worker: MockWorker) => void; +}; + +type MockCluster = { + on: jest.Mock void]>; + fork: jest.Mock; + disconnect: jest.Mock void]>; +}; + +function buildStartupFailureMessage( + overrides: Partial = {}, +): WorkerStartupFailureMessage { + return { + type: WORKER_STARTUP_FAILURE, + stage: 'listen', + code: 'EADDRINUSE', + errno: -48, + syscall: 'listen', + host: 'localhost', + port: 3800, + message: 'listen EADDRINUSE: address already in use :::3800', + ...overrides, + }; +} + +function setupMasterRunHarness() { + const operations: string[] = []; + const clusterHandlers: Partial = {}; + const mockFork = jest.fn(() => { + operations.push('fork'); + return {}; + }); + const mockCluster = {} as MockCluster; + mockCluster.disconnect = jest.fn((callback?: () => void) => { + if (callback) callback(); + }); + mockCluster.on = jest.fn((event: string, handler: (...args: unknown[]) => void) => { + operations.push(`on:${event}`); + if (event === 'message') { + clusterHandlers.message = handler as ClusterHandlers['message']; + } else if (event === 'exit') { + clusterHandlers.exit = handler as ClusterHandlers['exit']; + } + return mockCluster; + }); + mockCluster.fork = mockFork; + const mockErrorReporterMessage = jest.fn(); + const mockLog = { + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + fatal: jest.fn(), + }; + const mockBuildConfig = jest.fn(() => ({ + workersCount: 2, + allWorkersRestartInterval: undefined, + delayBetweenIndividualWorkerRestarts: undefined, + gracefulWorkerRestartTimeout: 0, + serverBundleCachePath: '/tmp/react-on-rails-pro-node-renderer-bundles', + })); + const mockLogSanitizedConfig = jest.fn(); + const mockGetLicenseStatus = jest.fn(() => 'valid'); + const setIntervalSpy = jest.spyOn(global, 'setInterval').mockReturnValue(0 as unknown as NodeJS.Timeout); + const setTimeoutSpy = jest + .spyOn(global, 'setTimeout') + .mockReturnValue({ unref: jest.fn() } as unknown as NodeJS.Timeout); + const processExitSpy = jest.spyOn(process, 'exit').mockImplementation(((code?: number) => { + throw new Error(`process.exit:${code}`); + }) as typeof process.exit); + + jest.doMock('cluster', () => ({ + __esModule: true, + default: mockCluster, + })); + jest.doMock('../src/shared/log', () => ({ + __esModule: true, + default: mockLog, + })); + jest.doMock('../src/shared/errorReporter', () => ({ + __esModule: true, + message: mockErrorReporterMessage, + error: jest.fn(), + addMessageNotifier: jest.fn(), + addErrorNotifier: jest.fn(), + addNotifier: jest.fn(), + })); + jest.doMock('../src/shared/configBuilder', () => ({ + __esModule: true, + buildConfig: mockBuildConfig, + logSanitizedConfig: mockLogSanitizedConfig, + })); + jest.doMock('../src/shared/licenseValidator', () => ({ + __esModule: true, + getLicenseStatus: mockGetLicenseStatus, + })); + jest.doMock('../src/master/restartWorkers', () => ({ + __esModule: true, + default: jest.fn(), + })); + + let masterRun: typeof import('../src/master').default | undefined; + jest.isolateModules(() => { + // eslint-disable-next-line global-require + masterRun = require('../src/master').default as typeof import('../src/master').default; + }); + + if (!masterRun) { + throw new Error('Failed to load masterRun'); + } + + masterRun(); + + if (!clusterHandlers.message || !clusterHandlers.exit) { + throw new Error('Failed to register cluster handlers'); + } + + return { + operations, + clusterHandlers: clusterHandlers as ClusterHandlers, + mockFork, + mockCluster, + mockErrorReporterMessage, + setIntervalSpy, + setTimeoutSpy, + processExitSpy, + }; +} + +async function waitForSetImmediate() { + await new Promise((resolve) => { + setImmediate(resolve); + }); +} + +describe('master startup failure handling via masterRun wiring', () => { + afterEach(() => { + jest.restoreAllMocks(); + jest.resetModules(); + }); + + it.each([ + { + testName: 'EADDRINUSE startup failure', + failureWorker: { id: 1, process: { exitCode: 1 } }, + exitingWorker: { id: 1, process: { exitCode: 1 } }, + failure: buildStartupFailureMessage(), + expectedMessage: 'Node renderer startup failed: localhost:3800 is already in use', + }, + { + testName: 'generic startup failure from one worker while another exits first', + failureWorker: { id: 2, process: { exitCode: 1 } }, + exitingWorker: { id: 1, process: { exitCode: 1 } }, + failure: buildStartupFailureMessage({ + code: 'EACCES', + message: 'listen EACCES: permission denied 0.0.0.0:80', + }), + expectedMessage: + 'Node renderer startup failed in worker 2: listen EACCES: permission denied 0.0.0.0:80', + }, + ])('registers listeners before forking and aborts without reforking on $testName', (scenario) => { + const harness = setupMasterRunHarness(); + + expect(harness.operations).toEqual(['on:message', 'fork', 'fork', 'on:exit']); + expect(harness.mockFork).toHaveBeenCalledTimes(2); + expect(harness.setIntervalSpy).toHaveBeenCalledTimes(1); + + harness.clusterHandlers.message(scenario.failureWorker, scenario.failure); + + expect(() => harness.clusterHandlers.exit(scenario.exitingWorker)).toThrow('process.exit:1'); + expect(harness.mockErrorReporterMessage).toHaveBeenCalledWith(scenario.expectedMessage); + expect(harness.mockCluster.disconnect).toHaveBeenCalledTimes(1); + expect(harness.processExitSpy).toHaveBeenCalledWith(1); + expect(harness.mockFork).toHaveBeenCalledTimes(2); + }); + + it('keeps the first startup-failure details when multiple workers report failures', () => { + const harness = setupMasterRunHarness(); + const firstFailure = buildStartupFailureMessage({ + code: 'EACCES', + message: 'listen EACCES: permission denied 0.0.0.0:80', + }); + const secondFailure = buildStartupFailureMessage({ + code: 'ECONNREFUSED', + message: 'listen ECONNREFUSED: connection refused 127.0.0.1:3800', + }); + + harness.clusterHandlers.message({ id: 1, process: { exitCode: 1 } }, firstFailure); + harness.clusterHandlers.message({ id: 2, process: { exitCode: 1 } }, secondFailure); + + expect(() => harness.clusterHandlers.exit({ id: 2, process: { exitCode: 1 } })).toThrow('process.exit:1'); + expect(harness.mockErrorReporterMessage).toHaveBeenCalledWith( + 'Node renderer startup failed in worker 1: listen EACCES: permission denied 0.0.0.0:80', + ); + }); + + it('reports error only once when multiple workers exit during abort', () => { + const harness = setupMasterRunHarness(); + // Use a disconnect mock that does NOT invoke the callback, so process.exit + // is not called and subsequent exit events can be observed. + harness.mockCluster.disconnect.mockImplementation(() => {}); + + harness.clusterHandlers.message({ id: 1, process: { exitCode: 1 } }, buildStartupFailureMessage()); + + // First exit triggers the error report and disconnect. + harness.clusterHandlers.exit({ id: 1, process: { exitCode: 1 } }); + expect(harness.mockErrorReporterMessage).toHaveBeenCalledTimes(1); + expect(harness.mockCluster.disconnect).toHaveBeenCalledTimes(1); + + // Second worker exit during abort — no duplicate report, no refork. + harness.clusterHandlers.exit({ id: 2, process: { exitCode: 1 } }); + expect(harness.mockErrorReporterMessage).toHaveBeenCalledTimes(1); + expect(harness.mockCluster.disconnect).toHaveBeenCalledTimes(1); + expect(harness.mockFork).toHaveBeenCalledTimes(2); + }); + + it('does not refork a scheduled-restart worker when aborting for startup failure', () => { + const harness = setupMasterRunHarness(); + + harness.clusterHandlers.message({ id: 1, process: { exitCode: 1 } }, buildStartupFailureMessage()); + + // A scheduled-restart worker exiting during abort should NOT be reforked. + expect(() => + harness.clusterHandlers.exit({ id: 2, isScheduledRestart: true, process: { exitCode: 0 } }), + ).toThrow('process.exit:1'); + expect(harness.mockFork).toHaveBeenCalledTimes(2); + expect(harness.mockErrorReporterMessage).toHaveBeenCalledTimes(1); + }); + + it('restarts scheduled-restart workers without reporting an error', () => { + const harness = setupMasterRunHarness(); + const worker: MockWorker = { id: 1, isScheduledRestart: true, process: { exitCode: 0 } }; + + harness.clusterHandlers.exit(worker); + + expect(harness.mockFork).toHaveBeenCalledTimes(3); + expect(harness.mockErrorReporterMessage).not.toHaveBeenCalled(); + expect(harness.processExitSpy).not.toHaveBeenCalled(); + }); + + it('waits one tick for startup-failure IPC before classifying an unexpected crash', async () => { + const harness = setupMasterRunHarness(); + harness.mockCluster.disconnect.mockImplementation(() => {}); + const worker = { id: 1, process: { exitCode: 1 } }; + + // Exit arrives first. + harness.clusterHandlers.exit(worker); + // Startup-failure message arrives before the deferred crash classification runs. + harness.clusterHandlers.message(worker, buildStartupFailureMessage()); + await waitForSetImmediate(); + + expect(harness.mockErrorReporterMessage).toHaveBeenCalledWith( + 'Node renderer startup failed: localhost:3800 is already in use', + ); + expect(harness.mockCluster.disconnect).toHaveBeenCalledTimes(1); + expect(harness.mockFork).toHaveBeenCalledTimes(2); + expect(harness.processExitSpy).not.toHaveBeenCalled(); + }); + + it('reforks on unexpected runtime crash when no startup failure was received', async () => { + const harness = setupMasterRunHarness(); + + harness.clusterHandlers.exit({ id: 3, process: { exitCode: 1 } }); + await waitForSetImmediate(); + + expect(harness.mockErrorReporterMessage).toHaveBeenCalledWith( + 'Worker 3 died UNEXPECTEDLY :(, restarting', + ); + expect(harness.mockFork).toHaveBeenCalledTimes(3); + expect(harness.processExitSpy).not.toHaveBeenCalled(); + }); + + it('ignores malformed startup-failure messages and treats exit as runtime crash', async () => { + const harness = setupMasterRunHarness(); + + harness.clusterHandlers.message({ id: 1, process: { exitCode: 1 } }, { type: WORKER_STARTUP_FAILURE }); + harness.clusterHandlers.exit({ id: 1, process: { exitCode: 1 } }); + await waitForSetImmediate(); + + expect(harness.mockErrorReporterMessage).toHaveBeenCalledWith( + 'Worker 1 died UNEXPECTEDLY :(, restarting', + ); + expect(harness.mockFork).toHaveBeenCalledTimes(3); + expect(harness.processExitSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/react-on-rails-pro-node-renderer/tests/workerStartupFailure.test.ts b/packages/react-on-rails-pro-node-renderer/tests/workerStartupFailure.test.ts new file mode 100644 index 0000000000..14c6ebc2d5 --- /dev/null +++ b/packages/react-on-rails-pro-node-renderer/tests/workerStartupFailure.test.ts @@ -0,0 +1,250 @@ +import { + WORKER_STARTUP_FAILURE, + isWorkerStartupFailureMessage, + type WorkerStartupFailureMessage, +} from '../src/shared/workerMessages'; +import { handleStartupListenError } from '../src/worker/startupErrorHandler'; + +describe('isWorkerStartupFailureMessage', () => { + it('returns true for a valid startup failure message', () => { + const msg: WorkerStartupFailureMessage = { + type: WORKER_STARTUP_FAILURE, + stage: 'listen', + code: 'EADDRINUSE', + errno: -48, + syscall: 'listen', + host: 'localhost', + port: 3800, + message: 'listen EADDRINUSE: address already in use :::3800', + }; + expect(isWorkerStartupFailureMessage(msg)).toBe(true); + }); + + it('returns false for null', () => { + expect(isWorkerStartupFailureMessage(null)).toBe(false); + }); + + it('returns false for a string', () => { + expect(isWorkerStartupFailureMessage('hello')).toBe(false); + }); + + it('returns false for an object with a different type', () => { + expect(isWorkerStartupFailureMessage({ type: 'OTHER' })).toBe(false); + }); + + it('returns false for an object without type', () => { + expect(isWorkerStartupFailureMessage({ stage: 'listen' })).toBe(false); + }); + + it('returns false for a non-integer port', () => { + expect( + isWorkerStartupFailureMessage({ + type: WORKER_STARTUP_FAILURE, + stage: 'listen', + host: 'localhost', + port: 3800.5, + message: 'some error', + }), + ).toBe(false); + }); + + it('returns false for an out-of-range port', () => { + expect( + isWorkerStartupFailureMessage({ + type: WORKER_STARTUP_FAILURE, + stage: 'listen', + host: 'localhost', + port: 70000, + message: 'some error', + }), + ).toBe(false); + }); + + it('returns false for a negative port', () => { + expect( + isWorkerStartupFailureMessage({ + type: WORKER_STARTUP_FAILURE, + stage: 'listen', + host: 'localhost', + port: -1, + message: 'some error', + }), + ).toBe(false); + }); +}); + +describe('worker startup listen error handling', () => { + const buildListenError = () => + Object.assign(new Error('listen EADDRINUSE: address already in use :::3800'), { + code: 'EADDRINUSE', + errno: -48, + syscall: 'listen', + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + it('sends WORKER_STARTUP_FAILURE message in clustered mode via the production handler', () => { + const sentMessages: unknown[] = []; + const exitCalls: number[] = []; + const send = ((msg: unknown, _handle?: unknown, _options?: unknown, callback?: () => void) => { + sentMessages.push(msg); + callback?.(); + return true; + }) as NodeJS.Process['send']; + const exit = ((code?: number) => { + exitCalls.push(code ?? 0); + }) as NodeJS.Process['exit']; + + handleStartupListenError({ + err: buildListenError(), + host: 'localhost', + port: 3800, + isWorker: true, + send, + exit, + }); + + expect(sentMessages).toHaveLength(1); + expect(isWorkerStartupFailureMessage(sentMessages[0])).toBe(true); + expect(sentMessages[0]).toMatchObject({ + type: WORKER_STARTUP_FAILURE, + stage: 'listen', + host: 'localhost', + port: 3800, + code: 'EADDRINUSE', + }); + expect(exitCalls).toEqual([1]); + }); + + it('exits without IPC in single-process mode via the production handler', () => { + const send = jest.fn(); + const exitCalls: number[] = []; + const exit = ((code?: number) => { + exitCalls.push(code ?? 0); + }) as NodeJS.Process['exit']; + + handleStartupListenError({ + err: buildListenError(), + host: 'localhost', + port: 3800, + isWorker: false, + send: send as unknown as NodeJS.Process['send'], + exit, + }); + + expect(send).not.toHaveBeenCalled(); + expect(exitCalls).toEqual([1]); + }); + + it('logs a warning and exits when cluster worker has no IPC channel', () => { + // Temporarily remove process.send so the fallback `send ?? process.send?.bind(process)` + // resolves to undefined, simulating a worker whose IPC channel has been destroyed. + const originalSend = process.send; + delete (process as { send?: typeof process.send }).send; + + const exitCalls: number[] = []; + const exit = ((code?: number) => { + exitCalls.push(code ?? 0); + }) as NodeJS.Process['exit']; + + try { + handleStartupListenError({ + err: buildListenError(), + host: 'localhost', + port: 3800, + isWorker: true, + send: undefined, + exit, + }); + + expect(exitCalls).toEqual([1]); + } finally { + process.send = originalSend; + } + }); + + it('exits when process.send throws synchronously', () => { + const send = (() => { + throw new Error('ERR_IPC_CHANNEL_CLOSED'); + }) as NodeJS.Process['send']; + const exitCalls: number[] = []; + const exit = ((code?: number) => { + exitCalls.push(code ?? 0); + }) as NodeJS.Process['exit']; + + handleStartupListenError({ + err: buildListenError(), + host: 'localhost', + port: 3800, + isWorker: true, + send, + exit, + }); + + expect(exitCalls).toEqual([1]); + }); + + it('exits via timeout fallback when IPC callback is never invoked', () => { + jest.useFakeTimers(); + const exitCalls: number[] = []; + // send accepts the message but never invokes the callback, simulating a + // half-broken IPC channel. + const send = ((_msg: unknown, _handle?: unknown, _options?: unknown, _callback?: () => void) => + true) as NodeJS.Process['send']; + const exit = ((code?: number) => { + exitCalls.push(code ?? 0); + }) as NodeJS.Process['exit']; + + handleStartupListenError({ + err: buildListenError(), + host: 'localhost', + port: 3800, + isWorker: true, + send, + exit, + }); + + // Callback was never invoked, so exit hasn't been called yet. + expect(exitCalls).toEqual([]); + + // Advance past the IPC_SEND_TIMEOUT_MS (2000ms) fallback timer. + jest.advanceTimersByTime(2000); + expect(exitCalls).toEqual([1]); + + jest.useRealTimers(); + }); + + it('does not exit twice when callback fires after timeout', () => { + jest.useFakeTimers(); + const exitCalls: number[] = []; + let savedCallback: (() => void) | undefined; + const send = ((_msg: unknown, _handle?: unknown, _options?: unknown, callback?: () => void) => { + savedCallback = callback; + return true; + }) as NodeJS.Process['send']; + const exit = ((code?: number) => { + exitCalls.push(code ?? 0); + }) as NodeJS.Process['exit']; + + handleStartupListenError({ + err: buildListenError(), + host: 'localhost', + port: 3800, + isWorker: true, + send, + exit, + }); + + // Timeout fires first. + jest.advanceTimersByTime(2000); + expect(exitCalls).toEqual([1]); + + // Late callback — should be a no-op. + savedCallback?.(); + expect(exitCalls).toEqual([1]); + + jest.useRealTimers(); + }); +});