Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/claude-code-review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ on:
jobs:
claude-review:
runs-on: ubuntu-latest
env:
HAS_CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN != '' }}
permissions:
contents: read
pull-requests: write
Expand All @@ -21,6 +23,7 @@ jobs:

- name: Run Claude Code Review
id: claude-review
if: env.HAS_CLAUDE_CODE_OAUTH_TOKEN == 'true'
uses: anthropics/claude-code-action@v1
with:
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
Expand All @@ -43,3 +46,8 @@ jobs:

claude_args: |
--allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*)"

- name: Skip Claude review when token is unavailable
if: env.HAS_CLAUDE_CODE_OAUTH_TOKEN != 'true'
run: |
echo "Skipping Claude Code Review because CLAUDE_CODE_OAUTH_TOKEN is not available for this workflow context."
1 change: 1 addition & 0 deletions .lychee.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ exclude = [
'^https://devchat\.tv', # Ruby Rogues podcast - times out in CI
'^https?://chlg\.co', # URL shortener - times out in CI
'^https?://contributor-covenant\.org', # Timeout in CI
'^https://www\.diva-portal\.org/smash/get/diva2:1903931/FULLTEXT01\.pdf$', # Intermittent timeout in CI

# ============================================================================
# CHANGELOG COMPARE LINKS
Expand Down
4 changes: 2 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ _A history of the news. A few bullets at the top will also show on the [README.m
- 2016-11-03: Spoke at [LA Ruby: "React on Rails: Why, What, and How?"](http://www.meetup.com/laruby/events/234825187/). [Video and pictures in this article](https://blog.shakacode.com/my-react-on-rails-talk-at-the-la-ruby-rails-meetup-november-10-2016-eaaa83aff800#.ej6h4eglp).
- 2016-12-20: New Video on Egghead.io: [Creating a component with React on Rails](https://egghead.io/lessons/react-creating-a-component-with-react-on-rails)
- 2016-11-03: Spoke at [LA Ruby, 7pm, Thursday, November 10 in Venice, CA: "React on Rails: Why, What, and How?"](http://www.meetup.com/laruby/events/234825187/). [Video and pictures in this article](https://blog.shakacode.com/my-react-on-rails-talk-at-the-la-ruby-rails-meetup-november-10-2016-eaaa83aff800#.ej6h4eglp).
- 2016-08-27: We now have a [Documentation Gitbook](https://shakacode.gitbooks.io/react-on-rails/content/) for improved readability & reference.
- 2016-08-27: We now have a [Documentation Gitbook](https://reactonrails.com/docs/) for improved readability & reference.
- 2016-08-21: v6.1 ships with several new features and bug fixes. See the [Changelog](CHANGELOG.md).
- 2016-07-28: If you're doing server rendering, be sure to use mini_racer! See [issues/428](https://github.com/shakacode/react_on_rails/issues/428). It's supposedly much faster than `therubyracer`.

- 2016-08-27: We now have a [Documentation Gitbook](https://shakacode.gitbooks.io/react-on-rails/content/) for improved readability & reference.
- 2016-08-27: We now have a [Documentation Gitbook](https://reactonrails.com/docs/) for improved readability & reference.
- 2016-08-21: v6.1 ships with several new features and bug fixes. See the [Changelog](CHANGELOG.md).
- 2016-06-13: 6.0.4 shipped with a critical fix regarding a missing polyfill for `clearTimeout`, used by babel-polyfill.
- 2016-06-06: 6.0.2 shipped with a critical fix if you are fragment caching the server generated React.
Expand Down
34 changes: 29 additions & 5 deletions packages/react-on-rails-pro-node-renderer/src/master.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { buildConfig, Config, logSanitizedConfig } from './shared/configBuilder.
import restartWorkers from './master/restartWorkers.js';
import * as errorReporter from './shared/errorReporter.js';
import { getLicenseStatus } from './shared/licenseValidator.js';
import { isWorkerStartupFailureMessage, type WorkerStartupFailureMessage } from './shared/workerMessages.js';

const MILLISECONDS_IN_MINUTE = 60000;
// How often to scan for orphaned upload directories.
Expand Down Expand Up @@ -77,21 +78,44 @@ export default function masterRun(runningConfig?: Partial<Config>) {
})();
}, ORPHAN_CLEANUP_INTERVAL_MS);

let isAbortingForStartupFailure = false;
let fatalStartupFailure: { workerId: number; failure: WorkerStartupFailureMessage } | null = null;

for (let i = 0; i < workersCount; i += 1) {
cluster.fork();
}

cluster.on('message', (worker, message) => {
if (!isWorkerStartupFailureMessage(message) || isAbortingForStartupFailure) return;

isAbortingForStartupFailure = true;
fatalStartupFailure = { workerId: worker.id, failure: message };
});

// Listen for dying workers:
cluster.on('exit', (worker) => {
if (worker.isScheduledRestart) {
log.info('Restarting worker #%d on schedule', worker.id);
} else {
// TODO: Track last rendering request per worker.id
// TODO: Consider blocking a given rendering request if it kills a worker more than X times
const msg = `Worker ${worker.id} died UNEXPECTEDLY :(, restarting`;
cluster.fork();
return;
}

if (isAbortingForStartupFailure) {
const failure = fatalStartupFailure?.failure;
const msg =
failure?.code === 'EADDRINUSE'
? `Node renderer startup failed: port ${failure.port} is already in use`
: `Node renderer startup failed in worker ${worker.id}: ${failure?.message || `exit code ${worker.process.exitCode}`}`;
Comment on lines +103 to +108
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Use the recorded failing worker ID in the generic error path.

Once isAbortingForStartupFailure is set, the next exit event is not guaranteed to come from the same worker that sent the IPC message. Using worker.id here can misreport which worker actually failed even though fatalStartupFailure.workerId already captured it.

🐛 Proposed fix
     if (isAbortingForStartupFailure) {
       const failure = fatalStartupFailure?.failure;
+      const failedWorkerId = fatalStartupFailure?.workerId ?? worker.id;
       const msg =
         failure?.code === 'EADDRINUSE'
           ? `Node renderer startup failed: port ${failure.port} is already in use`
-          : `Node renderer startup failed in worker ${worker.id}: ${failure?.message || `exit code ${worker.process.exitCode}`}`;
+          : `Node renderer startup failed in worker ${failedWorkerId}: ${failure?.message || `exit code ${worker.process.exitCode}`}`;
 
       errorReporter.message(msg);
       process.exit(1);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if (isAbortingForStartupFailure) {
const failure = fatalStartupFailure?.failure;
const msg =
failure?.code === 'EADDRINUSE'
? `Node renderer startup failed: port ${failure.port} is already in use`
: `Node renderer startup failed in worker ${worker.id}: ${failure?.message || `exit code ${worker.process.exitCode}`}`;
if (isAbortingForStartupFailure) {
const failure = fatalStartupFailure?.failure;
const failedWorkerId = fatalStartupFailure?.workerId ?? worker.id;
const msg =
failure?.code === 'EADDRINUSE'
? `Node renderer startup failed: port ${failure.port} is already in use`
: `Node renderer startup failed in worker ${failedWorkerId}: ${failure?.message || `exit code ${worker.process.exitCode}`}`;
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@packages/react-on-rails-pro-node-renderer/src/master.ts` around lines 103 -
108, The generic startup-failure message currently uses worker.id which may not
be the recorded failing worker; update the message construction in the
isAbortingForStartupFailure branch to prefer fatalStartupFailure.workerId (when
present) instead of worker.id, and keep using failure?.message or `exit code
${worker.process.exitCode}` for the error details; ensure you still handle the
EADDRINUSE case using failure.port and fall back appropriately if
fatalStartupFailure or its workerId is undefined.


errorReporter.message(msg);
process.exit(1);
return;
Comment on lines +111 to +112
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Dead code after process.exit()

process.exit(1) terminates the Node.js process synchronously, so the return on the next line is unreachable dead code. It can be removed.

Suggested change
process.exit(1);
return;
errorReporter.message(msg);
process.exit(1);

}
Comment on lines +88 to 113
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Potential IPC race condition between message and exit events

Node.js cluster does not guarantee that a worker's message event is processed before its exit event on the master side. If the OS delivers SIGCHLD (triggering exit) before the IPC socket data is read from the event loop (triggering message), the exit handler will observe isAbortingForStartupFailure === false and call cluster.fork() — spawning a replacement worker that will also fail.

In practice this is rare because the worker only calls process.exit(1) inside the process.send() callback (after the data has been flushed to the kernel buffer), so the timing window is very small. If it does trigger, it will cause one extra fork-and-crash cycle before the master eventually receives the deferred message event and shuts down cleanly — so it won't recreate an infinite loop, but it can still produce a confusing intermediate restart.

One mitigation would be to listen for the worker's IPC channel disconnect event and delay the refork decision briefly, or to cross-check the dead worker's id against any buffered-but-unprocessed WORKER_STARTUP_FAILURE messages. This is a known Node.js cluster caveat worth documenting if not addressed now.

// Replace the dead worker:

// TODO: Track last rendering request per worker.id
// TODO: Consider blocking a given rendering request if it kills a worker more than X times
const msg = `Worker ${worker.id} died UNEXPECTEDLY :(, restarting`;
errorReporter.message(msg);
cluster.fork();
});

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
export const WORKER_STARTUP_FAILURE = 'NODE_RENDERER_WORKER_STARTUP_FAILURE' as const;

export interface WorkerStartupFailureMessage {
type: typeof WORKER_STARTUP_FAILURE;
stage: 'listen';
code?: string;
errno?: number;
syscall?: string;
host: string;
port: number;
message: string;
}

export function isWorkerStartupFailureMessage(value: unknown): value is WorkerStartupFailureMessage {
return (
typeof value === 'object' &&
value !== null &&
(value as { type?: string }).type === WORKER_STARTUP_FAILURE
);
Comment on lines +14 to +19
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Validate the full IPC payload shape in the type guard.

packages/react-on-rails-pro-node-renderer/src/master.ts reads stage, port, and message immediately after this check. Right now any { type: WORKER_STARTUP_FAILURE } payload passes, so a malformed IPC message can trigger a fatal abort and produce undefined diagnostics.

🛡️ Proposed fix
 export function isWorkerStartupFailureMessage(value: unknown): value is WorkerStartupFailureMessage {
-  return (
-    typeof value === 'object' &&
-    value !== null &&
-    (value as { type?: string }).type === WORKER_STARTUP_FAILURE
-  );
+  if (typeof value !== 'object' || value === null) {
+    return false;
+  }
+
+  const message = value as Partial<WorkerStartupFailureMessage>;
+  return (
+    message.type === WORKER_STARTUP_FAILURE &&
+    message.stage === 'listen' &&
+    typeof message.host === 'string' &&
+    typeof message.port === 'number' &&
+    typeof message.message === 'string' &&
+    (message.code === undefined || typeof message.code === 'string') &&
+    (message.errno === undefined || typeof message.errno === 'number') &&
+    (message.syscall === undefined || typeof message.syscall === 'string')
+  );
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@packages/react-on-rails-pro-node-renderer/src/shared/workerMessages.ts`
around lines 14 - 19, The current isWorkerStartupFailureMessage type guard only
checks the top-level type field so malformed IPC payloads slip through; update
the function isWorkerStartupFailureMessage to fully validate the IPC shape by
confirming the object has a type === WORKER_STARTUP_FAILURE and also contains
the expected fields with correct types (e.g., stage is a string, port is a
number, and message is a string) before narrowing to
WorkerStartupFailureMessage; reference the WORKER_STARTUP_FAILURE constant and
the isWorkerStartupFailureMessage function when making the change so master.ts
can safely read stage, port, and message without risking undefined values.

}
19 changes: 19 additions & 0 deletions packages/react-on-rails-pro-node-renderer/src/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import checkProtocolVersion from './worker/checkProtocolVersionHandler.js';
import authenticate from './worker/authHandler.js';
import { handleRenderRequest, type ProvidedNewBundle } from './worker/handleRenderRequest.js';
import handleGracefulShutdown from './worker/handleGracefulShutdown.js';
import { WORKER_STARTUP_FAILURE, type WorkerStartupFailureMessage } from './shared/workerMessages.js';
import {
errorResponseResult,
formatExceptionMessage,
Expand Down Expand Up @@ -465,6 +466,24 @@ export default function run(config: Partial<Config>) {
app.listen({ port, host }, (err, address) => {
if (err) {
log.error({ err, host, port }, 'Node renderer failed to start');

if (cluster.isWorker && process.send) {
const startupFailure: WorkerStartupFailureMessage = {
type: WORKER_STARTUP_FAILURE,
stage: 'listen',
code: (err as NodeJS.ErrnoException).code,
errno: (err as NodeJS.ErrnoException).errno,
syscall: (err as NodeJS.ErrnoException).syscall,
host,
port,
message: err.message,
};
process.send(startupFailure, undefined, undefined, () => {
process.exit(1);
});
return;
}

process.exit(1);
}
const workerName = worker ? `worker #${worker.id}` : 'master (single-process)';
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import {
WORKER_STARTUP_FAILURE,
isWorkerStartupFailureMessage,
type WorkerStartupFailureMessage,
} from '../src/shared/workerMessages';

/**
* These tests verify that the master process correctly handles the
* WORKER_STARTUP_FAILURE IPC message by aborting instead of reforking,
* while preserving the existing behavior for scheduled restarts and
* runtime crashes.
*
* We test the decision logic in isolation by simulating the cluster events
* that the master process listens to, rather than forking real workers.
*/

function buildStartupFailureMessage(
overrides: Partial<WorkerStartupFailureMessage> = {},
): WorkerStartupFailureMessage {
return {
type: WORKER_STARTUP_FAILURE,
stage: 'listen',
code: 'EADDRINUSE',
errno: -48,
syscall: 'listen',
host: 'localhost',
port: 3800,
message: 'listen EADDRINUSE: address already in use :::3800',
...overrides,
};
}

describe('master startup failure handling', () => {
let isAbortingForStartupFailure: boolean;
let fatalStartupFailure: { workerId: number; failure: WorkerStartupFailureMessage } | null;
let forkCount: number;
let exitCode: number | null;
let reportedMessages: string[];

// Simulated handlers matching master.ts logic
function handleMessage(worker: { id: number }, message: unknown) {
if (!isWorkerStartupFailureMessage(message) || isAbortingForStartupFailure) return;

isAbortingForStartupFailure = true;
fatalStartupFailure = { workerId: worker.id, failure: message };
}

function handleExit(worker: {
id: number;
isScheduledRestart?: boolean;
process: { exitCode: number | null };
}) {
if (worker.isScheduledRestart) {
forkCount += 1; // cluster.fork()
return;
}

if (isAbortingForStartupFailure) {
const failure = fatalStartupFailure?.failure;
const msg =
failure?.code === 'EADDRINUSE'
? `Node renderer startup failed: port ${failure.port} is already in use`
: `Node renderer startup failed in worker ${worker.id}: ${failure?.message || `exit code ${worker.process.exitCode}`}`;

reportedMessages.push(msg);
exitCode = 1;
return;
}

const msg = `Worker ${worker.id} died UNEXPECTEDLY :(, restarting`;
reportedMessages.push(msg);
forkCount += 1; // cluster.fork()
}
Comment on lines +41 to +73
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

These tests duplicate packages/react-on-rails-pro-node-renderer/src/master.ts instead of testing it.

handleMessage and handleExit are local copies of the production branches, so the assertions below can stay green even if masterRun() stops registering the cluster handlers or changes their control flow. Please capture the real cluster.on('message') / cluster.on('exit') callbacks from masterRun() under mocks.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@packages/react-on-rails-pro-node-renderer/tests/masterStartupFailure.test.ts`
around lines 41 - 73, The tests currently duplicate the production logic in
local functions handleMessage and handleExit; instead, mock or spy on cluster.on
to capture the actual callbacks that masterRun registers for 'message' and
'exit', then invoke those captured callbacks with test fixtures so assertions
validate the real handlers from masterRun; change the test to call masterRun(),
grab the registered handlers from your cluster mock (the callbacks passed to
cluster.on('message', ...) and cluster.on('exit', ...)), and drive those
captured callbacks with the same worker/message scenarios used today to assert
reportedMessages, forkCount and exitCode.


beforeEach(() => {
isAbortingForStartupFailure = false;
fatalStartupFailure = null;
forkCount = 0;
exitCode = null;
reportedMessages = [];
});

it('aborts with clear message on EADDRINUSE startup failure', () => {
const worker = { id: 1, process: { exitCode: 1 } };
const failure = buildStartupFailureMessage();

// Worker sends startup failure message
handleMessage(worker, failure);
expect(isAbortingForStartupFailure).toBe(true);

// Worker exits
handleExit(worker);

expect(forkCount).toBe(0); // No refork
expect(exitCode).toBe(1);
expect(reportedMessages).toEqual(['Node renderer startup failed: port 3800 is already in use']);
});

it('aborts with generic message on non-EADDRINUSE startup failure', () => {
const worker = { id: 2, process: { exitCode: 1 } };
const failure = buildStartupFailureMessage({
code: 'EACCES',
message: 'listen EACCES: permission denied 0.0.0.0:80',
});

handleMessage(worker, failure);
handleExit(worker);

expect(forkCount).toBe(0);
expect(exitCode).toBe(1);
expect(reportedMessages).toEqual([
'Node renderer startup failed in worker 2: listen EACCES: permission denied 0.0.0.0:80',
]);
});

it('deduplicates multiple workers failing simultaneously', () => {
const worker1 = { id: 1, process: { exitCode: 1 } };
const worker2 = { id: 2, process: { exitCode: 1 } };
const failure = buildStartupFailureMessage();

// Both workers send failure messages
handleMessage(worker1, failure);
handleMessage(worker2, failure);

// Only the first one is recorded
expect(fatalStartupFailure?.workerId).toBe(1);

// First worker exit triggers abort
handleExit(worker1);
expect(exitCode).toBe(1);
expect(forkCount).toBe(0);
});

it('reforks on scheduled restart', () => {
const worker = { id: 1, isScheduledRestart: true, process: { exitCode: 0 } };

handleExit(worker);

expect(forkCount).toBe(1);
expect(exitCode).toBeNull();
expect(reportedMessages).toHaveLength(0);
});

it('reforks on unexpected runtime crash without startup failure', () => {
const worker = { id: 3, process: { exitCode: 1 } };

// No startup failure message sent — this is a runtime crash
handleExit(worker);

expect(forkCount).toBe(1);
expect(exitCode).toBeNull();
expect(reportedMessages).toEqual(['Worker 3 died UNEXPECTEDLY :(, restarting']);
});

it('ignores non-startup-failure messages', () => {
const worker = { id: 1 };

handleMessage(worker, { type: 'SOME_OTHER_MESSAGE' });
handleMessage(worker, 'a string message');
handleMessage(worker, null);

expect(isAbortingForStartupFailure).toBe(false);
expect(fatalStartupFailure).toBeNull();
});
});
Loading
Loading