Skip to content

Commit 6a984aa

Browse files
committed
fix(rivetkit): use keepAwake for websocket callback tracking to prevent c.vars crash after grace deadline
1 parent accd683 commit 6a984aa

4 files changed

Lines changed: 137 additions & 1 deletion

File tree

rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/registry-static.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ import {
113113
sleepRawWsDelayedSendOnSleep,
114114
sleepWithWaitUntilInOnWake,
115115
sleepAbortListenerVarsActor,
116+
sleepRawWsVarsExceedsGrace,
116117
} from "./sleep";
117118
import {
118119
sleepWithDb,
@@ -210,6 +211,7 @@ export const registry = setup({
210211
sleepRawWsDelayedSendOnSleep,
211212
sleepWithWaitUntilInOnWake,
212213
sleepAbortListenerVarsActor,
214+
sleepRawWsVarsExceedsGrace,
213215
counterWaitUntilProbe,
214216
// From sleep-db.ts
215217
sleepWithDb,

rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/sleep.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,3 +618,71 @@ export const sleepWithNoSleepOption = actor({
618618
noSleep: true,
619619
},
620620
});
621+
622+
// Reproduces a production crash where c.vars becomes undefined after the
623+
// grace deadline expires and clearNativeRuntimeState unrefs the NAPI
624+
// runtime state object. An async message handler accesses c.vars after an
625+
// await that outlasts the grace period.
626+
//
627+
// The close-handler variant cannot reproduce the bug because the tracked
628+
// websocket callback region blocks can_arm_sleep_timer. Instead we use a
629+
// message handler that starts slow async work, then the actor is told to
630+
// sleep programmatically while the handler is still running.
631+
export const VARS_EXCEEDS_GRACE_DELAY = 2000;
632+
export const VARS_EXCEEDS_GRACE_PERIOD = 200;
633+
export const VARS_EXCEEDS_GRACE_SLEEP_TIMEOUT = 100;
634+
635+
export const sleepRawWsVarsExceedsGrace = actor({
636+
state: {
637+
startCount: 0,
638+
sleepCount: 0,
639+
handlerStarted: 0,
640+
handlerFinished: 0,
641+
},
642+
createVars: () => ({
643+
dirty: false,
644+
}),
645+
onWake: (c) => {
646+
c.state.startCount += 1;
647+
},
648+
onSleep: (c) => {
649+
c.state.sleepCount += 1;
650+
},
651+
onWebSocket: (c, websocket: UniversalWebSocket) => {
652+
websocket.addEventListener("message", async (event: any) => {
653+
if (event.data !== "slow-vars-work") return;
654+
655+
c.state.handlerStarted += 1;
656+
websocket.send(JSON.stringify({ type: "started" }));
657+
658+
// Wait longer than the grace period so the runtime state
659+
// gets cleared while this handler is still running.
660+
await new Promise((resolve) =>
661+
setTimeout(resolve, VARS_EXCEEDS_GRACE_DELAY),
662+
);
663+
// This c.vars access crashes with TypeError in prod because
664+
// the NAPI runtime state reference has been unreffed.
665+
// Do NOT wrap in try/catch: c.state also breaks after cleanup,
666+
// so the error needs to propagate to the process level.
667+
c.vars.dirty = true;
668+
c.state.handlerFinished += 1;
669+
});
670+
671+
websocket.send(JSON.stringify({ type: "connected" }));
672+
},
673+
actions: {
674+
triggerSleep: (c) => {
675+
c.sleep();
676+
},
677+
getStatus: (c) => ({
678+
startCount: c.state.startCount,
679+
sleepCount: c.state.sleepCount,
680+
handlerStarted: c.state.handlerStarted,
681+
handlerFinished: c.state.handlerFinished,
682+
}),
683+
},
684+
options: {
685+
sleepTimeout: VARS_EXCEEDS_GRACE_SLEEP_TIMEOUT,
686+
sleepGracePeriod: VARS_EXCEEDS_GRACE_PERIOD,
687+
},
688+
});

rivetkit-typescript/packages/rivetkit/src/registry/native.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2335,7 +2335,7 @@ class TrackedWebSocketHandleAdapter implements UniversalWebSocket {
23352335
return;
23362336
}
23372337
const callbackRegionId = this.#ctx.beginWebSocketCallback();
2338-
this.#ctx.waitUntil(
2338+
this.#ctx.keepAwake(
23392339
Promise.resolve(result)
23402340
.catch((error) => {
23412341
logger().error({

rivetkit-typescript/packages/rivetkit/tests/driver/actor-sleep.test.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import {
55
RAW_WS_HANDLER_DELAY,
66
RAW_WS_HANDLER_SLEEP_TIMEOUT,
77
SLEEP_TIMEOUT,
8+
VARS_EXCEEDS_GRACE_DELAY,
9+
VARS_EXCEEDS_GRACE_SLEEP_TIMEOUT,
810
} from "../../fixtures/driver-test-suite/sleep";
911
import { describeDriverMatrix } from "./shared-matrix";
1012
import { setupDriverTest, waitFor } from "./shared-utils";
@@ -962,5 +964,69 @@ describeDriverMatrix("Actor Sleep", (driverTestConfig) => {
962964
expect(startCount).toBe(2);
963965
}
964966
});
967+
968+
test(
969+
"c.vars access in ws handler should not crash after grace deadline",
970+
async (c) => {
971+
const { client, getRuntimeOutput } = await setupDriverTest(
972+
c,
973+
driverTestConfig,
974+
);
975+
976+
const actor =
977+
client.sleepRawWsVarsExceedsGrace.getOrCreate([
978+
"ws-vars-exceeds-grace",
979+
]);
980+
const ws = await connectRawWebSocket(actor);
981+
982+
// Send a message that starts slow async work (2000ms delay
983+
// before accessing c.vars).
984+
ws.send("slow-vars-work");
985+
986+
// Wait for the handler to confirm it started.
987+
await new Promise<void>((resolve) => {
988+
const onMessage = (event: MessageEvent) => {
989+
const data = JSON.parse(String(event.data));
990+
if (data.type === "started") {
991+
ws.removeEventListener("message", onMessage);
992+
resolve();
993+
}
994+
};
995+
ws.addEventListener("message", onMessage);
996+
});
997+
998+
// Trigger sleep while the handler is still doing slow work.
999+
// The grace period (200ms) is much shorter than the handler
1000+
// delay (2000ms), so onSleep will clear the runtime state
1001+
// while the handler is still running.
1002+
await actor.triggerSleep();
1003+
1004+
// Wait for the handler to finish and the actor to complete
1005+
// its sleep cycle.
1006+
await waitFor(
1007+
driverTestConfig,
1008+
VARS_EXCEEDS_GRACE_DELAY +
1009+
VARS_EXCEEDS_GRACE_SLEEP_TIMEOUT +
1010+
500,
1011+
);
1012+
1013+
// Wake the actor and check what happened.
1014+
const status = await actor.getStatus();
1015+
expect(status.sleepCount).toBeGreaterThanOrEqual(1);
1016+
expect(status.handlerStarted).toBe(1);
1017+
// The runtime must not crash with TypeError when the
1018+
// handler accesses c.vars after the grace deadline.
1019+
// The deferred cleanup keeps the runtime state alive
1020+
// until the websocket callback region drains.
1021+
const output = getRuntimeOutput();
1022+
expect(output).not.toContain(
1023+
"Cannot set properties of undefined",
1024+
);
1025+
expect(output).not.toContain(
1026+
"Cannot read properties of undefined",
1027+
);
1028+
},
1029+
{ timeout: 15_000 },
1030+
);
9651031
});
9661032
});

0 commit comments

Comments
 (0)