Skip to content

Commit 7072a94

Browse files
committed
fix(rivetkit): recover stale dynamic actors across sleep and wake
1 parent 13072b7 commit 7072a94

5 files changed

Lines changed: 99 additions & 35 deletions

File tree

rivetkit-typescript/packages/rivetkit-napi/src/napi_actor_events.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1768,7 +1768,6 @@ mod tests {
17681768
rivetkit_core::ActorContext::new("actor-serialize-clean", "actor", Vec::new(), "local");
17691769
let ctx = ActorContext::new(core_ctx);
17701770
let dirty = AtomicBool::new(false);
1771-
17721771
let deltas = maybe_serialize(&bindings, &ctx, &dirty, SerializeStateReason::Save)
17731772
.await
17741773
.expect("clean save serialize should not fail");

rivetkit-typescript/packages/rivetkit/src/client/actor-conn.ts

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -236,10 +236,14 @@ export class ActorConnRaw {
236236
* re-resolves to a fresh actor. Returns true if the identity was
237237
* invalidated.
238238
*/
239-
#invalidateActorIfStale(group: string, code: string): boolean {
239+
#invalidateActorIfStale(
240+
group: string,
241+
code: string,
242+
message?: string,
243+
): boolean {
240244
if (
241245
!isDynamicActorQuery(this.#actorResolutionState) ||
242-
!isStaleResolvedActorError(group, code)
246+
!isStaleResolvedActorError(group, code, message)
243247
) {
244248
return false;
245249
}
@@ -248,10 +252,14 @@ export class ActorConnRaw {
248252
return true;
249253
}
250254

251-
#shouldReconnectForStaleActor(group: string, code: string): boolean {
255+
#shouldReconnectForStaleActor(
256+
group: string,
257+
code: string,
258+
message?: string,
259+
): boolean {
252260
return (
253261
isDynamicActorQuery(this.#actorResolutionState) &&
254-
isStaleResolvedActorError(group, code) &&
262+
isStaleResolvedActorError(group, code, message) &&
255263
this.#onOpenPromise !== undefined &&
256264
this.#connStatus !== "connected"
257265
);
@@ -667,7 +675,7 @@ export class ActorConnRaw {
667675

668676
if (actionId !== null) {
669677
const inFlight = this.#takeActionInFlight(Number(actionId));
670-
this.#invalidateActorIfStale(group, code);
678+
this.#invalidateActorIfStale(group, code, message);
671679

672680
logger().warn({
673681
msg: "action error",
@@ -691,7 +699,7 @@ export class ActorConnRaw {
691699
metadata,
692700
});
693701

694-
if (this.#shouldReconnectForStaleActor(group, code)) {
702+
if (this.#shouldReconnectForStaleActor(group, code, message)) {
695703
this.#clearResolvedActorIdentity();
696704
this.#onOpenPromise?.reject(
697705
new errors.ActorError(group, code, message, metadata),
@@ -724,7 +732,7 @@ export class ActorConnRaw {
724732
this.#onOpenPromise.reject(errorToThrow);
725733
}
726734

727-
this.#invalidateActorIfStale(group, code);
735+
this.#invalidateActorIfStale(group, code, message);
728736

729737
// Reject any in-flight requests
730738
for (const [id, inFlight] of this.#actionsInFlight.entries()) {

rivetkit-typescript/packages/rivetkit/src/client/actor-handle.ts

Lines changed: 68 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ import { rawHttpFetch, rawWebSocket } from "./raw-utils";
5050
import { resolveGatewayTarget } from "./resolve-gateway-target";
5151
import { sendHttpRequest } from "./utils";
5252

53+
const DYNAMIC_LIFECYCLE_RETRY_WINDOW_MS = 100;
54+
const GET_FOR_KEY_DYNAMIC_QUERY_MAX_ATTEMPTS = 24;
55+
const GET_OR_CREATE_DYNAMIC_QUERY_MAX_ATTEMPTS = 180;
56+
5357
/**
5458
* Provides underlying functions for stateless {@link ActorHandle} for action calls.
5559
* Similar to ActorConnRaw but doesn't maintain a connection.
@@ -153,7 +157,7 @@ export class ActorHandleRaw {
153157
maxAttempts,
154158
)
155159
) {
156-
useQueryTarget = true;
160+
useQueryTarget = this.#shouldRetryViaQueryTarget();
157161
await this.#waitForRetryWindow();
158162
continue;
159163
}
@@ -164,18 +168,24 @@ export class ActorHandleRaw {
164168
code,
165169
attempt,
166170
maxAttempts,
171+
message,
167172
)
168173
) {
169174
this.#clearResolvedActorId();
170-
useQueryTarget = true;
175+
useQueryTarget = this.#shouldRetryViaQueryTarget();
171176
await this.#waitForRetryWindow();
172177
continue;
173178
}
174179

175-
const invalidated = this.#invalidateResolvedActorId(group, code);
180+
const invalidated = this.#invalidateResolvedActorId(
181+
group,
182+
code,
183+
message,
184+
);
176185
if (invalidated && attempt < maxAttempts - 1) {
177186
useQueryTarget =
178-
code === "stopping" || code.startsWith("destroyed_");
187+
this.#shouldRetryViaQueryTarget() &&
188+
(code === "stopping" || code.startsWith("destroyed_"));
179189
if (useQueryTarget) {
180190
await this.#waitForRetryWindow();
181191
}
@@ -309,7 +319,7 @@ export class ActorHandleRaw {
309319
maxAttempts,
310320
)
311321
) {
312-
useQueryTarget = true;
322+
useQueryTarget = this.#shouldRetryViaQueryTarget();
313323
await this.#waitForRetryWindow();
314324
continue;
315325
}
@@ -321,10 +331,11 @@ export class ActorHandleRaw {
321331
code,
322332
attempt,
323333
maxAttempts,
334+
message,
324335
)
325336
) {
326337
this.#clearResolvedActorId();
327-
useQueryTarget = true;
338+
useQueryTarget = this.#shouldRetryViaQueryTarget();
328339
await this.#waitForRetryWindow();
329340
continue;
330341
}
@@ -342,10 +353,14 @@ export class ActorHandleRaw {
342353
);
343354
}
344355

345-
const invalidated = this.#invalidateResolvedActorId(group, code);
356+
const invalidated = this.#invalidateResolvedActorId(
357+
group,
358+
code,
359+
message,
360+
);
346361
if (invalidated && attempt < maxAttempts - 1) {
347362
if (group === "actor" && code === "stopping") {
348-
useQueryTarget = true;
363+
useQueryTarget = this.#shouldRetryViaQueryTarget();
349364
await new Promise((resolve) => setTimeout(resolve, 100));
350365
}
351366
continue;
@@ -371,22 +386,35 @@ export class ActorHandleRaw {
371386
}
372387

373388
async #waitForRetryWindow(): Promise<void> {
374-
await new Promise((resolve) => setTimeout(resolve, 100));
389+
await new Promise((resolve) =>
390+
setTimeout(resolve, DYNAMIC_LIFECYCLE_RETRY_WINDOW_MS),
391+
);
375392
}
376393

377394
#getDynamicQueryMaxAttempts(): number {
378395
if (!isDynamicActorQuery(this.#actorResolutionState)) {
379396
return 1;
380397
}
381398

382-
return "getOrCreateForKey" in this.#actorResolutionState ? 60 : 24;
399+
// `getOrCreateForKey` can race an actor that is still finishing the
400+
// runtime's sleep grace period. Keep retrying long enough for the
401+
// stop-to-wake handoff to settle instead of surfacing a stale
402+
// `actor/stopping` error.
403+
return "getOrCreateForKey" in this.#actorResolutionState
404+
? GET_OR_CREATE_DYNAMIC_QUERY_MAX_ATTEMPTS
405+
: GET_FOR_KEY_DYNAMIC_QUERY_MAX_ATTEMPTS;
406+
}
407+
408+
#shouldRetryViaQueryTarget(): boolean {
409+
return isDynamicActorQuery(this.#actorResolutionState);
383410
}
384411

385412
#shouldRetryDynamicLifecycleError(
386413
group: string,
387414
code: string,
388415
attempt: number,
389416
maxAttempts: number,
417+
message?: string,
390418
): boolean {
391419
if (
392420
!isDynamicActorQuery(this.#actorResolutionState) ||
@@ -399,6 +427,9 @@ export class ActorHandleRaw {
399427
return (
400428
code === "not_found" ||
401429
code === "stopping" ||
430+
(code === "not_configured" &&
431+
message ===
432+
"Actor capability 'actor event inbox' is not configured.") ||
402433
code === "destroying" ||
403434
code.startsWith("destroyed_")
404435
);
@@ -441,10 +472,14 @@ export class ActorHandleRaw {
441472
return true;
442473
}
443474

444-
#invalidateResolvedActorId(group: string, code: string): boolean {
475+
#invalidateResolvedActorId(
476+
group: string,
477+
code: string,
478+
message?: string,
479+
): boolean {
445480
if (
446481
!isDynamicActorQuery(this.#actorResolutionState) ||
447-
!isStaleResolvedActorError(group, code)
482+
!isStaleResolvedActorError(group, code, message)
448483
) {
449484
return false;
450485
}
@@ -570,7 +605,7 @@ export class ActorHandleRaw {
570605
maxAttempts,
571606
)
572607
) {
573-
useQueryTarget = true;
608+
useQueryTarget = this.#shouldRetryViaQueryTarget();
574609
await this.#waitForRetryWindow();
575610
continue;
576611
}
@@ -581,18 +616,24 @@ export class ActorHandleRaw {
581616
code,
582617
attempt,
583618
maxAttempts,
619+
message,
584620
)
585621
) {
586622
this.#clearResolvedActorId();
587-
useQueryTarget = true;
623+
useQueryTarget = this.#shouldRetryViaQueryTarget();
588624
await this.#waitForRetryWindow();
589625
continue;
590626
}
591627

592-
const invalidated = this.#invalidateResolvedActorId(group, code);
628+
const invalidated = this.#invalidateResolvedActorId(
629+
group,
630+
code,
631+
message,
632+
);
593633
if (invalidated && attempt < maxAttempts - 1) {
594634
useQueryTarget =
595-
code === "stopping" || code.startsWith("destroyed_");
635+
this.#shouldRetryViaQueryTarget() &&
636+
(code === "stopping" || code.startsWith("destroyed_"));
596637
if (useQueryTarget) {
597638
await this.#waitForRetryWindow();
598639
}
@@ -627,7 +668,7 @@ export class ActorHandleRaw {
627668
return null;
628669
}
629670

630-
const { group, code } = error;
671+
const { group, code, message } = error;
631672

632673
if (
633674
await this.#shouldRetrySchedulingError(
@@ -639,7 +680,7 @@ export class ActorHandleRaw {
639680
)
640681
) {
641682
return {
642-
useQueryTarget: true,
683+
useQueryTarget: this.#shouldRetryViaQueryTarget(),
643684
waitForRetryWindow: true,
644685
};
645686
}
@@ -650,19 +691,25 @@ export class ActorHandleRaw {
650691
code,
651692
attempt,
652693
maxAttempts,
694+
message,
653695
)
654696
) {
655697
this.#clearResolvedActorId();
656698
return {
657-
useQueryTarget: true,
699+
useQueryTarget: this.#shouldRetryViaQueryTarget(),
658700
waitForRetryWindow: true,
659701
};
660702
}
661703

662-
const invalidated = this.#invalidateResolvedActorId(group, code);
704+
const invalidated = this.#invalidateResolvedActorId(
705+
group,
706+
code,
707+
message,
708+
);
663709
if (invalidated && attempt < maxAttempts - 1) {
664710
const useQueryTarget =
665-
code === "stopping" || code.startsWith("destroyed_");
711+
this.#shouldRetryViaQueryTarget() &&
712+
(code === "stopping" || code.startsWith("destroyed_"));
666713
return {
667714
useQueryTarget,
668715
waitForRetryWindow: useQueryTarget,

rivetkit-typescript/packages/rivetkit/src/client/actor-query.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,14 @@ export function getGatewayTarget(state: ActorResolutionState): GatewayTarget {
4646
export function isStaleResolvedActorError(
4747
group: string,
4848
code: string,
49+
message?: string,
4950
): boolean {
5051
return (
5152
group === "actor" &&
5253
(code === "not_found" ||
5354
code === "stopping" ||
55+
(code === "not_configured" &&
56+
message === "Actor capability 'actor event inbox' is not configured.") ||
5457
code.startsWith("destroyed_"))
5558
);
5659
}

rivetkit-typescript/packages/rivetkit/src/registry/native.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,15 +148,17 @@ type NativeActorRuntimeState = {
148148
persistState?: NativePersistActorState;
149149
};
150150

151-
// Keep JS-only actor caches on the NAPI ActorContext runtime-state bag instead
152-
// of actorId-keyed module globals so same-key recreates start from a fresh
153-
// generation.
151+
const nativeRuntimeStateByActorId = new Map<string, NativeActorRuntimeState>();
152+
154153
function getNativeRuntimeState(
155154
ctx: NativeActorContext,
156155
): NativeActorRuntimeState {
157-
const runtimeState = callNativeSync(() =>
158-
ctx.runtimeState(),
159-
) as NativeActorRuntimeState;
156+
const actorId = callNativeSync(() => ctx.actorId());
157+
let runtimeState = nativeRuntimeStateByActorId.get(actorId);
158+
if (!runtimeState) {
159+
runtimeState = {};
160+
nativeRuntimeStateByActorId.set(actorId, runtimeState);
161+
}
160162
if (!runtimeState.destroyGate) {
161163
runtimeState.destroyGate = {};
162164
}
@@ -170,6 +172,10 @@ function getNativeRuntimeState(
170172
return runtimeState;
171173
}
172174

175+
function deleteNativeRuntimeState(ctx: NativeActorContext) {
176+
nativeRuntimeStateByActorId.delete(callNativeSync(() => ctx.actorId()));
177+
}
178+
173179
function getNativePersistState(ctx: NativeActorContext): NativePersistActorState {
174180
return getNativeRuntimeState(ctx).persistState!;
175181
}
@@ -3730,6 +3736,7 @@ export function buildNativeFactory(
37303736
} finally {
37313737
resolveNativeDestroy(ctx);
37323738
await actorCtx.closeDatabase(true);
3739+
deleteNativeRuntimeState(ctx);
37333740
await actorCtx.dispose();
37343741
}
37353742
},

0 commit comments

Comments
 (0)