Skip to content

Commit fccc091

Browse files
committed
feat(workflow-engine): add retryOnTimeout opt-in for step timeout retries
1 parent 8f91d91 commit fccc091

4 files changed

Lines changed: 62 additions & 6 deletions

File tree

rivetkit-typescript/packages/workflow-engine/docs/retries.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ await ctx.step({
2424

2525
- `maxRetries` defaults to 3.
2626
- `retryBackoffBase` and `retryBackoffMax` control exponential delay.
27-
- `timeout` limits how long the step can run; timeouts are treated as critical failures.
27+
- `timeout` limits how long the step can run; by default a timeout is treated as a critical failure with no retry. Set `retryOnTimeout: true` on the step to retry timeouts like any other error.
2828

2929
## Unrecoverable Errors
3030

@@ -46,7 +46,7 @@ await ctx.step("halt", async () => {
4646
});
4747
```
4848

49-
`StepTimeoutError` is also treated as critical, so timeouts bypass retries.
49+
`StepTimeoutError` is treated as critical by default and bypasses retries. Opt into normal retry behavior with `retryOnTimeout: true`; when retries exhaust on a timeout the `try`-step failure `kind` is `"timeout"`.
5050

5151
## Exhaustion and Recovery
5252

rivetkit-typescript/packages/workflow-engine/src/context.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -943,8 +943,9 @@ export class WorkflowContextImpl implements WorkflowContextInterface {
943943
}
944944
entry.dirty = true;
945945

946-
// Timeout errors are treated as critical (no retry)
947-
if (error instanceof StepTimeoutError) {
946+
// Timeout errors are treated as critical by default. Steps opt
947+
// into retrying on timeout with retryOnTimeout: true.
948+
if (error instanceof StepTimeoutError && !config.retryOnTimeout) {
948949
metadata.status = "exhausted";
949950
metadata.error = String(error);
950951
await this.notifyStepError(config, metadata.attempts, error, {
@@ -1010,7 +1011,10 @@ export class WorkflowContextImpl implements WorkflowContextInterface {
10101011
attachTryStepFailure(
10111012
new StepExhaustedError(config.name, String(error)),
10121013
{
1013-
kind: "exhausted",
1014+
kind:
1015+
error instanceof StepTimeoutError
1016+
? "timeout"
1017+
: "exhausted",
10141018
stepName: config.name,
10151019
attempts: metadata.attempts,
10161020
error: extractErrorInfo(error),
@@ -1029,7 +1033,9 @@ export class WorkflowContextImpl implements WorkflowContextInterface {
10291033
*
10301034
* Note: This does NOT cancel the underlying operation. JavaScript Promises
10311035
* cannot be cancelled once started. When a timeout occurs:
1032-
* - The step is marked as failed with StepTimeoutError
1036+
* - The step is rejected with StepTimeoutError. By default this is treated
1037+
* as a critical failure with no retry. Set retryOnTimeout: true on the
1038+
* step config to retry timeouts like any other error.
10331039
* - The underlying async operation continues running in the background
10341040
* - Any side effects from the operation may still occur
10351041
*

rivetkit-typescript/packages/workflow-engine/src/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,8 @@ export interface StepConfig<T> {
399399
retryBackoffMax?: number;
400400
/** Timeout in ms for step execution (default: 30000). Set to 0 to disable. */
401401
timeout?: number;
402+
/** If true, step timeouts retry like any other error instead of failing immediately as critical. Default: false. */
403+
retryOnTimeout?: boolean;
402404
}
403405

404406
export type TryStepCatchKind =

rivetkit-typescript/packages/workflow-engine/tests/steps.test.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,54 @@ for (const mode of modes) {
520520
).rejects.toThrow(CriticalError);
521521
});
522522

523+
it("should retry steps that exceed timeout when retryOnTimeout is set", async () => {
524+
let attempts = 0;
525+
const workflow = async (ctx: WorkflowContextInterface) => {
526+
return await ctx.step({
527+
name: "timeout-step",
528+
timeout: 5,
529+
retryOnTimeout: true,
530+
maxRetries: 2,
531+
retryBackoffBase: 1,
532+
retryBackoffMax: 1,
533+
run: async () => {
534+
attempts++;
535+
await new Promise((resolve) => setTimeout(resolve, 25));
536+
return "late";
537+
},
538+
});
539+
};
540+
541+
if (mode === "yield") {
542+
const firstResult = await runWorkflow(
543+
"wf-1",
544+
workflow,
545+
undefined,
546+
driver,
547+
{ mode },
548+
).result;
549+
expect(firstResult.state).toBe("sleeping");
550+
await new Promise((resolve) => setTimeout(resolve, 10));
551+
552+
const secondResult = await runWorkflow(
553+
"wf-1",
554+
workflow,
555+
undefined,
556+
driver,
557+
{ mode },
558+
).result;
559+
expect(secondResult.state).toBe("sleeping");
560+
await new Promise((resolve) => setTimeout(resolve, 10));
561+
}
562+
563+
await expect(
564+
runWorkflow("wf-1", workflow, undefined, driver, { mode })
565+
.result,
566+
).rejects.toThrow(StepExhaustedError);
567+
568+
expect(attempts).toBe(3);
569+
});
570+
523571
it("should fail when a step is not awaited", async () => {
524572
const workflow = async (ctx: WorkflowContextInterface) => {
525573
const first = ctx.step("step-a", async () => "a");

0 commit comments

Comments
 (0)