Skip to content

Commit ea20605

Browse files
committed
feat(workflow-engine): add retryOnTimeout opt-in for step timeout retries
1 parent f83acdc commit ea20605

4 files changed

Lines changed: 62 additions & 6 deletions

File tree

rivetkit-typescript/packages/workflow-engine/docs/retries.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ await ctx.step({
2424

2525
- `maxRetries` defaults to 3.
2626
- `retryBackoffBase` and `retryBackoffMax` control exponential delay.
27-
- `timeout` limits how long the step can run; timeouts are treated as critical failures.
27+
- `timeout` limits how long the step can run; by default a timeout is treated as a critical failure with no retry. Set `retryOnTimeout: true` on the step to retry timeouts like any other error.
2828

2929
## Unrecoverable Errors
3030

@@ -46,7 +46,7 @@ await ctx.step("halt", async () => {
4646
});
4747
```
4848

49-
`StepTimeoutError` is also treated as critical, so timeouts bypass retries.
49+
`StepTimeoutError` is treated as critical by default and bypasses retries. Opt into normal retry behavior with `retryOnTimeout: true`; when retries exhaust on a timeout the `try`-step failure `kind` is `"timeout"`.
5050

5151
## Exhaustion and Recovery
5252

rivetkit-typescript/packages/workflow-engine/src/context.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -941,8 +941,9 @@ export class WorkflowContextImpl implements WorkflowContextInterface {
941941
}
942942
entry.dirty = true;
943943

944-
// Timeout errors are treated as critical (no retry)
945-
if (error instanceof StepTimeoutError) {
944+
// Timeout errors are treated as critical by default. Steps opt
945+
// into retrying on timeout with retryOnTimeout: true.
946+
if (error instanceof StepTimeoutError && !config.retryOnTimeout) {
946947
metadata.status = "exhausted";
947948
metadata.error = String(error);
948949
await this.notifyStepError(config, metadata.attempts, error, {
@@ -1008,7 +1009,10 @@ export class WorkflowContextImpl implements WorkflowContextInterface {
10081009
attachTryStepFailure(
10091010
new StepExhaustedError(config.name, String(error)),
10101011
{
1011-
kind: "exhausted",
1012+
kind:
1013+
error instanceof StepTimeoutError
1014+
? "timeout"
1015+
: "exhausted",
10121016
stepName: config.name,
10131017
attempts: metadata.attempts,
10141018
error: extractErrorInfo(error),
@@ -1027,7 +1031,9 @@ export class WorkflowContextImpl implements WorkflowContextInterface {
10271031
*
10281032
* Note: This does NOT cancel the underlying operation. JavaScript Promises
10291033
* cannot be cancelled once started. When a timeout occurs:
1030-
* - The step is marked as failed with StepTimeoutError
1034+
* - The step is rejected with StepTimeoutError. By default this is treated
1035+
* as a critical failure with no retry. Set retryOnTimeout: true on the
1036+
* step config to retry timeouts like any other error.
10311037
* - The underlying async operation continues running in the background
10321038
* - Any side effects from the operation may still occur
10331039
*

rivetkit-typescript/packages/workflow-engine/src/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,8 @@ export interface StepConfig<T> {
399399
retryBackoffMax?: number;
400400
/** Timeout in ms for step execution (default: 30000). Set to 0 to disable. */
401401
timeout?: number;
402+
/** If true, step timeouts retry like any other error instead of failing immediately as critical. Default: false. */
403+
retryOnTimeout?: boolean;
402404
}
403405

404406
export type TryStepCatchKind =

rivetkit-typescript/packages/workflow-engine/tests/steps.test.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,54 @@ for (const mode of modes) {
519519
).rejects.toThrow(CriticalError);
520520
});
521521

522+
it("should retry steps that exceed timeout when retryOnTimeout is set", async () => {
523+
let attempts = 0;
524+
const workflow = async (ctx: WorkflowContextInterface) => {
525+
return await ctx.step({
526+
name: "timeout-step",
527+
timeout: 5,
528+
retryOnTimeout: true,
529+
maxRetries: 2,
530+
retryBackoffBase: 1,
531+
retryBackoffMax: 1,
532+
run: async () => {
533+
attempts++;
534+
await new Promise((resolve) => setTimeout(resolve, 25));
535+
return "late";
536+
},
537+
});
538+
};
539+
540+
if (mode === "yield") {
541+
const firstResult = await runWorkflow(
542+
"wf-1",
543+
workflow,
544+
undefined,
545+
driver,
546+
{ mode },
547+
).result;
548+
expect(firstResult.state).toBe("sleeping");
549+
await new Promise((resolve) => setTimeout(resolve, 10));
550+
551+
const secondResult = await runWorkflow(
552+
"wf-1",
553+
workflow,
554+
undefined,
555+
driver,
556+
{ mode },
557+
).result;
558+
expect(secondResult.state).toBe("sleeping");
559+
await new Promise((resolve) => setTimeout(resolve, 10));
560+
}
561+
562+
await expect(
563+
runWorkflow("wf-1", workflow, undefined, driver, { mode })
564+
.result,
565+
).rejects.toThrow(StepExhaustedError);
566+
567+
expect(attempts).toBe(3);
568+
});
569+
522570
it("should fail when a step is not awaited", async () => {
523571
const workflow = async (ctx: WorkflowContextInterface) => {
524572
const first = ctx.step("step-a", async () => "a");

0 commit comments

Comments
 (0)