Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 74 additions & 15 deletions packages/sdk/src/__tests__/workflow-runner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -916,23 +916,82 @@ agents:
expect(spawnCalls[0][0].task).toContain('STEP_COMPLETE:step-1');
});

it('should use the full remaining timeout as the review safety backstop', async () => {
const config = makeConfig({
workflows: [
{
name: 'default',
steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do step 1', timeoutMs: 90_000 }],
},
],
it('should force interactive Codex reviewers onto the non-interactive review path', async () => {
const spawnAndWait = vi
.spyOn(runner as any, 'spawnAndWait')
.mockImplementation(async (agentDef: any, _step: any, timeoutMs: number | undefined, options: any) => {
expect(agentDef.cli).toBe('codex');
expect(agentDef.interactive).toBe(false);
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
expect(timeoutMs).toBe(300_000);
options?.onChunk?.({
chunk: 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: subprocess review\n',
});
return {
output: 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: subprocess review\n',
exitCode: 0,
};
});

vi.spyOn(runner as any, 'postToChannel').mockImplementation(() => undefined);
vi.spyOn(runner as any, 'recordStepToolSideEffect').mockImplementation(() => undefined);
(runner as any).trajectory = {
registerAgent: vi.fn().mockResolvedValue(undefined),
reviewCompleted: vi.fn().mockResolvedValue(undefined),
};

const reviewOutput = await (runner as any).runStepReviewGate(
{ name: 'step-1', type: 'agent', agent: 'specialist', task: 'Do step 1' },
'Do step 1',
'worker finished\n',
'STEP_COMPLETE:step-1\n',
{ name: 'team-lead', cli: 'claude', role: 'lead coordinator' },
{ name: 'reviewer-1', cli: 'codex', role: 'reviewer' },
undefined
);

expect(reviewOutput).toContain('REVIEW_DECISION: APPROVE');
expect(spawnAndWait).toHaveBeenCalledTimes(1);
});

it('should cap default review timeout at 5 minutes in executor mode', async () => {
const executeAgentStep = vi.fn(
async (
_step: { name: string },
agentDef: { cli: string; interactive?: boolean },
_resolvedTask: string,
timeoutMs?: number
) => {
expect(agentDef.cli).toBe('codex');
expect(agentDef.interactive).toBe(false);
expect(timeoutMs).toBe(300_000);
return 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: executor review\n';
}
);

const localRunner = new WorkflowRunner({
db,
workspaceId: 'ws-test',
executor: { executeAgentStep },
});
const run = await runner.execute(config, 'default');
vi.spyOn(localRunner as any, 'postToChannel').mockImplementation(() => undefined);
vi.spyOn(localRunner as any, 'recordStepToolSideEffect').mockImplementation(() => undefined);
(localRunner as any).trajectory = {
registerAgent: vi.fn().mockResolvedValue(undefined),
reviewCompleted: vi.fn().mockResolvedValue(undefined),
};

const reviewOutput = await (localRunner as any).runStepReviewGate(
{ name: 'step-1', type: 'agent', agent: 'specialist', task: 'Do step 1' },
'Do step 1',
'worker finished\n',
'STEP_COMPLETE:step-1\n',
{ name: 'team-lead', cli: 'claude', role: 'lead coordinator' },
{ name: 'reviewer-1', cli: 'codex', role: 'reviewer' },
undefined
);

expect(run.status).toBe('completed');
const waitCalls = (waitForExitFn as any).mock?.calls ?? [];
expect(waitCalls.length).toBeGreaterThanOrEqual(2);
// first call: owner timeout; second call: review timeout
expect(waitCalls[1][0]).toBeGreaterThan(60_000);
expect(waitCalls[1][0]).toBeLessThanOrEqual(90_000);
expect(reviewOutput).toContain('REVIEW_DECISION: APPROVE');
expect(executeAgentStep).toHaveBeenCalledTimes(1);
});
});

Expand Down
16 changes: 14 additions & 2 deletions packages/sdk/src/workflows/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4595,7 +4595,10 @@ export class WorkflowRunner {
`REVIEW_REASON: <one sentence>\n` +
`Then output /exit.`;

const safetyTimeoutMs = timeoutMs ?? 600_000;
// Cap review timeout at 5 minutes. Review tasks are self-contained
// read-and-judge operations and should not inherit long owner step timeouts.
const REVIEW_TIMEOUT_CAP_MS = 300_000;
const safetyTimeoutMs = Math.min(timeoutMs ?? 600_000, REVIEW_TIMEOUT_CAP_MS);
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
const reviewStep: WorkflowStep = {
name: `${step.name}-review`,
type: 'agent',
Expand Down Expand Up @@ -4670,7 +4673,7 @@ export class WorkflowRunner {
};

try {
await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
const spawnResult = await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
evidenceStepName: step.name,
evidenceRole: 'reviewer',
logicalName: reviewerDef.name,
Expand All @@ -4686,6 +4689,15 @@ export class WorkflowRunner {
}
},
});
// If onChunk never fired (e.g. non-interactive reviewer path), fall back
// to the accumulated output returned by spawnAndWait.
if (!reviewOutput && spawnResult.output) {
reviewOutput = spawnResult.output;
const parsed = this.parseReviewDecision(reviewOutput);
if (parsed) {
startReviewCompletion(parsed);
}
}
await reviewCompletionPromise;
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
Expand Down
Loading