Skip to content

Commit 04c4e76

Browse files
timvisher-ddclaude
andcommitted
test: add edge case coverage for stopped status, error results, and back-to-back internal turns
Three remaining gaps from codex review: 1. stopped task_notification status clears pendingTaskIds (same as failed/completed — verifies all terminal statuses work) 2. error_during_execution result with pending bg tasks does NOT drain internal turns (documents known limitation — drain only runs for result/success) 3. Multiple back-to-back background task completions producing two consecutive internal turns are both consumed in a single prompt() call Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c3288c6 commit 04c4e76

1 file changed

Lines changed: 224 additions & 0 deletions

File tree

src/tests/bg-task-leak.test.ts

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,230 @@ describe("Background task notification leak", () => {
555555
expect(result.stopReason).toBe("end_turn");
556556
});
557557

558+
it("clears pendingTaskIds when task_notification reports stopped status", async () => {
559+
// Same as the failed test above but with "stopped" status —
560+
// verifies all terminal statuses clear the set.
561+
const normalTurn = makeNormalTurnMessages("Stopped.");
562+
const resultIdx = normalTurn.findIndex((m: any) => m.type === "result");
563+
normalTurn.splice(resultIdx, 0, {
564+
type: "system",
565+
subtype: "task_started",
566+
task_id: "stop-task-1",
567+
tool_use_id: "toolu_stop_1",
568+
description: "Will be stopped",
569+
task_type: "local_bash",
570+
session_id: SESSION_ID,
571+
});
572+
const newResultIdx = normalTurn.findIndex((m: any) => m.type === "result");
573+
normalTurn.splice(newResultIdx, 0, {
574+
type: "system",
575+
subtype: "task_notification",
576+
task_id: "stop-task-1",
577+
tool_use_id: "toolu_stop_1",
578+
status: "stopped",
579+
summary: "Task was cancelled",
580+
session_id: SESSION_ID,
581+
});
582+
583+
const mockQuery = createMockQuery(normalTurn);
584+
const { client } = createMockClient();
585+
const agent = createAgentWithSession(mockQuery, client);
586+
587+
const result = await agent.prompt({
588+
sessionId: SESSION_ID,
589+
prompt: [{ type: "text", text: "go" }],
590+
});
591+
592+
expect(result.stopReason).toBe("end_turn");
593+
});
594+
595+
it("error_during_execution result does NOT drain internal turns (known limitation)", async () => {
596+
// The internal turn drain only runs for result/success. If the
597+
// prompt errors while a bg task is pending, internal turn messages
598+
// can still leak. This test documents the current behavior.
599+
const messages = [
600+
{ type: "system", subtype: "init", session_id: SESSION_ID },
601+
{
602+
type: "system",
603+
subtype: "task_started",
604+
task_id: "err-task-1",
605+
tool_use_id: "toolu_err_1",
606+
description: "bg task during error",
607+
task_type: "local_bash",
608+
session_id: SESSION_ID,
609+
},
610+
{
611+
type: "result",
612+
subtype: "error_during_execution",
613+
is_error: true,
614+
stop_reason: null,
615+
duration_ms: 50,
616+
result: "something broke",
617+
errors: ["tool execution failed"],
618+
session_id: SESSION_ID,
619+
total_cost_usd: 0.001,
620+
usage: {
621+
input_tokens: 5,
622+
output_tokens: 2,
623+
cache_read_input_tokens: 0,
624+
cache_creation_input_tokens: 0,
625+
server_tool_use: { web_search_requests: 0, web_fetch_requests: 0 },
626+
service_tier: "standard",
627+
},
628+
modelUsage: {
629+
"test-model": {
630+
inputTokens: 5,
631+
outputTokens: 2,
632+
cacheReadInputTokens: 0,
633+
cacheCreationInputTokens: 0,
634+
webSearchRequests: 0,
635+
costUSD: 0.001,
636+
contextWindow: 200000,
637+
maxOutputTokens: 4096,
638+
},
639+
},
640+
},
641+
// These would be the internal turn — left in the queue
642+
...makeBgTaskInternalTurnMessages(),
643+
];
644+
645+
const mockQuery = createMockQuery(messages);
646+
const { client } = createMockClient();
647+
const agent = createAgentWithSession(mockQuery, client);
648+
649+
// error_during_execution with is_error throws
650+
await expect(
651+
agent.prompt({
652+
sessionId: SESSION_ID,
653+
prompt: [{ type: "text", text: "go" }],
654+
}),
655+
).rejects.toThrow("tool execution failed");
656+
657+
// The internal turn messages are still in the queue (known limitation).
658+
// This documents the behavior rather than asserting a fix.
659+
const queue = (mockQuery as any).inputStream.queue as any[];
660+
expect(0 < queue.length).toBe(true);
661+
});
662+
663+
it("multiple back-to-back bg task internal turns are all consumed", async () => {
664+
// Two background tasks complete after the first result, each
665+
// producing its own internal turn (task_notification → assistant → result).
666+
const normalTurn = makeNormalTurnMessages("Two tasks launched.");
667+
const resultIdx = normalTurn.findIndex((m: any) => m.type === "result");
668+
669+
// Insert two task_started messages
670+
normalTurn.splice(resultIdx, 0, {
671+
type: "system",
672+
subtype: "task_started",
673+
task_id: "bg-task-a",
674+
tool_use_id: "toolu_a",
675+
description: "First bg task",
676+
task_type: "local_bash",
677+
session_id: SESSION_ID,
678+
});
679+
const resultIdx2 = normalTurn.findIndex((m: any) => m.type === "result");
680+
normalTurn.splice(resultIdx2, 0, {
681+
type: "system",
682+
subtype: "task_started",
683+
task_id: "bg-task-b",
684+
tool_use_id: "toolu_b",
685+
description: "Second bg task",
686+
task_type: "local_bash",
687+
session_id: SESSION_ID,
688+
});
689+
690+
// First internal turn (task A completes)
691+
const internalTurnA = makeBgTaskInternalTurnMessages();
692+
(internalTurnA[0] as any).task_id = "bg-task-a";
693+
694+
// Second internal turn (task B completes)
695+
const bgTextB = "\n\nSecond background task also completed.";
696+
const internalTurnB = [
697+
{
698+
type: "system",
699+
subtype: "task_notification",
700+
task_id: "bg-task-b",
701+
tool_use_id: "toolu_b",
702+
status: "completed",
703+
output_file: "/tmp/tasks/bg-task-b.output",
704+
summary: "Second background command completed",
705+
session_id: SESSION_ID,
706+
},
707+
{ type: "system", subtype: "init", cwd: "/test", session_id: SESSION_ID, tools: [], model: "test" },
708+
{
709+
type: "stream_event",
710+
event: { type: "message_start", message: { model: "test", role: "assistant", content: [], id: "msg_b" } },
711+
parent_tool_use_id: null,
712+
session_id: SESSION_ID,
713+
},
714+
{
715+
type: "stream_event",
716+
event: { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
717+
parent_tool_use_id: null,
718+
session_id: SESSION_ID,
719+
},
720+
{
721+
type: "stream_event",
722+
event: { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: bgTextB } },
723+
parent_tool_use_id: null,
724+
session_id: SESSION_ID,
725+
},
726+
{
727+
type: "assistant",
728+
message: {
729+
role: "assistant",
730+
content: [{ type: "text", text: bgTextB }],
731+
model: "test",
732+
id: "msg_b",
733+
type: "message",
734+
stop_reason: null,
735+
stop_sequence: null,
736+
usage: { input_tokens: 3, output_tokens: 10, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 },
737+
},
738+
parent_tool_use_id: null,
739+
session_id: SESSION_ID,
740+
},
741+
{
742+
type: "stream_event",
743+
event: { type: "content_block_stop", index: 0 },
744+
parent_tool_use_id: null,
745+
session_id: SESSION_ID,
746+
},
747+
{
748+
type: "stream_event",
749+
event: { type: "message_stop" },
750+
parent_tool_use_id: null,
751+
session_id: SESSION_ID,
752+
},
753+
makeResultMessage(bgTextB, 3, 10),
754+
];
755+
756+
const allMessages = [...normalTurn, ...internalTurnA, ...internalTurnB];
757+
const mockQuery = createMockQuery(allMessages);
758+
const { client, updates } = createMockClient();
759+
const agent = createAgentWithSession(mockQuery, client);
760+
761+
const result = await agent.prompt({
762+
sessionId: SESSION_ID,
763+
prompt: [{ type: "text", text: "launch both" }],
764+
});
765+
766+
expect(result.stopReason).toBe("end_turn");
767+
768+
// Both internal turns should have been consumed and forwarded
769+
const allText = updates
770+
.filter((u: any) => u.update?.sessionUpdate === "agent_message_chunk")
771+
.map((u: any) => u.update?.content?.text ?? "")
772+
.join("");
773+
774+
expect(allText).toContain("background task from the subagent completed");
775+
expect(allText).toContain("Second background task also completed");
776+
777+
// Queue should be empty (all consumed)
778+
const queue = (mockQuery as any).inputStream.queue as any[];
779+
expect(queue.length).toBe(0);
780+
});
781+
558782
it("normal turns without bg tasks should be unaffected", async () => {
559783
const messages = makeNormalTurnMessages("Hello");
560784
const mockQuery = createMockQuery(messages);

0 commit comments

Comments
 (0)