Skip to content

Commit 379928f

Browse files
author
unraid
committed
fix: prevent agent communication bounds from hiding CI regressions
Tighten the UDS auth, framing, and response-reader boundaries while keeping the AgentSummary lifecycle covered so Codecov and CI fail on real regressions instead of missing coverage. The poorMode settings mock mirrors unrelated real settings defaults to avoid Bun mock retention changing later permission tests. Constraint: PR #369 must fix Codecov/CI precisely without warning suppression, fallback masking, or mock pollution Rejected: Delete AgentSummary lifecycle coverage | would hide Codecov loss and stale-summary behavior Rejected: Store inline UDS rejection in a hidden input sentinel | cloned observable inputs can drop it and bypass rejection Rejected: Ignore malformed UDS frames until timeout | leaves client slots and SendMessage calls open to exhaustion Confidence: high Scope-risk: moderate Directive: Keep empty #token= markers rejected; do not require a non-empty token value in hasInlineUdsToken Tested: bun test packages/builtin-tools/src/tools/SendMessageTool/__tests__/udsRecipientSanitization.test.ts src/utils/__tests__/udsMessaging.test.ts src/utils/__tests__/udsResponseReader.test.ts src/utils/__tests__/ndjsonFramer.test.ts Tested: bunx tsc --noEmit --pretty false Tested: bun run lint Tested: bun test --coverage --coverage-reporter lcov --coverage-dir coverage Tested: bun run test:all Tested: bun audit Tested: bun run build Tested: bun run build:vite Not-tested: GitHub-hosted Codecov upload until pushed PR checks rerun
1 parent ee0d788 commit 379928f

21 files changed

Lines changed: 1727 additions & 337 deletions
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import { describe, expect, test } from 'bun:test'
2+
import type { Message } from 'src/types/message.js'
3+
import { filterIncompleteToolCalls } from '../filterIncompleteToolCalls.js'
4+
5+
describe('filterIncompleteToolCalls', () => {
6+
test('drops assistant tool uses that do not have matching results', () => {
7+
const messages = [
8+
{
9+
type: 'assistant',
10+
uuid: 'a1',
11+
message: {
12+
role: 'assistant',
13+
content: [{ type: 'tool_use', id: 'missing', name: 'Read' }],
14+
},
15+
},
16+
{
17+
type: 'user',
18+
uuid: 'u1',
19+
message: { role: 'user', content: 'continue' },
20+
},
21+
] as unknown as Message[]
22+
23+
expect(
24+
filterIncompleteToolCalls(messages).map(message => String(message.uuid)),
25+
).toEqual(['u1'])
26+
})
27+
28+
test('preserves assistant text when dropping orphan tool uses', () => {
29+
const messages = [
30+
{
31+
type: 'assistant',
32+
uuid: 'a1',
33+
message: {
34+
role: 'assistant',
35+
content: [
36+
{ type: 'text', text: 'I will read the file.' },
37+
{ type: 'tool_use', id: 'missing', name: 'Read' },
38+
],
39+
},
40+
},
41+
] as unknown as Message[]
42+
43+
const filtered = filterIncompleteToolCalls(messages)
44+
expect(filtered).toHaveLength(1)
45+
const first = filtered[0]!
46+
const content = first.message!.content
47+
expect(
48+
Array.isArray(content) ? content.map(block => block.type) : [],
49+
).toEqual(['text'])
50+
})
51+
52+
test('keeps completed parallel tool calls when dropping an orphan', () => {
53+
const messages = [
54+
{
55+
type: 'assistant',
56+
uuid: 'a1',
57+
message: {
58+
role: 'assistant',
59+
content: [
60+
{ type: 'tool_use', id: 'done', name: 'Read' },
61+
{ type: 'tool_use', id: 'missing', name: 'Grep' },
62+
],
63+
},
64+
},
65+
{
66+
type: 'user',
67+
uuid: 'u1',
68+
message: {
69+
role: 'user',
70+
content: [{ type: 'tool_result', tool_use_id: 'done', content: 'ok' }],
71+
},
72+
},
73+
] as unknown as Message[]
74+
75+
const filtered = filterIncompleteToolCalls(messages)
76+
expect(filtered.map(message => String(message.uuid))).toEqual(['a1', 'u1'])
77+
const first = filtered[0]!
78+
const content = first.message!.content
79+
expect(
80+
Array.isArray(content)
81+
? content.map(block =>
82+
block.type === 'tool_use' ? block.id : block.type,
83+
)
84+
: [],
85+
).toEqual(['done'])
86+
})
87+
88+
test('keeps assistant tool uses that have matching results', () => {
89+
const messages = [
90+
{
91+
type: 'assistant',
92+
uuid: 'a1',
93+
message: {
94+
role: 'assistant',
95+
content: [{ type: 'tool_use', id: 'done', name: 'Read' }],
96+
},
97+
},
98+
{
99+
type: 'user',
100+
uuid: 'u1',
101+
message: {
102+
role: 'user',
103+
content: [{ type: 'tool_result', tool_use_id: 'done', content: 'ok' }],
104+
},
105+
},
106+
] as unknown as Message[]
107+
108+
expect(
109+
filterIncompleteToolCalls(messages).map(message => String(message.uuid)),
110+
).toEqual(['a1', 'u1'])
111+
})
112+
113+
test('drops orphan tool results when their tool use was removed', () => {
114+
const messages = [
115+
{
116+
type: 'user',
117+
uuid: 'u1',
118+
message: {
119+
role: 'user',
120+
content: [
121+
{ type: 'tool_result', tool_use_id: 'missing', content: 'late' },
122+
],
123+
},
124+
},
125+
] as unknown as Message[]
126+
127+
expect(filterIncompleteToolCalls(messages)).toEqual([])
128+
})
129+
130+
test('keeps user text while dropping orphan tool results', () => {
131+
const messages = [
132+
{
133+
type: 'assistant',
134+
uuid: 'a1',
135+
message: { role: 'assistant', content: 'done' },
136+
},
137+
{
138+
type: 'user',
139+
uuid: 'u1',
140+
message: {
141+
role: 'user',
142+
content: [
143+
{ type: 'text', text: 'keep this' },
144+
{ type: 'tool_result', tool_use_id: 'missing', content: 'late' },
145+
],
146+
},
147+
},
148+
] as unknown as Message[]
149+
150+
const filtered = filterIncompleteToolCalls(messages)
151+
expect(filtered.map(message => String(message.uuid))).toEqual(['a1', 'u1'])
152+
const content = filtered[1]!.message!.content
153+
expect(Array.isArray(content) ? content : []).toEqual([
154+
{ type: 'text', text: 'keep this' },
155+
])
156+
})
157+
158+
test('drops malformed tool blocks without ids', () => {
159+
const messages = [
160+
{
161+
type: 'assistant',
162+
uuid: 'a1',
163+
message: {
164+
role: 'assistant',
165+
content: [{ type: 'tool_use', name: 'Read' }],
166+
},
167+
},
168+
{
169+
type: 'user',
170+
uuid: 'u1',
171+
message: {
172+
role: 'user',
173+
content: [{ type: 'tool_result', content: 'late' }],
174+
},
175+
},
176+
] as unknown as Message[]
177+
178+
expect(filterIncompleteToolCalls(messages)).toEqual([])
179+
})
180+
})
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import type {
2+
AssistantMessage,
3+
Message,
4+
UserMessage,
5+
} from 'src/types/message.js'
6+
7+
/**
8+
* Removes invalid or orphaned tool_use/tool_result blocks while preserving
9+
* completed tool-call pairs. This is intentionally block-level, not
10+
* message-level, so completed parallel tool calls stay paired with results.
11+
*/
12+
export function filterIncompleteToolCalls(messages: Message[]): Message[] {
13+
const toolUseIdsWithResults = new Set<string>()
14+
15+
for (const message of messages) {
16+
if (message?.type === 'user') {
17+
const userMessage = message as UserMessage
18+
const content = userMessage.message.content
19+
if (Array.isArray(content)) {
20+
for (const block of content) {
21+
if (block.type === 'tool_result' && block.tool_use_id) {
22+
toolUseIdsWithResults.add(block.tool_use_id)
23+
}
24+
}
25+
}
26+
}
27+
}
28+
29+
const retainedToolUseIds = new Set<string>()
30+
const withoutOrphanToolUses: Message[] = []
31+
32+
for (const message of messages) {
33+
if (message?.type === 'assistant') {
34+
const assistantMessage = message as AssistantMessage
35+
const content = assistantMessage.message.content
36+
if (Array.isArray(content)) {
37+
let changed = false
38+
const filteredContent = content.filter(block => {
39+
if (block.type !== 'tool_use') return true
40+
if (!block.id) {
41+
changed = true
42+
return false
43+
}
44+
if (toolUseIdsWithResults.has(block.id)) {
45+
retainedToolUseIds.add(block.id)
46+
return true
47+
}
48+
changed = true
49+
return false
50+
})
51+
52+
if (!changed) {
53+
withoutOrphanToolUses.push(message)
54+
continue
55+
}
56+
if (filteredContent.length > 0) {
57+
withoutOrphanToolUses.push({
58+
...assistantMessage,
59+
message: {
60+
...assistantMessage.message,
61+
content: filteredContent,
62+
},
63+
})
64+
}
65+
continue
66+
}
67+
}
68+
withoutOrphanToolUses.push(message)
69+
}
70+
71+
const filteredMessages: Message[] = []
72+
for (const message of withoutOrphanToolUses) {
73+
if (message?.type !== 'user') {
74+
filteredMessages.push(message)
75+
continue
76+
}
77+
const userMessage = message as UserMessage
78+
const content = userMessage.message.content
79+
if (!Array.isArray(content)) {
80+
filteredMessages.push(message)
81+
continue
82+
}
83+
let changed = false
84+
const filteredContent = content.filter(block => {
85+
if (block.type !== 'tool_result') return true
86+
if (!block.tool_use_id) {
87+
changed = true
88+
return false
89+
}
90+
if (retainedToolUseIds.has(block.tool_use_id)) return true
91+
changed = true
92+
return false
93+
})
94+
if (!changed) {
95+
filteredMessages.push(message)
96+
continue
97+
}
98+
if (filteredContent.length > 0) {
99+
filteredMessages.push({
100+
...userMessage,
101+
message: {
102+
...userMessage.message,
103+
content: filteredContent,
104+
},
105+
})
106+
}
107+
}
108+
109+
return filteredMessages
110+
}

packages/builtin-tools/src/tools/AgentTool/runAgent.ts

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,11 @@ import {
8686
import type { ContentReplacementState } from 'src/utils/toolResultStorage.js'
8787
import { createAgentId } from 'src/utils/uuid.js'
8888
import { resolveAgentTools } from './agentToolUtils.js'
89+
import { filterIncompleteToolCalls } from './filterIncompleteToolCalls.js'
8990
import { type AgentDefinition, isBuiltInAgent } from './loadAgentsDir.js'
9091

92+
export { filterIncompleteToolCalls } from './filterIncompleteToolCalls.js'
93+
9194
/**
9295
* Initialize agent-specific MCP servers
9396
* Agents can define their own MCP servers in their frontmatter that are additive
@@ -886,50 +889,6 @@ export async function* runAgent({
886889
}
887890
}
888891

889-
/**
890-
* Filters out assistant messages with incomplete tool calls (tool uses without results).
891-
* This prevents API errors when sending messages with orphaned tool calls.
892-
*/
893-
export function filterIncompleteToolCalls(messages: Message[]): Message[] {
894-
// Build a set of tool use IDs that have results
895-
const toolUseIdsWithResults = new Set<string>()
896-
897-
for (const message of messages) {
898-
if (message?.type === 'user') {
899-
const userMessage = message as UserMessage
900-
const content = userMessage.message.content
901-
if (Array.isArray(content)) {
902-
for (const block of content) {
903-
if (block.type === 'tool_result' && block.tool_use_id) {
904-
toolUseIdsWithResults.add(block.tool_use_id)
905-
}
906-
}
907-
}
908-
}
909-
}
910-
911-
// Filter out assistant messages that contain tool calls without results
912-
return messages.filter(message => {
913-
if (message?.type === 'assistant') {
914-
const assistantMessage = message as AssistantMessage
915-
const content = assistantMessage.message.content
916-
if (Array.isArray(content)) {
917-
// Check if this assistant message has any tool uses without results
918-
const hasIncompleteToolCall = content.some(
919-
block =>
920-
block.type === 'tool_use' &&
921-
block.id &&
922-
!toolUseIdsWithResults.has(block.id),
923-
)
924-
// Exclude messages with incomplete tool calls
925-
return !hasIncompleteToolCall
926-
}
927-
}
928-
// Keep all non-assistant messages and assistant messages without tool calls
929-
return true
930-
})
931-
}
932-
933892
async function getAgentSystemPrompt(
934893
agentDefinition: AgentDefinition,
935894
toolUseContext: Pick<ToolUseContext, 'options'>,

0 commit comments

Comments
 (0)