fix(inference): omit content from streamed tool call chunks (#1359)

Genmin · web-flow · commit cd6dbe58537d · 2026-05-01T01:31:03.000-07:00
diff --git a/.changeset/clean-dancers-wait.md b/.changeset/clean-dancers-wait.md
@@ -0,0 +1,5 @@
+---
+'@livekit/agents': patch
+---
+
+fix(inference): drop streamed assistant text from tool call chunks
diff --git a/agents/src/inference/llm.test.ts b/agents/src/inference/llm.test.ts
@@ -11,6 +11,7 @@ beforeAll(() => {
 });
 
 type CapturedHeaders = Record<string, string>;
+type CompletionChunk = Record<string, unknown>;
 
 /**
  * Build an LLM, stub its OpenAI client's chat.completions.create, start a chat
@@ -61,6 +62,36 @@ async function captureHeaders(opts: {
   return capturedHeaders;
 }
 
+async function collectChatChunks(completionChunks: CompletionChunk[]) {
+  const llm = new LLM({
+    model: 'openai/gpt-4o-mini',
+    apiKey: 'test-key',
+    apiSecret: 'test-secret',
+    baseURL: 'https://example.livekit.cloud',
+  });
+
+  const stub = async () => ({
+    async *[Symbol.asyncIterator]() {
+      for (const chunk of completionChunks) {
+        yield chunk;
+      }
+    },
+  });
+
+  const internal = llm as unknown as {
+    client: { chat: { completions: { create: typeof stub } } };
+  };
+  internal.client.chat.completions.create = stub;
+
+  const stream = llm.chat({ chatCtx: new ChatContext() });
+  const chunks = [];
+  for await (const chunk of stream) {
+    chunks.push(chunk);
+  }
+
+  return chunks;
+}
+
 describe('inference.LLM X-LiveKit-Inference-Priority header', () => {
   // --- no value anywhere ---
 
@@ -105,3 +136,41 @@ describe('inference.LLM X-LiveKit-Inference-Priority header', () => {
     expect(headers['X-LiveKit-Inference-Priority']).toBe('priority');
   });
 });
+
+describe('inference.LLM streamed tool calls', () => {
+  it('does not forward assistant content on tool call chunks', async () => {
+    const chunks = await collectChatChunks([
+      {
+        id: 'chatcmpl_test',
+        choices: [
+          {
+            index: 0,
+            finish_reason: 'tool_calls',
+            delta: {
+              role: 'assistant',
+              content: 'saveAnswer({"answer":"yes"})',
+              tool_calls: [
+                {
+                  index: 0,
+                  id: 'call_123',
+                  type: 'function',
+                  function: {
+                    name: 'saveAnswer',
+                    arguments: '{"answer":"yes"}',
+                  },
+                },
+              ],
+            },
+          },
+        ],
+      },
+    ]);
+
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0]?.delta?.content).toBeUndefined();
+    expect(chunks[0]?.delta?.toolCalls).toHaveLength(1);
+    expect(chunks[0]?.delta?.toolCalls?.[0]?.callId).toBe('call_123');
+    expect(chunks[0]?.delta?.toolCalls?.[0]?.name).toBe('saveAnswer');
+    expect(chunks[0]?.delta?.toolCalls?.[0]?.args).toBe('{"answer":"yes"}');
+  });
+});
diff --git a/agents/src/inference/llm.ts b/agents/src/inference/llm.ts
@@ -618,7 +618,6 @@ export class LLMStream extends llm.LLMStream {
       id,
       delta: {
         role: 'assistant',
-        content: delta.content || undefined,
         extra: deltaExtra,
         toolCalls: [
           llm.FunctionCall.create({

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +'@livekit/agents': patch
 +---
++
 +fix(inference): drop streamed assistant text from tool call chunks