fix: honor short voice splitter chunks

he-yufeng · he-yufeng · commit e90fed1b9558 · 2026-05-16T04:15:31.000+08:00
diff --git a/src/agents/voice/result.py b/src/agents/voice/result.py
@@ -201,7 +201,7 @@ async def _add_text(self, text: str):
 
         combined_sentences, self._text_buffer = self.tts_settings.text_splitter(self._text_buffer)
 
-        if len(combined_sentences) >= 20:
+        if combined_sentences:
             local_queue: asyncio.Queue[VoiceStreamEvent | None] = asyncio.Queue()
             self._ordered_tasks.append(local_queue)
             self._tasks.append(
@@ -220,6 +220,10 @@ async def _turn_done(self):
                 )
             )
             self._text_buffer = ""
+        elif self._started_processing_turn:
+            local_queue: asyncio.Queue[VoiceStreamEvent | None] = asyncio.Queue()
+            await local_queue.put(VoiceStreamEventLifecycle(event="turn_ended"))
+            self._ordered_tasks.append(local_queue)
         self._done_processing = True
         if self._dispatcher_task is None:
             self._dispatcher_task = asyncio.create_task(self._dispatch_audio())
diff --git a/tests/voice/test_pipeline.py b/tests/voice/test_pipeline.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import asyncio
+from collections.abc import AsyncIterator
 
 import numpy as np
 import numpy.typing as npt
@@ -82,6 +83,39 @@ async def run(self, text: str, settings: TTSModelSettings):
     assert audio_chunks == [np.array([1], dtype=np.int16).tobytes()]
 
 
+@pytest.mark.asyncio
+async def test_streamed_audio_result_sends_short_custom_splitter_chunks() -> None:
+    class RecordingTTS(FakeTTS):
+        def __init__(self) -> None:
+            super().__init__()
+            self.texts: list[str] = []
+
+        async def run(self, text: str, settings: TTSModelSettings) -> AsyncIterator[bytes]:
+            del settings
+            self.texts.append(text)
+            yield np.zeros(2, dtype=np.int16).tobytes()
+
+    def split_immediately(text: str) -> tuple[str, str]:
+        return text, ""
+
+    fake_tts = RecordingTTS()
+    result = StreamedAudioResult(
+        fake_tts,
+        TTSModelSettings(buffer_size=1, text_splitter=split_immediately),
+        VoicePipelineConfig(),
+    )
+
+    await result._add_text("ok")
+    await result._turn_done()
+    await result._done()
+
+    events, audio_chunks = await extract_events(result)
+
+    assert fake_tts.texts == ["ok"]
+    assert events == ["turn_started", "audio", "turn_ended", "session_ended"]
+    assert len(audio_chunks) == 1
+
+
 @pytest.mark.asyncio
 async def test_voicepipeline_run_single_turn() -> None:
     # Single turn. Should produce a single audio output, which is the TTS output for "out_1".