fix: #3363 honor short custom voice splitter chunks (#3364)

Aphroq · web-flow · commit 4bd459e403ac · 2026-05-16T10:04:23.000+09:00
diff --git a/src/agents/voice/result.py b/src/agents/voice/result.py
@@ -201,7 +201,7 @@ async def _add_text(self, text: str):
 
         combined_sentences, self._text_buffer = self.tts_settings.text_splitter(self._text_buffer)
 
-        if len(combined_sentences) >= 20:
+        if combined_sentences:
             local_queue: asyncio.Queue[VoiceStreamEvent | None] = asyncio.Queue()
             self._ordered_tasks.append(local_queue)
             self._tasks.append(
@@ -220,6 +220,10 @@ async def _turn_done(self):
                 )
             )
             self._text_buffer = ""
+        elif self._started_processing_turn:
+            local_queue = asyncio.Queue()
+            self._ordered_tasks.append(local_queue)
+            await local_queue.put(VoiceStreamEventLifecycle(event="turn_ended"))
         self._done_processing = True
         if self._dispatcher_task is None:
             self._dispatcher_task = asyncio.create_task(self._dispatch_audio())
diff --git a/tests/voice/test_pipeline.py b/tests/voice/test_pipeline.py
@@ -82,6 +82,64 @@ async def run(self, text: str, settings: TTSModelSettings):
     assert audio_chunks == [np.array([1], dtype=np.int16).tobytes()]
 
 
+@pytest.mark.asyncio
+async def test_streamed_audio_result_synthesizes_short_custom_splitter_chunk() -> None:
+    texts: list[str] = []
+
+    class RecordingTTS(FakeTTS):
+        async def run(self, text: str, settings: TTSModelSettings):
+            texts.append(text)
+            yield np.zeros(2, dtype=np.int16).tobytes()
+
+    def split_immediately(text: str) -> tuple[str, str]:
+        return text, ""
+
+    result = StreamedAudioResult(
+        RecordingTTS(),
+        TTSModelSettings(buffer_size=1, text_splitter=split_immediately),
+        VoicePipelineConfig(),
+    )
+
+    await result._add_text("ok")
+    await result._turn_done()
+    await result._done()
+
+    events, audio_chunks = await extract_events(result)
+
+    assert texts == ["ok"]
+    assert events == ["turn_started", "audio", "turn_ended", "session_ended"]
+    assert audio_chunks == [np.zeros(2, dtype=np.int16).tobytes()]
+
+
+@pytest.mark.asyncio
+async def test_streamed_audio_result_ignores_empty_custom_splitter_chunk() -> None:
+    texts: list[str] = []
+
+    class RecordingTTS(FakeTTS):
+        async def run(self, text: str, settings: TTSModelSettings):
+            texts.append(text)
+            yield np.zeros(2, dtype=np.int16).tobytes()
+
+    def discard_text(_text: str) -> tuple[str, str]:
+        return "", ""
+
+    result = StreamedAudioResult(
+        RecordingTTS(),
+        TTSModelSettings(buffer_size=1, text_splitter=discard_text),
+        VoicePipelineConfig(),
+    )
+
+    await result._add_text("ok")
+    await result._turn_done()
+    await result._done()
+
+    events, audio_chunks = await extract_events(result)
+
+    assert texts == []
+    assert events == ["turn_started", "turn_ended", "session_ended"]
+    assert audio_chunks == []
+
+
 @pytest.mark.asyncio
 async def test_voicepipeline_run_single_turn() -> None:
     # Single turn. Should produce a single audio output, which is the TTS output for "out_1".