Skip to content

Commit 4bd459e

Browse files
authored
fix: #3363 honor short custom voice splitter chunks (#3364)
1 parent e37b3d2 commit 4bd459e

2 files changed

Lines changed: 63 additions & 1 deletion

File tree

src/agents/voice/result.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ async def _add_text(self, text: str):
201201

202202
combined_sentences, self._text_buffer = self.tts_settings.text_splitter(self._text_buffer)
203203

204-
if len(combined_sentences) >= 20:
204+
if combined_sentences:
205205
local_queue: asyncio.Queue[VoiceStreamEvent | None] = asyncio.Queue()
206206
self._ordered_tasks.append(local_queue)
207207
self._tasks.append(
@@ -220,6 +220,10 @@ async def _turn_done(self):
220220
)
221221
)
222222
self._text_buffer = ""
223+
elif self._started_processing_turn:
224+
local_queue = asyncio.Queue()
225+
self._ordered_tasks.append(local_queue)
226+
await local_queue.put(VoiceStreamEventLifecycle(event="turn_ended"))
223227
self._done_processing = True
224228
if self._dispatcher_task is None:
225229
self._dispatcher_task = asyncio.create_task(self._dispatch_audio())

tests/voice/test_pipeline.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,64 @@ async def run(self, text: str, settings: TTSModelSettings):
8282
assert audio_chunks == [np.array([1], dtype=np.int16).tobytes()]
8383

8484

85+
@pytest.mark.asyncio
86+
async def test_streamed_audio_result_synthesizes_short_custom_splitter_chunk() -> None:
87+
texts: list[str] = []
88+
89+
class RecordingTTS(FakeTTS):
90+
async def run(self, text: str, settings: TTSModelSettings):
91+
texts.append(text)
92+
yield np.zeros(2, dtype=np.int16).tobytes()
93+
94+
def split_immediately(text: str) -> tuple[str, str]:
95+
return text, ""
96+
97+
result = StreamedAudioResult(
98+
RecordingTTS(),
99+
TTSModelSettings(buffer_size=1, text_splitter=split_immediately),
100+
VoicePipelineConfig(),
101+
)
102+
103+
await result._add_text("ok")
104+
await result._turn_done()
105+
await result._done()
106+
107+
events, audio_chunks = await extract_events(result)
108+
109+
assert texts == ["ok"]
110+
assert events == ["turn_started", "audio", "turn_ended", "session_ended"]
111+
assert audio_chunks == [np.zeros(2, dtype=np.int16).tobytes()]
112+
113+
114+
@pytest.mark.asyncio
115+
async def test_streamed_audio_result_ignores_empty_custom_splitter_chunk() -> None:
116+
texts: list[str] = []
117+
118+
class RecordingTTS(FakeTTS):
119+
async def run(self, text: str, settings: TTSModelSettings):
120+
texts.append(text)
121+
yield np.zeros(2, dtype=np.int16).tobytes()
122+
123+
def discard_text(_text: str) -> tuple[str, str]:
124+
return "", ""
125+
126+
result = StreamedAudioResult(
127+
RecordingTTS(),
128+
TTSModelSettings(buffer_size=1, text_splitter=discard_text),
129+
VoicePipelineConfig(),
130+
)
131+
132+
await result._add_text("ok")
133+
await result._turn_done()
134+
await result._done()
135+
136+
events, audio_chunks = await extract_events(result)
137+
138+
assert texts == []
139+
assert events == ["turn_started", "turn_ended", "session_ended"]
140+
assert audio_chunks == []
141+
142+
85143
@pytest.mark.asyncio
86144
async def test_voicepipeline_run_single_turn() -> None:
87145
# Single turn. Should produce a single audio output, which is the TTS output for "out_1".

0 commit comments

Comments
 (0)