Skip to content

Commit f288778

Browse files
committed
Handle "response.output_audio.done" in openai.Realtime to emit end of output
1 parent b2c7383 commit f288778

2 files changed

Lines changed: 9 additions & 9 deletions

File tree

plugins/openai/tests/test_openai_realtime.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from vision_agents.core.llm.realtime import (
1111
RealtimeAgentTranscript,
1212
RealtimeAudioOutput,
13+
RealtimeAudioOutputDone,
1314
RealtimeUserTranscript,
1415
)
1516
from vision_agents.plugins.openai import Realtime
@@ -127,10 +128,12 @@ async def test_simple_response_flow(self, realtime):
127128
pass
128129

129130
await asyncio.sleep(3.0)
130-
audio = [
131-
i for i in realtime.output.peek() if isinstance(i, RealtimeAudioOutput)
132-
]
131+
items = realtime.output.peek()
132+
audio = [i for i in items if isinstance(i, RealtimeAudioOutput)]
133+
done = [i for i in items if isinstance(i, RealtimeAudioOutputDone)]
133134
assert len(audio) > 0
135+
assert len(done) >= 1
136+
assert any(not d.interrupted for d in done)
134137

135138
async def test_audio_sending_flow(self, realtime, mia_audio_16khz):
136139
# Wait for connection to be fully established

plugins/openai/vision_agents/plugins/openai/openai_realtime.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -336,12 +336,9 @@ async def _handle_openai_event(self, event: dict) -> None:
336336
elif et == "output_audio_buffer.stopped":
337337
# Output audio buffer stopped - acknowledgment of audio playback end
338338
pass
339-
elif et == "response.audio.done":
340-
# Audio generation complete for this response item
341-
self._emit_audio_output_done_event()
342-
elif et == "response.output_audio.done":
343-
# Output audio generation complete for this response item
344-
pass
339+
elif et in ("response.output_audio.done", "response.audio.done"):
340+
# Audio generation complete for this response item.
341+
self._emit_audio_output_done_event(response_id=event.get("response_id"))
345342
elif et == "response.content_part.done":
346343
# Content part complete - contains full transcript
347344
pass

0 commit comments

Comments
 (0)