Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ def _iter_audio_b64_chunks(path: Path, chunk_bytes: int = 10_240) -> Iterator[st

def _get_speech_recognition_setting(model: str) -> AudioInputTranscriptionOptions:
speech_recognition_model = (
"whisper-1" if model.startswith(("gpt-4o-realtime", "gpt-4o-mini-realtime")) else "azure-speech"
"whisper-1"
if model.startswith(("gpt-realtime", "gpt-realtime-mini"))
else "azure-speech"
)
return AudioInputTranscriptionOptions(model=speech_recognition_model, language="en-US")

Expand Down Expand Up @@ -180,7 +182,7 @@ def smoke_test(self, **kwargs):
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1", "phi4-mm-realtime", "phi4-mini"])
@pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1", "phi4-mm-realtime", "phi4-mini"])
Comment thread
xitzhang marked this conversation as resolved.
@pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
async def test_realtime_service(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
Expand Down Expand Up @@ -234,7 +236,7 @@ async def test_realtime_service(self, test_data_dir: Path, model: str, api_versi
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"])
@pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1"])
@pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
async def test_realtime_service_with_audio_enhancements(
self,
Expand Down Expand Up @@ -273,9 +275,9 @@ async def test_realtime_service_with_audio_enhancements(
("model", "server_sd_conf"),
[
pytest.param(
"gpt-4o-realtime-preview",
"gpt-realtime",
{"type": "azure_semantic_vad", "speech_duration_assistant_speaking_ms": 800},
id="gpt-4o-realtime",
id="gpt-realtime",
),
pytest.param(
"gpt-4o",
Expand Down Expand Up @@ -319,11 +321,11 @@ async def test_realtime_service_with_turn_detection_long_tts_vad_duration(
@pytest.mark.parametrize(
("model", "semantic_vad_params"),
[
pytest.param("gpt-4o-realtime-preview", {}, id="gpt-4o-realtime"),
pytest.param("gpt-realtime", {}, id="gpt-realtime"),
# pytest.param(
# "gpt-4o-realtime-preview",
# "gpt-realtime",
# {"window_size": 4, "distinct_ci_phones": 2, "require_vowel": True, "remove_filler_words": True},
# id="gpt-4o-realtime-remove-filler-words",
# id="gpt-realtime-remove-filler-words",
# ),
pytest.param("gpt-4o", {}, id="cascaded-realtime"),
pytest.param("gpt-4o", {"speech_duration_ms": 200}, id="cascaded-realtime"),
Expand Down Expand Up @@ -371,7 +373,7 @@ async def test_realtime_service_with_filler_word_removal(
api_version: str,
**kwargs,
):
model = "gpt-4o-realtime-preview"
model = "gpt-realtime"
file = test_data_dir / test_audio_file
voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
Expand Down Expand Up @@ -403,7 +405,7 @@ async def test_realtime_service_with_filler_word_removal(
async def test_realtime_service_with_filler_word_removal_multilingual(
self, test_data_dir: Path, test_audio_file: str, api_version: str, **kwargs
):
model = "gpt-4o-realtime-preview"
model = "gpt-realtime"
file = test_data_dir / test_audio_file
server_sd_conf = {
"remove_filler_words": True,
Expand All @@ -430,7 +432,7 @@ async def test_realtime_service_with_filler_word_removal_multilingual(
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4o"])
@pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4o"])
@pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
async def test_realtime_service_tool_call(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
audio_file = test_data_dir / "4-1.wav"
Expand Down Expand Up @@ -746,7 +748,7 @@ async def test_realtime_service_live_session_update(
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.skip()
@pytest.mark.parametrize("model", ["gpt-4o", "gpt-4o-realtime"])
@pytest.mark.parametrize("model", ["gpt-4o", "gpt-realtime"])
@pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
async def test_realtime_service_tool_call_no_audio_overlap(
self,
Expand Down Expand Up @@ -918,7 +920,7 @@ async def test_realtime_service_with_eou(
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"])
@pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1"])
@pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
async def test_realtime_service_with_audio_timestamp_viseme(
self,
Expand Down Expand Up @@ -976,7 +978,7 @@ async def test_realtime_service_with_audio_timestamp_viseme(
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4o", "phi4-mm-realtime", "phi4-mini"])
@pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4o", "phi4-mm-realtime", "phi4-mini"])
@pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
async def test_realtime_service_wo_turn_detection(
self,
Expand Down Expand Up @@ -1014,7 +1016,7 @@ async def test_realtime_service_wo_turn_detection(
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4.1", "phi4-mm-realtime"])
@pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1", "phi4-mm-realtime"])
@pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
async def test_realtime_service_with_voice_properties(
self,
Expand Down Expand Up @@ -1050,7 +1052,7 @@ async def test_realtime_service_with_voice_properties(
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime"])
@pytest.mark.parametrize("model", ["gpt-realtime"])
@pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
file = test_data_dir / "largest_lake.wav"
Expand Down Expand Up @@ -1093,7 +1095,7 @@ async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model:
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime"])
@pytest.mark.parametrize("model", ["gpt-realtime"])
@pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-01-01-preview"])
async def test_realtime_service_truncate_item(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
file = test_data_dir / "largest_lake.wav"
Expand Down Expand Up @@ -1138,25 +1140,25 @@ async def test_realtime_service_truncate_item(self, test_data_dir: Path, model:
"gpt-4o", InputAudioFormat.G711_ALAW, AzureSemanticVad(), id="gpt4o_g711_alaw_azure_semantic_vad"
),
pytest.param(
"gpt-4o-realtime-preview",
"gpt-realtime",
InputAudioFormat.G711_ULAW,
AzureSemanticVad(),
id="gpt4o_realtime_preview_g711_ulaw_azure_semantic_vad",
),
Comment thread
xitzhang marked this conversation as resolved.
pytest.param(
"gpt-4o-realtime-preview",
"gpt-realtime",
InputAudioFormat.G711_ULAW,
ServerVad(),
id="gpt4o_realtime_preview_g711_ulaw_server_vad",
),
pytest.param(
"gpt-4o-realtime-preview",
"gpt-realtime",
InputAudioFormat.G711_ALAW,
AzureSemanticVad(),
id="gpt4o_realtime_preview_g711_alaw_azure_semantic_vad",
),
pytest.param(
"gpt-4o-realtime-preview",
"gpt-realtime",
InputAudioFormat.G711_ALAW,
ServerVad(),
id="gpt4o_realtime_preview_g711_alaw_server_vad",
Expand Down Expand Up @@ -1246,9 +1248,9 @@ async def test_realtime_service_with_input_audio_format(
@pytest.mark.parametrize(
("model", "sampling_rate"),
[
pytest.param("gpt-4o-realtime", 16000, id="gpt4o_realtime_16kHz_no_resample"),
pytest.param("gpt-4o-realtime", 44100, id="gpt4o_realtime_44kHz_no_resample"),
pytest.param("gpt-4o-realtime", 8000, id="gpt4o_realtime_8kHz_no_resample"),
pytest.param("gpt-realtime", 16000, id="gpt4o_realtime_16kHz_no_resample"),
pytest.param("gpt-realtime", 44100, id="gpt4o_realtime_44kHz_no_resample"),
pytest.param("gpt-realtime", 8000, id="gpt4o_realtime_8kHz_no_resample"),
Comment thread
xitzhang marked this conversation as resolved.
Outdated
pytest.param("gpt-4o", 16000, id="gpt4o_16kHz_no_resample"),
pytest.param("gpt-4o", 44100, id="gpt4o_44kHz_no_resample"),
pytest.param("gpt-4.1", 8000, id="gpt4.1_8kHz_no_resample"),
Expand Down Expand Up @@ -1352,7 +1354,7 @@ async def test_output_formats_with_azure_voice(
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime"])
@pytest.mark.parametrize("model", ["gpt-realtime"])
@pytest.mark.parametrize(
"audio_output_format",
[
Expand Down Expand Up @@ -1393,7 +1395,7 @@ async def test_output_formats_with_openai_voice(
@pytest.mark.live_test_only
@VoiceLivePreparer()
@pytest.mark.flaky(reruns=3, reruns_delay=2)
@pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"])
@pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1"])
@pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
async def test_realtime_service_with_echo_cancellation(
self,
Expand Down
Loading