Azure · xitzhang · May 7, 2026 · May 7, 2026
@@ -78,7 +78,9 @@ def _iter_audio_b64_chunks(path: Path, chunk_bytes: int = 10_240) -> Iterator[st
 
 def _get_speech_recognition_setting(model: str) -> AudioInputTranscriptionOptions:
     speech_recognition_model = (
-        "whisper-1" if model.startswith(("gpt-4o-realtime", "gpt-4o-mini-realtime")) else "azure-speech"
+        "whisper-1"
+        if model.startswith(("gpt-realtime", "gpt-realtime-mini"))
+        else "azure-speech"
     )
     return AudioInputTranscriptionOptions(model=speech_recognition_model, language="en-US")
 
@@ -180,7 +182,7 @@ def smoke_test(self, **kwargs):
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1", "phi4-mm-realtime", "phi4-mini"])
+    @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1", "phi4-mm-realtime", "phi4-mini"])
     @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
     async def test_realtime_service(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
         voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
@@ -234,7 +236,7 @@ async def test_realtime_service(self, test_data_dir: Path, model: str, api_versi
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"])
+    @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1"])
     @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
     async def test_realtime_service_with_audio_enhancements(
         self,
@@ -273,9 +275,9 @@ async def test_realtime_service_with_audio_enhancements(
         ("model", "server_sd_conf"),
         [
             pytest.param(
-                "gpt-4o-realtime-preview",
+                "gpt-realtime",
                 {"type": "azure_semantic_vad", "speech_duration_assistant_speaking_ms": 800},
-                id="gpt-4o-realtime",
+                id="gpt-realtime",
             ),
             pytest.param(
                 "gpt-4o",
@@ -319,11 +321,11 @@ async def test_realtime_service_with_turn_detection_long_tts_vad_duration(
     @pytest.mark.parametrize(
         ("model", "semantic_vad_params"),
         [
-            pytest.param("gpt-4o-realtime-preview", {}, id="gpt-4o-realtime"),
+            pytest.param("gpt-realtime", {}, id="gpt-realtime"),
             # pytest.param(
-            #     "gpt-4o-realtime-preview",
+            #     "gpt-realtime",
             #     {"window_size": 4, "distinct_ci_phones": 2, "require_vowel": True, "remove_filler_words": True},
-            #     id="gpt-4o-realtime-remove-filler-words",
+            #     id="gpt-realtime-remove-filler-words",
             # ),
             pytest.param("gpt-4o", {}, id="cascaded-realtime"),
             pytest.param("gpt-4o", {"speech_duration_ms": 200}, id="cascaded-realtime"),
@@ -371,7 +373,7 @@ async def test_realtime_service_with_filler_word_removal(
         api_version: str,
         **kwargs,
     ):
-        model = "gpt-4o-realtime-preview"
+        model = "gpt-realtime"
         file = test_data_dir / test_audio_file
         voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
         voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
@@ -403,7 +405,7 @@ async def test_realtime_service_with_filler_word_removal(
     async def test_realtime_service_with_filler_word_removal_multilingual(
         self, test_data_dir: Path, test_audio_file: str, api_version: str, **kwargs
     ):
-        model = "gpt-4o-realtime-preview"
+        model = "gpt-realtime"
         file = test_data_dir / test_audio_file
         server_sd_conf = {
             "remove_filler_words": True,
@@ -430,7 +432,7 @@ async def test_realtime_service_with_filler_word_removal_multilingual(
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4o"])
+    @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4o"])
     @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
     async def test_realtime_service_tool_call(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
         audio_file = test_data_dir / "4-1.wav"
@@ -746,7 +748,7 @@ async def test_realtime_service_live_session_update(
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.skip()
-    @pytest.mark.parametrize("model", ["gpt-4o", "gpt-4o-realtime"])
+    @pytest.mark.parametrize("model", ["gpt-4o", "gpt-realtime"])
     @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
     async def test_realtime_service_tool_call_no_audio_overlap(
         self,
@@ -918,7 +920,7 @@ async def test_realtime_service_with_eou(
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"])
+    @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1"])
     @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
     async def test_realtime_service_with_audio_timestamp_viseme(
         self,
@@ -976,7 +978,7 @@ async def test_realtime_service_with_audio_timestamp_viseme(
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4o", "phi4-mm-realtime", "phi4-mini"])
+    @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4o", "phi4-mm-realtime", "phi4-mini"])
     @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
     async def test_realtime_service_wo_turn_detection(
         self,
@@ -1014,7 +1016,7 @@ async def test_realtime_service_wo_turn_detection(
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4.1", "phi4-mm-realtime"])
+    @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1", "phi4-mm-realtime"])
     @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
     async def test_realtime_service_with_voice_properties(
         self,
@@ -1050,7 +1052,7 @@ async def test_realtime_service_with_voice_properties(
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime"])
+    @pytest.mark.parametrize("model", ["gpt-realtime"])
     @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
     async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
         file = test_data_dir / "largest_lake.wav"
@@ -1093,7 +1095,7 @@ async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model:
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime"])
+    @pytest.mark.parametrize("model", ["gpt-realtime"])
     @pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-01-01-preview"])
     async def test_realtime_service_truncate_item(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
         file = test_data_dir / "largest_lake.wav"
@@ -1138,25 +1140,25 @@ async def test_realtime_service_truncate_item(self, test_data_dir: Path, model:
                 "gpt-4o", InputAudioFormat.G711_ALAW, AzureSemanticVad(), id="gpt4o_g711_alaw_azure_semantic_vad"
             ),
             pytest.param(
-                "gpt-4o-realtime-preview",
+                "gpt-realtime",
                 InputAudioFormat.G711_ULAW,
                 AzureSemanticVad(),
                 id="gpt4o_realtime_preview_g711_ulaw_azure_semantic_vad",
             ),
             pytest.param(
-                "gpt-4o-realtime-preview",
+                "gpt-realtime",
                 InputAudioFormat.G711_ULAW,
                 ServerVad(),
                 id="gpt4o_realtime_preview_g711_ulaw_server_vad",
             ),
             pytest.param(
-                "gpt-4o-realtime-preview",
+                "gpt-realtime",
                 InputAudioFormat.G711_ALAW,
                 AzureSemanticVad(),
                 id="gpt4o_realtime_preview_g711_alaw_azure_semantic_vad",
             ),
             pytest.param(
-                "gpt-4o-realtime-preview",
+                "gpt-realtime",
                 InputAudioFormat.G711_ALAW,
                 ServerVad(),
                 id="gpt4o_realtime_preview_g711_alaw_server_vad",
@@ -1246,9 +1248,9 @@ async def test_realtime_service_with_input_audio_format(
     @pytest.mark.parametrize(
         ("model", "sampling_rate"),
         [
-            pytest.param("gpt-4o-realtime", 16000, id="gpt4o_realtime_16kHz_no_resample"),
-            pytest.param("gpt-4o-realtime", 44100, id="gpt4o_realtime_44kHz_no_resample"),
-            pytest.param("gpt-4o-realtime", 8000, id="gpt4o_realtime_8kHz_no_resample"),
+            pytest.param("gpt-realtime", 16000, id="gpt4o_realtime_16kHz_no_resample"),
+            pytest.param("gpt-realtime", 44100, id="gpt4o_realtime_44kHz_no_resample"),
+            pytest.param("gpt-realtime", 8000, id="gpt4o_realtime_8kHz_no_resample"),
             pytest.param("gpt-4o", 16000, id="gpt4o_16kHz_no_resample"),
             pytest.param("gpt-4o", 44100, id="gpt4o_44kHz_no_resample"),
             pytest.param("gpt-4.1", 8000, id="gpt4.1_8kHz_no_resample"),
@@ -1352,7 +1354,7 @@ async def test_output_formats_with_azure_voice(
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime"])
+    @pytest.mark.parametrize("model", ["gpt-realtime"])
     @pytest.mark.parametrize(
         "audio_output_format",
         [
@@ -1393,7 +1395,7 @@ async def test_output_formats_with_openai_voice(
     @pytest.mark.live_test_only
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
-    @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"])
+    @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1"])
     @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
     async def test_realtime_service_with_echo_cancellation(
         self,