aligned vendor structs with expected params.keys names

digitallysavvy · digitallysavvy · commit 87585c5ce6a4 · 2026-06-05T11:21:31.000-04:00
diff --git a/src/agora_agent/agentkit/presets.py b/src/agora_agent/agentkit/presets.py
@@ -187,6 +187,8 @@ def strip_inferred_preset_fields(properties: typing.Dict[str, typing.Any], infer
             params["url"] = None
         tts = {k: v for k, v in {**tts, "params": _omit_none(params)}.items() if v is not None}
         tts.pop("_minimax_preset_model", None)
+    if tts and "_minimax_preset_model" in tts:
+        tts = {k: v for k, v in tts.items() if k != "_minimax_preset_model"}
 
     return {**properties, "asr": asr, "llm": llm, "tts": tts}
 
diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py
@@ -382,12 +382,7 @@ def to_config(self) -> Dict[str, Any]:
                 f"{self.options.project_id}/locations/{self.options.location}/"
                 f"publishers/google/models/{self.options.model}:streamGenerateContent?alt=sse"
             )
-        config = Gemini(**options).to_config()
-        params = dict(config.get("params") or {})
-        params["project_id"] = self.options.project_id
-        params["location"] = self.options.location
-        config["params"] = params
-        return config
+        return Gemini(**options).to_config()
 
 
 class AmazonBedrockOptions(BaseModel):
diff --git a/src/agora_agent/agentkit/vendors/tts.py b/src/agora_agent/agentkit/vendors/tts.py
@@ -214,13 +214,13 @@ def sample_rate(self) -> Optional[int]:
     def to_config(self) -> Dict[str, Any]:
         params: Dict[str, Any] = {
             "credentials": self.options.key,
-            "voice_selection_params": {"name": self.options.voice_name},
+            "VoiceSelectionParams": {"name": self.options.voice_name},
         }
 
         if self.options.language_code is not None:
-            params["voice_selection_params"]["language_code"] = self.options.language_code
+            params["VoiceSelectionParams"]["language_code"] = self.options.language_code
         if self.options.sample_rate_hertz is not None:
-            params["audio_config"] = {"sample_rate_hertz": self.options.sample_rate_hertz}
+            params["AudioConfig"] = {"sample_rate_hertz": self.options.sample_rate_hertz}
 
         result: Dict[str, Any] = {"vendor": "google", "params": params}
         if self.options.skip_patterns is not None:
@@ -359,7 +359,7 @@ def to_config(self) -> Dict[str, Any]:
         params: Dict[str, Any] = {
             "api_key": self.options.key,
             "speaker": self.options.speaker,
-            "model_id": self.options.model_id,
+            "modelId": self.options.model_id,
         }
         if self.options.base_url is not None:
             params["base_url"] = self.options.base_url
diff --git a/tests/custom/test_request_body.py b/tests/custom/test_request_body.py
@@ -464,6 +464,49 @@ def test_7c_pipeline_id_empty_properties_no_vendors() -> None:
     assert "tts" not in properties
 
 
+def test_7d_pipeline_id_with_byok_tts_only() -> None:
+    """7d: pipeline_id present, TTS-only BYOK override — ASR and LLM absent from properties."""
+    agent = Agent(name="support", pipeline_id="studio-pipeline").with_tts(
+        ElevenLabsTTS(
+            key="el-key",
+            model_id="eleven_flash_v2_5",
+            voice_id="some-voice",
+            base_url="wss://api.elevenlabs.io/v1",
+        )
+    )
+
+    call = start_session(agent)
+    assert call["pipeline_id"] == "studio-pipeline"
+    properties = dump(call["properties"])
+    assert "asr" not in properties
+    assert "llm" not in properties
+    assert properties["tts"]["vendor"] == "elevenlabs"
+    assert properties["tts"]["params"]["key"] == "el-key"
+
+
+def test_7e_pipeline_id_with_byok_asr_and_tts() -> None:
+    """7e: pipeline_id present, ASR+TTS BYOK overrides — LLM absent from properties."""
+    agent = (
+        Agent(name="support", pipeline_id="studio-pipeline")
+        .with_stt(DeepgramSTT(api_key="dg-key", language="en"))
+        .with_tts(
+            ElevenLabsTTS(
+                key="el-key",
+                model_id="eleven_flash_v2_5",
+                voice_id="some-voice",
+                base_url="wss://api.elevenlabs.io/v1",
+            )
+        )
+    )
+
+    call = start_session(agent)
+    assert call["pipeline_id"] == "studio-pipeline"
+    properties = dump(call["properties"])
+    assert "llm" not in properties
+    assert properties["asr"]["vendor"] == "deepgram"
+    assert properties["tts"]["vendor"] == "elevenlabs"
+
+
 # ===========================================================================
 # Scenario 8 — MLLM mode
 # ===========================================================================
@@ -872,10 +915,11 @@ def test_byok_sarvam_tts_params() -> None:
 
 
 def test_byok_murf_tts_params() -> None:
-    agent = Agent(name="t").with_tts(MurfTTS(key="murf-key"))
+    agent = Agent(name="t").with_tts(MurfTTS(key="murf-key", voice_id="Ariana"))
     props = build_properties(agent, allow_missing={"asr", "llm"})
     assert props["tts"]["vendor"] == "murf"
     assert props["tts"]["params"]["api_key"] == "murf-key"
+    assert props["tts"]["params"]["voiceId"] == "Ariana"
 
 
 # ---------------------------------------------------------------------------
@@ -965,3 +1009,15 @@ def test_preset_minimax_speech_2_8_turbo_inferred() -> None:
 def test_preset_minimax_speech_2_6_turbo_inferred() -> None:
     preset, properties = resolve_session_presets(None, {"tts": MiniMaxTTS(model="speech-2.6-turbo", voice_id="voice").to_config()})
     assert preset == "minimax_speech_2_6_turbo"
+
+
+def test_explicit_minimax_preset_strips_internal_hint() -> None:
+    """Explicit MiniMax TTS preset must not leak _minimax_preset_model to the wire."""
+    # When the caller supplies the preset explicitly, inference is skipped but the
+    # internal _minimax_preset_model hint set by MiniMaxTTS.to_config() must still
+    # be removed before the POST body is sent.
+    tts_config = MiniMaxTTS(model="speech_2_8_turbo", voice_id="voice").to_config()
+    assert "_minimax_preset_model" in tts_config  # confirm the hint is set pre-strip
+
+    _, properties = resolve_session_presets("minimax_speech_2_8_turbo", {"tts": tts_config})
+    assert "_minimax_preset_model" not in properties["tts"]
diff --git a/tests/custom/test_tts_vendors.py b/tests/custom/test_tts_vendors.py
@@ -1,6 +1,7 @@
 import pytest
 
 from agora_agent import AmazonTTS, CartesiaTTS, DeepgramTTS, ElevenLabsTTS, FishAudioTTS, GoogleTTS, HumeAITTS, MiniMaxTTS, MurfTTS, OpenAITTS, RimeTTS, SarvamTTS
+from agora_agent.agents.types.start_agents_request_properties import StartAgentsRequestProperties
 
 
 def test_tts_vendor_params_match_generated_core_shapes() -> None:
@@ -116,3 +117,43 @@ def test_tts_managed_mode_validation_matches_core_shapes() -> None:
 
     with pytest.raises(Exception, match="MiniMaxTTS requires key unless using a supported Agora-managed model"):
         MiniMaxTTS(model="unsupported-model")
+
+
+def test_tts_wire_serialization_applies_fern_aliases() -> None:
+    """Verify alias-sensitive TTS params reach the wire with the correct Fern aliases.
+
+    The intermediate to_config() / build_properties() helpers return snake_case
+    field names.  The real POST body goes through StartAgentsRequestProperties →
+    .dict(by_alias=True) → convert_and_respect_annotation_metadata(direction='write'),
+    which is what jsonable_encoder calls in the live HTTP client.  These tests
+    exercise that full chain so a Fern alias regression would be caught.
+    """
+    _BASE = dict(channel="ch", token="tok", agent_rtc_uid="1", remote_rtc_uids=["100"])
+
+    # Google TTS: voice_selection_params and audio_config must arrive as PascalCase aliases
+    google_config = GoogleTTS(
+        key="{}", voice_name="en-US-JennyNeural", language_code="en-US", sample_rate_hertz=24000
+    ).to_config()
+    assert "voice_selection_params" in google_config["params"]  # pre-condition: to_config emits snake_case
+    google_wire = StartAgentsRequestProperties(**_BASE, tts=google_config).dict(by_alias=True)
+    google_params = google_wire["tts"]["params"]
+    assert "VoiceSelectionParams" in google_params, f"wire missing VoiceSelectionParams, got: {list(google_params)}"
+    assert "voice_selection_params" not in google_params
+    assert "AudioConfig" in google_params
+    assert "audio_config" not in google_params
+
+    # Rime TTS: model_id must arrive as modelId alias
+    rime_config = RimeTTS(key="rime-key", speaker="speaker", model_id="mist").to_config()
+    assert "model_id" in rime_config["params"]  # pre-condition: to_config emits snake_case
+    rime_wire = StartAgentsRequestProperties(**_BASE, tts=rime_config).dict(by_alias=True)
+    rime_params = rime_wire["tts"]["params"]
+    assert "modelId" in rime_params, f"wire missing modelId, got: {list(rime_params)}"
+    assert "model_id" not in rime_params
+
+    # Murf TTS: voiceId (emitted by to_config as alias) must survive through wire serialization
+    murf_config = MurfTTS(key="murf-key", voice_id="Ariana").to_config()
+    assert "voiceId" in murf_config["params"]  # to_config currently emits alias directly
+    murf_wire = StartAgentsRequestProperties(**_BASE, tts=murf_config).dict(by_alias=True)
+    murf_params = murf_wire["tts"]["params"]
+    assert "voiceId" in murf_params, f"wire missing voiceId, got: {list(murf_params)}"
+    assert murf_params["voiceId"] == "Ariana"