From 743e60a0018afdc4dbea170a2f2b0b651a4945ce Mon Sep 17 00:00:00 2001 From: Ryan Neal Date: Mon, 11 May 2026 21:11:52 +0100 Subject: [PATCH 1/5] Fix MCP/REST generations incorrectly triggering autoplay (#635) Agent-initiated generations (via MCP tool or POST /speak) were stored in the database with source="manual" because generate_speech() ignored the caller's origin. The frontend SSE handler checks gen.source to skip autoplay for agent sources, but it received "manual" and played anyway. Pass source="mcp" / source="rest" through GenerationRequest so the DB row carries the correct origin and the frontend's AGENT_SOURCES guard works reliably. Co-Authored-By: Claude Opus 4.6 --- backend/mcp_server/tools.py | 1 + backend/models.py | 3 +++ backend/routes/generations.py | 5 +++-- backend/routes/speak.py | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/backend/mcp_server/tools.py b/backend/mcp_server/tools.py index fcf3b6a2..636210eb 100644 --- a/backend/mcp_server/tools.py +++ b/backend/mcp_server/tools.py @@ -240,6 +240,7 @@ async def _speak( language=language or "en", engine=engine, personality=personality, + source="mcp", ) generation = await generate_speech(req, db) return _speak_response(generation, profile_name, source="mcp") diff --git a/backend/models.py b/backend/models.py index 06f321ac..e5707f64 100644 --- a/backend/models.py +++ b/backend/models.py @@ -100,6 +100,9 @@ class GenerationRequest(BaseModel): effects_chain: Optional[List["EffectConfig"]] = Field( None, description="Effects chain to apply after generation (overrides profile default)" ) + source: Optional[str] = Field( + None, description="Origin of the request (e.g. 'mcp', 'rest'). Internal use — not exposed to public API docs." + ) class GenerationResponse(BaseModel): diff --git a/backend/routes/generations.py b/backend/routes/generations.py index 215c96cb..2caf041e 100644 --- a/backend/routes/generations.py +++ b/backend/routes/generations.py @@ -77,7 +77,7 @@ async def generate_speech( model_size = (data.model_size or "1.7B") if engine_has_model_sizes(engine) else None text = data.text - source = "manual" + source = data.source or "manual" if data.personality and getattr(profile, "personality", None): try: llm_result = await personality.rewrite_as_profile(profile.personality, data.text) @@ -86,7 +86,8 @@ async def generate_speech( text = llm_result.text.strip() if not text: raise HTTPException(status_code=500, detail="LLM produced empty output; nothing to speak.") - source = "personality_speak" + if not data.source: + source = "personality_speak" generation = await history.create_generation( profile_id=data.profile_id, diff --git a/backend/routes/speak.py b/backend/routes/speak.py index 0c81846c..293951b3 100644 --- a/backend/routes/speak.py +++ b/backend/routes/speak.py @@ -78,6 +78,7 @@ async def speak( language=data.language or "en", engine=engine, personality=bool(personality_flag), + source="rest", ), db, ) From 8f4131753a8056d4aa274c0a110de63ad41c0fa7 Mon Sep 17 00:00:00 2001 From: Ryan Neal Date: Mon, 11 May 2026 21:53:50 +0100 Subject: [PATCH 2/5] Constrain GenerationRequest.source to a typed Literal Replaces free-form Optional[str] with a Literal type so Pydantic rejects unknown source values at validation time. Co-Authored-By: Claude Opus 4.6 --- backend/models.py | 8 +++++--- backend/services/history.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/backend/models.py b/backend/models.py index e5707f64..f21a4858 100644 --- a/backend/models.py +++ b/backend/models.py @@ -3,7 +3,7 @@ """ from pydantic import BaseModel, Field -from typing import Optional, List +from typing import Literal, Optional, List from datetime import datetime from .utils.capture_chords import ( @@ -11,6 +11,8 @@ default_toggle_to_talk_chord, ) +GenerationSource = Literal["mcp", "rest", "manual", "import", "personality_speak"] + class VoiceProfileCreate(BaseModel): """Request model for creating a voice profile.""" @@ -100,8 +102,8 @@ class GenerationRequest(BaseModel): effects_chain: Optional[List["EffectConfig"]] = Field( None, description="Effects chain to apply after generation (overrides profile default)" ) - source: Optional[str] = Field( - None, description="Origin of the request (e.g. 'mcp', 'rest'). Internal use — not exposed to public API docs." + source: Optional[GenerationSource] = Field( + None, description="Origin of the request. Internal use — not exposed to public API docs." ) diff --git a/backend/services/history.py b/backend/services/history.py index 3062f7d6..9daca8cd 100644 --- a/backend/services/history.py +++ b/backend/services/history.py @@ -10,7 +10,7 @@ from sqlalchemy.orm import Session from sqlalchemy import or_ -from ..models import GenerationRequest, GenerationResponse, HistoryQuery, HistoryResponse, HistoryListResponse, GenerationVersionResponse, EffectConfig +from ..models import GenerationRequest, GenerationResponse, GenerationSource, HistoryQuery, HistoryResponse, HistoryListResponse, GenerationVersionResponse, EffectConfig from ..database import Generation as DBGeneration, GenerationVersion as DBGenerationVersion, VoiceProfile as DBVoiceProfile from .. import config @@ -65,7 +65,7 @@ async def create_generation( status: str = "completed", engine: Optional[str] = "qwen", model_size: Optional[str] = None, - source: str = "manual", + source: GenerationSource = "manual", ) -> GenerationResponse: """ Create a new generation history entry. From bec0cc2412e76ff0bdd6f6b4301ee183716cc341 Mon Sep 17 00:00:00 2001 From: Ryan Neal Date: Mon, 11 May 2026 21:58:04 +0100 Subject: [PATCH 3/5] Replace typing.List with builtin list (PEP 585) Co-Authored-By: Claude Opus 4.6 --- backend/models.py | 68 +++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/backend/models.py b/backend/models.py index f21a4858..133c343d 100644 --- a/backend/models.py +++ b/backend/models.py @@ -3,7 +3,7 @@ """ from pydantic import BaseModel, Field -from typing import Literal, Optional, List +from typing import Literal, Optional from datetime import datetime from .utils.capture_chords import ( @@ -38,7 +38,7 @@ class VoiceProfileResponse(BaseModel): description: Optional[str] language: str avatar_path: Optional[str] = None - effects_chain: Optional[List["EffectConfig"]] = None + effects_chain: Optional[list["EffectConfig"]] = None voice_type: str = "cloned" preset_engine: Optional[str] = None preset_voice_id: Optional[str] = None @@ -99,7 +99,7 @@ class GenerationRequest(BaseModel): default=50, ge=0, le=500, description="Crossfade duration in ms between chunks (0 for hard cut)" ) normalize: bool = Field(default=True, description="Normalize output audio volume") - effects_chain: Optional[List["EffectConfig"]] = Field( + effects_chain: Optional[list["EffectConfig"]] = Field( None, description="Effects chain to apply after generation (overrides profile default)" ) source: Optional[GenerationSource] = Field( @@ -125,7 +125,7 @@ class GenerationResponse(BaseModel): is_favorited: bool = False source: str = "manual" created_at: datetime - versions: Optional[List["GenerationVersionResponse"]] = None + versions: Optional[list["GenerationVersionResponse"]] = None active_version_id: Optional[str] = None class Config: @@ -159,7 +159,7 @@ class HistoryResponse(BaseModel): error: Optional[str] = None is_favorited: bool = False created_at: datetime - versions: Optional[List["GenerationVersionResponse"]] = None + versions: Optional[list["GenerationVersionResponse"]] = None active_version_id: Optional[str] = None class Config: @@ -169,7 +169,7 @@ class Config: class HistoryListResponse(BaseModel): """Response model for history list.""" - items: List[HistoryResponse] + items: list[HistoryResponse] total: int @@ -217,7 +217,7 @@ class Config: class CaptureListResponse(BaseModel): """Response model for paginated capture list.""" - items: List[CaptureResponse] + items: list[CaptureResponse] total: int @@ -263,10 +263,10 @@ class CaptureSettingsResponse(BaseModel): allow_auto_paste: bool = True default_playback_voice_id: Optional[str] = None hotkey_enabled: bool = False - chord_push_to_talk_keys: List[str] = Field( + chord_push_to_talk_keys: list[str] = Field( default_factory=default_push_to_talk_chord ) - chord_toggle_to_talk_keys: List[str] = Field( + chord_toggle_to_talk_keys: list[str] = Field( default_factory=default_toggle_to_talk_chord ) @@ -287,8 +287,8 @@ class CaptureSettingsUpdate(BaseModel): allow_auto_paste: Optional[bool] = None default_playback_voice_id: Optional[str] = None hotkey_enabled: Optional[bool] = None - chord_push_to_talk_keys: Optional[List[str]] = Field(default=None, min_length=1, max_length=6) - chord_toggle_to_talk_keys: Optional[List[str]] = Field(default=None, min_length=1, max_length=6) + chord_push_to_talk_keys: Optional[list[str]] = Field(default=None, min_length=1, max_length=6) + chord_toggle_to_talk_keys: Optional[list[str]] = Field(default=None, min_length=1, max_length=6) class GenerationSettingsResponse(BaseModel): @@ -347,7 +347,7 @@ class MCPClientBindingUpsert(BaseModel): class MCPClientBindingListResponse(BaseModel): - items: List[MCPClientBindingResponse] + items: list[MCPClientBindingResponse] class SpeakRequest(BaseModel): @@ -384,7 +384,7 @@ class LLMGenerateRequest(BaseModel): # Used by the refinement service to pin tricky rules (imperatives # staying imperatives, technical-term punctuation) that small models # lose when the examples live inline in the system prompt. - examples: Optional[List[List[str]]] = Field(default=None, max_length=8) + examples: Optional[list[list[str]]] = Field(default=None, max_length=8) class LLMGenerateResponse(BaseModel): @@ -466,7 +466,7 @@ class FilesystemHealthResponse(BaseModel): healthy: bool disk_free_mb: Optional[float] = None disk_total_mb: Optional[float] = None - directories: List[DirectoryCheck] + directories: list[DirectoryCheck] class ModelStatus(BaseModel): @@ -484,7 +484,7 @@ class ModelStatus(BaseModel): class ModelStatusListResponse(BaseModel): """Response model for model status list.""" - models: List[ModelStatus] + models: list[ModelStatus] class ModelDownloadRequest(BaseModel): @@ -524,22 +524,22 @@ class ActiveGenerationTask(BaseModel): class ActiveTasksResponse(BaseModel): """Response model for active tasks.""" - downloads: List[ActiveDownloadTask] - generations: List[ActiveGenerationTask] + downloads: list[ActiveDownloadTask] + generations: list[ActiveGenerationTask] class AudioChannelCreate(BaseModel): """Request model for creating an audio channel.""" name: str = Field(..., min_length=1, max_length=100) - device_ids: List[str] = Field(default_factory=list) + device_ids: list[str] = Field(default_factory=list) class AudioChannelUpdate(BaseModel): """Request model for updating an audio channel.""" name: Optional[str] = Field(None, min_length=1, max_length=100) - device_ids: Optional[List[str]] = None + device_ids: Optional[list[str]] = None class AudioChannelResponse(BaseModel): @@ -548,7 +548,7 @@ class AudioChannelResponse(BaseModel): id: str name: str is_default: bool - device_ids: List[str] + device_ids: list[str] created_at: datetime class Config: @@ -558,13 +558,13 @@ class Config: class ChannelVoiceAssignment(BaseModel): """Request model for assigning voices to a channel.""" - profile_ids: List[str] + profile_ids: list[str] class ProfileChannelAssignment(BaseModel): """Request model for assigning channels to a profile.""" - channel_ids: List[str] + channel_ids: list[str] class StoryCreate(BaseModel): @@ -613,7 +613,7 @@ class StoryItemDetail(BaseModel): volume: float = 1.0 generation_created_at: datetime # Versions available for this generation - versions: Optional[List["GenerationVersionResponse"]] = None + versions: Optional[list["GenerationVersionResponse"]] = None active_version_id: Optional[str] = None class Config: @@ -628,7 +628,7 @@ class StoryDetailResponse(BaseModel): description: Optional[str] created_at: datetime updated_at: datetime - items: List[StoryItemDetail] = [] + items: list[StoryItemDetail] = [] class Config: from_attributes = True @@ -652,13 +652,13 @@ class StoryItemUpdateTime(BaseModel): class StoryItemBatchUpdate(BaseModel): """Request model for batch updating story item timecodes.""" - updates: List[StoryItemUpdateTime] + updates: list[StoryItemUpdateTime] class StoryItemReorder(BaseModel): """Request model for reordering story items.""" - generation_ids: List[str] = Field(..., min_length=1) + generation_ids: list[str] = Field(..., min_length=1) class StoryItemMove(BaseModel): @@ -709,7 +709,7 @@ class EffectConfig(BaseModel): class EffectsChain(BaseModel): """An ordered list of effects to apply.""" - effects: List[EffectConfig] = Field(default_factory=list) + effects: list[EffectConfig] = Field(default_factory=list) class EffectPresetCreate(BaseModel): @@ -717,7 +717,7 @@ class EffectPresetCreate(BaseModel): name: str = Field(..., min_length=1, max_length=100) description: Optional[str] = Field(None, max_length=500) - effects_chain: List[EffectConfig] + effects_chain: list[EffectConfig] class EffectPresetUpdate(BaseModel): @@ -725,7 +725,7 @@ class EffectPresetUpdate(BaseModel): name: Optional[str] = Field(None, min_length=1, max_length=100) description: Optional[str] = None - effects_chain: Optional[List[EffectConfig]] = None + effects_chain: Optional[list[EffectConfig]] = None class EffectPresetResponse(BaseModel): @@ -734,7 +734,7 @@ class EffectPresetResponse(BaseModel): id: str name: str description: Optional[str] = None - effects_chain: List[EffectConfig] + effects_chain: list[EffectConfig] is_builtin: bool = False created_at: datetime @@ -749,7 +749,7 @@ class GenerationVersionResponse(BaseModel): generation_id: str label: str audio_path: str - effects_chain: Optional[List[EffectConfig]] = None + effects_chain: Optional[list[EffectConfig]] = None source_version_id: Optional[str] = None is_default: bool created_at: datetime @@ -761,7 +761,7 @@ class Config: class ApplyEffectsRequest(BaseModel): """Request to apply effects to an existing generation.""" - effects_chain: List[EffectConfig] + effects_chain: list[EffectConfig] source_version_id: Optional[str] = Field( None, description="Version to use as source audio (defaults to clean/original)" ) @@ -772,7 +772,7 @@ class ApplyEffectsRequest(BaseModel): class ProfileEffectsUpdate(BaseModel): """Request to update the default effects chain on a profile.""" - effects_chain: Optional[List[EffectConfig]] = Field(None, description="Effects chain (null to remove)") + effects_chain: Optional[list[EffectConfig]] = Field(None, description="Effects chain (null to remove)") class AvailableEffectParam(BaseModel): @@ -797,4 +797,4 @@ class AvailableEffect(BaseModel): class AvailableEffectsResponse(BaseModel): """Response listing all available effect types.""" - effects: List[AvailableEffect] + effects: list[AvailableEffect] From b856f753429e5d2db4189e6de41583859dc07f5e Mon Sep 17 00:00:00 2001 From: Ryan Neal Date: Tue, 12 May 2026 08:37:34 +0100 Subject: [PATCH 4/5] Use GenerationSource type on GenerationResponse for consistency Co-Authored-By: Claude Opus 4.6 --- backend/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/models.py b/backend/models.py index 133c343d..ca81c4ba 100644 --- a/backend/models.py +++ b/backend/models.py @@ -123,7 +123,7 @@ class GenerationResponse(BaseModel): status: str = "completed" error: Optional[str] = None is_favorited: bool = False - source: str = "manual" + source: GenerationSource = "manual" created_at: datetime versions: Optional[list["GenerationVersionResponse"]] = None active_version_id: Optional[str] = None From cb2b913b00ce0ce269ff695d036e29f55d9bc6bb Mon Sep 17 00:00:00 2001 From: Ryan Neal Date: Tue, 12 May 2026 08:52:34 +0100 Subject: [PATCH 5/5] Remove client-writable source field from GenerationRequest The source field was exposed on the public API model, allowing clients to spoof request provenance. Now source is only set server-side via a function parameter on generate_speech(). Co-Authored-By: Claude Opus 4.6 --- backend/mcp_server/tools.py | 3 +-- backend/models.py | 3 --- backend/routes/generations.py | 18 +++++++++++++----- backend/routes/speak.py | 2 +- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/backend/mcp_server/tools.py b/backend/mcp_server/tools.py index 636210eb..be91f799 100644 --- a/backend/mcp_server/tools.py +++ b/backend/mcp_server/tools.py @@ -240,9 +240,8 @@ async def _speak( language=language or "en", engine=engine, personality=personality, - source="mcp", ) - generation = await generate_speech(req, db) + generation = await generate_speech(req, db, source="mcp") return _speak_response(generation, profile_name, source="mcp") diff --git a/backend/models.py b/backend/models.py index ca81c4ba..38ae9d7d 100644 --- a/backend/models.py +++ b/backend/models.py @@ -102,9 +102,6 @@ class GenerationRequest(BaseModel): effects_chain: Optional[list["EffectConfig"]] = Field( None, description="Effects chain to apply after generation (overrides profile default)" ) - source: Optional[GenerationSource] = Field( - None, description="Origin of the request. Internal use — not exposed to public API docs." - ) class GenerationResponse(BaseModel): diff --git a/backend/routes/generations.py b/backend/routes/generations.py index 2caf041e..7e9842da 100644 --- a/backend/routes/generations.py +++ b/backend/routes/generations.py @@ -54,11 +54,19 @@ def _resolve_generation_engine(data: models.GenerationRequest, profile) -> str: @router.post("/generate", response_model=models.GenerationResponse) -async def generate_speech( +async def generate_speech_endpoint( data: models.GenerationRequest, db: Session = Depends(get_db), ): """Generate speech from text using a voice profile.""" + return await generate_speech(data, db) + + +async def generate_speech( + data: models.GenerationRequest, + db: Session, + source: "models.GenerationSource | None" = None, +): task_manager = get_task_manager() generation_id = str(uuid.uuid4()) @@ -77,7 +85,7 @@ async def generate_speech( model_size = (data.model_size or "1.7B") if engine_has_model_sizes(engine) else None text = data.text - source = data.source or "manual" + resolved_source: models.GenerationSource = source or "manual" if data.personality and getattr(profile, "personality", None): try: llm_result = await personality.rewrite_as_profile(profile.personality, data.text) @@ -86,8 +94,8 @@ async def generate_speech( text = llm_result.text.strip() if not text: raise HTTPException(status_code=500, detail="LLM produced empty output; nothing to speak.") - if not data.source: - source = "personality_speak" + if not source: + resolved_source = "personality_speak" generation = await history.create_generation( profile_id=data.profile_id, @@ -102,7 +110,7 @@ async def generate_speech( status="generating", engine=engine, model_size=model_size if engine_has_model_sizes(engine) else None, - source=source, + source=resolved_source, ) task_manager.start_generation( diff --git a/backend/routes/speak.py b/backend/routes/speak.py index 293951b3..dbe296dc 100644 --- a/backend/routes/speak.py +++ b/backend/routes/speak.py @@ -78,9 +78,9 @@ async def speak( language=data.language or "en", engine=engine, personality=bool(personality_flag), - source="rest", ), db, + source="rest", ) mcp_events.publish(