From 743e60a0018afdc4dbea170a2f2b0b651a4945ce Mon Sep 17 00:00:00 2001
From: Ryan Neal <ryan.neal@depop.com>
Date: Mon, 11 May 2026 21:11:52 +0100
Subject: [PATCH 1/5] Fix MCP/REST generations incorrectly triggering autoplay
 (#635)

Agent-initiated generations (via MCP tool or POST /speak) were stored in
the database with source="manual" because generate_speech() ignored the
caller's origin. The frontend SSE handler checks gen.source to skip
autoplay for agent sources, but it received "manual" and played anyway.

Pass source="mcp" / source="rest" through GenerationRequest so the DB
row carries the correct origin and the frontend's AGENT_SOURCES guard
works reliably.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/mcp_server/tools.py   | 1 +
 backend/models.py             | 3 +++
 backend/routes/generations.py | 5 +++--
 backend/routes/speak.py       | 1 +
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/backend/mcp_server/tools.py b/backend/mcp_server/tools.py
index fcf3b6a2..636210eb 100644
--- a/backend/mcp_server/tools.py
+++ b/backend/mcp_server/tools.py
@@ -240,6 +240,7 @@ async def _speak(
         language=language or "en",
         engine=engine,
         personality=personality,
+        source="mcp",
     )
     generation = await generate_speech(req, db)
     return _speak_response(generation, profile_name, source="mcp")
diff --git a/backend/models.py b/backend/models.py
index 06f321ac..e5707f64 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -100,6 +100,9 @@ class GenerationRequest(BaseModel):
     effects_chain: Optional[List["EffectConfig"]] = Field(
         None, description="Effects chain to apply after generation (overrides profile default)"
     )
+    source: Optional[str] = Field(
+        None, description="Origin of the request (e.g. 'mcp', 'rest'). Internal use — not exposed to public API docs."
+    )
 
 
 class GenerationResponse(BaseModel):
diff --git a/backend/routes/generations.py b/backend/routes/generations.py
index 215c96cb..2caf041e 100644
--- a/backend/routes/generations.py
+++ b/backend/routes/generations.py
@@ -77,7 +77,7 @@ async def generate_speech(
     model_size = (data.model_size or "1.7B") if engine_has_model_sizes(engine) else None
 
     text = data.text
-    source = "manual"
+    source = data.source or "manual"
     if data.personality and getattr(profile, "personality", None):
         try:
             llm_result = await personality.rewrite_as_profile(profile.personality, data.text)
@@ -86,7 +86,8 @@ async def generate_speech(
         text = llm_result.text.strip()
         if not text:
             raise HTTPException(status_code=500, detail="LLM produced empty output; nothing to speak.")
-        source = "personality_speak"
+        if not data.source:
+            source = "personality_speak"
 
     generation = await history.create_generation(
         profile_id=data.profile_id,
diff --git a/backend/routes/speak.py b/backend/routes/speak.py
index 0c81846c..293951b3 100644
--- a/backend/routes/speak.py
+++ b/backend/routes/speak.py
@@ -78,6 +78,7 @@ async def speak(
             language=data.language or "en",
             engine=engine,
             personality=bool(personality_flag),
+            source="rest",
         ),
         db,
     )

From 8f4131753a8056d4aa274c0a110de63ad41c0fa7 Mon Sep 17 00:00:00 2001
From: Ryan Neal <ryan.neal@depop.com>
Date: Mon, 11 May 2026 21:53:50 +0100
Subject: [PATCH 2/5] Constrain GenerationRequest.source to a typed Literal

Replaces free-form Optional[str] with a Literal type so Pydantic
rejects unknown source values at validation time.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/models.py           | 8 +++++---
 backend/services/history.py | 4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/backend/models.py b/backend/models.py
index e5707f64..f21a4858 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -3,7 +3,7 @@
 """
 
 from pydantic import BaseModel, Field
-from typing import Optional, List
+from typing import Literal, Optional, List
 from datetime import datetime
 
 from .utils.capture_chords import (
@@ -11,6 +11,8 @@
     default_toggle_to_talk_chord,
 )
 
+GenerationSource = Literal["mcp", "rest", "manual", "import", "personality_speak"]
+
 
 class VoiceProfileCreate(BaseModel):
     """Request model for creating a voice profile."""
@@ -100,8 +102,8 @@ class GenerationRequest(BaseModel):
     effects_chain: Optional[List["EffectConfig"]] = Field(
         None, description="Effects chain to apply after generation (overrides profile default)"
     )
-    source: Optional[str] = Field(
-        None, description="Origin of the request (e.g. 'mcp', 'rest'). Internal use — not exposed to public API docs."
+    source: Optional[GenerationSource] = Field(
+        None, description="Origin of the request. Internal use — not exposed to public API docs."
     )
 
 
diff --git a/backend/services/history.py b/backend/services/history.py
index 3062f7d6..9daca8cd 100644
--- a/backend/services/history.py
+++ b/backend/services/history.py
@@ -10,7 +10,7 @@
 from sqlalchemy.orm import Session
 from sqlalchemy import or_
 
-from ..models import GenerationRequest, GenerationResponse, HistoryQuery, HistoryResponse, HistoryListResponse, GenerationVersionResponse, EffectConfig
+from ..models import GenerationRequest, GenerationResponse, GenerationSource, HistoryQuery, HistoryResponse, HistoryListResponse, GenerationVersionResponse, EffectConfig
 from ..database import Generation as DBGeneration, GenerationVersion as DBGenerationVersion, VoiceProfile as DBVoiceProfile
 from .. import config
 
@@ -65,7 +65,7 @@ async def create_generation(
     status: str = "completed",
     engine: Optional[str] = "qwen",
     model_size: Optional[str] = None,
-    source: str = "manual",
+    source: GenerationSource = "manual",
 ) -> GenerationResponse:
     """
     Create a new generation history entry.

From bec0cc2412e76ff0bdd6f6b4301ee183716cc341 Mon Sep 17 00:00:00 2001
From: Ryan Neal <ryan.neal@depop.com>
Date: Mon, 11 May 2026 21:58:04 +0100
Subject: [PATCH 3/5] Replace typing.List with builtin list (PEP 585)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/models.py | 68 +++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/backend/models.py b/backend/models.py
index f21a4858..133c343d 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -3,7 +3,7 @@
 """
 
 from pydantic import BaseModel, Field
-from typing import Literal, Optional, List
+from typing import Literal, Optional
 from datetime import datetime
 
 from .utils.capture_chords import (
@@ -38,7 +38,7 @@ class VoiceProfileResponse(BaseModel):
     description: Optional[str]
     language: str
     avatar_path: Optional[str] = None
-    effects_chain: Optional[List["EffectConfig"]] = None
+    effects_chain: Optional[list["EffectConfig"]] = None
     voice_type: str = "cloned"
     preset_engine: Optional[str] = None
     preset_voice_id: Optional[str] = None
@@ -99,7 +99,7 @@ class GenerationRequest(BaseModel):
         default=50, ge=0, le=500, description="Crossfade duration in ms between chunks (0 for hard cut)"
     )
     normalize: bool = Field(default=True, description="Normalize output audio volume")
-    effects_chain: Optional[List["EffectConfig"]] = Field(
+    effects_chain: Optional[list["EffectConfig"]] = Field(
         None, description="Effects chain to apply after generation (overrides profile default)"
     )
     source: Optional[GenerationSource] = Field(
@@ -125,7 +125,7 @@ class GenerationResponse(BaseModel):
     is_favorited: bool = False
     source: str = "manual"
     created_at: datetime
-    versions: Optional[List["GenerationVersionResponse"]] = None
+    versions: Optional[list["GenerationVersionResponse"]] = None
     active_version_id: Optional[str] = None
 
     class Config:
@@ -159,7 +159,7 @@ class HistoryResponse(BaseModel):
     error: Optional[str] = None
     is_favorited: bool = False
     created_at: datetime
-    versions: Optional[List["GenerationVersionResponse"]] = None
+    versions: Optional[list["GenerationVersionResponse"]] = None
     active_version_id: Optional[str] = None
 
     class Config:
@@ -169,7 +169,7 @@ class Config:
 class HistoryListResponse(BaseModel):
     """Response model for history list."""
 
-    items: List[HistoryResponse]
+    items: list[HistoryResponse]
     total: int
 
 
@@ -217,7 +217,7 @@ class Config:
 class CaptureListResponse(BaseModel):
     """Response model for paginated capture list."""
 
-    items: List[CaptureResponse]
+    items: list[CaptureResponse]
     total: int
 
 
@@ -263,10 +263,10 @@ class CaptureSettingsResponse(BaseModel):
     allow_auto_paste: bool = True
     default_playback_voice_id: Optional[str] = None
     hotkey_enabled: bool = False
-    chord_push_to_talk_keys: List[str] = Field(
+    chord_push_to_talk_keys: list[str] = Field(
         default_factory=default_push_to_talk_chord
     )
-    chord_toggle_to_talk_keys: List[str] = Field(
+    chord_toggle_to_talk_keys: list[str] = Field(
         default_factory=default_toggle_to_talk_chord
     )
 
@@ -287,8 +287,8 @@ class CaptureSettingsUpdate(BaseModel):
     allow_auto_paste: Optional[bool] = None
     default_playback_voice_id: Optional[str] = None
     hotkey_enabled: Optional[bool] = None
-    chord_push_to_talk_keys: Optional[List[str]] = Field(default=None, min_length=1, max_length=6)
-    chord_toggle_to_talk_keys: Optional[List[str]] = Field(default=None, min_length=1, max_length=6)
+    chord_push_to_talk_keys: Optional[list[str]] = Field(default=None, min_length=1, max_length=6)
+    chord_toggle_to_talk_keys: Optional[list[str]] = Field(default=None, min_length=1, max_length=6)
 
 
 class GenerationSettingsResponse(BaseModel):
@@ -347,7 +347,7 @@ class MCPClientBindingUpsert(BaseModel):
 
 
 class MCPClientBindingListResponse(BaseModel):
-    items: List[MCPClientBindingResponse]
+    items: list[MCPClientBindingResponse]
 
 
 class SpeakRequest(BaseModel):
@@ -384,7 +384,7 @@ class LLMGenerateRequest(BaseModel):
     # Used by the refinement service to pin tricky rules (imperatives
     # staying imperatives, technical-term punctuation) that small models
     # lose when the examples live inline in the system prompt.
-    examples: Optional[List[List[str]]] = Field(default=None, max_length=8)
+    examples: Optional[list[list[str]]] = Field(default=None, max_length=8)
 
 
 class LLMGenerateResponse(BaseModel):
@@ -466,7 +466,7 @@ class FilesystemHealthResponse(BaseModel):
     healthy: bool
     disk_free_mb: Optional[float] = None
     disk_total_mb: Optional[float] = None
-    directories: List[DirectoryCheck]
+    directories: list[DirectoryCheck]
 
 
 class ModelStatus(BaseModel):
@@ -484,7 +484,7 @@ class ModelStatus(BaseModel):
 class ModelStatusListResponse(BaseModel):
     """Response model for model status list."""
 
-    models: List[ModelStatus]
+    models: list[ModelStatus]
 
 
 class ModelDownloadRequest(BaseModel):
@@ -524,22 +524,22 @@ class ActiveGenerationTask(BaseModel):
 class ActiveTasksResponse(BaseModel):
     """Response model for active tasks."""
 
-    downloads: List[ActiveDownloadTask]
-    generations: List[ActiveGenerationTask]
+    downloads: list[ActiveDownloadTask]
+    generations: list[ActiveGenerationTask]
 
 
 class AudioChannelCreate(BaseModel):
     """Request model for creating an audio channel."""
 
     name: str = Field(..., min_length=1, max_length=100)
-    device_ids: List[str] = Field(default_factory=list)
+    device_ids: list[str] = Field(default_factory=list)
 
 
 class AudioChannelUpdate(BaseModel):
     """Request model for updating an audio channel."""
 
     name: Optional[str] = Field(None, min_length=1, max_length=100)
-    device_ids: Optional[List[str]] = None
+    device_ids: Optional[list[str]] = None
 
 
 class AudioChannelResponse(BaseModel):
@@ -548,7 +548,7 @@ class AudioChannelResponse(BaseModel):
     id: str
     name: str
     is_default: bool
-    device_ids: List[str]
+    device_ids: list[str]
     created_at: datetime
 
     class Config:
@@ -558,13 +558,13 @@ class Config:
 class ChannelVoiceAssignment(BaseModel):
     """Request model for assigning voices to a channel."""
 
-    profile_ids: List[str]
+    profile_ids: list[str]
 
 
 class ProfileChannelAssignment(BaseModel):
     """Request model for assigning channels to a profile."""
 
-    channel_ids: List[str]
+    channel_ids: list[str]
 
 
 class StoryCreate(BaseModel):
@@ -613,7 +613,7 @@ class StoryItemDetail(BaseModel):
     volume: float = 1.0
     generation_created_at: datetime
     # Versions available for this generation
-    versions: Optional[List["GenerationVersionResponse"]] = None
+    versions: Optional[list["GenerationVersionResponse"]] = None
     active_version_id: Optional[str] = None
 
     class Config:
@@ -628,7 +628,7 @@ class StoryDetailResponse(BaseModel):
     description: Optional[str]
     created_at: datetime
     updated_at: datetime
-    items: List[StoryItemDetail] = []
+    items: list[StoryItemDetail] = []
 
     class Config:
         from_attributes = True
@@ -652,13 +652,13 @@ class StoryItemUpdateTime(BaseModel):
 class StoryItemBatchUpdate(BaseModel):
     """Request model for batch updating story item timecodes."""
 
-    updates: List[StoryItemUpdateTime]
+    updates: list[StoryItemUpdateTime]
 
 
 class StoryItemReorder(BaseModel):
     """Request model for reordering story items."""
 
-    generation_ids: List[str] = Field(..., min_length=1)
+    generation_ids: list[str] = Field(..., min_length=1)
 
 
 class StoryItemMove(BaseModel):
@@ -709,7 +709,7 @@ class EffectConfig(BaseModel):
 class EffectsChain(BaseModel):
     """An ordered list of effects to apply."""
 
-    effects: List[EffectConfig] = Field(default_factory=list)
+    effects: list[EffectConfig] = Field(default_factory=list)
 
 
 class EffectPresetCreate(BaseModel):
@@ -717,7 +717,7 @@ class EffectPresetCreate(BaseModel):
 
     name: str = Field(..., min_length=1, max_length=100)
     description: Optional[str] = Field(None, max_length=500)
-    effects_chain: List[EffectConfig]
+    effects_chain: list[EffectConfig]
 
 
 class EffectPresetUpdate(BaseModel):
@@ -725,7 +725,7 @@ class EffectPresetUpdate(BaseModel):
 
     name: Optional[str] = Field(None, min_length=1, max_length=100)
     description: Optional[str] = None
-    effects_chain: Optional[List[EffectConfig]] = None
+    effects_chain: Optional[list[EffectConfig]] = None
 
 
 class EffectPresetResponse(BaseModel):
@@ -734,7 +734,7 @@ class EffectPresetResponse(BaseModel):
     id: str
     name: str
     description: Optional[str] = None
-    effects_chain: List[EffectConfig]
+    effects_chain: list[EffectConfig]
     is_builtin: bool = False
     created_at: datetime
 
@@ -749,7 +749,7 @@ class GenerationVersionResponse(BaseModel):
     generation_id: str
     label: str
     audio_path: str
-    effects_chain: Optional[List[EffectConfig]] = None
+    effects_chain: Optional[list[EffectConfig]] = None
     source_version_id: Optional[str] = None
     is_default: bool
     created_at: datetime
@@ -761,7 +761,7 @@ class Config:
 class ApplyEffectsRequest(BaseModel):
     """Request to apply effects to an existing generation."""
 
-    effects_chain: List[EffectConfig]
+    effects_chain: list[EffectConfig]
     source_version_id: Optional[str] = Field(
         None, description="Version to use as source audio (defaults to clean/original)"
     )
@@ -772,7 +772,7 @@ class ApplyEffectsRequest(BaseModel):
 class ProfileEffectsUpdate(BaseModel):
     """Request to update the default effects chain on a profile."""
 
-    effects_chain: Optional[List[EffectConfig]] = Field(None, description="Effects chain (null to remove)")
+    effects_chain: Optional[list[EffectConfig]] = Field(None, description="Effects chain (null to remove)")
 
 
 class AvailableEffectParam(BaseModel):
@@ -797,4 +797,4 @@ class AvailableEffect(BaseModel):
 class AvailableEffectsResponse(BaseModel):
     """Response listing all available effect types."""
 
-    effects: List[AvailableEffect]
+    effects: list[AvailableEffect]

From b856f753429e5d2db4189e6de41583859dc07f5e Mon Sep 17 00:00:00 2001
From: Ryan Neal <ryan.neal@depop.com>
Date: Tue, 12 May 2026 08:37:34 +0100
Subject: [PATCH 4/5] Use GenerationSource type on GenerationResponse for
 consistency

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/models.py b/backend/models.py
index 133c343d..ca81c4ba 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -123,7 +123,7 @@ class GenerationResponse(BaseModel):
     status: str = "completed"
     error: Optional[str] = None
     is_favorited: bool = False
-    source: str = "manual"
+    source: GenerationSource = "manual"
     created_at: datetime
     versions: Optional[list["GenerationVersionResponse"]] = None
     active_version_id: Optional[str] = None

From cb2b913b00ce0ce269ff695d036e29f55d9bc6bb Mon Sep 17 00:00:00 2001
From: Ryan Neal <ryan.neal@depop.com>
Date: Tue, 12 May 2026 08:52:34 +0100
Subject: [PATCH 5/5] Remove client-writable source field from
 GenerationRequest

The source field was exposed on the public API model, allowing clients
to spoof request provenance. Now source is only set server-side via a
function parameter on generate_speech().

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/mcp_server/tools.py   |  3 +--
 backend/models.py             |  3 ---
 backend/routes/generations.py | 18 +++++++++++++-----
 backend/routes/speak.py       |  2 +-
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/backend/mcp_server/tools.py b/backend/mcp_server/tools.py
index 636210eb..be91f799 100644
--- a/backend/mcp_server/tools.py
+++ b/backend/mcp_server/tools.py
@@ -240,9 +240,8 @@ async def _speak(
         language=language or "en",
         engine=engine,
         personality=personality,
-        source="mcp",
     )
-    generation = await generate_speech(req, db)
+    generation = await generate_speech(req, db, source="mcp")
     return _speak_response(generation, profile_name, source="mcp")
 
 
diff --git a/backend/models.py b/backend/models.py
index ca81c4ba..38ae9d7d 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -102,9 +102,6 @@ class GenerationRequest(BaseModel):
     effects_chain: Optional[list["EffectConfig"]] = Field(
         None, description="Effects chain to apply after generation (overrides profile default)"
     )
-    source: Optional[GenerationSource] = Field(
-        None, description="Origin of the request. Internal use — not exposed to public API docs."
-    )
 
 
 class GenerationResponse(BaseModel):
diff --git a/backend/routes/generations.py b/backend/routes/generations.py
index 2caf041e..7e9842da 100644
--- a/backend/routes/generations.py
+++ b/backend/routes/generations.py
@@ -54,11 +54,19 @@ def _resolve_generation_engine(data: models.GenerationRequest, profile) -> str:
 
 
 @router.post("/generate", response_model=models.GenerationResponse)
-async def generate_speech(
+async def generate_speech_endpoint(
     data: models.GenerationRequest,
     db: Session = Depends(get_db),
 ):
     """Generate speech from text using a voice profile."""
+    return await generate_speech(data, db)
+
+
+async def generate_speech(
+    data: models.GenerationRequest,
+    db: Session,
+    source: "models.GenerationSource | None" = None,
+):
     task_manager = get_task_manager()
     generation_id = str(uuid.uuid4())
 
@@ -77,7 +85,7 @@ async def generate_speech(
     model_size = (data.model_size or "1.7B") if engine_has_model_sizes(engine) else None
 
     text = data.text
-    source = data.source or "manual"
+    resolved_source: models.GenerationSource = source or "manual"
     if data.personality and getattr(profile, "personality", None):
         try:
             llm_result = await personality.rewrite_as_profile(profile.personality, data.text)
@@ -86,8 +94,8 @@ async def generate_speech(
         text = llm_result.text.strip()
         if not text:
             raise HTTPException(status_code=500, detail="LLM produced empty output; nothing to speak.")
-        if not data.source:
-            source = "personality_speak"
+        if not source:
+            resolved_source = "personality_speak"
 
     generation = await history.create_generation(
         profile_id=data.profile_id,
@@ -102,7 +110,7 @@ async def generate_speech(
         status="generating",
         engine=engine,
         model_size=model_size if engine_has_model_sizes(engine) else None,
-        source=source,
+        source=resolved_source,
     )
 
     task_manager.start_generation(
diff --git a/backend/routes/speak.py b/backend/routes/speak.py
index 293951b3..dbe296dc 100644
--- a/backend/routes/speak.py
+++ b/backend/routes/speak.py
@@ -78,9 +78,9 @@ async def speak(
             language=data.language or "en",
             engine=engine,
             personality=bool(personality_flag),
-            source="rest",
         ),
         db,
+        source="rest",
     )
 
     mcp_events.publish(