diff --git a/backend/mcp_server/tools.py b/backend/mcp_server/tools.py index fcf3b6a2..be91f799 100644 --- a/backend/mcp_server/tools.py +++ b/backend/mcp_server/tools.py @@ -241,7 +241,7 @@ async def _speak( engine=engine, personality=personality, ) - generation = await generate_speech(req, db) + generation = await generate_speech(req, db, source="mcp") return _speak_response(generation, profile_name, source="mcp") diff --git a/backend/models.py b/backend/models.py index 06f321ac..38ae9d7d 100644 --- a/backend/models.py +++ b/backend/models.py @@ -3,7 +3,7 @@ """ from pydantic import BaseModel, Field -from typing import Optional, List +from typing import Literal, Optional from datetime import datetime from .utils.capture_chords import ( @@ -11,6 +11,8 @@ default_toggle_to_talk_chord, ) +GenerationSource = Literal["mcp", "rest", "manual", "import", "personality_speak"] + class VoiceProfileCreate(BaseModel): """Request model for creating a voice profile.""" @@ -36,7 +38,7 @@ class VoiceProfileResponse(BaseModel): description: Optional[str] language: str avatar_path: Optional[str] = None - effects_chain: Optional[List["EffectConfig"]] = None + effects_chain: Optional[list["EffectConfig"]] = None voice_type: str = "cloned" preset_engine: Optional[str] = None preset_voice_id: Optional[str] = None @@ -97,7 +99,7 @@ class GenerationRequest(BaseModel): default=50, ge=0, le=500, description="Crossfade duration in ms between chunks (0 for hard cut)" ) normalize: bool = Field(default=True, description="Normalize output audio volume") - effects_chain: Optional[List["EffectConfig"]] = Field( + effects_chain: Optional[list["EffectConfig"]] = Field( None, description="Effects chain to apply after generation (overrides profile default)" ) @@ -118,9 +120,9 @@ class GenerationResponse(BaseModel): status: str = "completed" error: Optional[str] = None is_favorited: bool = False - source: str = "manual" + source: GenerationSource = "manual" created_at: datetime - versions: Optional[List["GenerationVersionResponse"]] = None + versions: Optional[list["GenerationVersionResponse"]] = None active_version_id: Optional[str] = None class Config: @@ -154,7 +156,7 @@ class HistoryResponse(BaseModel): error: Optional[str] = None is_favorited: bool = False created_at: datetime - versions: Optional[List["GenerationVersionResponse"]] = None + versions: Optional[list["GenerationVersionResponse"]] = None active_version_id: Optional[str] = None class Config: @@ -164,7 +166,7 @@ class Config: class HistoryListResponse(BaseModel): """Response model for history list.""" - items: List[HistoryResponse] + items: list[HistoryResponse] total: int @@ -212,7 +214,7 @@ class Config: class CaptureListResponse(BaseModel): """Response model for paginated capture list.""" - items: List[CaptureResponse] + items: list[CaptureResponse] total: int @@ -258,10 +260,10 @@ class CaptureSettingsResponse(BaseModel): allow_auto_paste: bool = True default_playback_voice_id: Optional[str] = None hotkey_enabled: bool = False - chord_push_to_talk_keys: List[str] = Field( + chord_push_to_talk_keys: list[str] = Field( default_factory=default_push_to_talk_chord ) - chord_toggle_to_talk_keys: List[str] = Field( + chord_toggle_to_talk_keys: list[str] = Field( default_factory=default_toggle_to_talk_chord ) @@ -282,8 +284,8 @@ class CaptureSettingsUpdate(BaseModel): allow_auto_paste: Optional[bool] = None default_playback_voice_id: Optional[str] = None hotkey_enabled: Optional[bool] = None - chord_push_to_talk_keys: Optional[List[str]] = Field(default=None, min_length=1, max_length=6) - chord_toggle_to_talk_keys: Optional[List[str]] = Field(default=None, min_length=1, max_length=6) + chord_push_to_talk_keys: Optional[list[str]] = Field(default=None, min_length=1, max_length=6) + chord_toggle_to_talk_keys: Optional[list[str]] = Field(default=None, min_length=1, max_length=6) class GenerationSettingsResponse(BaseModel): @@ -342,7 +344,7 @@ class MCPClientBindingUpsert(BaseModel): class MCPClientBindingListResponse(BaseModel): - items: List[MCPClientBindingResponse] + items: list[MCPClientBindingResponse] class SpeakRequest(BaseModel): @@ -379,7 +381,7 @@ class LLMGenerateRequest(BaseModel): # Used by the refinement service to pin tricky rules (imperatives # staying imperatives, technical-term punctuation) that small models # lose when the examples live inline in the system prompt. - examples: Optional[List[List[str]]] = Field(default=None, max_length=8) + examples: Optional[list[list[str]]] = Field(default=None, max_length=8) class LLMGenerateResponse(BaseModel): @@ -461,7 +463,7 @@ class FilesystemHealthResponse(BaseModel): healthy: bool disk_free_mb: Optional[float] = None disk_total_mb: Optional[float] = None - directories: List[DirectoryCheck] + directories: list[DirectoryCheck] class ModelStatus(BaseModel): @@ -479,7 +481,7 @@ class ModelStatus(BaseModel): class ModelStatusListResponse(BaseModel): """Response model for model status list.""" - models: List[ModelStatus] + models: list[ModelStatus] class ModelDownloadRequest(BaseModel): @@ -519,22 +521,22 @@ class ActiveGenerationTask(BaseModel): class ActiveTasksResponse(BaseModel): """Response model for active tasks.""" - downloads: List[ActiveDownloadTask] - generations: List[ActiveGenerationTask] + downloads: list[ActiveDownloadTask] + generations: list[ActiveGenerationTask] class AudioChannelCreate(BaseModel): """Request model for creating an audio channel.""" name: str = Field(..., min_length=1, max_length=100) - device_ids: List[str] = Field(default_factory=list) + device_ids: list[str] = Field(default_factory=list) class AudioChannelUpdate(BaseModel): """Request model for updating an audio channel.""" name: Optional[str] = Field(None, min_length=1, max_length=100) - device_ids: Optional[List[str]] = None + device_ids: Optional[list[str]] = None class AudioChannelResponse(BaseModel): @@ -543,7 +545,7 @@ class AudioChannelResponse(BaseModel): id: str name: str is_default: bool - device_ids: List[str] + device_ids: list[str] created_at: datetime class Config: @@ -553,13 +555,13 @@ class Config: class ChannelVoiceAssignment(BaseModel): """Request model for assigning voices to a channel.""" - profile_ids: List[str] + profile_ids: list[str] class ProfileChannelAssignment(BaseModel): """Request model for assigning channels to a profile.""" - channel_ids: List[str] + channel_ids: list[str] class StoryCreate(BaseModel): @@ -608,7 +610,7 @@ class StoryItemDetail(BaseModel): volume: float = 1.0 generation_created_at: datetime # Versions available for this generation - versions: Optional[List["GenerationVersionResponse"]] = None + versions: Optional[list["GenerationVersionResponse"]] = None active_version_id: Optional[str] = None class Config: @@ -623,7 +625,7 @@ class StoryDetailResponse(BaseModel): description: Optional[str] created_at: datetime updated_at: datetime - items: List[StoryItemDetail] = [] + items: list[StoryItemDetail] = [] class Config: from_attributes = True @@ -647,13 +649,13 @@ class StoryItemUpdateTime(BaseModel): class StoryItemBatchUpdate(BaseModel): """Request model for batch updating story item timecodes.""" - updates: List[StoryItemUpdateTime] + updates: list[StoryItemUpdateTime] class StoryItemReorder(BaseModel): """Request model for reordering story items.""" - generation_ids: List[str] = Field(..., min_length=1) + generation_ids: list[str] = Field(..., min_length=1) class StoryItemMove(BaseModel): @@ -704,7 +706,7 @@ class EffectConfig(BaseModel): class EffectsChain(BaseModel): """An ordered list of effects to apply.""" - effects: List[EffectConfig] = Field(default_factory=list) + effects: list[EffectConfig] = Field(default_factory=list) class EffectPresetCreate(BaseModel): @@ -712,7 +714,7 @@ class EffectPresetCreate(BaseModel): name: str = Field(..., min_length=1, max_length=100) description: Optional[str] = Field(None, max_length=500) - effects_chain: List[EffectConfig] + effects_chain: list[EffectConfig] class EffectPresetUpdate(BaseModel): @@ -720,7 +722,7 @@ class EffectPresetUpdate(BaseModel): name: Optional[str] = Field(None, min_length=1, max_length=100) description: Optional[str] = None - effects_chain: Optional[List[EffectConfig]] = None + effects_chain: Optional[list[EffectConfig]] = None class EffectPresetResponse(BaseModel): @@ -729,7 +731,7 @@ class EffectPresetResponse(BaseModel): id: str name: str description: Optional[str] = None - effects_chain: List[EffectConfig] + effects_chain: list[EffectConfig] is_builtin: bool = False created_at: datetime @@ -744,7 +746,7 @@ class GenerationVersionResponse(BaseModel): generation_id: str label: str audio_path: str - effects_chain: Optional[List[EffectConfig]] = None + effects_chain: Optional[list[EffectConfig]] = None source_version_id: Optional[str] = None is_default: bool created_at: datetime @@ -756,7 +758,7 @@ class Config: class ApplyEffectsRequest(BaseModel): """Request to apply effects to an existing generation.""" - effects_chain: List[EffectConfig] + effects_chain: list[EffectConfig] source_version_id: Optional[str] = Field( None, description="Version to use as source audio (defaults to clean/original)" ) @@ -767,7 +769,7 @@ class ApplyEffectsRequest(BaseModel): class ProfileEffectsUpdate(BaseModel): """Request to update the default effects chain on a profile.""" - effects_chain: Optional[List[EffectConfig]] = Field(None, description="Effects chain (null to remove)") + effects_chain: Optional[list[EffectConfig]] = Field(None, description="Effects chain (null to remove)") class AvailableEffectParam(BaseModel): @@ -792,4 +794,4 @@ class AvailableEffect(BaseModel): class AvailableEffectsResponse(BaseModel): """Response listing all available effect types.""" - effects: List[AvailableEffect] + effects: list[AvailableEffect] diff --git a/backend/routes/generations.py b/backend/routes/generations.py index 215c96cb..7e9842da 100644 --- a/backend/routes/generations.py +++ b/backend/routes/generations.py @@ -54,11 +54,19 @@ def _resolve_generation_engine(data: models.GenerationRequest, profile) -> str: @router.post("/generate", response_model=models.GenerationResponse) -async def generate_speech( +async def generate_speech_endpoint( data: models.GenerationRequest, db: Session = Depends(get_db), ): """Generate speech from text using a voice profile.""" + return await generate_speech(data, db) + + +async def generate_speech( + data: models.GenerationRequest, + db: Session, + source: "models.GenerationSource | None" = None, +): task_manager = get_task_manager() generation_id = str(uuid.uuid4()) @@ -77,7 +85,7 @@ async def generate_speech( model_size = (data.model_size or "1.7B") if engine_has_model_sizes(engine) else None text = data.text - source = "manual" + resolved_source: models.GenerationSource = source or "manual" if data.personality and getattr(profile, "personality", None): try: llm_result = await personality.rewrite_as_profile(profile.personality, data.text) @@ -86,7 +94,8 @@ async def generate_speech( text = llm_result.text.strip() if not text: raise HTTPException(status_code=500, detail="LLM produced empty output; nothing to speak.") - source = "personality_speak" + if not source: + resolved_source = "personality_speak" generation = await history.create_generation( profile_id=data.profile_id, @@ -101,7 +110,7 @@ async def generate_speech( status="generating", engine=engine, model_size=model_size if engine_has_model_sizes(engine) else None, - source=source, + source=resolved_source, ) task_manager.start_generation( diff --git a/backend/routes/speak.py b/backend/routes/speak.py index 0c81846c..dbe296dc 100644 --- a/backend/routes/speak.py +++ b/backend/routes/speak.py @@ -80,6 +80,7 @@ async def speak( personality=bool(personality_flag), ), db, + source="rest", ) mcp_events.publish( diff --git a/backend/services/history.py b/backend/services/history.py index 3062f7d6..9daca8cd 100644 --- a/backend/services/history.py +++ b/backend/services/history.py @@ -10,7 +10,7 @@ from sqlalchemy.orm import Session from sqlalchemy import or_ -from ..models import GenerationRequest, GenerationResponse, HistoryQuery, HistoryResponse, HistoryListResponse, GenerationVersionResponse, EffectConfig +from ..models import GenerationRequest, GenerationResponse, GenerationSource, HistoryQuery, HistoryResponse, HistoryListResponse, GenerationVersionResponse, EffectConfig from ..database import Generation as DBGeneration, GenerationVersion as DBGenerationVersion, VoiceProfile as DBVoiceProfile from .. import config @@ -65,7 +65,7 @@ async def create_generation( status: str = "completed", engine: Optional[str] = "qwen", model_size: Optional[str] = None, - source: str = "manual", + source: GenerationSource = "manual", ) -> GenerationResponse: """ Create a new generation history entry.