Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/mcp_server/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ async def _speak(
engine=engine,
personality=personality,
)
generation = await generate_speech(req, db)
generation = await generate_speech(req, db, source="mcp")
return _speak_response(generation, profile_name, source="mcp")


Expand Down
72 changes: 37 additions & 35 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
"""

from pydantic import BaseModel, Field
from typing import Optional, List
from typing import Literal, Optional
from datetime import datetime

from .utils.capture_chords import (
default_push_to_talk_chord,
default_toggle_to_talk_chord,
)

GenerationSource = Literal["mcp", "rest", "manual", "import", "personality_speak"]


class VoiceProfileCreate(BaseModel):
"""Request model for creating a voice profile."""
Expand All @@ -36,7 +38,7 @@ class VoiceProfileResponse(BaseModel):
description: Optional[str]
language: str
avatar_path: Optional[str] = None
effects_chain: Optional[List["EffectConfig"]] = None
effects_chain: Optional[list["EffectConfig"]] = None
voice_type: str = "cloned"
preset_engine: Optional[str] = None
preset_voice_id: Optional[str] = None
Expand Down Expand Up @@ -97,7 +99,7 @@ class GenerationRequest(BaseModel):
default=50, ge=0, le=500, description="Crossfade duration in ms between chunks (0 for hard cut)"
)
normalize: bool = Field(default=True, description="Normalize output audio volume")
effects_chain: Optional[List["EffectConfig"]] = Field(
effects_chain: Optional[list["EffectConfig"]] = Field(
None, description="Effects chain to apply after generation (overrides profile default)"
)

Expand All @@ -118,9 +120,9 @@ class GenerationResponse(BaseModel):
status: str = "completed"
error: Optional[str] = None
is_favorited: bool = False
source: str = "manual"
source: GenerationSource = "manual"
created_at: datetime
versions: Optional[List["GenerationVersionResponse"]] = None
versions: Optional[list["GenerationVersionResponse"]] = None
active_version_id: Optional[str] = None

class Config:
Expand Down Expand Up @@ -154,7 +156,7 @@ class HistoryResponse(BaseModel):
error: Optional[str] = None
is_favorited: bool = False
created_at: datetime
versions: Optional[List["GenerationVersionResponse"]] = None
versions: Optional[list["GenerationVersionResponse"]] = None
active_version_id: Optional[str] = None

class Config:
Expand All @@ -164,7 +166,7 @@ class Config:
class HistoryListResponse(BaseModel):
"""Response model for history list."""

items: List[HistoryResponse]
items: list[HistoryResponse]
total: int


Expand Down Expand Up @@ -212,7 +214,7 @@ class Config:
class CaptureListResponse(BaseModel):
"""Response model for paginated capture list."""

items: List[CaptureResponse]
items: list[CaptureResponse]
total: int


Expand Down Expand Up @@ -258,10 +260,10 @@ class CaptureSettingsResponse(BaseModel):
allow_auto_paste: bool = True
default_playback_voice_id: Optional[str] = None
hotkey_enabled: bool = False
chord_push_to_talk_keys: List[str] = Field(
chord_push_to_talk_keys: list[str] = Field(
default_factory=default_push_to_talk_chord
)
chord_toggle_to_talk_keys: List[str] = Field(
chord_toggle_to_talk_keys: list[str] = Field(
default_factory=default_toggle_to_talk_chord
)

Expand All @@ -282,8 +284,8 @@ class CaptureSettingsUpdate(BaseModel):
allow_auto_paste: Optional[bool] = None
default_playback_voice_id: Optional[str] = None
hotkey_enabled: Optional[bool] = None
chord_push_to_talk_keys: Optional[List[str]] = Field(default=None, min_length=1, max_length=6)
chord_toggle_to_talk_keys: Optional[List[str]] = Field(default=None, min_length=1, max_length=6)
chord_push_to_talk_keys: Optional[list[str]] = Field(default=None, min_length=1, max_length=6)
chord_toggle_to_talk_keys: Optional[list[str]] = Field(default=None, min_length=1, max_length=6)


class GenerationSettingsResponse(BaseModel):
Expand Down Expand Up @@ -342,7 +344,7 @@ class MCPClientBindingUpsert(BaseModel):


class MCPClientBindingListResponse(BaseModel):
items: List[MCPClientBindingResponse]
items: list[MCPClientBindingResponse]


class SpeakRequest(BaseModel):
Expand Down Expand Up @@ -379,7 +381,7 @@ class LLMGenerateRequest(BaseModel):
# Used by the refinement service to pin tricky rules (imperatives
# staying imperatives, technical-term punctuation) that small models
# lose when the examples live inline in the system prompt.
examples: Optional[List[List[str]]] = Field(default=None, max_length=8)
examples: Optional[list[list[str]]] = Field(default=None, max_length=8)


class LLMGenerateResponse(BaseModel):
Expand Down Expand Up @@ -461,7 +463,7 @@ class FilesystemHealthResponse(BaseModel):
healthy: bool
disk_free_mb: Optional[float] = None
disk_total_mb: Optional[float] = None
directories: List[DirectoryCheck]
directories: list[DirectoryCheck]


class ModelStatus(BaseModel):
Expand All @@ -479,7 +481,7 @@ class ModelStatus(BaseModel):
class ModelStatusListResponse(BaseModel):
"""Response model for model status list."""

models: List[ModelStatus]
models: list[ModelStatus]


class ModelDownloadRequest(BaseModel):
Expand Down Expand Up @@ -519,22 +521,22 @@ class ActiveGenerationTask(BaseModel):
class ActiveTasksResponse(BaseModel):
"""Response model for active tasks."""

downloads: List[ActiveDownloadTask]
generations: List[ActiveGenerationTask]
downloads: list[ActiveDownloadTask]
generations: list[ActiveGenerationTask]


class AudioChannelCreate(BaseModel):
"""Request model for creating an audio channel."""

name: str = Field(..., min_length=1, max_length=100)
device_ids: List[str] = Field(default_factory=list)
device_ids: list[str] = Field(default_factory=list)


class AudioChannelUpdate(BaseModel):
"""Request model for updating an audio channel."""

name: Optional[str] = Field(None, min_length=1, max_length=100)
device_ids: Optional[List[str]] = None
device_ids: Optional[list[str]] = None


class AudioChannelResponse(BaseModel):
Expand All @@ -543,7 +545,7 @@ class AudioChannelResponse(BaseModel):
id: str
name: str
is_default: bool
device_ids: List[str]
device_ids: list[str]
created_at: datetime

class Config:
Expand All @@ -553,13 +555,13 @@ class Config:
class ChannelVoiceAssignment(BaseModel):
"""Request model for assigning voices to a channel."""

profile_ids: List[str]
profile_ids: list[str]


class ProfileChannelAssignment(BaseModel):
"""Request model for assigning channels to a profile."""

channel_ids: List[str]
channel_ids: list[str]


class StoryCreate(BaseModel):
Expand Down Expand Up @@ -608,7 +610,7 @@ class StoryItemDetail(BaseModel):
volume: float = 1.0
generation_created_at: datetime
# Versions available for this generation
versions: Optional[List["GenerationVersionResponse"]] = None
versions: Optional[list["GenerationVersionResponse"]] = None
active_version_id: Optional[str] = None

class Config:
Expand All @@ -623,7 +625,7 @@ class StoryDetailResponse(BaseModel):
description: Optional[str]
created_at: datetime
updated_at: datetime
items: List[StoryItemDetail] = []
items: list[StoryItemDetail] = []

class Config:
from_attributes = True
Expand All @@ -647,13 +649,13 @@ class StoryItemUpdateTime(BaseModel):
class StoryItemBatchUpdate(BaseModel):
"""Request model for batch updating story item timecodes."""

updates: List[StoryItemUpdateTime]
updates: list[StoryItemUpdateTime]


class StoryItemReorder(BaseModel):
"""Request model for reordering story items."""

generation_ids: List[str] = Field(..., min_length=1)
generation_ids: list[str] = Field(..., min_length=1)


class StoryItemMove(BaseModel):
Expand Down Expand Up @@ -704,23 +706,23 @@ class EffectConfig(BaseModel):
class EffectsChain(BaseModel):
"""An ordered list of effects to apply."""

effects: List[EffectConfig] = Field(default_factory=list)
effects: list[EffectConfig] = Field(default_factory=list)


class EffectPresetCreate(BaseModel):
"""Request model for creating an effect preset."""

name: str = Field(..., min_length=1, max_length=100)
description: Optional[str] = Field(None, max_length=500)
effects_chain: List[EffectConfig]
effects_chain: list[EffectConfig]


class EffectPresetUpdate(BaseModel):
"""Request model for updating an effect preset."""

name: Optional[str] = Field(None, min_length=1, max_length=100)
description: Optional[str] = None
effects_chain: Optional[List[EffectConfig]] = None
effects_chain: Optional[list[EffectConfig]] = None


class EffectPresetResponse(BaseModel):
Expand All @@ -729,7 +731,7 @@ class EffectPresetResponse(BaseModel):
id: str
name: str
description: Optional[str] = None
effects_chain: List[EffectConfig]
effects_chain: list[EffectConfig]
is_builtin: bool = False
created_at: datetime

Expand All @@ -744,7 +746,7 @@ class GenerationVersionResponse(BaseModel):
generation_id: str
label: str
audio_path: str
effects_chain: Optional[List[EffectConfig]] = None
effects_chain: Optional[list[EffectConfig]] = None
source_version_id: Optional[str] = None
is_default: bool
created_at: datetime
Expand All @@ -756,7 +758,7 @@ class Config:
class ApplyEffectsRequest(BaseModel):
"""Request to apply effects to an existing generation."""

effects_chain: List[EffectConfig]
effects_chain: list[EffectConfig]
source_version_id: Optional[str] = Field(
None, description="Version to use as source audio (defaults to clean/original)"
)
Expand All @@ -767,7 +769,7 @@ class ApplyEffectsRequest(BaseModel):
class ProfileEffectsUpdate(BaseModel):
"""Request to update the default effects chain on a profile."""

effects_chain: Optional[List[EffectConfig]] = Field(None, description="Effects chain (null to remove)")
effects_chain: Optional[list[EffectConfig]] = Field(None, description="Effects chain (null to remove)")


class AvailableEffectParam(BaseModel):
Expand All @@ -792,4 +794,4 @@ class AvailableEffect(BaseModel):
class AvailableEffectsResponse(BaseModel):
"""Response listing all available effect types."""

effects: List[AvailableEffect]
effects: list[AvailableEffect]
17 changes: 13 additions & 4 deletions backend/routes/generations.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,19 @@ def _resolve_generation_engine(data: models.GenerationRequest, profile) -> str:


@router.post("/generate", response_model=models.GenerationResponse)
async def generate_speech(
async def generate_speech_endpoint(
data: models.GenerationRequest,
db: Session = Depends(get_db),
):
"""Generate speech from text using a voice profile."""
return await generate_speech(data, db)


async def generate_speech(
data: models.GenerationRequest,
db: Session,
source: "models.GenerationSource | None" = None,
):
task_manager = get_task_manager()
generation_id = str(uuid.uuid4())

Expand All @@ -77,7 +85,7 @@ async def generate_speech(
model_size = (data.model_size or "1.7B") if engine_has_model_sizes(engine) else None

text = data.text
source = "manual"
resolved_source: models.GenerationSource = source or "manual"
if data.personality and getattr(profile, "personality", None):
try:
llm_result = await personality.rewrite_as_profile(profile.personality, data.text)
Expand All @@ -86,7 +94,8 @@ async def generate_speech(
text = llm_result.text.strip()
if not text:
raise HTTPException(status_code=500, detail="LLM produced empty output; nothing to speak.")
source = "personality_speak"
if not source:
resolved_source = "personality_speak"

generation = await history.create_generation(
profile_id=data.profile_id,
Expand All @@ -101,7 +110,7 @@ async def generate_speech(
status="generating",
engine=engine,
model_size=model_size if engine_has_model_sizes(engine) else None,
source=source,
source=resolved_source,
)

task_manager.start_generation(
Expand Down
1 change: 1 addition & 0 deletions backend/routes/speak.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ async def speak(
personality=bool(personality_flag),
),
db,
source="rest",
)

mcp_events.publish(
Expand Down
4 changes: 2 additions & 2 deletions backend/services/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sqlalchemy.orm import Session
from sqlalchemy import or_

from ..models import GenerationRequest, GenerationResponse, HistoryQuery, HistoryResponse, HistoryListResponse, GenerationVersionResponse, EffectConfig
from ..models import GenerationRequest, GenerationResponse, GenerationSource, HistoryQuery, HistoryResponse, HistoryListResponse, GenerationVersionResponse, EffectConfig
from ..database import Generation as DBGeneration, GenerationVersion as DBGenerationVersion, VoiceProfile as DBVoiceProfile
from .. import config

Expand Down Expand Up @@ -65,7 +65,7 @@ async def create_generation(
status: str = "completed",
engine: Optional[str] = "qwen",
model_size: Optional[str] = None,
source: str = "manual",
source: GenerationSource = "manual",
) -> GenerationResponse:
"""
Create a new generation history entry.
Expand Down