Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions mlx_audio/ui/app/text-to-speech/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { useState, useRef } from "react"
import { ChevronDown, Download, ThumbsUp, ThumbsDown, Play, Pause, RefreshCw } from "lucide-react"
import { LayoutWrapper } from "@/components/layout-wrapper"
import { VoiceSelection } from "@/components/voice-selection"
import { getVoiceDisplayName } from "@/components/voice-library"

// Custom range input component with colored progress
function RangeInput({
Expand Down Expand Up @@ -62,7 +63,7 @@ export default function SpeechSynthesis() {
const [quantization, setQuantization] = useState("6bit")
const [language, setLanguage] = useState("English-detected")
const [liked, setLiked] = useState<boolean | null>(null)
const [selectedVoice, setSelectedVoice] = useState("conversational_a")
const [selectedVoice, setSelectedVoice] = useState("af_heart")

const audioRef = useRef<HTMLAudioElement | null>(null)

Expand Down Expand Up @@ -131,10 +132,10 @@ export default function SpeechSynthesis() {
if (!audioRef.current) return
setIsGenerating(true)

const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || 'http://localhost';
const API_PORT = process.env.NEXT_PUBLIC_API_PORT || '8000';
const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || "http://localhost"
const API_PORT = process.env.NEXT_PUBLIC_API_PORT || "8000"

const voice = (model.includes("marvis") ? "conversational_a" : "af_heart");
const voice = isMarvisModel(model) ? "conversational_a" : selectedVoice;

try {
const response = await fetch(`${API_BASE_URL}:${API_PORT}/v1/audio/speech`, {
Expand Down Expand Up @@ -333,7 +334,12 @@ export default function SpeechSynthesis() {
</div>
</div>


{!isMarvisModel(baseModel) && !baseModel.includes("Spark") && (
<VoiceSelection
onVoiceChange={handleVoiceChange}
initialVoice={selectedVoice}
/>
)}

<div className="mb-6">
<div className="mb-2 flex items-center justify-between">
Expand Down Expand Up @@ -479,7 +485,7 @@ export default function SpeechSynthesis() {
<div className="flex flex-col justify-between h-full flex-1 px-4 py-2">
<div className="flex items-center justify-between w-full">
<div className="text-sm">
{selectedVoice}: {text.length > 20 ? text.substring(0, 20) + "..." : text}
{getVoiceDisplayName(selectedVoice)}: {text.length > 20 ? text.substring(0, 20) + "..." : text}
</div>
<div className="flex items-center space-x-2">
<div className="text-xs text-gray-500 dark:text-gray-400 mr-2">How did this sound?</div>
Expand Down
192 changes: 72 additions & 120 deletions mlx_audio/ui/components/voice-library.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
"use client"

import type React from "react"

import { useState, useEffect } from "react"
import { Bookmark, ChevronDown, Play } from "lucide-react"
import { useState, useEffect, type MouseEvent } from "react"
import { Play } from "lucide-react"

type Voice = {
id: string
Expand All @@ -13,113 +11,120 @@ type Voice = {
age: string
accent: string
region: string
isSelected?: boolean
tags?: string[]
}

const voices: Voice[] = [
{
id: "trustworthy-man",
name: "Trustworthy Man",
id: "af_heart",
name: "Heart",
language: "English",
gender: "Male",
gender: "Female",
age: "Adult",
accent: "Resonate",
region: "EN-US (General)",
isSelected: true,
accent: "Warm",
region: "EN-US (American)",
},
{
id: "expressive-narrator",
name: "Expressive Narrator",
id: "af_bella",
name: "Bella",
language: "English",
gender: "Male",
gender: "Female",
age: "Adult",
accent: "Audiobook",
region: "EN-British",
accent: "Bright",
region: "EN-US (American)",
},
{
id: "radiant-girl",
name: "Radiant Girl",
id: "af_nova",
name: "Nova",
language: "English",
gender: "Female",
age: "Young Adult",
accent: "Lively",
region: "EN-US (General)",
},
{
id: "magnetic-voiced-male",
name: "Magnetic-voiced Male",
language: "English",
gender: "Male",
age: "Adult",
accent: "Ad",
region: "EN-US (General)",
accent: "Clear",
region: "EN-US (American)",
},
{
id: "compelling-lady",
name: "Compelling Lady",
id: "af_sky",
name: "Sky",
language: "English",
gender: "Female",
age: "Adult",
accent: "Broadcast",
region: "EN-British",
age: "Young Adult",
accent: "Lively",
region: "EN-US (American)",
},
{
id: "aussie-bloke",
name: "Aussie Bloke",
id: "am_adam",
name: "Adam",
language: "English",
gender: "Male",
age: "Adult",
accent: "Bright",
region: "EN-Australian",
accent: "Deep",
region: "EN-US (American)",
},
{
id: "captivating-female",
name: "Captivating Female",
id: "am_echo",
name: "Echo",
language: "English",
gender: "Female",
gender: "Male",
age: "Adult",
accent: "News Report",
region: "EN-US (General)",
accent: "Resonant",
region: "EN-US (American)",
},
{
id: "upbeat-woman",
name: "Upbeat Woman",
id: "bf_alice",
name: "Alice",
language: "English",
gender: "Female",
age: "Adult",
accent: "Bright",
region: "EN-US (General)",
accent: "Refined",
region: "EN-British",
},
{
id: "calm-woman",
name: "Calm Woman",
id: "bf_emma",
name: "Emma",
language: "English",
gender: "Female",
age: "Adult",
accent: "Audiobook",
region: "EN-US (General)",
accent: "Clear",
region: "EN-British",
},
{
id: "upset-girl",
name: "Upset Girl",
id: "bm_daniel",
name: "Daniel",
language: "English",
gender: "Female",
age: "Young Adult",
accent: "Sad",
gender: "Male",
age: "Adult",
accent: "Deep",
region: "EN-British",
},
{
id: "gentle-voiced-man",
name: "Gentle-voiced Man",
id: "bm_george",
name: "George",
language: "English",
gender: "Male",
age: "Adult",
accent: "Resonate",
region: "EN-US (General)",
accent: "Warm",
region: "EN-British",
},
]

export function getVoiceDisplayName(voiceId: string): string {
const voice = voices.find(v => v.id === voiceId)
return voice?.name || voiceId
}

export const VOICE_GRADIENT_COLORS: Record<string, string> = {
af_heart: "from-pink-400 to-rose-500",
af_bella: "from-purple-400 to-pink-500",
af_nova: "from-sky-400 to-blue-500",
af_sky: "from-cyan-400 to-sky-500",
am_adam: "from-blue-400 to-indigo-600",
am_echo: "from-indigo-400 to-purple-500",
bf_alice: "from-rose-400 to-pink-500",
bf_emma: "from-amber-400 to-orange-500",
bm_daniel: "from-slate-400 to-gray-600",
bm_george: "from-teal-400 to-emerald-500",
}

interface VoiceLibraryProps {
onClose?: () => void
onSelectVoice?: (voice: string) => void
Expand All @@ -135,85 +140,41 @@ export function VoiceLibrary({
}: VoiceLibraryProps) {
const [activeTab, setActiveTab] = useState<"library" | "my-voices">("library")
const [selectedVoice, setSelectedVoice] = useState(
initialSelectedVoice
? voices.find((v) => v.name === initialSelectedVoice)?.id || "trustworthy-man"
: "trustworthy-man",
initialSelectedVoice || "af_heart",
)
const [language, setLanguage] = useState("")
const [accent, setAccent] = useState("")
const [gender, setGender] = useState("")
const [age, setAge] = useState("")
const [bookmarkedVoices, setBookmarkedVoices] = useState<string[]>([])
const [isCloneModalOpen, setIsCloneModalOpen] = useState(false)

useEffect(() => {
if (initialSelectedVoice) {
const voiceId = voices.find((v) => v.name === initialSelectedVoice)?.id
if (voiceId) {
setSelectedVoice(voiceId)
}
setSelectedVoice(initialSelectedVoice)
}
}, [initialSelectedVoice])

const getGradientForVoice = (voiceId: string) => {
// Map of voice IDs to gradient classes
const gradientMap: Record<string, string> = {
"trustworthy-man": "bg-gradient-to-br from-blue-400 to-indigo-600",
"expressive-narrator": "bg-gradient-to-br from-purple-400 to-indigo-500",
"radiant-girl": "bg-gradient-to-br from-pink-400 to-orange-300",
"magnetic-voiced-male": "bg-gradient-to-br from-sky-400 to-blue-600",
"compelling-lady": "bg-gradient-to-br from-rose-400 to-red-500",
"aussie-bloke": "bg-gradient-to-br from-amber-400 to-orange-500",
"captivating-female": "bg-gradient-to-br from-teal-400 to-emerald-500",
"upbeat-woman": "bg-gradient-to-br from-green-400 to-emerald-500",
"calm-woman": "bg-gradient-to-br from-indigo-400 to-purple-500",
"upset-girl": "bg-gradient-to-br from-rose-300 to-pink-500",
"gentle-voiced-man": "bg-gradient-to-br from-cyan-400 to-blue-500",
}

// Return the gradient class or a default gradient if not found
return gradientMap[voiceId] || "bg-gradient-to-br from-gray-400 to-gray-600"
}
const getGradientForVoice = (voiceId: string) =>
`bg-gradient-to-br ${VOICE_GRADIENT_COLORS[voiceId] || "from-gray-400 to-gray-600"}`

const handleSelectVoice = (voiceId: string) => {
setSelectedVoice(voiceId)
// Get the voice name from the voices array
const selectedVoiceName = voices.find((v) => v.id === voiceId)?.name || "Trustworthy Man"

// Call the onSelectVoice callback if provided
if (onSelectVoice) {
onSelectVoice(selectedVoiceName)
onSelectVoice(voiceId)
}

// In a real app, this would update the selected voice in the parent component
if (onClose) {
setTimeout(() => {
onClose()
}, 300)
}
}

const handleBookmark = (e: React.MouseEvent, voiceId: string) => {
e.stopPropagation()
setBookmarkedVoices((prev) => (prev.includes(voiceId) ? prev.filter((id) => id !== voiceId) : [...prev, voiceId]))
}

const handleUseVoice = (e: React.MouseEvent, voiceId: string) => {
const handleUseVoice = (e: MouseEvent, voiceId: string) => {
e.stopPropagation()
// Set the selected voice
setSelectedVoice(voiceId)

// Get the voice name from the voices array
const selectedVoiceName = voices.find((v) => v.id === voiceId)?.name || "Trustworthy Man"

// Call the onSelectVoice callback if provided
if (onSelectVoice) {
onSelectVoice(selectedVoiceName)
onSelectVoice(voiceId)
}

// Provide visual feedback
const voiceName = voices.find((v) => v.id === voiceId)?.name
console.log(`Voice selected: ${voiceName}`)
}

const handleCreateVoice = () => {
Expand All @@ -223,11 +184,8 @@ export function VoiceLibrary({
return (
<div
className="flex flex-col h-full"
onClick={() => console.log("Current selected voice:", selectedVoice)}
style={{ display: "grid", gridTemplateRows: "auto 1fr", height: "100%" }}
>


<div className="overflow-y-auto">
<div className="space-y-2">
{activeTab === "library" ? (
Expand Down Expand Up @@ -275,12 +233,6 @@ export function VoiceLibrary({
Use
</button>
)}
<button
className={`${bookmarkedVoices.includes(voice.id) ? "text-yellow-500" : "text-gray-400"} hover:text-yellow-500`}
onClick={(e) => handleBookmark(e, voice.id)}
>
<Bookmark className="h-4 w-4" />
</button>
</div>
</div>
))
Expand Down
Loading