Skip to content

Commit 09fac92

Browse files
committed
review of kokoro over
1 parent 6e306fd commit 09fac92

1 file changed

Lines changed: 81 additions & 0 deletions

File tree

kokoro_review/app.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# /// script
2+
# requires-python = ">=3.12"
3+
# dependencies = [
4+
# "gradio>=5.13.1",
5+
# "kokoro-onnx>=0.3.8",
6+
# "onnxruntime>=1.20.1",
7+
# "colorlog>=6.9.0",
8+
# "espeakng-loader>=0.2.4",
9+
# "phonemizer-fork>=3.3.2",
10+
# "numpy>=2.0.2",
11+
# ]
12+
#
13+
# ///
14+
15+
"""
16+
wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx
17+
wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin
18+
uv run examples/app.py
19+
"""
20+
21+
import gradio as gr
22+
import numpy as np
23+
24+
from kokoro_onnx import Kokoro
25+
from kokoro_onnx.tokenizer import Tokenizer
26+
27+
tokenizer = Tokenizer()
28+
kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin")
29+
30+
31+
SUPPORTED_LANGUAGES = ["en-us"]
32+
33+
34+
def create(text: str, voice: str, language: str, blend_voice_name: str = None):
35+
phonemes = tokenizer.phonemize(text, lang=language)
36+
37+
# Blending
38+
if blend_voice_name:
39+
first_voice = kokoro.get_voice_style(voice)
40+
second_voice = kokoro.get_voice_style(blend_voice_name)
41+
voice = np.add(first_voice * (50 / 100), second_voice * (50 / 100))
42+
samples, sample_rate = kokoro.create(
43+
phonemes, voice=voice, speed=1.0, is_phonemes=True
44+
)
45+
return [(sample_rate, samples), phonemes]
46+
47+
48+
def create_app():
49+
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto")])) as ui:
50+
text_input = gr.TextArea(
51+
label="Input Text",
52+
rtl=False,
53+
value="Kokoro TTS. Turning words into emotion, one voice at a time!",
54+
)
55+
language_input = gr.Dropdown(
56+
label="Language",
57+
value="en-us",
58+
choices=SUPPORTED_LANGUAGES,
59+
)
60+
voice_input = gr.Dropdown(
61+
label="Voice", value="af_sky", choices=sorted(kokoro.get_voices())
62+
)
63+
blend_voice_input = gr.Dropdown(
64+
label="Blend Voice (Optional)",
65+
value=None,
66+
choices=sorted(kokoro.get_voices()) + [None],
67+
)
68+
submit_button = gr.Button("Create")
69+
phonemes_output = gr.Textbox(label="Phonemes")
70+
audio_output = gr.Audio()
71+
submit_button.click(
72+
fn=create,
73+
inputs=[text_input, voice_input, language_input, blend_voice_input],
74+
outputs=[audio_output, phonemes_output],
75+
)
76+
return ui
77+
78+
79+
ui = create_app()
80+
ui.launch(debug=True)
81+

0 commit comments

Comments
 (0)