|
| 1 | +# /// script |
| 2 | +# requires-python = ">=3.12" |
| 3 | +# dependencies = [ |
| 4 | +# "gradio>=5.13.1", |
| 5 | +# "kokoro-onnx>=0.3.8", |
| 6 | +# "onnxruntime>=1.20.1", |
| 7 | +# "colorlog>=6.9.0", |
| 8 | +# "espeakng-loader>=0.2.4", |
| 9 | +# "phonemizer-fork>=3.3.2", |
| 10 | +# "numpy>=2.0.2", |
| 11 | +# ] |
| 12 | +# |
| 13 | +# /// |
| 14 | + |
| 15 | +""" |
| 16 | +wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx |
| 17 | +wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin |
| 18 | +uv run examples/app.py |
| 19 | +""" |
| 20 | + |
| 21 | +import gradio as gr |
| 22 | +import numpy as np |
| 23 | + |
| 24 | +from kokoro_onnx import Kokoro |
| 25 | +from kokoro_onnx.tokenizer import Tokenizer |
| 26 | + |
| 27 | +tokenizer = Tokenizer() |
| 28 | +kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin") |
| 29 | + |
| 30 | + |
| 31 | +SUPPORTED_LANGUAGES = ["en-us"] |
| 32 | + |
| 33 | + |
| 34 | +def create(text: str, voice: str, language: str, blend_voice_name: str = None): |
| 35 | + phonemes = tokenizer.phonemize(text, lang=language) |
| 36 | + |
| 37 | + # Blending |
| 38 | + if blend_voice_name: |
| 39 | + first_voice = kokoro.get_voice_style(voice) |
| 40 | + second_voice = kokoro.get_voice_style(blend_voice_name) |
| 41 | + voice = np.add(first_voice * (50 / 100), second_voice * (50 / 100)) |
| 42 | + samples, sample_rate = kokoro.create( |
| 43 | + phonemes, voice=voice, speed=1.0, is_phonemes=True |
| 44 | + ) |
| 45 | + return [(sample_rate, samples), phonemes] |
| 46 | + |
| 47 | + |
| 48 | +def create_app(): |
| 49 | + with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto")])) as ui: |
| 50 | + text_input = gr.TextArea( |
| 51 | + label="Input Text", |
| 52 | + rtl=False, |
| 53 | + value="Kokoro TTS. Turning words into emotion, one voice at a time!", |
| 54 | + ) |
| 55 | + language_input = gr.Dropdown( |
| 56 | + label="Language", |
| 57 | + value="en-us", |
| 58 | + choices=SUPPORTED_LANGUAGES, |
| 59 | + ) |
| 60 | + voice_input = gr.Dropdown( |
| 61 | + label="Voice", value="af_sky", choices=sorted(kokoro.get_voices()) |
| 62 | + ) |
| 63 | + blend_voice_input = gr.Dropdown( |
| 64 | + label="Blend Voice (Optional)", |
| 65 | + value=None, |
| 66 | + choices=sorted(kokoro.get_voices()) + [None], |
| 67 | + ) |
| 68 | + submit_button = gr.Button("Create") |
| 69 | + phonemes_output = gr.Textbox(label="Phonemes") |
| 70 | + audio_output = gr.Audio() |
| 71 | + submit_button.click( |
| 72 | + fn=create, |
| 73 | + inputs=[text_input, voice_input, language_input, blend_voice_input], |
| 74 | + outputs=[audio_output, phonemes_output], |
| 75 | + ) |
| 76 | + return ui |
| 77 | + |
| 78 | + |
| 79 | +ui = create_app() |
| 80 | +ui.launch(debug=True) |
| 81 | + |
0 commit comments