Skip to content

Commit d8839c1

Browse files
committed
PyThaiTTS v0.1.1
- Add KhanomTan TTS v1.1 #2
1 parent e1e86ab commit d8839c1

4 files changed

Lines changed: 91 additions & 38 deletions

File tree

notebook/run.ipynb

Lines changed: 61 additions & 20 deletions
Large diffs are not rendered by default.

pythaitts/__init__.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,43 +2,45 @@
22
"""
33
PyThaiTTS
44
"""
5-
__version__ = "0.1.0"
5+
__version__ = "0.1.1"
66

77

88
class TTS:
9-
def __init__(self, pretrained="khanomtan", mode="last_checkpoint") -> None:
9+
def __init__(self, pretrained="khanomtan", mode="last_checkpoint", version="1.0") -> None:
1010
"""
1111
:param str pretrained: TTS pretrained (khanomtan)
1212
:param str mode: pretrained mode
13+
:param str version: model version (default is 1.0 or 1.1)
1314
1415
**Options for mode**
1516
* *last_checkpoint* (default) - last checkpoint of model
1617
* *best_model* - Best model (best loss)
1718
1819
You can see more about khanomtan tts at `https://github.com/wannaphong/KhanomTan-TTS-v1.0 <https://github.com/wannaphong/KhanomTan-TTS-v1.0>`_
20+
and `https://github.com/wannaphong/KhanomTan-TTS-v1.1 <https://github.com/wannaphong/KhanomTan-TTS-v1.1>`_
1921
"""
2022
self.pretrained = pretrained
2123
self.mode = mode
22-
self.load_pretrained()
24+
self.load_pretrained(version=version)
2325

24-
def load_pretrained(self):
26+
def load_pretrained(self,version):
2527
"""
2628
Load pretrined
2729
"""
2830
if self.pretrained == "khanomtan":
2931
from pythaitts.pretrained import KhanomTan
30-
self.model = KhanomTan(mode=self.mode)
32+
self.model = KhanomTan(mode=self.mode, version=version)
3133
else:
3234
raise NotImplemented(
3335
"PyThaiTTS doesn't support %s pretrained." % self.pretrained
3436
)
3537

36-
def tts(self, text: str, speaker_idx: str = "Tsynctwo", language_idx: str = "th-th", return_type: str = "file", filename: str = None):
38+
def tts(self, text: str, speaker_idx: str = "Linda", language_idx: str = "th-th", return_type: str = "file", filename: str = None):
3739
"""
3840
speech synthesis
3941
4042
:param str text: text
41-
:param str speaker_idx: speaker (default is Tsynctwo)
43+
:param str speaker_idx: speaker (default is Linda)
4244
:param str language_idx: language (default is th-th)
4345
:param str return_type: return type (default is file)
4446
:param str filename: path filename for save wav file if return_type is file.

pythaitts/pretrained/khanomtan_tts.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,34 @@
11
# -*- coding: utf-8 -*-
22
"""
3-
KhanomTan TTS model V1.0
3+
KhanomTan TTS model
44
55
KhanomTan TTS (ขนมตาล) is a open-source Thai text-to-speech model that supports multilingual speakers.
66
It supports Thai, English, and others.
77
8-
Homepage: `https://github.com/wannaphong/KhanomTan-TTS-v1.0 <https://github.com/wannaphong/KhanomTan-TTS-v1.0>`_
8+
KhanomTan TTS v1.0: `https://github.com/wannaphong/KhanomTan-TTS-v1.0 <https://github.com/wannaphong/KhanomTan-TTS-v1.0>`_
9+
KhanomTan TTS v1.1: `https://github.com/wannaphong/KhanomTan-TTS-v1.1 <https://github.com/wannaphong/KhanomTan-TTS-v1.1>`_
910
"""
1011
import tempfile
1112
from TTS.utils.synthesizer import Synthesizer
1213
from huggingface_hub import hf_hub_download
1314

1415

1516
class KhanomTan:
16-
def __init__(self, mode) -> None:
17-
self.config_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="config.json",force_filename="config.json")
18-
self.speakers_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="speakers.pth",force_filename="speakers.pth")
19-
self.languages_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="language_ids.json",force_filename="language_ids.json")
20-
self.speaker_encoder_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="model_se.pth",force_filename="model_se.pth")
21-
self.speaker_encoder_config_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="config_se.json",force_filename="config_se.json")
17+
def __init__(self, mode, version="1.0") -> None:
18+
self.version = version
19+
if self.version not in ["1.0","1.1"]:
20+
raise NotImplementedError("KhanomTan don't have v{0}.".format(self.version))
21+
if self.version == "1.0":
22+
self.best_model_path_name = "best_model.pth"
23+
self.last_checkpoint_model_path_name = "checkpoint_440000.pth"
24+
else:
25+
self.best_model_path_name = "best_model.pth"
26+
self.last_checkpoint_model_path_name = "checkpoint_440000.pth"
27+
self.config_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="config.json",force_filename="config-v{0}.json".format(self.version))
28+
self.speakers_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="speakers.pth",force_filename="speakers-v{0}.pth".format(self.version))
29+
self.languages_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="language_ids.json",force_filename="language_ids-v{0}.json".format(self.version))
30+
self.speaker_encoder_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="model_se.pth",force_filename="model_se.pth")
31+
self.speaker_encoder_config_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="config_se.json",force_filename="config_se.json")
2232
self.synthesizer = None
2333
with open(self.config_path,"r") as f:
2434
_temp = f.read()
@@ -35,7 +45,7 @@ def load_synthesizer(self, mode):
3545
mode: The model mode (best_mode or last_checkpoint)
3646
"""
3747
if mode=="best_model":
38-
self.best_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="best_model.pth",force_filename="best_model.pth")
48+
self.best_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename=self.best_model_path_name,force_filename="best_model-v{0}.pth".format(self.version))
3949
self.synthesizer = Synthesizer(
4050
self.best_model_path,
4151
self.config_path,
@@ -48,7 +58,7 @@ def load_synthesizer(self, mode):
4858
False
4959
)
5060
else:
51-
self.last_checkpoint_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="checkpoint_440000.pth",force_filename="checkpoint_440000.pth")
61+
self.last_checkpoint_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename=self.last_checkpoint_model_path_name,force_filename="last_checkpoint-v{0}.pth".format(self.version))
5262
self.synthesizer = Synthesizer(
5363
self.last_checkpoint_model_path,
5464
self.config_path,

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
setup(
1111
name="PyThaiTTS",
12-
version="0.1.0",
12+
version="0.1.1",
1313
description="Open Source Thai Text-to-speech library in Python",
1414
long_description=readme,
1515
long_description_content_type="text/markdown",

0 commit comments

Comments
 (0)