PyThaiTTS v0.1.1

wannaphong · wannaphong · commit d8839c1e9835 · 2022-08-27T23:46:08.000+07:00
- Add KhanomTan TTS v1.1 #2
diff --git a/notebook/run.ipynb b/notebook/run.ipynb
diff --git a/pythaitts/__init__.py b/pythaitts/__init__.py
@@ -2,43 +2,45 @@
 """
 PyThaiTTS
 """
-__version__ = "0.1.0"
+__version__ = "0.1.1"
 
 
 class TTS:
-    def __init__(self, pretrained="khanomtan", mode="last_checkpoint") -> None:
+    def __init__(self, pretrained="khanomtan", mode="last_checkpoint", version="1.0") -> None:
         """
         :param str pretrained: TTS pretrained (khanomtan)
         :param str mode: pretrained mode
+        :param str version: model version (default is 1.0 or 1.1)
 
         **Options for mode**
             * *last_checkpoint* (default) - last checkpoint of model
             * *best_model* - Best model (best loss)
         
         You can see more about khanomtan tts at `https://github.com/wannaphong/KhanomTan-TTS-v1.0 <https://github.com/wannaphong/KhanomTan-TTS-v1.0>`_
+        and `https://github.com/wannaphong/KhanomTan-TTS-v1.1 <https://github.com/wannaphong/KhanomTan-TTS-v1.1>`_
         """
         self.pretrained = pretrained
         self.mode = mode
-        self.load_pretrained()
+        self.load_pretrained(version=version)
 
-    def load_pretrained(self):
+    def load_pretrained(self,version):
         """
         Load pretrined
         """
         if self.pretrained == "khanomtan":
             from pythaitts.pretrained import KhanomTan
-            self.model = KhanomTan(mode=self.mode)
+            self.model = KhanomTan(mode=self.mode, version=version)
         else:
             raise NotImplemented(
                 "PyThaiTTS doesn't support %s pretrained." % self.pretrained
             )
 
-    def tts(self, text: str, speaker_idx: str = "Tsynctwo", language_idx: str = "th-th", return_type: str = "file", filename: str = None):
+    def tts(self, text: str, speaker_idx: str = "Linda", language_idx: str = "th-th", return_type: str = "file", filename: str = None):
         """
         speech synthesis
 
         :param str text: text
-        :param str speaker_idx: speaker (default is Tsynctwo)
+        :param str speaker_idx: speaker (default is Linda)
         :param str language_idx: language (default is th-th)
         :param str return_type: return type (default is file)
         :param str filename: path filename for save wav file if return_type is file.
diff --git a/pythaitts/pretrained/khanomtan_tts.py b/pythaitts/pretrained/khanomtan_tts.py
@@ -1,24 +1,34 @@
 # -*- coding: utf-8 -*-
 """
-KhanomTan TTS model V1.0
+KhanomTan TTS model
 
 KhanomTan TTS (ขนมตาล) is a open-source Thai text-to-speech model that supports multilingual speakers.
 It supports Thai, English, and others.
 
-Homepage: `https://github.com/wannaphong/KhanomTan-TTS-v1.0 <https://github.com/wannaphong/KhanomTan-TTS-v1.0>`_
+KhanomTan TTS v1.0: `https://github.com/wannaphong/KhanomTan-TTS-v1.0 <https://github.com/wannaphong/KhanomTan-TTS-v1.0>`_
+KhanomTan TTS v1.1: `https://github.com/wannaphong/KhanomTan-TTS-v1.1 <https://github.com/wannaphong/KhanomTan-TTS-v1.1>`_
 """
 import tempfile
 from TTS.utils.synthesizer import Synthesizer
 from huggingface_hub import hf_hub_download
 
 
 class KhanomTan:
-    def __init__(self, mode) -> None:
-        self.config_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="config.json",force_filename="config.json")
-        self.speakers_path =  hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="speakers.pth",force_filename="speakers.pth")
-        self.languages_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="language_ids.json",force_filename="language_ids.json")
-        self.speaker_encoder_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="model_se.pth",force_filename="model_se.pth")
-        self.speaker_encoder_config_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="config_se.json",force_filename="config_se.json")
+    def __init__(self, mode, version="1.0") -> None:
+        self.version = version
+        if self.version not in ["1.0","1.1"]:
+            raise NotImplementedError("KhanomTan don't have v{0}.".format(self.version))
+        if self.version == "1.0":
+            self.best_model_path_name = "best_model.pth"
+            self.last_checkpoint_model_path_name = "checkpoint_440000.pth"
+        else:
+            self.best_model_path_name = "best_model.pth"
+            self.last_checkpoint_model_path_name = "checkpoint_440000.pth"
+        self.config_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="config.json",force_filename="config-v{0}.json".format(self.version))
+        self.speakers_path =  hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="speakers.pth",force_filename="speakers-v{0}.pth".format(self.version))
+        self.languages_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="language_ids.json",force_filename="language_ids-v{0}.json".format(self.version))
+        self.speaker_encoder_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="model_se.pth",force_filename="model_se.pth")
+        self.speaker_encoder_config_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename="config_se.json",force_filename="config_se.json")
         self.synthesizer = None
         with open(self.config_path,"r") as f:
             _temp = f.read()
@@ -35,7 +45,7 @@ def load_synthesizer(self, mode):
         mode: The model mode (best_mode or last_checkpoint)
         """
         if mode=="best_model":
-            self.best_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="best_model.pth",force_filename="best_model.pth")
+            self.best_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename=self.best_model_path_name,force_filename="best_model-v{0}.pth".format(self.version))
             self.synthesizer = Synthesizer(
                 self.best_model_path,
                 self.config_path,
@@ -48,7 +58,7 @@ def load_synthesizer(self, mode):
                 False
             )
         else:
-            self.last_checkpoint_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v1.0",filename="checkpoint_440000.pth",force_filename="checkpoint_440000.pth")
+            self.last_checkpoint_model_path = hf_hub_download(repo_id="wannaphong/khanomtan-tts-v{0}".format(self.version),filename=self.last_checkpoint_model_path_name,force_filename="last_checkpoint-v{0}.pth".format(self.version))
             self.synthesizer = Synthesizer(
                 self.last_checkpoint_model_path,
                 self.config_path,
diff --git a/setup.py b/setup.py
@@ -9,7 +9,7 @@
 
 setup(
     name="PyThaiTTS",
-    version="0.1.0",
+    version="0.1.1",
     description="Open Source Thai Text-to-speech library in Python",
     long_description=readme,
     long_description_content_type="text/markdown",