Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions asset/zero_shot_prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
希望你以后能够做的比我还好呦。
25 changes: 25 additions & 0 deletions tools/sft_en_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
sys.path.append('third_party/Matcha-TTS')
import torch
import torchaudio
from cosyvoice.cli.cosyvoice import CosyVoice


cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT', load_jit=False, load_trt=False, fp16=False)

en_spk_ids = ['英文女', '英文男']

tts_text = 'Maintaining your ability to learn translates into increased marketability, improved career optionsand higher salaries.'

for spk_id in en_spk_ids:
chunks = []
for out in cosyvoice.inference_sft(tts_text, spk_id, stream=False):
chunks.append(out['tts_speech'])
if len(chunks) == 0:
continue
speech = torch.cat(chunks, dim=1)
filename = f'sft_en_{spk_id}.wav'
torchaudio.save(filename, speech, cosyvoice.sample_rate)
print('saved', filename)
13 changes: 13 additions & 0 deletions tools/sft_speak_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
sys.path.append('third_party/Matcha-TTS')
from cosyvoice.cli.cosyvoice import CosyVoice


cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT', load_jit=False, load_trt=False, fp16=False)


# 使用 SFT 模型,遍历所有集成的 speaker 并合成语音
spk_ids = cosyvoice.list_available_spks()
print('available spk_id:', spk_ids)
26 changes: 26 additions & 0 deletions tools/sft_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
sys.path.append('third_party/Matcha-TTS')
import torch
import torchaudio
from cosyvoice.cli.cosyvoice import CosyVoice


cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT', load_jit=False, load_trt=False, fp16=False)

tts_text = '收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。'

spk_ids = ['中文女', '中文男', '粤语女']
# print('available spk_id:', spk_ids)

for spk_id in spk_ids:
chunks = []
for out in cosyvoice.inference_sft(tts_text, spk_id, stream=False):
chunks.append(out['tts_speech'])
if len(chunks) == 0:
continue
speech = torch.cat(chunks, dim=1)
filename = f'sft_{spk_id}.wav'
torchaudio.save(filename, speech, cosyvoice.sample_rate)
print('saved', filename)
29 changes: 29 additions & 0 deletions tools/zero_short_speak_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os
from pathlib import Path
import sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
sys.path.append('third_party/Matcha-TTS')
import torchaudio
from cosyvoice.cli.cosyvoice import CosyVoice2
from cosyvoice.utils.file_utils import load_wav


cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, load_vllm=False, fp16=False)

# NOTE if you want to reproduce the results on https://funaudiollm.github.io/cosyvoice2, please add text_frontend=False during inference
# zero_shot usage
voice_file = 'asset/zero_shot_prompt.wav'
prompt_speech_16k = load_wav(voice_file, 16000)
prompt_text_file = Path(voice_file).with_suffix(".txt")
if prompt_text_file.exists():
with open(prompt_text_file, 'r', encoding='utf-8') as f:
prompt_text = f.read().strip()

# save zero_shot spk as a voice_id for future usage
assert cosyvoice.add_zero_shot_spk(prompt_text, prompt_speech_16k, 'my_zero_shot_spk') is True
for i, j in enumerate(cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '', '', zero_shot_spk_id='my_zero_shot_spk', stream=False)):
torchaudio.save('speaker_id_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
# contiue to use "my_zero_shot_spk" instead of "prompt_speech_16k"
...
# this step is optional unless you want to persist "my_zero_shot_spk" into "model_dir/spk2info.pt" for a new session
cosyvoice.save_spkinfo()
24 changes: 24 additions & 0 deletions tools/zero_short_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os
from pathlib import Path
import sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
sys.path.append('third_party/Matcha-TTS')
import torchaudio
from cosyvoice.cli.cosyvoice import CosyVoice2
from cosyvoice.utils.file_utils import load_wav


cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, load_vllm=False, fp16=True)

# NOTE if you want to reproduce the results on https://funaudiollm.github.io/cosyvoice2, please add text_frontend=False during inference
# zero_shot usage
voice_file = 'asset/zero_shot_prompt.wav'
prompt_speech_16k = load_wav(voice_file, 16000)
prompt_text_file = Path(voice_file).with_suffix(".txt")
if prompt_text_file.exists():
with open(prompt_text_file, 'r', encoding='utf-8') as f:
prompt_text = f.read().strip()
for i, j in enumerate(cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', prompt_text, prompt_speech_16k, stream=False)):
filename = 'zero_shot_{}.wav'.format(i)
src_ = j['tts_speech']
torchaudio.save(filename, src_, cosyvoice.sample_rate)