easy test_zero_shot.py

WANDY666 · WANDY666 · commit 01d26c2b73b1 · 2025-12-26T06:26:15.000Z
diff --git a/test/test_zero_shot.py b/test/test_zero_shot.py
@@ -14,13 +14,21 @@
     "--cosyvoice_version", type=int, choices=[2, 3], default=3, help="CosyVoice version: 2 or 3 (default: 3)"
 )
 parser.add_argument("--stream", action="store_true", default=False, help="是否使用流式推理 (default: True)")
-parser.add_argument("--num", type=int, default=5, help="测试数量 (default: 5)")
+parser.add_argument("--num", type=int, default=1, help="测试数量 (default: 5)")
 args = parser.parse_args()
 
 url = f"http://0.0.0.0:{args.port}/inference_zero_shot"
-num = args.num
+
 # 准备要发送的文本和音频文件
 path = "../cosyvoice/asset/zero_shot_prompt.wav"
+your_text = "收到好友从远方寄来的生日礼物，那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐，笑容如花儿般绽放。"
+# 根据 cosyvoice_version 设置 prompt_text
+if args.cosyvoice_version == 3:
+    prompt_text = "You are a helpful assistant.<|endofprompt|>希望你以后能够做的比我还好呦。"
+else:
+    prompt_text = "希望你以后能够做的比我还好呦。"
+
+num = args.num
 stream = args.stream  # 是否使用流式推理
 with open("test_texts.json", "r") as f:
     all_inputs = json.load(f)
@@ -31,16 +39,9 @@
 def get_file(index):
     files = {"prompt_wav": ("sample.wav", open(path, "rb"), "audio/wav")}
     # inputs = random.choice(all_inputs)
-    inputs = all_inputs[0]
     # inputs = all_inputs[2]
 
-    # 根据 cosyvoice_version 设置 prompt_text
-    if args.cosyvoice_version == 3:
-        prompt_text = "You are a helpful assistant.<|endofprompt|>希望你以后能够做的比我还好呦。"
-    else:
-        prompt_text = "希望你以后能够做的比我还好呦。"
-
-    data = {"tts_text": inputs, "prompt_text": prompt_text, "stream": stream}
+    data = {"tts_text": your_text, "prompt_text": prompt_text, "stream": stream}
     start_time = time.time()
 
     response = requests.post(url, files=files, data=data, stream=True)
@@ -74,7 +75,7 @@ def get_file(index):
         output_wav = f"./outs/output{'_stream' if stream else ''}_{index}.wav"
         sf.write(output_wav, audio_np, samplerate=sample_rate, subtype="PCM_16")
         print(
-            f"{inputs} saved as {output_wav}, time cost: {cost_time:.2f} s"
+            f"{your_text} saved as {output_wav}, time cost: {cost_time:.2f} s"
             + f", rtf: {cost_time / speech_len}, ttft: {ttft:.2f} s"
         )
     else: