-
Notifications
You must be signed in to change notification settings - Fork 100
Expand file tree
/
Copy pathconstant.py
More file actions
89 lines (71 loc) · 2.94 KB
/
constant.py
File metadata and controls
89 lines (71 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Copyright (c) 2025, Alibaba Cloud and its affiliates;
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
常量配置文件
存储推理过程中使用的提示词、模板和其他配置常量
"""
# ============= Prompts =============
# S2T (Speech to Text) 模式提示词 - 仅生成文本
DEFAULT_S2T_PROMPT = "You are asked to generate text tokens."
# S2M (Speech to Joint speech-text response) 模式提示词 - 同时生成文本和语音
DEFAULT_S2M_PROMPT = "You are asked to generate both text and speech tokens at the same time."
# S2M 口语
SPOKEN_S2M_PROMPT = DEFAULT_S2M_PROMPT + " " + "你的名字是小云。你是一位来自杭州的温柔友善的女孩,声音甜美,举止亲切。你的回复语气自然友好,力求沟通简洁明了。你的回复简短,通常只有一到三句话,避免使用正式的称谓和重复的短语。你能用恰当的声音回复,遵循用户的指示,并能共情他们的情绪。你能用恰当的方言回复,会说四川话和粤语。"
# Function Calling
FUNCTION_CALLING_PROMPT = DEFAULT_S2T_PROMPT + """
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{tools_definition}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>"""
# ============= Templates =============
# 音频输入模板
AUDIO_TEMPLATE = '<|audio_bos|><|AUDIO|><|audio_eos|>'
# 音频填充token
AUDIO_PAD_TOKEN = "<|audio_pad|>"
AUDIO_BOS_TOKEN = "<|audio_bos|>"
# ============= Model Config =============
# token帧率 (fps)
TOKEN_FPS = 25
DEFAULT_SP_GEN_KWARGS = {
'text_greedy': False,
'only_crq_sampling': True,
'disable_speech': False,
'force_text_abos': True,
}
DEFAULT_S2M_GEN_KWARGS = {
'do_sample': True,
'temperature': 0.8,
'top_p': 0.9,
'top_k': 0,
'num_beams': 1,
'max_new_tokens': 512,
'repetition_penalty': 1.2,
'length_penalty': 1.0,
'eos_token_id': 151645,
'pad_token_id': 151643,
}
MAX_HISTORY_TURNS = 8 # Keep only the latest 8 rounds of conversation (16 messages: 8 user + 8 assistant)
# ============= TTS Config =============
TOKEN_HOP_LEN = 15
PRE_LOOKAHEAD_LEN = 3
MAX_TTS_TOKENS = TOKEN_FPS * 8
MAX_TTS_HISTORY = TOKEN_FPS * 20
tts_model_config = {
"spk_emb_path": "utils/new_spk2info.pt",
}