Skip to content

Commit 8f8ed44

Browse files
committed
merge main
1 parent 92bf83a commit 8f8ed44

2 files changed

Lines changed: 6 additions & 4 deletions

File tree

lightllm/models/neo_chat_moe/layer_infer/transformer_layer_infer.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,13 @@ def _get_qkv_mergekv(
108108
):
109109
input = input.view(-1, self.embed_dim_)
110110

111-
q = layer_weight.q_proj.mm(input) # [T, Hq*D]
111+
qkv = layer_weight.qkv_proj.mm(input)
112+
q, cache_kv = qkv.split(
113+
[self.tp_q_head_num_ * self.head_dim_, (self.tp_k_head_num_ + self.tp_v_head_num_) * self.head_dim_], dim=-1
114+
)
112115
q_hw = layer_weight.q_hw_proj.mm(input)
113116
k_hw = layer_weight.k_hw_proj.mm(input)
114117

115-
cache_kv = layer_weight.kv_proj.mm(input) # [T, (Hk+Hv)*D]
116-
117118
layer_weight.q_norm_weight_(q, eps=self.eps_)
118119
layer_weight.q_norm_hw_weight_(q_hw, eps=self.eps_)
119120
layer_weight.k_norm_hw_weight_(k_hw, eps=self.eps_)

lightllm/server/api_start.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ def _set_envs_and_config(args: StartArgs):
9191
def _launch_subprocesses(args: StartArgs):
9292

9393
_set_envs_and_config(args)
94-
set_unique_server_name(args)
9594

9695
if not args.disable_shm_warning:
9796
check_recommended_shm_size(args)
@@ -291,6 +290,8 @@ def _launch_subprocesses(args: StartArgs):
291290
args.pd_p_allowed_port_min = 20000
292291
args.pd_p_allowed_port_max = 30000
293292

293+
set_unique_server_name(args)
294+
294295
# p d 分离模式下,decode节点的调度间隙是0
295296
if args.run_mode == "decode":
296297
args.router_max_wait_tokens = 0

0 commit comments

Comments
 (0)