From 5cf4206caca073c03fbc0a1ba9d746d7dc265ea3 Mon Sep 17 00:00:00 2001 From: chang-wenbin Date: Thu, 28 May 2026 23:30:24 +0800 Subject: [PATCH 1/2] support fleet-gqa-latent --- fastdeploy/worker/input_batch.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fastdeploy/worker/input_batch.py b/fastdeploy/worker/input_batch.py index aa34784b2a5..b1f657855e1 100644 --- a/fastdeploy/worker/input_batch.py +++ b/fastdeploy/worker/input_batch.py @@ -108,6 +108,10 @@ def __init__(self, fd_config: FDConfig) -> None: else: self.max_chunk_tokens = self.fd_config.get_max_chunk_tokens(self.model_config.mm_max_tokens_per_item) + # NOTE (changwenbin):Supports neox_rotary_style. + rotary_percent = getattr(self.model_config, "rotary_percent", 1) + self.rotary_dim = int(rotary_percent * self.model_config.head_dim) + def init_share_inputs(self): max_num_seqs = self.scheduler_config.max_num_seqs @@ -237,7 +241,7 @@ def init_share_inputs(self): # Initialize rotary position embedding if not self.enable_mm: self.rope_emb = get_rope( - rotary_dim=self.model_config.head_dim, + rotary_dim=self.rotary_dim, position_ids=paddle.arange(self.model_config.max_model_len).reshape((1, -1)), base=self.model_config.rope_theta, model_config=self.model_config, @@ -717,7 +721,7 @@ def reset_share_inputs(self): else: # Reset non-multimodal rope_emb self.rope_emb = get_rope( - rotary_dim=self.model_config.head_dim, + rotary_dim=self.rotary_dim, position_ids=paddle.arange(self.model_config.max_model_len).reshape((1, -1)), base=self.model_config.rope_theta, model_config=self.model_config, @@ -761,6 +765,10 @@ def __init__(self, fd_config: FDConfig, target_model_input_batch: InputBatch) -> self.speculative_config: SpeculativeConfig = fd_config.speculative_config self.enable_pd_reorder: bool = False + # NOTE (changwenbin):Supports neox_rotary_style. + rotary_percent = getattr(self.model_config, "rotary_percent", 1) + self.rotary_dim = int(rotary_percent * self.model_config.head_dim) + def init_share_inputs(self): # share with targe model self.enable_pd_reorder = getattr(self.target_model_input_batch, "enable_pd_reorder", False) @@ -817,7 +825,7 @@ def init_share_inputs(self): tmp_position_ids = paddle.arange(self.model_config.max_model_len).reshape((1, -1)) self.rope_emb = get_rope( - rotary_dim=self.model_config.head_dim, + rotary_dim=self.rotary_dim, position_ids=tmp_position_ids, base=self.model_config.rope_theta, model_config=self.model_config, @@ -1041,7 +1049,7 @@ def reset_model_inputs(self) -> None: # Reset rope embedding by recreating with default position_ids tmp_position_ids = paddle.arange(self.model_config.max_model_len).reshape((1, -1)) self.rope_emb = get_rope( - rotary_dim=self.model_config.head_dim, + rotary_dim=self.rotary_dim, position_ids=tmp_position_ids, base=self.model_config.rope_theta, model_config=self.model_config, From a142eaf4ef4579d0cb9af70513db03dd6256bd15 Mon Sep 17 00:00:00 2001 From: chang-wenbin Date: Fri, 29 May 2026 11:56:30 +0800 Subject: [PATCH 2/2] update --- fastdeploy/worker/input_batch.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fastdeploy/worker/input_batch.py b/fastdeploy/worker/input_batch.py index b1f657855e1..2ab3bb6823f 100644 --- a/fastdeploy/worker/input_batch.py +++ b/fastdeploy/worker/input_batch.py @@ -108,10 +108,6 @@ def __init__(self, fd_config: FDConfig) -> None: else: self.max_chunk_tokens = self.fd_config.get_max_chunk_tokens(self.model_config.mm_max_tokens_per_item) - # NOTE (changwenbin):Supports neox_rotary_style. - rotary_percent = getattr(self.model_config, "rotary_percent", 1) - self.rotary_dim = int(rotary_percent * self.model_config.head_dim) - def init_share_inputs(self): max_num_seqs = self.scheduler_config.max_num_seqs @@ -240,8 +236,10 @@ def init_share_inputs(self): # Initialize rotary position embedding if not self.enable_mm: + rotary_percent = getattr(self.model_config, "rotary_percent", 1.0) + self.rotary_dim = int(self.model_config.head_dim * rotary_percent) self.rope_emb = get_rope( - rotary_dim=self.rotary_dim, + rotary_dim=self.rotary_dim if rotary_percent < 1.0 else self.model_config.head_dim, position_ids=paddle.arange(self.model_config.max_model_len).reshape((1, -1)), base=self.model_config.rope_theta, model_config=self.model_config, @@ -720,8 +718,10 @@ def reset_share_inputs(self): fill_paddle_tensor(self, "attn_mask_offsets_full", -1) else: # Reset non-multimodal rope_emb + rotary_percent = getattr(self.model_config, "rotary_percent", 1.0) + self.rotary_dim = int(self.model_config.head_dim * rotary_percent) self.rope_emb = get_rope( - rotary_dim=self.rotary_dim, + rotary_dim=self.rotary_dim if rotary_percent < 1.0 else self.model_config.head_dim, position_ids=paddle.arange(self.model_config.max_model_len).reshape((1, -1)), base=self.model_config.rope_theta, model_config=self.model_config, @@ -765,10 +765,6 @@ def __init__(self, fd_config: FDConfig, target_model_input_batch: InputBatch) -> self.speculative_config: SpeculativeConfig = fd_config.speculative_config self.enable_pd_reorder: bool = False - # NOTE (changwenbin):Supports neox_rotary_style. - rotary_percent = getattr(self.model_config, "rotary_percent", 1) - self.rotary_dim = int(rotary_percent * self.model_config.head_dim) - def init_share_inputs(self): # share with targe model self.enable_pd_reorder = getattr(self.target_model_input_batch, "enable_pd_reorder", False) @@ -824,8 +820,10 @@ def init_share_inputs(self): tmp_position_ids = paddle.arange(self.model_config.max_model_len).reshape((1, -1)) + rotary_percent = getattr(self.model_config, "rotary_percent", 1.0) + self.rotary_dim = int(self.model_config.head_dim * rotary_percent) self.rope_emb = get_rope( - rotary_dim=self.rotary_dim, + rotary_dim=self.rotary_dim if rotary_percent < 1.0 else self.model_config.head_dim, position_ids=tmp_position_ids, base=self.model_config.rope_theta, model_config=self.model_config, @@ -1048,8 +1046,10 @@ def reset_model_inputs(self) -> None: # Reset rope embedding by recreating with default position_ids tmp_position_ids = paddle.arange(self.model_config.max_model_len).reshape((1, -1)) + rotary_percent = getattr(self.model_config, "rotary_percent", 1.0) + self.rotary_dim = int(self.model_config.head_dim * rotary_percent) self.rope_emb = get_rope( - rotary_dim=self.rotary_dim, + rotary_dim=self.rotary_dim if rotary_percent < 1.0 else self.model_config.head_dim, position_ids=tmp_position_ids, base=self.model_config.rope_theta, model_config=self.model_config,