@@ -1980,6 +1980,7 @@ def expand_bsz_map(real_bsz_to_captured_size):
19801980 int (envs .ENABLE_V1_KVCACHE_SCHEDULER ) == 0
19811981 and self .model_config is not None
19821982 and self .model_config .enable_mm
1983+ and self .deploy_modality != DeployModality .TEXT
19831984 ):
19841985 self .max_prefill_batch = 1 # TODO:当前V0多模prefill阶段只支持并行度为1,待优化
19851986 else :
@@ -2019,6 +2020,20 @@ def expand_bsz_map(real_bsz_to_captured_size):
20192020 self .check ()
20202021 # self.print() # NOTE: it's better to explicitly call .print() when FDConfig is initialized
20212022
2023+ @property
2024+ def enable_mm_runtime (self ) -> bool :
2025+ return (
2026+ self .model_config is not None
2027+ and self .model_config .enable_mm
2028+ and self .deploy_modality != DeployModality .TEXT
2029+ )
2030+
2031+ @property
2032+ def enable_rope_3d_runtime (self ) -> bool :
2033+ return self .enable_mm_runtime and (
2034+ getattr (self .model_config , "rope_3d" , False ) or getattr (self .model_config , "use_3d_rope" , False )
2035+ )
2036+
20222037 def _disable_sequence_parallel_moe_if_needed (self , mode_name ):
20232038 if self .parallel_config .use_sequence_parallel_moe and self .graph_opt_config .use_cudagraph :
20242039 self .parallel_config .use_sequence_parallel_moe = False
@@ -2057,9 +2072,21 @@ def postprocess(self):
20572072 if self .long_prefill_token_threshold == 0 :
20582073 self .long_prefill_token_threshold = int (self .model_config .max_model_len * 0.04 )
20592074
2075+ if (
2076+ self .model_config is not None
2077+ and self .model_config .enable_mm
2078+ and self .deploy_modality == DeployModality .TEXT
2079+ ):
2080+ if getattr (self .model_config , "rope_3d" , False ) or getattr (self .model_config , "use_3d_rope" , False ):
2081+ logger .info (
2082+ "Deploy modality is text; forcing the multimodal-capable model onto the 1D RoPE runtime path."
2083+ )
2084+ setattr (self .model_config , "rope_3d" , False )
2085+ setattr (self .model_config , "use_3d_rope" , False )
2086+
20602087 self .cache_config .max_block_num_per_seq = int (self .model_config .max_model_len // self .cache_config .block_size )
20612088 self .cache_config .postprocess (self .get_max_chunk_tokens (), self .scheduler_config .max_num_seqs )
2062- if self .model_config is not None and self .model_config . enable_mm and not envs .ENABLE_V1_KVCACHE_SCHEDULER :
2089+ if self .model_config is not None and self .enable_mm_runtime and not envs .ENABLE_V1_KVCACHE_SCHEDULER :
20632090 self .cache_config .enable_prefix_caching = False
20642091 if (
20652092 self .structured_outputs_config is not None
@@ -2085,7 +2112,7 @@ def postprocess(self):
20852112 f"Guided decoding backend '{ self .structured_outputs_config .guided_decoding_backend } ' is not implemented. [auto, xgrammar, guidance, off]"
20862113 )
20872114
2088- if self .model_config . enable_mm :
2115+ if self .enable_mm_runtime :
20892116 if self .cache_config .max_encoder_cache is None or self .cache_config .max_encoder_cache < 0 :
20902117 self .cache_config .max_encoder_cache = self .scheduler_config .max_num_batched_tokens
20912118 elif self .cache_config .max_encoder_cache != 0 :
@@ -2392,7 +2419,7 @@ def get_max_chunk_tokens(self, mm_max_tokens_per_item=None):
23922419 num_tokens = self .scheduler_config .max_num_seqs
23932420 else :
23942421 num_tokens = self .scheduler_config .max_num_batched_tokens
2395- if mm_max_tokens_per_item is not None and self . deploy_modality != DeployModality . TEXT :
2422+ if self . enable_mm_runtime and mm_max_tokens_per_item is not None :
23962423 max_mm_tokens = max (
23972424 mm_max_tokens_per_item .get ("image" , 0 ),
23982425 mm_max_tokens_per_item .get ("video" , 0 ),
0 commit comments