@@ -1992,6 +1992,7 @@ def __init__(
19921992 int (envs .ENABLE_V1_KVCACHE_SCHEDULER ) == 0
19931993 and self .model_config is not None
19941994 and self .model_config .enable_mm
1995+ and self .deploy_modality != DeployModality .TEXT
19951996 ):
19961997 self .max_prefill_batch = 1 # TODO:当前V0多模prefill阶段只支持并行度为1,待优化
19971998 else :
@@ -2031,6 +2032,20 @@ def __init__(
20312032 self .check ()
20322033 # self.print() # NOTE: it's better to explicitly call .print() when FDConfig is initialized
20332034
2035+ @property
2036+ def enable_mm_runtime (self ) -> bool :
2037+ return (
2038+ self .model_config is not None
2039+ and self .model_config .enable_mm
2040+ and self .deploy_modality != DeployModality .TEXT
2041+ )
2042+
2043+ @property
2044+ def enable_rope_3d_runtime (self ) -> bool :
2045+ return self .enable_mm_runtime and (
2046+ getattr (self .model_config , "rope_3d" , False ) or getattr (self .model_config , "use_3d_rope" , False )
2047+ )
2048+
20342049 def _disable_sequence_parallel_moe_if_needed (self , mode_name ):
20352050 if self .parallel_config .use_sequence_parallel_moe and self .graph_opt_config .use_cudagraph :
20362051 self .parallel_config .use_sequence_parallel_moe = False
@@ -2069,9 +2084,21 @@ def postprocess(self):
20692084 if self .long_prefill_token_threshold == 0 :
20702085 self .long_prefill_token_threshold = int (self .model_config .max_model_len * 0.04 )
20712086
2087+ if (
2088+ self .model_config is not None
2089+ and self .model_config .enable_mm
2090+ and self .deploy_modality == DeployModality .TEXT
2091+ ):
2092+ if getattr (self .model_config , "rope_3d" , False ) or getattr (self .model_config , "use_3d_rope" , False ):
2093+ logger .info (
2094+ "Deploy modality is text; forcing the multimodal-capable model onto the 2D RoPE runtime path."
2095+ )
2096+ setattr (self .model_config , "rope_3d" , False )
2097+ setattr (self .model_config , "use_3d_rope" , False )
2098+
20722099 self .cache_config .max_block_num_per_seq = int (self .model_config .max_model_len // self .cache_config .block_size )
20732100 self .cache_config .postprocess (self .get_max_chunk_tokens (), self .scheduler_config .max_num_seqs )
2074- if self .model_config is not None and self .model_config . enable_mm and not envs .ENABLE_V1_KVCACHE_SCHEDULER :
2101+ if self .model_config is not None and self .enable_mm_runtime and not envs .ENABLE_V1_KVCACHE_SCHEDULER :
20752102 self .cache_config .enable_prefix_caching = False
20762103 if (
20772104 self .structured_outputs_config is not None
@@ -2097,7 +2124,7 @@ def postprocess(self):
20972124 f"Guided decoding backend '{ self .structured_outputs_config .guided_decoding_backend } ' is not implemented. [auto, xgrammar, guidance, off]"
20982125 )
20992126
2100- if self .model_config . enable_mm :
2127+ if self .enable_mm_runtime :
21012128 if self .cache_config .max_encoder_cache is None or self .cache_config .max_encoder_cache < 0 :
21022129 self .cache_config .max_encoder_cache = self .scheduler_config .max_num_batched_tokens
21032130 elif self .cache_config .max_encoder_cache != 0 :
@@ -2404,7 +2431,7 @@ def get_max_chunk_tokens(self, mm_max_tokens_per_item=None):
24042431 num_tokens = self .scheduler_config .max_num_seqs
24052432 else :
24062433 num_tokens = self .scheduler_config .max_num_batched_tokens
2407- if mm_max_tokens_per_item is not None and self . deploy_modality != DeployModality . TEXT :
2434+ if self . enable_mm_runtime and mm_max_tokens_per_item is not None :
24082435 max_mm_tokens = max (
24092436 mm_max_tokens_per_item .get ("image" , 0 ),
24102437 mm_max_tokens_per_item .get ("video" , 0 ),
0 commit comments