Support InternS2 Preview (InternLM#4575)

CUHKSZzxy · RunningLeon · lvhan028 · web-flow · commit df9b428a7712 · 2026-05-09T15:41:40.000+08:00
* support interns2preview

* support time series

* fix time series

* fix visual

* fix: address InternS2 preview review comments

* fix: align InternS1 Pro time-series handling

* fix: restore InternS1 Pro processor dtype contract

* fix: require dtype for Qwen3 VL input processor

---------

Co-authored-by: RunningLeon &lt;mnsheng@yeah.net&gt;
Co-authored-by: 吕晗 &lt;lvhan@pjlab.org.cn&gt;
diff --git a/lmdeploy/archs.py b/lmdeploy/archs.py
@@ -114,15 +114,20 @@ def check_vl_llm(backend: str, config: dict) -> bool:
         'Qwen3_5MoeForConditionalGeneration', 'MllamaForConditionalGeneration', 'MolmoForCausalLM',
         'Gemma3ForConditionalGeneration', 'Llama4ForConditionalGeneration', 'InternVLForConditionalGeneration',
         'InternS1ForConditionalGeneration', 'InternS1ProForConditionalGeneration',
-        'InternS1_1_ForConditionalGeneration', 'Glm4vForConditionalGeneration'
+        'InternS1_1_ForConditionalGeneration', 'Glm4vForConditionalGeneration',
+        'InternS2PreviewForConditionalGeneration', 'InternS2PreviewForCausalLM',
     ])
+    turbomind_unsupported_archs = ['Qwen3_5ForConditionalGeneration',
+                                   'Qwen3_5MoeForConditionalGeneration',
+                                   'InternS2PreviewForConditionalGeneration',
+                                   'InternS2PreviewForCausalLM']
     if arch == 'QWenLMHeadModel' and 'visual' in config:
         return True
     elif arch == 'MultiModalityCausalLM' and 'language_config' in config:
         return True
     elif arch in ['ChatGLMModel', 'ChatGLMForConditionalGeneration'] and 'vision_config' in config:
         return True
-    elif arch in ['Qwen3_5ForConditionalGeneration', 'Qwen3_5MoeForConditionalGeneration'] and backend == 'turbomind':
+    elif arch in turbomind_unsupported_archs and backend == 'turbomind':
         return False
     elif arch in supported_archs:
         return True
diff --git a/lmdeploy/pytorch/config.py b/lmdeploy/pytorch/config.py
@@ -567,6 +567,7 @@ def from_config(
         target_model: str = None,
         dtype: str = 'auto',
         trust_remote_code: bool = False,
+        hf_overrides: dict[str, Any] = None,
     ):
         model = model or target_model
         model_config = ModelConfig.from_pretrained(model,
@@ -575,6 +576,7 @@ def from_config(
                                                    is_draft_model=True,
                                                    spec_method=method,
                                                    block_size=target_cache_cfg.block_size,
+                                                   hf_overrides=hf_overrides,
                                                    )
         cache_config = None
         # include medusa
diff --git a/lmdeploy/pytorch/configurations/qwen3_5.py b/lmdeploy/pytorch/configurations/qwen3_5.py
@@ -14,7 +14,7 @@ class Qwen3_5ModelConfigBuilder(AutoModelConfigBuilder):
     @classmethod
     def condition(cls, hf_config):
         """config."""
-        return hf_config.model_type in ['qwen3_5', 'qwen3_5_moe']
+        return hf_config.model_type in ['qwen3_5', 'qwen3_5_moe', 'intern_s2_preview']
 
     @classmethod
     def build(cls,
diff --git a/lmdeploy/pytorch/engine/config_builder.py b/lmdeploy/pytorch/engine/config_builder.py
@@ -115,5 +115,6 @@ def build_specdecode_config(target_model, speculative_config: SpeculativeConfig,
                 target_cache_cfg=cache_config,
                 dtype=engine_config.dtype,
                 trust_remote_code=trust_remote_code,
+                hf_overrides=engine_config.hf_overrides,
             )
         return specdecode_config
diff --git a/lmdeploy/pytorch/messages.py b/lmdeploy/pytorch/messages.py
@@ -12,6 +12,7 @@
 from lmdeploy.pytorch.disagg.conn.protocol import MigrationRequest
 from lmdeploy.pytorch.multimodal.data_type import MultiModalInputs
 from lmdeploy.utils import get_logger
+from lmdeploy.vl.constants import Modality
 
 from .block import LogicalTokenBlocks
 
@@ -872,6 +873,10 @@ def _update_mrope_pos_ids(self):
             modal_datas = list(multimodals.values())[0]
             mm_offset = next_pos
             for modal_data in modal_datas:
+                # InternS2Preview uses mrope for image / video, except time series
+                if modal_data.modality == Modality.TIME_SERIES:
+                    continue
+
                 mm_start = modal_data.start + mm_offset
 
                 # tokens
diff --git a/lmdeploy/pytorch/models/interns1_pro.py b/lmdeploy/pytorch/models/interns1_pro.py
@@ -123,6 +123,8 @@ def forward(
                 multimodal_mask = multimodal_mask.unsqueeze(-1).expand_as(inputs_embeds)
                 inputs_embeds = inputs_embeds.masked_scatter(multimodal_mask, image_embeds)
             elif ts_values is not None:
+                if not hasattr(self, 'time_series'):
+                    raise RuntimeError('Time-series inputs require a time_series module.')
                 ts_embeds = self.time_series(ts_values, ts_lens, ts_sr)  # [B, T, C]
                 inputs_embeds = inputs_embeds.masked_scatter(multimodal_mask[..., None], ts_embeds)
 
@@ -182,8 +184,8 @@ def prepare_inputs_for_generation(
 
                 if modality == Modality.TIME_SERIES:
                     ts_values = torch.cat([inp.data for inp in mm_inputs])
-                    ts_lens = mm_inputs[0].meta['ts_lens']
-                    ts_sr = mm_inputs[0].meta['ts_sr']
+                    ts_lens = torch.cat([inp.meta['ts_lens'] for inp in mm_inputs])
+                    ts_sr = torch.cat([inp.meta['ts_sr'] for inp in mm_inputs])
                 else:
                     pixel_values = torch.cat([inp.data for inp in mm_inputs])
                     grid_thw = torch.stack([data.meta['grid_thw'] for data in mm_inputs]).cpu()
@@ -346,6 +348,8 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
                         elif name in buffers_dict:
                             param = buffers_dict[name]
                             load_weight(param, loaded_weight)
+                        else:
+                            raise KeyError(f'Unexpected weight name: {name}')
 
     def get_input_processor(self) -> BaseModelInputProcessor:
         """Get input processor."""
diff --git a/lmdeploy/pytorch/models/interns1_pro_time_series.py b/lmdeploy/pytorch/models/interns1_pro_time_series.py
@@ -21,7 +21,6 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
 
         self.embed_dim = config.d_model
         self.num_mel_bins = config.num_mel_bins
-        self.padding_idx = config.pad_token_id
         self.max_source_positions = config.max_source_positions
         self.embed_scale = math.sqrt(self.embed_dim) if config.scale_embedding else 1.0
 
diff --git a/lmdeploy/pytorch/models/module_map.py b/lmdeploy/pytorch/models/module_map.py
@@ -186,6 +186,16 @@
     f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_5_moe.Qwen3_5MoeForConditionalGeneration',
 })
 
+# interns2preview
+MODULE_MAP.update({
+    'InternS2PreviewForConditionalGeneration':
+    f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_5_moe.Qwen3_5MoeForConditionalGeneration',
+})
+MODULE_MAP.update({
+    'InternS2PreviewForCausalLM':
+    f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_5_moe.Qwen3_5MoeForConditionalGeneration',
+})
+
 MODULE_MAP.update({
     'Qwen3_5MTPModel': f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_5_mtp.Qwen3_5MTPModel',
 })
diff --git a/lmdeploy/pytorch/models/qwen3_5.py b/lmdeploy/pytorch/models/qwen3_5.py
@@ -26,6 +26,7 @@
 )
 from lmdeploy.pytorch.nn.rotary_embedding import get_rope_parameters
 from lmdeploy.pytorch.weight_loader.model_weight_loader import default_weight_loader, load_weight
+from lmdeploy.vl.constants import Modality
 
 from .patch import add_prefix, get_build_model_context
 from .qwen2_5_vl import Qwen2_5_VisionRotaryEmbedding as Qwen3_5VisionRotaryEmbedding
@@ -1026,6 +1027,10 @@ def forward(
         grid_thw: torch.Tensor | None = None,
         all_routed_experts: torch.Tensor | None = None,
         return_input_embeds: bool = False,
+        # for time series
+        ts_values: torch.Tensor = None,
+        ts_lens: torch.Tensor = None,
+        ts_sr: torch.Tensor = None,
     ):
         """Model forward, return logits."""
 
@@ -1052,6 +1057,11 @@ def forward(
                 # mask and scatter to create final input embeddings
                 multimodal_mask = multimodal_mask.unsqueeze(-1).expand_as(inputs_embeds)
                 inputs_embeds = inputs_embeds.masked_scatter(multimodal_mask, image_embeds)
+            elif ts_values is not None:
+                if not hasattr(self, 'time_series'):
+                    raise RuntimeError('Time-series inputs require a time_series module.')
+                ts_embeds = self.time_series(ts_values, ts_lens, ts_sr)  # [B, T, C]
+                inputs_embeds = inputs_embeds.masked_scatter(multimodal_mask[..., None], ts_embeds)
 
         output_inputs_embeds = inputs_embeds if return_input_embeds else None
 
@@ -1098,7 +1108,7 @@ def __init__(self,
         self.ctx_mgr = ctx_mgr
 
         # build preprocessor
-        self.input_processor = Qwen3_5InputProcessor(self.config)
+        self.input_processor = Qwen3_5InputProcessor(self.config, dtype)
 
         # build model
         self.model = Qwen3_5Model(config, dtype=dtype, device=device, prefix=add_prefix('model', prefix))
@@ -1129,6 +1139,10 @@ def forward(
         pos_embeds: torch.Tensor | None = None,
         grid_thw: torch.Tensor | None = None,
         return_input_embeds: bool = False,
+        # for time series
+        ts_values: torch.Tensor = None,
+        ts_lens: torch.Tensor = None,
+        ts_sr: torch.Tensor = None,
         **kwargs,
     ):
         """Model forward, return logits."""
@@ -1155,6 +1169,10 @@ def forward(
             grid_thw=grid_thw,
             all_routed_experts=all_routed_experts,
             return_input_embeds=return_input_embeds,
+            # for time series
+            ts_values=ts_values,
+            ts_lens=ts_lens,
+            ts_sr=ts_sr,
         )
         return dict(hidden_states=hidden_states,
                     all_routed_experts=all_routed_experts,
@@ -1194,23 +1212,33 @@ def prepare_inputs_for_generation(
         multimodal_mask = None
         grid_thw = None
         pos_embeds = None
+        # for time series
+        ts_values = None
+        ts_lens = None
+        ts_sr = None
         if context.input_multimodals is not None:
             mm_inputs = [input_mm.get('mm_data', []) for input_mm in context.input_multimodals]
             # flatten batch
             mm_inputs = [item for sublist in mm_inputs for item in sublist]
 
             if len(mm_inputs) > 0:
-                pixel_values = torch.cat([inp.data for inp in mm_inputs])
-
+                modality = mm_inputs[0].modality
                 multimodal_mask = self.get_multimodal_mask(input_ids, mm_inputs)
-                grid_thw = torch.stack([data.meta['grid_thw'] for data in mm_inputs]).cpu()
-                vis_pos_emb = self.model.visual.rot_pos_emb(grid_thw)
-                pos_embeds = self.model.visual.fast_pos_embed_interpolate(grid_thw)
-                vis_cu_seqlens = torch.repeat_interleave(grid_thw[:, 1] * grid_thw[:, 2],
-                                                         grid_thw[:, 0]).to(pixel_values.device)
-                vis_cu_seqlens = vis_cu_seqlens.cumsum(dim=0, dtype=torch.int32)
-                vis_pos_emb = vis_pos_emb.repeat(1, 2)
-                vis_pos_emb = (vis_pos_emb.cos(), vis_pos_emb.sin())
+
+                if modality == Modality.TIME_SERIES:
+                    ts_values = torch.cat([inp.data for inp in mm_inputs])
+                    ts_lens = torch.cat([inp.meta['ts_lens'] for inp in mm_inputs])
+                    ts_sr = torch.cat([inp.meta['ts_sr'] for inp in mm_inputs])
+                else:
+                    pixel_values = torch.cat([inp.data for inp in mm_inputs])
+                    grid_thw = torch.stack([data.meta['grid_thw'] for data in mm_inputs]).cpu()
+                    vis_pos_emb = self.model.visual.rot_pos_emb(grid_thw)
+                    pos_embeds = self.model.visual.fast_pos_embed_interpolate(grid_thw)
+                    vis_cu_seqlens = torch.repeat_interleave(grid_thw[:, 1] * grid_thw[:, 2],
+                                                             grid_thw[:, 0]).to(pixel_values.device)
+                    vis_cu_seqlens = vis_cu_seqlens.cumsum(dim=0, dtype=torch.int32)
+                    vis_pos_emb = vis_pos_emb.repeat(1, 2)
+                    vis_pos_emb = (vis_pos_emb.cos(), vis_pos_emb.sin())
 
         mrope_position_ids = getattr(context, 'mrope_position_ids', None)
 
@@ -1242,6 +1270,10 @@ def prepare_inputs_for_generation(
             grid_thw=grid_thw,
             pos_embeds=pos_embeds,
             return_input_embeds=return_input_embeds,
+            # for time series
+            ts_values=ts_values,
+            ts_lens=ts_lens,
+            ts_sr=ts_sr,
         )
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
diff --git a/lmdeploy/pytorch/models/qwen3_5_moe.py b/lmdeploy/pytorch/models/qwen3_5_moe.py
@@ -13,6 +13,7 @@
 from lmdeploy.pytorch.nn.moe import build_fused_moe
 from lmdeploy.pytorch.weight_loader.model_weight_loader import load_weight
 
+from .interns1_pro_time_series import InternS1ProTimeSeriesModel
 from .patch import add_prefix, get_build_model_context
 from .qwen3_5 import (
     Qwen3_5Attention,
@@ -232,6 +233,9 @@ def __init__(self,
                                                   device=device,
                                                   prefix=add_prefix('language_model', prefix))
 
+        # build time series model
+        if hasattr(config, 'ts_config'):
+            self.time_series = InternS1ProTimeSeriesModel(config.ts_config, dtype=dtype, device=device)
 
 class Qwen3_5MoeForConditionalGeneration(Qwen3_5ForConditionalGeneration):
     """ModelForCausalLM."""
@@ -259,7 +263,7 @@ def __init__(self,
         self.ctx_mgr = ctx_mgr
 
         # build preprocessor
-        self.input_processor = Qwen3_5MoeInputProcessor(self.config)
+        self.input_processor = Qwen3_5MoeInputProcessor(self.config, dtype)
 
         # build model
         self.model = Qwen3_5MoeModel(config, dtype=dtype, device=device, prefix=add_prefix('model', prefix))
@@ -351,6 +355,7 @@ def __skip_layers(name):
         rms_norm_keys = ['model.norm', '.input_layernorm', '.post_attention_layernorm', '.q_norm', '.k_norm']
 
         params_dict = dict(self.named_parameters())
+        buffers_dict = dict(self.named_buffers())
         for name, loaded_weight in weights:
 
             if __skip_layers(name):
@@ -369,7 +374,9 @@ def __skip_layers(name):
                 self._load_weight_experts(name, loaded_weight, params_dict)
             else:
                 for (param_name, weight_name, shard_id) in stacked_params_mapping:
-                    if weight_name not in name:
+                    # include dot to avoid partial match
+                    # e.g. in_proj_ba (in linear attn) vs in_proj_bias (in time series)
+                    if f'{weight_name}.' not in name:
                         continue
                     name = name.replace(weight_name, param_name)
                     param = params_dict[name]
@@ -384,9 +391,15 @@ def __skip_layers(name):
                         load_weight(param, k, shard_id='k')
                         load_weight(param, v, shard_id='v')
                     else:
-                        for rms_norm_key in rms_norm_keys:
-                            if rms_norm_key in name and 'weight' in name:
-                                loaded_weight = loaded_weight + 1
-                                break
-                        param = params_dict[name]
-                        load_weight(param, loaded_weight)
+                        if name in params_dict:
+                            for rms_norm_key in rms_norm_keys:
+                                if rms_norm_key in name and 'weight' in name:
+                                    loaded_weight = loaded_weight + 1
+                                    break
+                            param = params_dict[name]
+                            load_weight(param, loaded_weight)
+                        elif name in buffers_dict:
+                            param = buffers_dict[name]
+                            load_weight(param, loaded_weight)
+                        else:
+                            raise KeyError(f'Unexpected weight name: {name}')
diff --git a/lmdeploy/pytorch/models/qwen3_5_mtp.py b/lmdeploy/pytorch/models/qwen3_5_mtp.py
@@ -203,6 +203,7 @@ def __init__(self,
                                                 prefix=add_prefix('model', prefix=prefix))
 
         self.num_experts = getattr(config.text_config, 'num_experts', None)
+        self.enable_sci_mtp = getattr(config, 'enable_sci_mtp', False)
         # for router replay
 
         self.enable_return_routed_experts = False
@@ -341,6 +342,23 @@ def _load_weight_fused_experts(self, name: str, loaded_weight: torch.Tensor, par
                 w2 = loaded_weight[expert_id]
                 load_weight(param, w2, expert_id=expert_id, shard_id='down')
 
+    def _rename_interns2_preview(self, name: str, suffixs: list[str]) -> tuple[str, bool]:
+        """Rename the weight name of interns2 preview model."""
+        skip = False
+        if self.enable_sci_mtp:
+            if name.startswith('mtp.sci.'):
+                name = name.replace('mtp.sci.', 'mtp.')
+            else:
+                skip = True
+        else:
+            if name.startswith('mtp.sci.'):
+                skip = True
+            else:
+                name = name.replace('mtp.normal.', 'mtp.')
+        if any(suffix in name for suffix in suffixs):
+            name = name.replace('mtp.layers.0.', 'mtp.')
+        return name, skip
+
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
         """Load weights."""
         stacked_params_mapping = [
@@ -368,11 +386,17 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
             'model.norm', '.input_layernorm', '.post_attention_layernorm', '.q_norm', '.k_norm', 'mtp.norm',
             '.pre_fc_norm_embedding', '.pre_fc_norm_hidden'
         ]
+        interns2_preview_names = ['.fc.weight', '.pre_fc_norm_embedding.weight',
+                                  '.pre_fc_norm_hidden.weight', '.norm.weight']
 
         params_dict = dict(self.named_parameters())
         for name, loaded_weight in weights:
             if not name.startswith('mtp.'):
                 continue
+            name, skip = self._rename_interns2_preview(name, interns2_preview_names)
+            if skip:
+                continue
+
             name = name.replace('mtp.', 'model.')
             if '.experts' in name and '.shared_expert' not in name:
                 if name.split('.experts.', 1)[1].split('.', 1)[0].isdigit():
diff --git a/lmdeploy/pytorch/models/qwen3_vl.py b/lmdeploy/pytorch/models/qwen3_vl.py
diff --git a/lmdeploy/vl/model/interns1_pro.py b/lmdeploy/vl/model/interns1_pro.py
diff --git a/lmdeploy/vl/model/qwen3_5.py b/lmdeploy/vl/model/qwen3_5.py

Original file line number	Diff line number	Diff line change
`@@ -115,5 +115,6 @@ def build_specdecode_config(target_model, speculative_config: SpeculativeConfig,`
`115`	`115`	`target_cache_cfg=cache_config,`
`116`	`116`	`dtype=engine_config.dtype,`
`117`	`117`	`trust_remote_code=trust_remote_code,`
	`118`	`+ hf_overrides=engine_config.hf_overrides,`
`118`	`119`	`)`
`119`	`120`	`return specdecode_config`