add skip layers for debug

grimoire · grimoire · commit 0b0c1c80c434 · 2026-05-09T17:03:34.000+08:00
diff --git a/lmdeploy/pytorch/models/deepseek_v4.py b/lmdeploy/pytorch/models/deepseek_v4.py
@@ -661,6 +661,7 @@ def __init__(self,
                  device: torch.device | str | None):
         super().__init__()
         self.norm_eps = args.norm_eps
+        self.layer_id = layer_id
         self.attn = Attention(config, layer_id, args, dtype=dtype, device=device)
         self.ffn = MoE(config, layer_id, args, dtype=dtype, device=device)
         self.attn_norm = RMSNorm(args.dim, args.norm_eps, dtype=dtype, device=device)
@@ -986,10 +987,23 @@ def _load_expert(self, name: str, weight: torch.Tensor, params_dict: dict[str, n
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters())
 
+        def __skip_layers():
+            """We might change the number of layers so we can debug the model
+            with less gpus."""
+            import re
+            matches = re.findall(r'layers\.(\d+)\.', name)
+            if not matches:
+                return False
+            layer_id = int(matches[0])
+            return layer_id >= self.config.num_hidden_layers
+
         for name, loaded_weight in weights:
             if name.startswith('mtp.'):
                 continue
 
+            if __skip_layers():
+                continue
+
             if name.endswith('tie2eid'):
                 name = name.replace('tie2eid', 'tid2eid')
             if '.ffn.' in name: