cleanup

evezhier · evezhier · commit fc8751911d9a · 2026-04-03T10:05:01.000-07:00
Signed-off-by: Olya Kozlova &lt;okozlova@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/models/checkpoints/mistral/weight_mapper.py b/tensorrt_llm/_torch/models/checkpoints/mistral/weight_mapper.py
@@ -1,5 +1,3 @@
-from torch import nn
-
 from tensorrt_llm._torch.models.checkpoints.hf.weight_mapper import HfWeightMapper
 from tensorrt_llm._torch.models.modeling_utils import register_mapper
 
@@ -92,20 +90,14 @@ def permute(w, n_heads: int, head_dim: int, hidden_size: int):
         # If using quantized model in mistral format,
         # quantization scales (qscale_weight) also need to be sliced
         for name in weights.keys():
-            # TODO: add scales if dequant is necessary 
+            # TODO: add scales if dequant is necessary
             if ".wq.weight" in name:
                 weights[name] = permute(
-                    weights[name],
-                    config.num_attention_heads,
-                    config.head_dim,
-                    config.hidden_size
+                    weights[name], config.num_attention_heads, config.head_dim, config.hidden_size
                 )
             elif ".wk.weight" in name:
                 weights[name] = permute(
-                    weights[name],
-                    config.num_key_value_heads,
-                    config.head_dim,
-                    config.hidden_size
+                    weights[name], config.num_key_value_heads, config.head_dim, config.hidden_size
                 )
         return weights
 
diff --git a/tensorrt_llm/_torch/models/modeling_mistral.py b/tensorrt_llm/_torch/models/modeling_mistral.py
@@ -623,13 +623,14 @@ def load_weights(self, weights: Dict, weight_mapper=None, *args, **kwargs):
 
         llm_weights = filter_weights(weights=weights, prefix="language_model")
         logger.debug(f"Loading weights for {type(self.llm)}")
-        if weight_mapper:    
-            weight_mapper.permute_qk(weights=llm_weights, config=self.llm.config)                                                                                                                                         
-            self.llm.load_weights(llm_weights, 
-                                  weight_mapper=weight_mapper, 
-                                  params_map=weight_mapper.mistral_llm_mapping)  
-        else:                                                                                       
-            self.llm.load_weights(llm_weights)   
+        if weight_mapper:
+            weight_mapper.permute_qk(weights=llm_weights,
+                                     config=self.llm.config)
+            self.llm.load_weights(llm_weights,
+                                  weight_mapper=weight_mapper,
+                                  params_map=weight_mapper.mistral_llm_mapping)
+        else:
+            self.llm.load_weights(llm_weights)
         logger.debug(f"Successfully loaded weights for {type(self.llm)}")
 
         vit_weights = filter_weights(weights=weights, prefix="vision_tower")
@@ -638,7 +639,8 @@ def load_weights(self, weights: Dict, weight_mapper=None, *args, **kwargs):
         if vit_params_map is not None:
             # Pixtral uses num_attention_heads = num_key_value_heads
             self._vision_tower.config.num_key_value_heads = self._vision_tower.config.num_attention_heads
-            weight_mapper.permute_qk(weights=vit_weights, config=self._vision_tower.config)
+            weight_mapper.permute_qk(weights=vit_weights,
+                                     config=self._vision_tower.config)
             vit_weights = weight_mapper.rename_by_params_map(
                 weights=vit_weights, params_map=vit_params_map)
 
diff --git a/tensorrt_llm/_torch/pyexecutor/config_utils.py b/tensorrt_llm/_torch/pyexecutor/config_utils.py
@@ -283,7 +283,7 @@ def load_pretrained_config(model_name_or_path: str,
 
     elif model_type == "mistral3" and "layer_types" in config_dict:
         # TODO: update this for transformers v5.0
-        config_class = "MinistralConfig" 
+        config_class = "MinistralConfig"
         model_config = config_class.from_pretrained(model_name_or_path,
                                                     **kwargs)
 
diff --git a/tensorrt_llm/llmapi/llm_utils.py b/tensorrt_llm/llmapi/llm_utils.py
@@ -430,7 +430,7 @@ def _update_from_hf_quant_config(self) -> bool:
 
         if hf_quant_config is not None:
             # DeepSeek V3 FP8 ckpt
-            if hf_quant_config.get("quant_method") == "fp8": 
+            if hf_quant_config.get("quant_method") == "fp8":
                 if hf_quant_config.get("weight_block_size"):
                     quant_config.quant_algo = QuantAlgo.FP8_BLOCK_SCALES
                     quant_config.exclude_modules = ["*eh_proj"]