move permute from callbacks to weight loading

evezhier · evezhier · commit 2e478432c3df · 2026-04-03T10:04:56.000-07:00
Signed-off-by: Olya Kozlova &lt;okozlova@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/models/modeling_mistral.py b/tensorrt_llm/_torch/models/modeling_mistral.py
@@ -2,7 +2,6 @@
 import dataclasses
 from typing import Any, Dict, List, Tuple
 
-import math
 import torch
 import torchvision
 from mistral_common.tokens.tokenizers.multimodal import ImageEncoder
@@ -371,7 +370,7 @@ def __init__(
             use_fast=self.use_fast,
             trust_remote_code=trust_remote_code)
         self._model_path = model_path
-        if model_type in ("mistral_large_3", "mistral3"):
+        if model_type == "mistral_large_3":
             # For mistral large 3, we add chat template in the model forward, and the
             # MistralCommonImageProcessor is used to process the input when both text and images are provided.
             # When the input only contains text, we use the text processor to process the input.
@@ -507,7 +506,7 @@ def __init__(
     def load_tokenizer(model_path: str,
                        config: PretrainedConfig,
                        tokenizer: AutoTokenizer | None = None):
-        if getattr(config, "input_processor_type", None) == "mistral_large_3":
+        if getattr(config, "input_processor_type", None) in ("mistral_large_3"):
             try:
                 return MistralTokenizer.from_pretrained(model_path)