[1/2] Address security concerns in code (#626)

kevalmorabia97 · kevalmorabia97 · commit 58a5f1e71b2b · 2026-01-14T02:02:59.000+05:30
- [x] Address feedback on Threat and Vuln Analysis (TAVA) doc by ProdSec team - [x] Add note on safe usage of pickle deserialization of modelopt-generated state files **TODO: [Separate PR]** Replace pickle usage in `modelopt/torch/opt/plugins/megatron.py` - Needs fix on TransformerEngine first as we copy from https://github.com/NVIDIA/TransformerEngine/blob/3ff0b8d4/transformer_engine/pytorch/module/base.py#L863  ## Summary by CodeRabbit * **New Features** * Added `--trust_calibration_data` CLI flag for secure ONNX quantization with pickle data files. * **Improvements** * Enhanced security validation for generated quantization code. * Simplified data loading by removing pickle-based caching—data is now always loaded fresh. * Added security guidance throughout model state loading operations. * **Documentation** * Updated guides with security best practices for model state handling. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub>  --------- Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
diff --git a/docs/source/guides/2_save_load.rst b/docs/source/guides/2_save_load.rst
@@ -129,6 +129,7 @@ Here is the example workflow of restoring the ModelOpt-modified model architectu
     model = ...
 
     # Restore the model architecture using the saved `modelopt_state`
+    # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
     modelopt_state = torch.load("modelopt_state.pth", weights_only=False)
     model = mto.restore_from_modelopt_state(model, modelopt_state)
 
diff --git a/examples/llm_qat/export.py b/examples/llm_qat/export.py
@@ -51,6 +51,7 @@ def get_model(
 
     # Restore modelopt state for LoRA models. For QAT/QAD models from_pretrained call handles this
     if hasattr(model, "peft_config"):
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         modelopt_state = torch.load(f"{ckpt_path}/modelopt_state_train.pth", weights_only=False)
         restore_from_modelopt_state(model, modelopt_state)
         print_rank_0("Restored modelopt state")
diff --git a/examples/llm_sparsity/weight_sparsity/README.md b/examples/llm_sparsity/weight_sparsity/README.md
@@ -84,7 +84,7 @@ python data_prep.py --save_path data
 
 The following command demonstrates how to perform SAT on the Llama2-7B model on 8 GPUs.
 The model is finetuned on the [cnn_dailymail](https://huggingface.co/datasets/abisee/cnn_dailymail) dataset for 3 epochs.
-The input data is tokenized to a maximum length of 1024 tokens. The tokenized data is saved as a pickle file for faster data loading. The one-time process takes less than an hour to finish depending on the CPU. The resulting pickle file can be utilized for future training sessions.
+The input data is tokenized to a maximum length of 1024 tokens.
 
 ```sh
 bash launch_finetune.sh --model meta-llama/Llama-2-7b-hf \
diff --git a/examples/llm_sparsity/weight_sparsity/finetune.py b/examples/llm_sparsity/weight_sparsity/finetune.py
@@ -32,7 +32,6 @@
 import argparse
 import copy
 import os
-import pickle
 from collections.abc import Sequence
 from dataclasses import dataclass, field
 
@@ -232,27 +231,17 @@ def __init__(
     ):
         super().__init__()
 
-        pickle_name = f"dict_{split}_{tokenizer.model_max_length}.pickle"
         with training_args.main_process_first():
-            if os.path.isfile(pickle_name):
-                with open(pickle_name, "rb") as f:
-                    print_rank_0("Reuse pickled data")
-                    data_dict = pickle.load(f)
-            else:
-                print_rank_0("Loading data...")
-                list_data_dict = utils.jload(data_path)
-
-                print_rank_0("Formatting inputs...")
-                prompt_input = PROMPT_DICT["prompt_input"]
-                sources = [prompt_input.format_map(example) for example in list_data_dict]
-                targets = [
-                    f"{example['output']}{tokenizer.eos_token}" for example in list_data_dict
-                ]
-
-                print_rank_0("Tokenizing inputs... This may take some time...")
-                data_dict = preprocess(sources, targets, tokenizer)
-                with open(pickle_name, "wb") as f:
-                    pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL)
+            print_rank_0("Loading data...")
+            list_data_dict = utils.jload(data_path)
+
+            print_rank_0("Formatting inputs...")
+            prompt_input = PROMPT_DICT["prompt_input"]
+            sources = [prompt_input.format_map(example) for example in list_data_dict]
+            targets = [f"{example['output']}{tokenizer.eos_token}" for example in list_data_dict]
+
+            print_rank_0("Tokenizing inputs... This may take some time...")
+            data_dict = preprocess(sources, targets, tokenizer)
 
         self.input_ids = data_dict["input_ids"]
         self.labels = data_dict["labels"]
diff --git a/modelopt/onnx/quantization/__main__.py b/modelopt/onnx/quantization/__main__.py
@@ -16,6 +16,7 @@
 """Command-line entrypoint for ONNX PTQ."""
 
 import argparse
+import os
 
 import numpy as np
 
@@ -24,6 +25,31 @@
 __all__ = ["main"]
 
 
+def validate_file_size(file_path: str, max_size_bytes: int) -> None:
+    """Validate that a file exists and does not exceed the maximum allowed size.
+
+    Args:
+        file_path: Path to the file to validate
+        max_size_bytes: Maximum allowed file size in bytes
+
+    Raises:
+        FileNotFoundError: If the file does not exist
+        ValueError: If the file exceeds the maximum allowed size
+    """
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not found: {file_path}")
+
+    file_size = os.path.getsize(file_path)
+    if file_size > max_size_bytes:
+        max_size_gb = max_size_bytes / (1024 * 1024 * 1024)
+        actual_size_gb = file_size / (1024 * 1024 * 1024)
+        raise ValueError(
+            f"File size validation failed: {file_path} ({actual_size_gb:.2f}GB) exceeds "
+            f"maximum allowed size of {max_size_gb:.2f}GB. This limit helps prevent potential "
+            f"denial-of-service attacks."
+        )
+
+
 def get_parser() -> argparse.ArgumentParser:
     """Get the argument parser for ONNX PTQ."""
     argparser = argparse.ArgumentParser("python -m modelopt.onnx.quantization")
@@ -52,6 +78,11 @@ def get_parser() -> argparse.ArgumentParser:
         type=str,
         help="Calibration data in npz/npy format. If None, random data for calibration will be used.",
     )
+    group.add_argument(
+        "--trust_calibration_data",
+        action="store_true",
+        help="If True, trust the calibration data and allow pickle deserialization.",
+    )
     group.add_argument(
         "--calibration_cache_path",
         type=str,
@@ -261,12 +292,35 @@ def get_parser() -> argparse.ArgumentParser:
 def main():
     """Command-line entrypoint for ONNX PTQ."""
     args = get_parser().parse_args()
+
+    # Security: Validate onnx model size is under 2GB by default
+    if not args.use_external_data_format:
+        try:
+            validate_file_size(args.onnx_path, 2 * (1024**3))
+        except ValueError as e:
+            raise ValueError(
+                "Onnx model size larger than 2GB. Please set --use_external_data_format flag to bypass this validation."
+            ) from e
+
     calibration_data = None
     if args.calibration_data_path:
-        calibration_data = np.load(args.calibration_data_path, allow_pickle=True)
-        if args.calibration_data_path.endswith(".npz"):
-            # Convert the NpzFile object to a Python dictionary
-            calibration_data = {key: calibration_data[key] for key in calibration_data.files}
+        # Security: Disable pickle deserialization for untrusted sources to prevent RCE attacks
+        try:
+            calibration_data = np.load(
+                args.calibration_data_path, allow_pickle=args.trust_calibration_data
+            )
+            if args.calibration_data_path.endswith(".npz"):
+                # Convert the NpzFile object to a Python dictionary
+                calibration_data = {key: calibration_data[key] for key in calibration_data.files}
+        except ValueError as e:
+            if "allow_pickle" in str(e) and not args.trust_calibration_data:
+                raise ValueError(
+                    "Calibration data file contains pickled objects which pose a security risk. "
+                    "For trusted sources, you may enable pickle deserialization by setting the "
+                    "--trust_calibration_data flag."
+                ) from e
+            else:
+                raise
 
     quantize(
         args.onnx_path,
diff --git a/modelopt/torch/export/distribute.py b/modelopt/torch/export/distribute.py
@@ -91,6 +91,7 @@ def read_configs_and_weights_from_rank(
             raise ValueError("NFSWorkspace is not initialized!")
         state_path = self._get_state_path(target_rank)
         if state_path.exists():
+            # Security NOTE: weights_only=False is used here on ModelOpt-generated ckpt, not on untrusted user input
             state = torch.load(state_path, map_location="cpu", weights_only=False)
             return state["config"], state["weight"]
         else:
diff --git a/modelopt/torch/opt/conversion.py b/modelopt/torch/opt/conversion.py
@@ -526,6 +526,7 @@ def restore_from_modelopt_state(model: ModelLike, modelopt_state: dict[str, Any]
         model = ...  # Create the model-like object
 
         # Restore the previously saved modelopt state followed by model weights
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         mto.restore_from_modelopt_state(
             model, torch.load("modelopt_state.pt", weights_only=False)
         )  # Restore modelopt state
diff --git a/modelopt/torch/opt/plugins/huggingface.py b/modelopt/torch/opt/plugins/huggingface.py
@@ -79,6 +79,7 @@ def new_init_fn(self, *args, **kwargs):
         modelopt_state_path = _get_modelopt_state_path(model_path)
         _original__init__(self, *args, **kwargs)
         if os.path.isfile(modelopt_state_path):
+            # Security NOTE: weights_only=False is used on ModelOpt-generated state_dict, not on untrusted user input
             modelopt_state = torch.load(modelopt_state_path, map_location="cpu", weights_only=False)
             with extra_context() if extra_context else nullcontext():
                 restore_from_modelopt_state(self, modelopt_state)
diff --git a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
@@ -242,6 +242,7 @@ def restore_sharded_modelopt_state(
         return
 
     # Loading the common modelopt_state (replicated on all ranks)
+    # Security NOTE: weights_only=False is used here on NVIDIA-generated file, not on untrusted user input
     common_modelopt_state = torch.load(
         modelopt_checkpoint_name + "/" + COMMON_STATE_FNAME, weights_only=False
     )
diff --git a/modelopt/torch/opt/plugins/megatron.py b/modelopt/torch/opt/plugins/megatron.py
@@ -102,7 +102,7 @@ def _modelopt_set_extra_state(self, state: Any):
         # Default format: byte tensor with pickled data
         #
         # TODO: possible deserialization improvement
-        # https://github.com/NVIDIA/TensorRT-LLM/commits/main/tensorrt_llm/serialization.py
+        # https://github.com/NVIDIA/TensorRT-LLM/blob/main/tensorrt_llm/serialization.py
         extra_state = pickle.loads(state.detach().cpu().numpy().tobytes())  # nosec
     else:
         raise RuntimeError("Unsupported extra_state format.")
diff --git a/modelopt/torch/opt/plugins/peft.py b/modelopt/torch/opt/plugins/peft.py
@@ -72,6 +72,7 @@ def _new_load_adapter(self, model_id, adapter_name, *args, **kwargs):
         assert adapter_name in self.peft_config, (
             f"ModelOpt modified model should have adapter_name={adapter_name} in peft_config"
         )
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         restore_from_modelopt_state(
             self, torch.load(modelopt_state_path, map_location="cpu", weights_only=False)
         )
@@ -85,6 +86,7 @@ def _new_load_adapter(self, model_id, adapter_name, *args, **kwargs):
     if os.path.isfile(_get_quantizer_state_save_path(model_id)):
         from modelopt.torch.quantization.nn import TensorQuantizer
 
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         quantizer_state_dict = torch.load(
             _get_quantizer_state_save_path(model_id), map_location="cpu", weights_only=False
         )
diff --git a/modelopt/torch/opt/searcher.py b/modelopt/torch/opt/searcher.py
@@ -249,6 +249,7 @@ def load_search_checkpoint(self) -> bool:
 
         # iterate through state dict and load keys
         print(f"Loading searcher state from {checkpoint}...")
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated ckpt, not on untrusted user input
         state_dict = torch.load(checkpoint, weights_only=False)
         assert state_dict.keys() == self.state_dict().keys(), "Keys in checkpoint don't match!"
         for key, state in state_dict.items():
diff --git a/modelopt/torch/quantization/plugins/attention.py b/modelopt/torch/quantization/plugins/attention.py
@@ -207,22 +207,24 @@ def patch_binop(node, quantizer_names, transpose=False):
     head = ast.fix_missing_locations(head)
     org_class = model_module.__dict__[org_class_name]
 
-    quant_class = _create_quantized_class_from_ast(head, org_class, new_class_name, model_module)
+    quant_class = _create_quantized_class_from_ast(head, org_class, new_class_name)
     register(original_cls=org_class, quantized_cls=quant_class)
     print(f"Successfully registered {org_class_name} for quantization")
     return True
 
 
 def _create_quantized_class_from_ast(
-    head, org_class, new_class_name, model_module, temp_file_name=None
+    head: ast.Module,
+    org_class: type,
+    new_class_name: str,
+    temp_file_name: str | None = None,
 ):
     """Create a quantized class from an AST representation.
 
     Args:
         head: The AST head containing the modified class definition
         org_class: The original class to be quantized
         new_class_name: Name for the new quantized class
-        model_module: The module containing the original class
         temp_file_name: Optional file name to save the generated code
 
     Returns:
@@ -232,6 +234,19 @@ def _create_quantized_class_from_ast(
 
     # Save the generated code to a temporary file if requested
     module_code_str = ast.unparse(head)
+
+    # Security: Validate generated code doesn't contain suspicious patterns
+    suspicious_patterns = ["__import__", "eval", "exec", "compile", "open(", "os.system"]
+    for pattern in suspicious_patterns:
+        if pattern in module_code_str:
+            # Allow compile for specific trusted ModelOpt internal use
+            if pattern == "compile" and "torch.compile" in module_code_str:
+                continue
+            raise ValueError(
+                f"Generated code contains suspicious pattern '{pattern}'. "
+                f"This may indicate a security issue in AST transformation."
+            )
+
     if temp_file_name is None:
         with tempfile.NamedTemporaryFile(
             prefix="modelopt_", suffix=".py", delete=False
@@ -253,6 +268,11 @@ def _create_quantized_class_from_ast(
     # )  # bandit throws error here
     # quant_class = model_module.__dict__[new_class_name]
 
+    # Security NOTE: compile() is used here on internally-generated AST,
+    # not on untrusted user input. The AST is created by ModelOpt's quantization
+    # logic and has been validated above. This is safer than exec() but still
+    # requires the AST transformation logic to be secure.
+
     # Extract the bytecode and create a new class on the fly
     # This is more tricky but doesn't require runtime execution
     module_code = compile(head, filename=f"{temp_file_name}", mode="exec")
diff --git a/modelopt/torch/quantization/plugins/transformers_trainer.py b/modelopt/torch/quantization/plugins/transformers_trainer.py
@@ -186,6 +186,7 @@ def _save_modelopt_state_with_weights(self):
         print_rank_0(f"Saved modelopt state to {self._modelopt_state_path}")
 
     def _restore_modelopt_state_with_weights(self):
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         modelopt_state = torch.load(self._modelopt_state_path, weights_only=False)
         modelopt_weights = modelopt_state.pop("modelopt_state_weights", None)
         restore_from_modelopt_state(self.model, modelopt_state)
diff --git a/modelopt/torch/utils/distributed.py b/modelopt/torch/utils/distributed.py
@@ -101,6 +101,7 @@ def _deserialize(tensor: torch.Tensor, size: int | None = None) -> Any:
     buffer = tensor.numpy().tobytes()
     if size is not None:
         buffer = buffer[:size]
+    # Security NOTE: weights_only=False is used here on internally-generated buffer, not on untrusted user input
     obj = torch.load(io.BytesIO(buffer), weights_only=False)
     return obj
 

Original file line number	Diff line number	Diff line change
`@@ -242,6 +242,7 @@ def restore_sharded_modelopt_state(`
`242`	`242`	`return`
`243`	`243`
`244`	`244`	`# Loading the common modelopt_state (replicated on all ranks)`
	`245`	`+ # Security NOTE: weights_only=False is used here on NVIDIA-generated file, not on untrusted user input`
`245`	`246`	`common_modelopt_state = torch.load(`
`246`	`247`	`modelopt_checkpoint_name + "/" + COMMON_STATE_FNAME, weights_only=False`
`247`	`248`	`)`
Original file line number	Diff line number	Diff line change
`@@ -102,7 +102,7 @@ def _modelopt_set_extra_state(self, state: Any):`
`102`	`102`	`# Default format: byte tensor with pickled data`
`103`	`103`	`#`
`104`	`104`	`# TODO: possible deserialization improvement`
`105`		`- # https://github.com/NVIDIA/TensorRT-LLM/commits/main/tensorrt_llm/serialization.py`
	`105`	`+ # https://github.com/NVIDIA/TensorRT-LLM/blob/main/tensorrt_llm/serialization.py`
`106`	`106`	`extra_state = pickle.loads(state.detach().cpu().numpy().tobytes()) # nosec`
`107`	`107`	`else:`
`108`	`108`	`raise RuntimeError("Unsupported extra_state format.")`