diff --git a/.coderabbit.yaml b/.coderabbit.yaml
index 2dba1d2a3c8..7c896941df4 100644
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@@ -4,7 +4,35 @@ reviews:
   profile: chill
   collapse_walkthrough: true
   poem: false
+  path_instructions:
+    - path: "modelopt/**/*.py"
+      instructions: &security_instructions |
+        Review all modelopt package and examples Python changes against the security coding practices in
+        SECURITY.md. Flag any of the following as CRITICAL security issues,
+        request changes, and fail the check if ANY are present:
+        1. torch.load(..., weights_only=False) with no inline comment justifying why it is safe
+           (e.g. confirming the file is internally-generated and not user-supplied).
+        2. numpy.load(..., allow_pickle=True) with no inline comment justifying why it is safe.
+           Should expose allow_pickle as a caller-configurable parameter defaulting to False, not hardcode True.
+        3. trust_remote_code=True hardcoded for transformers model or tokenizer loading.
+           Code should expose it as a caller-configurable parameter defaulting to False, not hardcode True.
+        4. eval() or exec() on any input that could originate from outside the process.
+        5. Any use of "# nosec" comments to bypass Bandit security checks is not allowed.
+           If a security-sensitive pattern is genuinely necessary, the PR must be reviewed and approved
+           by @NVIDIA/modelopt-setup-codeowners with an explicit justification in the PR description.
+    - path: "examples/**/*.py"
+      instructions: *security_instructions
   auto_review:
-    auto_incremental_review: false
+    auto_incremental_review: true
     drafts: false
     base_branches: ["main", "release/.*", "feature/.*"]
+  pre_merge_checks:
+    custom_checks:
+      - name: "Security anti-patterns"
+        mode: "error"
+        instructions: *security_instructions
+knowledge_base:
+  code_guidelines:
+    filePatterns:
+      - "CONTRIBUTING.md"
+      - "SECURITY.md"
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index e3c4dd45161..746f84ee6dc 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,27 +1,28 @@
-## What does this PR do?
+### What does this PR do?
 
-**Type of change:** ? <!-- Use one of the following: Bug fix, new feature, new example, new tests, documentation. -->
+Type of change: ? <!-- Use one of the following: Bug fix, new feature, new example, new tests, documentation. -->
 
-**Overview:** ?
+<!-- Details about the change. -->
 
-## Usage
-<!-- You can potentially add a usage example below. -->
+### Usage
 
 ```python
 # Add a code snippet demonstrating how to use this
 ```
 
-## Testing
+### Testing
 <!-- Mention how have you tested your change if applicable. -->
 
-## Before your PR is "*Ready for review*"
-<!-- If you haven't finished some of the above items you can still open `Draft` PR. -->
+### Before your PR is "*Ready for review*"
 
-- **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed.
-- **Is this change backward compatible?**: Yes/No <!--- If No, explain why. -->
-- **Did you write any new necessary tests?**: Yes/No
-- **Did you add or update any necessary documentation?**: Yes/No
-- **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: Yes/No <!--- Only for new features, API changes, critical bug fixes or bw breaking changes. -->
+Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md) and your commits are signed (`git commit -s -S`).
 
-## Additional Information
+Make sure you read and follow the [Security Best Practices](https://github.com/NVIDIA/Model-Optimizer/blob/main/SECURITY.md#security-coding-practices-for-contributors) (e.g. avoiding hardcoded `trust_remote_code=True`, using `torch.load(..., weights_only=True)`, avoiding `pickle`, etc.).
+
+- Is this change backward compatible?: ✅ / ❌ / N/A <!--- If ❌, explain why. -->
+- If you copied code from any other source, did you follow IP policy in [CONTRIBUTING.md](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md#-copying-code-from-other-sources)?: ✅ / ❌ / N/A <!--- Mandatory -->
+- Did you write any new necessary tests?: ✅ / ❌ / N/A <!--- Mandatory for new features or examples. -->
+- Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?: ✅ / ❌ / N/A <!--- Only for new features, API changes, critical bug fixes or backward incompatible changes. -->
+
+### Additional Information
 <!-- E.g. related issue. -->
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3ace50ada39..eaffe813a51 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -39,36 +39,41 @@ To run the pre-commit hooks without committing, use:
 pre-commit run --all-files
 ```
 
-## 📝 Writing tests
+## 🔒 Security coding practices
 
-We use [pytest](https://docs.pytest.org/) for all tests. The tests are organized into the following directories:
+All contributors must follow the security coding practices documented in *Security Coding Practices for
+Contributors* section of [SECURITY.md](./SECURITY.md#security-coding-practices-for-contributors) page.
 
-- `tests/unit`: Fast cpu-based unit tests for the core ModelOpt library. They should not take more than a few seconds to run.
-- `tests/gpu`: Fast GPU-based unit tests for the core ModelOpt library. In most cases, they should not take more than a few seconds to run.
-- `tests/examples`: Integration tests for ModelOpt examples. They should not take more than a few minutes to run. Please refer to [example test README](./tests/examples/README.md) for more details.
+Any security-sensitive exception requires review and approval from `@NVIDIA/modelopt-setup-codeowners`.
 
-Please refer to [tox.ini](./tox.ini) for more details on how to run the tests and their dependencies.
+## 📋 Copying code from other sources
 
-### Code Coverage
+The utilization of third-party code requires authorization via the Open Source Review Board (OSRB) team and needs to follow proper guidance on contributing code.
 
-For any new features / examples, make sure to they are covered by the tests and that the Codecov coverage check in your PR passes.
+If you are an external contributor, seek guidance from `@NVIDIA/modelopt-setup-codeowners` for next steps. For internal contributors, follow the steps below:
 
-## Submitting your code
+- **File NVBug for use of open-source code:**
+  Clone NVBug 2885977 and add your use case. Copying code from permissive licensed repositories (e.g. MIT, Apache 2) is generally self-checkout but for other licenses, it is necessary to get expert guidance before merging your PR.
+- **License header format:** The file which has code copied from another third-party GitHub repository should have the following in order:
+  1. A reference link (with commit hash) to the source from which the code was copied.
+  1. The original repository's Copyright / License.
+  1. The NVIDIA Apache 2.0 Copyright / License header.
 
-- If you are an external contributor, create a fork of the repository.
-- Rebase (not merge) your code to the most recent commit of the `main` branch. We want to ensure a linear history;
-  see [Merge vs Rebase](https://www.atlassian.com/git/tutorials/merging-vs-rebasing). Remember to test again locally after rebasing to catch any new issues before pushing to your PR.
+  See [`modelopt/torch/speculative/eagle/utils.py`](./modelopt/torch/speculative/eagle/utils.py)
+  for an example of the correct license header format.
+- **Exclude from license pre-commit hook:** Exclude copied files from the license pre-commit hook so it doesn't auto-add the NVIDIA Apache 2.0 license on top of the file. Add the file path to the `exclude` list in the `insert-license` hook in [`.pre-commit-config.yaml`](./.pre-commit-config.yaml).
 
-```bash
-git pull
-git rebase origin/main
-git push origin <branch> --force-with-lease
-```
+## 📝 Writing tests
 
-- When pushing the rebased (or any) branch, use `git push --force-with-lease` instead of `git push --force`.
-- Submit a pull request and let auto-assigned reviewers (based on [CODEOWNERS](./.github/CODEOWNERS)) review your PR.
-- If any CI/CD checks fail, fix the issues and push again.
-- Once your PR is approved and all checks pass, one of the reviewers will merge the PR.
+We use [pytest](https://docs.pytest.org/) for all tests. For any new features / examples, make sure to add tests and that the coverage check in your PR passes. The tests are organized into the following directories:
+
+- `tests/unit`: Fast cpu-based unit tests for the core ModelOpt library. They should not take more than a few seconds to run.
+- `tests/gpu`: Fast GPU-based unit tests for the core ModelOpt library. In most cases, they should not take more than a few seconds to run.
+- `tests/gpu_megatron`: Fast GPU-based unit tests for the core ModelOpt library for Megatron-Core features. In most cases, they should not take more than a few seconds to run.
+- `tests/gpu_trtllm`: Fast GPU-based unit tests for the core ModelOpt library for TensorRT-LLM features. In most cases, they should not take more than a few seconds to run.
+- `tests/examples`: Integration tests for ModelOpt examples. They should not take more than a few minutes to run. Please refer to [example test README](./tests/examples/README.md) for more details.
+
+Please refer to [tox.ini](./tox.ini) for more details on how to run the tests and their dependencies.
 
 ## ✍️ Signing your work
 
@@ -135,3 +140,9 @@ git push origin <branch> --force-with-lease
 
     (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
   ```
+
+## Submitting your code
+
+- Submit a pull request and let auto-assigned reviewers (based on [CODEOWNERS](./.github/CODEOWNERS)) review your PR.
+- If any CI/CD checks fail, fix the issues and push again.
+- Once your PR is approved and all checks pass, one of the reviewers will merge the PR.
diff --git a/SECURITY.md b/SECURITY.md
index bba6893c5e5..503e6e2b0f6 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -22,4 +22,150 @@ While NVIDIA currently does not have a bug bounty program, we do offer acknowled
 
 ## NVIDIA Product Security
 
-For all security-related concerns, please visit NVIDIA's [Product Security portal](https://www.nvidia.com/en-us/security)
+For all security-related concerns, please visit NVIDIA's [Product Security portal](https://www.nvidia.com/en-us/security).
+
+---
+
+## Security Considerations
+
+### Overview
+
+NVIDIA Model Optimizer (ModelOpt) is a library used to optimize ML models and may load and process user-provided artifacts (models, weights, configs, calibration data) and their dependencies. Secure deployment depends on how you source artifacts, validate inputs, and harden the environment where ModelOpt runs.
+
+### What to Be Aware Of
+
+#### Untrusted model and data inputs
+
+- Models, weights, configs and data may be malicious or corrupted.
+
+#### Deserialization and code-execution risks
+
+- Unsafe deserialization can lead to arbitrary code execution if fed untrusted inputs.
+- Avoid using serialization formats/settings that can deserialize arbitrary objects.
+
+#### Input validation and resource exhaustion
+
+- Large or malformed inputs can trigger crashes or excessive CPU/GPU/memory use.
+- Missing size/type checks can increase DoS risk.
+
+#### Data in transit and at rest
+
+- If fetching models or dependencies over the network, insecure transport can enable tampering.
+- Stored artifacts, logs, and caches may contain sensitive data.
+
+#### Logging and observability
+
+- Logs may inadvertently contain sensitive inputs, paths, tokens, or proprietary model details.
+- Overly verbose logs can leak operational and security-relevant information.
+
+#### Supply chain and third-party components
+
+- Dependencies may include known vulnerabilities or be compromised.
+- Third-party plugins/components loaded at runtime may not have the same security assurances.
+
+### Example Security Approaches
+
+#### Artifact integrity
+
+- Only load artifacts from trusted sources.
+- Prefer signed artifacts; verify signatures before loading.
+
+#### Safe parsing and deserialization
+
+- Prefer safer storage formats (avoid object deserialization for untrusted inputs).
+- Avoid `pickle`, `torch.load()` with untrusted weights, or YAML `unsafe_load`.
+- Treat any unverified artifact as untrusted and block/guard its loading.
+
+#### Hardening and least privilege
+
+- Run with least privilege and isolate workloads.
+
+#### Data protection
+
+- Encrypt sensitive data at rest; use TLS 1.3 for data in transit.
+- Never hardcode or log credentials.
+
+#### Resilience
+
+- Validate inputs and enforce limits (file size, timeouts, quotas, etc.).
+- Keep OS, containers, and dependencies patched; scan for known vulnerabilities.
+
+---
+
+## Security Coding Practices for Contributors
+
+ModelOpt processes model checkpoints and weights from various sources. Contributors must avoid patterns that can introduce security vulnerabilities. These rules apply to all code except tests. These rules cover a few key security considerations as follows:
+
+### Deserializing untrusted data
+
+**Do not use `torch.load(..., weights_only=False)`** unless a documented exception is provided. It uses pickle under the hood and can execute arbitrary code from a malicious checkpoint.
+
+```python
+# Bad — allows arbitrary code execution from the checkpoint file
+state = torch.load(path, weights_only=False)
+
+# Good
+state = torch.load(path, weights_only=True, map_location="cpu")
+
+# Acceptable only with an inline comment explaining why weights_only=False
+# is required and confirming the file is internally-generated / trusted.
+state = torch.load(
+    path,
+    weights_only=False,  # loaded file is generated internally by ModelOpt and not supplied by the user
+    map_location="cpu",
+)
+```
+
+**Do not use `numpy.load(..., allow_pickle=True)`** unless a documented exception is provided. It uses pickle under the hood and can execute arbitrary code from a malicious checkpoint.
+
+```python
+# Bad — allows arbitrary code execution from the checkpoint file
+state = numpy.load(path, allow_pickle=True)
+
+# Good - let the caller decide; default to False
+def load_data(path: str, trust_data: bool = False):
+    return numpy.load(path, allow_pickle=trust_data)
+```
+
+**Do not use `yaml.load()`** — always use `yaml.safe_load()`. The default loader can execute arbitrary Python objects embedded in YAML.
+
+### Loading transformers models with `trust_remote_code`
+
+**Do not hardcode `trust_remote_code=True`.** This flag tells Transformers to execute arbitrary Python shipped with a checkpoint, which is an RCE vector if the model source is untrusted.
+
+```python
+# Bad — silently opts every user into remote code execution
+model = AutoModel.from_pretrained(name, trust_remote_code=True)
+
+# Good — let the caller decide; default to False
+def load_model(name: str, trust_remote_code: bool = False):
+    return AutoModel.from_pretrained(name, trust_remote_code=trust_remote_code)
+```
+
+### Subprocess and shell commands
+
+**Never use `shell=True` with string interpolation or user-supplied input.** This is a command-injection vector.
+
+```python
+# Bad — command injection if model_name contains shell metacharacters
+subprocess.run(f"python convert.py --model {model_name}", shell=True)
+
+# Good — pass arguments as a list
+subprocess.run(["python", "convert.py", "--model", model_name])
+```
+
+### Other patterns to avoid
+
+- **`eval()` / `exec()`** on strings derived from external input. If you must generate and execute code dynamically, validate the input against an allowlist of safe patterns.
+- **Hardcoded secrets or credentials** — never commit tokens, passwords, or API keys. Use environment variables or config files listed in `.gitignore`.
+
+### Bandit security checks
+
+Bandit is used as a pre-commit hook to check for security-sensitive patterns in the code. **`# nosec` comments are not allowed** as a bypass for security checks.
+
+### Creating a security exception
+
+If a security-sensitive pattern (e.g. `pickle`, `subprocess`) is genuinely required, the contributor must:
+
+1. **Add an inline comment** explaining *why* the pattern is necessary and *why* it is safe in this specific context (e.g. "loaded file is generated internally by ModelOpt").
+1. **Request review from [@NVIDIA/modelopt-setup-codeowners](https://github.com/orgs/NVIDIA/teams/modelopt-setup-codeowners)** and include a clear justification in the PR description.
diff --git a/docs/source/guides/2_save_load.rst b/docs/source/guides/2_save_load.rst
index e097e3f8067..9deb82f8db2 100644
--- a/docs/source/guides/2_save_load.rst
+++ b/docs/source/guides/2_save_load.rst
@@ -129,9 +129,7 @@ Here is the example workflow of restoring the ModelOpt-modified model architectu
     model = ...
 
     # Restore the model architecture using the saved `modelopt_state`
-    # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-    modelopt_state = torch.load("modelopt_state.pth", weights_only=False)
-    model = mto.restore_from_modelopt_state(model, modelopt_state)
+    model = mto.restore_from_modelopt_state(model, modelopt_state_path="modelopt_state.pth")
 
     # Load the model weights separately after restoring the model architecture
     custom_method_to_load_model_weights(model)
diff --git a/docs/source/reference/2_security.rst b/docs/source/reference/2_security.rst
deleted file mode 100644
index 5a6e37af0ec..00000000000
--- a/docs/source/reference/2_security.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-Security Considerations
-=======================
-
-Overview
---------
-
-NVIDIA Model Optimizer (ModelOpt) is a library used to optimize ML models and
-may load and process user-provided artifacts (models, weights, configs,
-calibration data) and their dependencies. Secure deployment depends on how you
-source artifacts, validate inputs, and harden the environment where ModelOpt
-runs.
-
-What to Be Aware Of
--------------------
-
-**Untrusted model and data inputs**
-
-- Models, weights, configs and data may be malicious or corrupted.
-
-**Deserialization and code-execution risks**
-
-- Unsafe deserialization can lead to arbitrary code execution if fed untrusted
-  inputs.
-- Avoid using serialization formats/settings that can deserialize arbitrary
-  objects.
-
-**Input validation and resource exhaustion**
-
-- Large or malformed inputs can trigger crashes or excessive CPU/GPU/memory use.
-- Missing size/type checks can increase DoS risk.
-
-**Data in transit and at rest**
-
-- If fetching models or dependencies over the network, insecure transport can
-  enable tampering.
-- Stored artifacts, logs, and caches may contain sensitive data.
-
-**Logging and observability**
-
-- Logs may inadvertently contain sensitive inputs, paths, tokens, or proprietary
-  model details.
-- Overly verbose logs can leak operational and security-relevant information.
-
-**Supply chain and third-party components**
-
-- Dependencies may include known vulnerabilities or be compromised.
-- Third-party plugins/components loaded at runtime may not have the same
-  security assurances.
-
-Example Security Approaches
----------------------------
-
-**Artifact integrity**
-
-- Only load artifacts from trusted sources.
-- Prefer signed artifacts; verify signatures before loading.
-
-**Safe parsing and deserialization**
-
-- Prefer safer storage formats (avoid object deserialization for untrusted
-  inputs).
-- Avoid ``pickle``, ``torch.load()`` with untrusted weights, or YAML
-  ``unsafe_load``.
-- Treat any unverified artifact as untrusted and block/guard its loading.
-
-**Hardening and least privilege**
-
-- Run with least privilege and isolate workloads.
-
-**Data protection**
-
-- Encrypt sensitive data at rest; use TLS 1.3 for data in transit.
-- Never hardcode or log credentials.
-
-**Resilience**
-
-- Validate inputs and enforce limits (file size, timeouts, quotas,..).
-- Keep OS, containers, and dependencies patched; scan for known vulnerabilities.
diff --git a/examples/diffusers/distillation/distillation_trainer.py b/examples/diffusers/distillation/distillation_trainer.py
index d98278b9afb..9cd5c0d142f 100644
--- a/examples/diffusers/distillation/distillation_trainer.py
+++ b/examples/diffusers/distillation/distillation_trainer.py
@@ -591,10 +591,9 @@ def _apply_modelopt_quantization(self) -> None:
                         f"Resuming: restoring quantization architecture from "
                         f"{modelopt_state_path} (weights loaded later by accelerator)"
                     )
-                    # Security NOTE: weights_only=False is used on ModelOpt-generated state,
-                    # not on untrusted user input.
-                    state = torch.load(modelopt_state_path, weights_only=False, map_location="cpu")
-                    self._transformer = mto.restore_from_modelopt_state(self._transformer, state)
+                    self._transformer = mto.restore_from_modelopt_state(
+                        self._transformer, modelopt_state_path=modelopt_state_path
+                    )
                     logger.info("Quantization architecture restored for resume")
                     return
                 else:
diff --git a/examples/diffusers/requirements.txt b/examples/diffusers/requirements.txt
index 3cdac70f8ca..b762ec314ee 100644
--- a/examples/diffusers/requirements.txt
+++ b/examples/diffusers/requirements.txt
@@ -1,4 +1,3 @@
 cuda-python<13
 nvtx
 opencv-python>=4.8.1.78,<4.12.0.88
-sentencepiece
diff --git a/examples/gpt-oss/README.md b/examples/gpt-oss/README.md
index 62f1435f9b5..372fdbcc494 100644
--- a/examples/gpt-oss/README.md
+++ b/examples/gpt-oss/README.md
@@ -20,6 +20,7 @@ Performing finetuning with Quantization Aware Training solves these issues. The
 Install the necessary dependencies:
 
 ```bash
+pip install -U nvidia-modelopt[hf]
 pip install -r requirements.txt
 ```
 
diff --git a/examples/gpt-oss/requirements.txt b/examples/gpt-oss/requirements.txt
index 4d75b59c373..368097d3376 100644
--- a/examples/gpt-oss/requirements.txt
+++ b/examples/gpt-oss/requirements.txt
@@ -1,8 +1,4 @@
-accelerate
-datasets
-deepspeed
 kernels>=0.9.0
-peft>=0.17.0
 torch>2.7.1
 trackio
 transformers>=4.55.0
diff --git a/examples/llm_qat/README.md b/examples/llm_qat/README.md
index c8d0a542afe..cd4b103f30c 100644
--- a/examples/llm_qat/README.md
+++ b/examples/llm_qat/README.md
@@ -81,7 +81,7 @@ torch.save(mto.modelopt_state(model), "modelopt_quantizer_states.pt")
 
 # To resume training from a checkpoint or load the final QAT model for evaluation,
 # load the quantizer states before loading the model weights
-# mto.restore_from_modelopt_state(model, torch.load("modelopt_quantizer_states.pt", weights_only=False))
+# mto.restore_from_modelopt_state(model, modelopt_state_path="modelopt_quantizer_states.pt")
 # After loading the quantizer states, load the model weights
 # model.load_state_dict(state_dict_from_last_checkpoint)
 
diff --git a/examples/llm_qat/export.py b/examples/llm_qat/export.py
index 1c9e6f4b11d..f48e85c3ee4 100644
--- a/examples/llm_qat/export.py
+++ b/examples/llm_qat/export.py
@@ -18,7 +18,6 @@
 import warnings
 from pathlib import Path
 
-import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 import modelopt.torch.opt as mto
@@ -51,8 +50,7 @@ def get_model(
 
     # Restore modelopt state for LoRA models. For QAT/QAD models from_pretrained call handles this
     if hasattr(model, "peft_config"):
-        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-        modelopt_state = torch.load(f"{ckpt_path}/modelopt_state_train.pth", weights_only=False)
+        modelopt_state = mto.load_modelopt_state(f"{ckpt_path}/modelopt_state_train.pth")
         restore_from_modelopt_state(model, modelopt_state)
         print_rank_0("Restored modelopt state")
 
diff --git a/examples/llm_qat/requirements.txt b/examples/llm_qat/requirements.txt
index 8d44913bd60..b8da4e088f5 100644
--- a/examples/llm_qat/requirements.txt
+++ b/examples/llm_qat/requirements.txt
@@ -1,5 +1,3 @@
 flash-attn
-peft
 py7zr
-sentencepiece>=0.2.0
 tensorboardX
diff --git a/examples/llm_sparsity/weight_sparsity/README.md b/examples/llm_sparsity/weight_sparsity/README.md
index ca4df236ffa..97563aff007 100644
--- a/examples/llm_sparsity/weight_sparsity/README.md
+++ b/examples/llm_sparsity/weight_sparsity/README.md
@@ -4,6 +4,17 @@ In this tutorial, we demonstrate how to use Nvidia Model Optimizer to perform Po
 
 To learn more about the sparsity feature, please refer to the [documentation](https://nvidia.github.io/Model-Optimizer/guides/6_sparsity.html).
 
+## Pre-Requisites
+
+### Installation
+
+Install Model Optimizer with `hf` dependencies using `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/) and install the requirements for the example:
+
+```bash
+pip install -U nvidia-modelopt[hf]
+pip install -r requirements.txt
+```
+
 ## Getting Started
 
 ### Post-Training Sparsification (PTS) for PyTorch models
diff --git a/examples/llm_sparsity/weight_sparsity/requirements.txt b/examples/llm_sparsity/weight_sparsity/requirements.txt
index e4d43ea0e70..e4021b0194f 100644
--- a/examples/llm_sparsity/weight_sparsity/requirements.txt
+++ b/examples/llm_sparsity/weight_sparsity/requirements.txt
@@ -1,3 +1,2 @@
 flash-attn
-sentencepiece>=0.2.0
 tensorboardX
diff --git a/examples/onnx_ptq/requirements.txt b/examples/onnx_ptq/requirements.txt
index 01f7f6dd0ec..166c7675700 100644
--- a/examples/onnx_ptq/requirements.txt
+++ b/examples/onnx_ptq/requirements.txt
@@ -1,5 +1,5 @@
 datasets>=2.14.4
 optimum
-sentencepiece
+sentencepiece>=0.2.1
 timm
 torchvision
diff --git a/examples/windows/accuracy_benchmark/perplexity_metrics/requirements.txt b/examples/windows/accuracy_benchmark/perplexity_metrics/requirements.txt
index 73bb392b00a..4bdac071cf6 100644
--- a/examples/windows/accuracy_benchmark/perplexity_metrics/requirements.txt
+++ b/examples/windows/accuracy_benchmark/perplexity_metrics/requirements.txt
@@ -5,8 +5,7 @@ datasets
 numpy
 onnxruntime-genai
 pandas
-sentencepiece
+sentencepiece>=0.2.1
 tokenizers>=0.14.1
-
-torch>=2.0.0
-transformers>=4.36
+torch>=2.6.0
+transformers>=4.53
diff --git a/modelopt/torch/opt/conversion.py b/modelopt/torch/opt/conversion.py
index 874c51b5990..6ec7a172981 100644
--- a/modelopt/torch/opt/conversion.py
+++ b/modelopt/torch/opt/conversion.py
@@ -51,6 +51,7 @@
 __all__ = [
     "ModeloptStateManager",
     "apply_mode",
+    "load_modelopt_state",
     "modelopt_state",
     "restore",
     "restore_from_modelopt_state",
@@ -512,7 +513,29 @@ def save(model: nn.Module, f: str | os.PathLike | BinaryIO, **kwargs) -> None:
     torch.save(ckpt_dict, f, **kwargs)
 
 
-def restore_from_modelopt_state(model: ModelLike, modelopt_state: dict[str, Any]) -> nn.Module:
+def load_modelopt_state(modelopt_state_path: str | os.PathLike, **kwargs) -> dict[str, Any]:
+    """Load the modelopt state from a file.
+
+    Args:
+        modelopt_state_path: Target file location.
+        **kwargs: additional args for ``torch.load()``.
+
+    Returns:
+        A modelopt state dictionary describing the modifications to the model.
+    """
+    # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
+    kwargs.setdefault("weights_only", False)
+    kwargs.setdefault("map_location", "cpu")
+    # TODO: Add some validation to ensure the file is a valid modelopt state file.
+    modelopt_state = torch.load(modelopt_state_path, **kwargs)
+    return modelopt_state
+
+
+def restore_from_modelopt_state(
+    model: ModelLike,
+    modelopt_state: dict[str, Any] | None = None,
+    modelopt_state_path: str | os.PathLike | None = None,
+) -> nn.Module:
     """Restore the model architecture from the modelopt state dictionary based on the user-provided model.
 
     This method does not restore the model parameters such as weights, biases and quantization scales.
@@ -526,10 +549,7 @@ def restore_from_modelopt_state(model: ModelLike, modelopt_state: dict[str, Any]
         model = ...  # Create the model-like object
 
         # Restore the previously saved modelopt state followed by model weights
-        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-        mto.restore_from_modelopt_state(
-            model, torch.load("modelopt_state.pt", weights_only=False)
-        )  # Restore modelopt state
+        mto.restore_from_modelopt_state(model, modelopt_state_path="modelopt_state.pt")
         model.load_state_dict(torch.load("model_weights.pt"), ...)  # Load the model weights
 
     If you want to restore the model weights and the modelopt state with saved scales, please use
@@ -543,11 +563,21 @@ def restore_from_modelopt_state(model: ModelLike, modelopt_state: dict[str, Any]
         modelopt_state: The modelopt state dict describing the modelopt modifications to the model. The
             ``modelopt_state`` can be generated via
             :meth:`mto.modelopt_state()<modelopt.torch.opt.conversion.modelopt_state>`.
+            Cannot be used with modelopt_state_path.
+        modelopt_state_path: The path to the modelopt state file.
+            Cannot be used with modelopt_state.
 
     Returns:
         A modified model architecture based on the restored modifications with the unmodified
         weights as stored in the provided ``model`` argument.
     """
+    assert (modelopt_state is not None) != (modelopt_state_path is not None), (
+        "Either modelopt_state or modelopt_state_path must be provided, but not both."
+    )
+    if modelopt_state_path is not None:
+        modelopt_state = load_modelopt_state(modelopt_state_path)
+    assert modelopt_state, "modelopt_state is required!"
+
     # initialize ModelLikeModule if needed.
     model = model if isinstance(model, nn.Module) else ModelLikeModule(model)
 
diff --git a/modelopt/torch/opt/plugins/huggingface.py b/modelopt/torch/opt/plugins/huggingface.py
index 99bab772576..8b6396f3e79 100644
--- a/modelopt/torch/opt/plugins/huggingface.py
+++ b/modelopt/torch/opt/plugins/huggingface.py
@@ -79,10 +79,8 @@ def new_init_fn(self, *args, **kwargs):
         modelopt_state_path = _get_modelopt_state_path(model_path)
         _original__init__(self, *args, **kwargs)
         if os.path.isfile(modelopt_state_path):
-            # Security NOTE: weights_only=False is used on ModelOpt-generated state_dict, not on untrusted user input
-            modelopt_state = torch.load(modelopt_state_path, map_location="cpu", weights_only=False)
             with extra_context() if extra_context else nullcontext():
-                restore_from_modelopt_state(self, modelopt_state)
+                restore_from_modelopt_state(self, modelopt_state_path=modelopt_state_path)
 
             print_rank_0(f"Restored ModelOpt state from {modelopt_state_path}")
 
diff --git a/modelopt/torch/opt/plugins/peft.py b/modelopt/torch/opt/plugins/peft.py
index c3fd268a588..de1218917f9 100644
--- a/modelopt/torch/opt/plugins/peft.py
+++ b/modelopt/torch/opt/plugins/peft.py
@@ -72,10 +72,7 @@ def _new_load_adapter(self, model_id, adapter_name, *args, **kwargs):
         assert adapter_name in self.peft_config, (
             f"ModelOpt modified model should have adapter_name={adapter_name} in peft_config"
         )
-        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-        restore_from_modelopt_state(
-            self, torch.load(modelopt_state_path, map_location="cpu", weights_only=False)
-        )
+        restore_from_modelopt_state(self, modelopt_state_path=modelopt_state_path)
 
     outputs = self._modelopt_cache["load_adapter"](self, model_id, adapter_name, *args, **kwargs)
 
diff --git a/modelopt/torch/quantization/plugins/transformers_trainer.py b/modelopt/torch/quantization/plugins/transformers_trainer.py
index b92b240c0da..b0d27865095 100644
--- a/modelopt/torch/quantization/plugins/transformers_trainer.py
+++ b/modelopt/torch/quantization/plugins/transformers_trainer.py
@@ -28,7 +28,6 @@
 import modelopt.torch.opt as mto
 import modelopt.torch.quantization as mtq
 from modelopt.torch.distill.plugins.huggingface import KDTrainer
-from modelopt.torch.opt.conversion import restore_from_modelopt_state
 from modelopt.torch.opt.plugins import ModelOptHFTrainer
 from modelopt.torch.utils import print_rank_0
 
@@ -233,10 +232,9 @@ def _save_modelopt_state_with_weights(self):
         print_rank_0(f"Saved modelopt state to {self._modelopt_state_path}")
 
     def _restore_modelopt_state_with_weights(self):
-        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-        modelopt_state = torch.load(self._modelopt_state_path, weights_only=False)
+        modelopt_state = mto.load_modelopt_state(self._modelopt_state_path)
         modelopt_weights = modelopt_state.pop("modelopt_state_weights", None)
-        restore_from_modelopt_state(self.model, modelopt_state)
+        mto.restore_from_modelopt_state(self.model, modelopt_state)
         if modelopt_weights is not None:
             set_quantizer_state_dict(self.model, modelopt_weights)
         print_rank_0("Restored modelopt state with weights.")
diff --git a/pyproject.toml b/pyproject.toml
index 61319f8cf42..7cee60d309f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,6 +74,7 @@ hf = [
     "huggingface_hub>=0.24.0",
     "nltk",
     "peft>=0.17.0",
+    "sentencepiece>=0.2.1",                                                           # Also implicitly used in test_unified_export_megatron, test_vllm_fakequant_megatron_export
     "transformers>=4.53,<5.0",                                                        # Should match modelopt/torch/__init__.py and tox.ini
     "wonderwords",
 ]
@@ -98,7 +99,6 @@ dev-test = [
     "pytest-cov",
     "pytest-instafail",
     "pytest-timeout",
-    "sentencepiece",
     "timm",
     "torchprofile>=0.0.4",     # optional dependency for modelopt.torch
     "torchvision",