Skip to content

Commit 2ba536b

Browse files
authored
Merge branch 'main' into chore/agents-skills-canonical-location
2 parents 510f36d + d738995 commit 2ba536b

187 files changed

Lines changed: 16286 additions & 6298 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/CODEOWNERS

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ modelopt_recipes @NVIDIA/modelopt-recipes-codeowners
3535
# Examples
3636
/README.md @NVIDIA/modelopt-examples-codeowners
3737
/examples @NVIDIA/modelopt-examples-codeowners
38-
/examples/chained_optimizations @NVIDIA/modelopt-torch-nas-prune-codeowners
3938
/examples/cnn_qat @NVIDIA/modelopt-examples-cnn_qat-codeowners
4039
/examples/deepseek @NVIDIA/modelopt-deploy-codeowners
4140
/examples/diffusers @NVIDIA/modelopt-examples-diffusers-codeowners

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Make sure you read and follow the [Security Best Practices](https://github.com/N
2323
- If you copied code from any other sources or added a new PIP dependency, did you follow guidance in `CONTRIBUTING.md`: ✅ / ❌ / N/A <!--- Mandatory -->
2424
- Did you write any new necessary tests?: ✅ / ❌ / N/A <!--- Mandatory for new features or examples. -->
2525
- Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?: ✅ / ❌ / N/A <!--- Only for new features, API changes, critical bug fixes or backward incompatible changes. -->
26+
- Did you get Claude approval on this PR?: ✅ / ❌ / N/A <!--- Run `/claude review`. NVIDIA org members can self-trigger for complex changes; orthogonal to CodeRabbit. -->
2627

2728
### Additional Information
2829
<!-- E.g. related issue. -->

.github/workflows/claude.yml

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
name: Claude
2+
3+
# Interactive `@claude` assistant. Triggered by mentioning `@claude` in:
4+
# - PR or issue comments
5+
# - PR review comments (line-level review threads)
6+
# - PR review bodies
7+
# - Issue body or title (when the issue is opened)
8+
#
9+
# Restricted to NVIDIA org members / collaborators to prevent abuse from
10+
# external contributors (the repo is public).
11+
#
12+
# `contents: write` lets Claude push branches and open PRs.
13+
14+
on:
15+
issue_comment:
16+
types: [created]
17+
pull_request_review_comment:
18+
types: [created]
19+
pull_request_review:
20+
types: [submitted]
21+
issues:
22+
types: [opened, assigned]
23+
24+
jobs:
25+
claude:
26+
name: Claude (interactive)
27+
# Fire only when @claude is mentioned by an NVIDIA org member, owner,
28+
# or collaborator. Skip if the comment is also a `/claude review`
29+
# invocation — that's handled by claude_review.yml.
30+
if: |
31+
(
32+
(github.event_name == 'issue_comment' &&
33+
contains(github.event.comment.body, '@claude') &&
34+
!contains(github.event.comment.body, '/claude review') &&
35+
contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association)
36+
) ||
37+
(github.event_name == 'pull_request_review_comment' &&
38+
contains(github.event.comment.body, '@claude') &&
39+
contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association)
40+
) ||
41+
(github.event_name == 'pull_request_review' &&
42+
contains(github.event.review.body, '@claude') &&
43+
contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.review.author_association)
44+
) ||
45+
(github.event_name == 'issues' &&
46+
(contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')) &&
47+
contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.issue.author_association)
48+
)
49+
)
50+
runs-on: ubuntu-latest
51+
timeout-minutes: 10
52+
permissions:
53+
contents: write
54+
pull-requests: write
55+
issues: write
56+
id-token: write
57+
steps:
58+
- name: Checkout repository
59+
uses: actions/checkout@v6
60+
with:
61+
fetch-depth: 1
62+
63+
- name: Set up Python
64+
uses: actions/setup-python@v6
65+
with:
66+
python-version: "3.12"
67+
68+
- name: Install and enable pre-commit hooks
69+
# Installs the git hook so any `git commit` Claude runs is
70+
# subject to the same lint/format/license-header checks as a
71+
# local developer commit — preventing PRs that fail CI on
72+
# code-quality.
73+
run: |
74+
pip install pre-commit
75+
pre-commit install
76+
77+
- name: Run Claude
78+
uses: anthropics/claude-code-action@v1
79+
env:
80+
ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_BASE_URL }}
81+
# NVIDIA inference proxy (LiteLLM-based) rejects two fields
82+
# the Claude Code SDK sends by default. Set per NVIDIA/OSMO's
83+
# workflow which has hit and solved both issues:
84+
# - `context_management` → disable experimental betas
85+
# - `cache_control.ephemeral.scope` → disable prompt caching
86+
CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: "1"
87+
DISABLE_PROMPT_CACHING: "1"
88+
with:
89+
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
90+
claude_args: |
91+
--allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr create:*),Bash(gh issue view:*),Bash(gh issue comment:*),Bash(git diff:*),Bash(git show:*),Bash(git log:*),Bash(git status:*),Bash(git checkout:*),Bash(git add:*),Bash(git commit:*),Bash(git push:*),Bash(pre-commit:*),Read,Edit,Write,Grep,Glob"
92+
--model "${{ vars.CLAUDE_MODEL }}"
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
name: Claude Code Review
2+
3+
on:
4+
issue_comment:
5+
types: [created]
6+
7+
jobs:
8+
# ──────────────────────────────────────────────────────────────────
9+
# Deep ModelOpt-focused review covering numerical correctness,
10+
# mode/state composition, export compatibility, and backward
11+
# compatibility for saved checkpoints / recipes.
12+
#
13+
# CodeRabbit (.coderabbit.yaml) auto-reviews every PR for routine
14+
# bugs, typos, style, and security anti-patterns — this Claude job
15+
# is on-demand and complements that with deeper architectural
16+
# analysis. Trigger: /claude review
17+
# ──────────────────────────────────────────────────────────────────
18+
review:
19+
name: Claude Review
20+
if: |
21+
github.event_name == 'issue_comment' &&
22+
github.event.issue.pull_request &&
23+
contains(github.event.comment.body, '/claude review') &&
24+
contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association)
25+
runs-on: ubuntu-latest
26+
timeout-minutes: 10
27+
permissions:
28+
contents: read
29+
pull-requests: write
30+
issues: write
31+
id-token: write
32+
env:
33+
GH_TOKEN: ${{ github.token }}
34+
REPO: ${{ github.repository }}
35+
PR_NUMBER: ${{ github.event.issue.number }}
36+
steps:
37+
- name: Get PR info
38+
id: pr-info
39+
run: |
40+
PR_DATA=$(gh pr view $PR_NUMBER --repo $REPO --json headRefOid,baseRefName)
41+
echo "sha=$(echo $PR_DATA | jq -r .headRefOid)" >> $GITHUB_OUTPUT
42+
echo "base_ref=$(echo $PR_DATA | jq -r .baseRefName)" >> $GITHUB_OUTPUT
43+
44+
- name: Checkout repository
45+
uses: actions/checkout@v6
46+
with:
47+
fetch-depth: 1
48+
ref: ${{ steps.pr-info.outputs.sha }}
49+
50+
- name: Fetch base branch for diff analysis
51+
run: git fetch origin ${{ steps.pr-info.outputs.base_ref }}
52+
53+
- name: React to trigger comment
54+
run: |
55+
gh api repos/$REPO/issues/comments/${{ github.event.comment.id }}/reactions \
56+
--method POST \
57+
-f content='eyes'
58+
59+
- name: Run Claude Review
60+
uses: anthropics/claude-code-action@v1
61+
env:
62+
ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_BASE_URL }}
63+
# NVIDIA inference proxy (LiteLLM-based) rejects two fields
64+
# the Claude Code SDK sends by default. Set per NVIDIA/OSMO's
65+
# workflow which has hit and solved both issues:
66+
# - `context_management` → disable experimental betas
67+
# - `cache_control.ephemeral.scope` → disable prompt caching
68+
CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: "1"
69+
DISABLE_PROMPT_CACHING: "1"
70+
with:
71+
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
72+
trigger_phrase: "/claude review"
73+
show_full_output: true
74+
claude_args: |
75+
--allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr review:*),Bash(git diff:*),Bash(git show:*),Bash(git log:*),Read,Grep,Glob"
76+
--model "${{ vars.CLAUDE_MODEL }}"
77+
prompt: |
78+
REPO: ${{ env.REPO }}
79+
PR NUMBER: ${{ env.PR_NUMBER }}
80+
BASE REF: origin/${{ steps.pr-info.outputs.base_ref }}
81+
82+
Mandatory workflow — never skip or reorder:
83+
1. Read the PR diff first (gh pr diff).
84+
2. Read CLAUDE.md and CONTRIBUTING.md for project conventions and architecture.
85+
3. For changed files under `modelopt/torch/<sub-package>/`, read the sub-package's
86+
`__init__.py` plus any `mode.py` / `config.py` to understand mode registration
87+
and config schema.
88+
4. Only then perform the review using that context.
89+
90+
You are performing a deep code review on a **NVIDIA Model Optimizer (ModelOpt)** PR.
91+
ModelOpt is NVIDIA's open-source library for model optimization (quantization, pruning,
92+
distillation, sparsity, speculative decoding, NAS, PEFT) targeting PyTorch / ONNX /
93+
HuggingFace / Megatron, with deployment into TRT-LLM / vLLM / SGLang.
94+
95+
Apply general correctness reasoning to whichever ModelOpt sub-package the diff touches —
96+
you already know the algorithms (quantization formulas, distillation losses, N:M sparsity
97+
mask selection, speculative draft-token alignment, etc.). The prompt below covers the
98+
**ModelOpt-specific structural concerns** that you can't infer from the diff alone.
99+
100+
## Division of labor with CodeRabbit
101+
102+
CodeRabbit (`.coderabbit.yaml`) already auto-reviews every PR with the `chill` profile
103+
and runs a hard pre-merge gate on security anti-patterns. **Do NOT duplicate its work.**
104+
Specifically, do NOT comment on:
105+
- Style, formatting, or naming nits (handled by ruff + CodeRabbit)
106+
- Simple typos in code/comments/strings (CodeRabbit catches these)
107+
- The security anti-patterns enumerated in `.coderabbit.yaml`:
108+
`torch.load(weights_only=False)` without justification, `numpy.load(allow_pickle=True)`
109+
without justification, hardcoded `trust_remote_code=True`, `eval`/`exec` on external
110+
input, `# nosec` bypasses, non-permissive new PIP dependencies — these are already
111+
gated. Skip them entirely.
112+
- Generic "consider adding a test" suggestions for trivial changes.
113+
114+
Your value is in things CodeRabbit's pattern-matching cannot do well: tracing dataflow
115+
across multiple files, reasoning about mode/state composition, judging export
116+
compatibility, and catching algorithm-level correctness bugs.
117+
118+
## Review Procedure
119+
120+
1. Get PR metadata: `gh pr view $PR_NUMBER --repo $REPO --json title,body,baseRefName,headRefName,files,additions,deletions,changedFiles,author`
121+
2. Get the full diff: `gh pr diff $PR_NUMBER --repo $REPO`
122+
- For large PRs (>50 files), prioritize source code over config/lock/auto-generated files.
123+
3. For each significant changed file, read the full file for surrounding context.
124+
4. Trace the algorithm end-to-end through the diff. Verify the math/logic matches the
125+
intended technique (whatever sub-package it belongs to).
126+
5. For each newly introduced variable/argument/field, verify it has a meaningful runtime
127+
use path — not just declaration/docstring or discard assignment (`_ = new_arg`).
128+
Use Grep to search for usage beyond declaration sites.
129+
6. Post findings as inline comments with severity and category tags.
130+
131+
## Critical Issues (Must Fix)
132+
133+
### Algorithm Correctness
134+
- Verify the implementation matches the intended technique. Apply your knowledge of the
135+
relevant algorithm family (quantization scales/rounding/saturation, distillation loss
136+
composition, sparsity mask selection, pruning importance scoring, speculative draft
137+
acceptance, NAS supernet weight sharing, etc.).
138+
- Watch for silent numerical bugs: missing fp32 upcast in reductions, wrong reduction
139+
dimension, division-by-zero guards, casts that wrap instead of saturate, gradient
140+
flow through stop_gradient boundaries.
141+
- Watch for state corruption across calibration / search / training passes — leftover
142+
statistics from a previous run are a common foot-gun.
143+
144+
### Mode & State Composability (ModelOpt-specific)
145+
- **Mode registration**: New modes must register correctly with `apply_mode()` /
146+
`restore()`, declare their dependencies, and produce a `modelopt_state` entry that
147+
round-trips through save/restore.
148+
- **State dict schema**: Modified `modelopt_state` schema must include a migration path
149+
or version bump — silently changing keys breaks restore for existing checkpoints.
150+
- **Restore fidelity**: After `restore(model, state)`, the model must be functionally
151+
identical to the saved one. Verify module replacements, hooks, and parameters are
152+
re-applied.
153+
- **Plugin laziness**: Optional integrations (HF, Megatron, TRT-LLM, ONNX) must not
154+
hard-import at module load — gate behind `import_plugin()` so users without those
155+
extras don't break.
156+
157+
### Export Compatibility (ModelOpt-specific)
158+
- HF export (`unified_export_hf.py`) must produce a checkpoint that loads cleanly in
159+
transformers and matches the on-device dtype.
160+
- TRT-LLM export (`model_config_export.py`) must emit a valid `config.json` with
161+
correct `quant_algo`, `kv_cache_quant_algo`, scale tensor names, and weight layout.
162+
- ONNX export must use opsets and operator versions supported by the target consumer
163+
(TRT, ORT).
164+
165+
## Important Issues (Should Fix)
166+
167+
### Backward Compatibility
168+
- Renamed or removed arguments / config fields without deprecation path — breaks
169+
existing user scripts.
170+
- `modelopt_recipes/*.yaml` schema changes without a version bump — old recipes
171+
silently misparse.
172+
- Changed defaults silently alter behavior for users relying on them.
173+
- Changed function signatures, return types, or side effects in
174+
`modelopt/torch/*/__init__.py` (public API) without a backward-compat shim.
175+
- Modified `modelopt_state` keys/structure without migration — makes existing
176+
optimized checkpoints unloadable.
177+
178+
### Performance
179+
- Unnecessary CPU-GPU synchronization in hot paths: `.item()`, `.cpu()`,
180+
`torch.cuda.synchronize()`, Python-side tensor value checks.
181+
- Memory regressions: double-allocating weights, holding tensors past their lifetime.
182+
183+
## Suggestions (Nice to Have)
184+
- Stale, imprecise, or misleading comments/docstrings — a wrong docstring is worse
185+
than none.
186+
- Missing shape/dtype assertions at module/parallelism boundaries where they would
187+
catch real bugs.
188+
- Functions mixing many unrelated responsibilities that would benefit from splitting.
189+
190+
## Comment Format
191+
192+
Prefix each comment with severity and category tag:
193+
- `**[CRITICAL Algorithm]**`, `**[CRITICAL ModeState]**`, `**[CRITICAL Export]**`
194+
- `**[IMPORTANT Compatibility]**`, `**[IMPORTANT Performance]**`
195+
- `**[SUGGESTION]**`
196+
197+
For each finding, explain: (1) what the issue is, (2) why it matters (impact/risk), (3) specific suggestion for fix.
198+
199+
Only use inline ```suggestion blocks for simple, self-contained line replacements (typos,
200+
renames, single-line fixes). For structural changes that add, remove, or reorganize blocks
201+
of code, use a top-level PR comment with a code block showing the proposed change instead.
202+
203+
## Completion
204+
205+
After posting all inline comments, post a summary PR comment:
206+
- List total findings by severity (CRITICAL: N, IMPORTANT: N, SUGGESTION: N)
207+
- Highlight the most impactful findings
208+
- Overall assessment of the PR's risk level
209+
210+
If no significant issues are found, approve the PR:
211+
gh pr review $PR_NUMBER --repo $REPO --approve --body "Claude review passed — no significant issues found. LGTM"

.github/workflows/example_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ jobs:
8686
uses: ./.github/workflows/_example_tests_runner.yml
8787
secrets: inherit
8888
with:
89-
docker_image: "nvcr.io/nvidia/nemo:26.02"
89+
docker_image: "nvcr.io/nvidia/nemo:26.04"
9090
example: megatron_bridge
9191
timeout_minutes: 30
9292
pip_install_extras: "[hf,puzzletron,dev-test]"

.github/workflows/gpu_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
matrix:
4040
include:
4141
- example: gpu
42-
timeout: 60
42+
timeout: 75
4343
container_image: pytorch:26.03-py3
4444
- example: gpu_megatron
4545
timeout: 45

.github/workflows/unit_tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ jobs:
9999
- {nox_session: "unit-3.10(torch_211, tf_latest)", python_version: "3.10"}
100100
- {nox_session: "unit-3.11(torch_211, tf_latest)", python_version: "3.11"}
101101
- {nox_session: "unit-3.13(torch_211, tf_latest)", python_version: "3.13"}
102+
- {nox_session: "unit-3.14(torch_211, tf_latest)", python_version: "3.14"}
102103
- {nox_session: "unit-3.12(torch_28, tf_latest)", python_version: "3.12"}
103104
- {nox_session: "unit-3.12(torch_29, tf_latest)", python_version: "3.12"}
104105
- {nox_session: "unit-3.12(torch_210, tf_latest)", python_version: "3.12"}

.pre-commit-config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ repos:
6868
entry: python tools/precommit/check_modelopt_recipes.py
6969
language: system
7070
files: ^modelopt_recipes/
71+
# configs/ contains reusable snippets (not full recipes) — skip recipe validation
72+
exclude: ^modelopt_recipes/configs/
7173

7274
# Instructions to change license file if ever needed:
7375
# https://github.com/Lucas-C/pre-commit-hooks#removing-old-license-and-replacing-it-with-a-new-one
@@ -101,7 +103,6 @@ repos:
101103
modelopt/torch/speculative/eagle/utils.py|
102104
modelopt/torch/speculative/plugins/hf_medusa.py|
103105
modelopt/torch/utils/plugins/megatron_mmlu.py|
104-
examples/chained_optimizations/bert_prune_distill_quantize.py|
105106
examples/deepseek/quantize_to_nvfp4.py|
106107
examples/deepseek/ptq.py|
107108
examples/diffusers/quantization/onnx_utils/export.py|

0 commit comments

Comments
 (0)