Skip to content

Commit 0409ddb

Browse files
Copy remote code files to output hf ckpt
Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
1 parent 38d9522 commit 0409ddb

File tree

4 files changed

+26
-75
lines changed

4 files changed

+26
-75
lines changed

modelopt/torch/export/plugins/hf_checkpoint_utils.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,42 +21,38 @@
2121
from pathlib import Path
2222

2323
import torch
24-
from huggingface_hub import hf_hub_download, list_repo_files
24+
from huggingface_hub import snapshot_download
2525
from safetensors.torch import safe_open
2626
from tqdm import tqdm
2727

2828

29-
def copy_remote_code(
30-
pretrained_model_path: str | os.PathLike,
31-
save_directory: str | os.PathLike,
32-
):
29+
def copy_remote_code(pretrained_model_path: str | os.PathLike, save_directory: str | os.PathLike):
3330
"""Copy remote code from pretrained model to save directory.
3431
3532
For models that keep configuration and modeling files as part of the checkpoint,
3633
we need to copy them to the export directory for seamless integration with inference
3734
frameworks.
3835
3936
If ``pretrained_model_path`` is a local directory, Python files are copied directly.
40-
If it is a HuggingFace Hub model ID, Python files are downloaded from the Hub first.
37+
If it's a HF Hub model ID (e.g. ``nvidia/NVIDIA-Nemotron-Nano-12B-v2``), files are downloaded from the Hub.
4138
4239
Args:
4340
pretrained_model_path: Local path to the pretrained model or HuggingFace Hub model ID.
4441
save_directory: Path to the save directory.
4542
"""
4643
hf_checkpoint_path = Path(pretrained_model_path)
4744
save_dir = Path(save_directory)
45+
save_dir.mkdir(parents=True, exist_ok=True)
4846

4947
if hf_checkpoint_path.is_dir():
5048
for py_file in hf_checkpoint_path.glob("*.py"):
51-
if py_file.is_file():
52-
shutil.copy(py_file, save_dir / py_file.name)
49+
shutil.copy2(py_file, save_dir / py_file.name)
5350
else:
54-
# Hub model ID: download any top-level .py files (custom modeling code)
55-
repo_id = str(pretrained_model_path)
56-
for filename in list_repo_files(repo_id):
57-
if "/" not in filename and filename.endswith(".py"):
58-
local_path = hf_hub_download(repo_id=repo_id, filename=filename)
59-
shutil.copy(local_path, save_dir / filename)
51+
snapshot_download(
52+
repo_id=str(pretrained_model_path),
53+
local_dir=str(save_dir),
54+
allow_patterns=["*.py"],
55+
)
6056

6157

6258
def load_multimodal_components(

modelopt/torch/puzzletron/tools/bypassed_training/init_child_from_parent.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import yaml
2525
from transformers import AutoModelForCausalLM
2626

27+
from modelopt.torch.export.plugins.hf_checkpoint_utils import copy_remote_code
28+
2729
from ...anymodel.model_descriptor import ModelDescriptor, ModelDescriptorFactory
2830
from ...anymodel.puzzformer import deci_x_patcher
2931
from ..checkpoint_utils import copy_tokenizer, load_state_dict
@@ -87,6 +89,9 @@ def init_child_from_parent(
8789
trust_remote_code=descriptor.requires_trust_remote_code(),
8890
)
8991

92+
if descriptor.requires_trust_remote_code():
93+
copy_remote_code(parent_checkpoint_dir, output_checkpoint_dir)
94+
9095
parent_model_config = load_model_config(
9196
parent_checkpoint_dir, trust_remote_code=descriptor.requires_trust_remote_code()
9297
)

tests/_test_utils/torch/puzzletron/utils.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
import torch
2020
from _test_utils.torch.transformers_models import get_tiny_tokenizer
2121
from datasets import Dataset, DatasetDict
22-
from huggingface_hub import snapshot_download
2322
from transformers import AutoConfig, AutoModelForCausalLM, PreTrainedTokenizerBase
2423

2524
import modelopt.torch.puzzletron as mtpz
2625
import modelopt.torch.utils.distributed as dist
26+
from modelopt.torch.export.plugins.hf_checkpoint_utils import copy_remote_code
2727

2828

2929
def setup_test_model_and_data(
@@ -189,21 +189,11 @@ def create_and_save_small_hf_model(
189189
submodule._tied_weights_keys = None
190190
model.save_pretrained(output_path, save_original_format=False)
191191

192-
# Save tokenizer
192+
# Save tokenizer, config, and custom code files
193193
tokenizer.save_pretrained(output_path)
194-
195-
# Save config
196194
config.save_pretrained(output_path)
197-
198-
# Download trust_remote_code .py files from HF hub into the checkpoint directory so that
199-
# force_cache_dynamic_modules can resolve classes from the local path.
200-
# save_pretrained only saves weights + config, not these .py files.
201195
if hasattr(config, "auto_map") and isinstance(config.auto_map, dict):
202-
snapshot_download(
203-
repo_id=hf_model_name,
204-
local_dir=output_path,
205-
allow_patterns=["*.py"],
206-
)
196+
copy_remote_code(hf_model_name, output_path)
207197

208198

209199
def save_dummy_dataset(dataset_path: Path | str):

tests/gpu_megatron/torch/export/test_hf_checkpoint_utils.py renamed to tests/unit/torch/export/test_hf_checkpoint_utils.py

Lines changed: 8 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -56,54 +56,14 @@ def test_copy_remote_code_local_dir_no_py_files(tmp_path):
5656

5757

5858
def test_copy_remote_code_hub_id(tmp_path):
59-
"""copy_remote_code downloads and copies top-level .py files from a Hub model ID."""
59+
"""copy_remote_code delegates to snapshot_download for a Hub model ID."""
6060
dst_dir = tmp_path / "dst"
61-
dst_dir.mkdir()
62-
63-
# Create a fake cached file that hf_hub_download would return
64-
cached_py = tmp_path / "cached_modeling_custom.py"
65-
cached_py.write_text("# custom hub model")
66-
67-
repo_files = [
68-
"modeling_custom.py", # top-level .py — should be downloaded
69-
"config.json", # non-.py — skip
70-
"model.safetensors", # non-.py — skip
71-
"subdir/nested.py", # subdirectory .py — skip (contains "/")
72-
]
73-
74-
with (
75-
patch(
76-
"modelopt.torch.export.plugins.hf_checkpoint_utils.list_repo_files",
77-
return_value=repo_files,
78-
) as mock_list,
79-
patch(
80-
"modelopt.torch.export.plugins.hf_checkpoint_utils.hf_hub_download",
81-
return_value=str(cached_py),
82-
) as mock_download,
83-
):
84-
copy_remote_code("meta-llama/Llama-3.2-1B", dst_dir)
85-
86-
mock_list.assert_called_once_with("meta-llama/Llama-3.2-1B")
87-
# Only the top-level .py should have been downloaded
88-
mock_download.assert_called_once_with(
89-
repo_id="meta-llama/Llama-3.2-1B", filename="modeling_custom.py"
90-
)
91-
assert (dst_dir / "modeling_custom.py").read_text() == "# custom hub model"
9261

62+
with patch("modelopt.torch.export.plugins.hf_checkpoint_utils.snapshot_download") as mock_sd:
63+
copy_remote_code("nvidia/NVIDIA-Nemotron-Nano-12B-v2", dst_dir)
9364

94-
def test_copy_remote_code_hub_id_no_py_files(tmp_path):
95-
"""copy_remote_code is a no-op when the Hub repo has no top-level .py files."""
96-
dst_dir = tmp_path / "dst"
97-
dst_dir.mkdir()
98-
99-
with (
100-
patch(
101-
"modelopt.torch.export.plugins.hf_checkpoint_utils.list_repo_files",
102-
return_value=["config.json", "model.safetensors"],
103-
),
104-
patch("modelopt.torch.export.plugins.hf_checkpoint_utils.hf_hub_download") as mock_download,
105-
):
106-
copy_remote_code("meta-llama/Llama-3.2-1B", dst_dir)
107-
108-
mock_download.assert_not_called()
109-
assert list(dst_dir.iterdir()) == []
65+
mock_sd.assert_called_once_with(
66+
repo_id="nvidia/NVIDIA-Nemotron-Nano-12B-v2",
67+
local_dir=str(dst_dir),
68+
allow_patterns=["*.py"],
69+
)

0 commit comments

Comments
 (0)