Skip to content

Commit 7e04df8

Browse files
ChenhanYukevalmorabia97
authored andcommitted
chg: passing through trust_remote_code (#778)
## What does this PR do? **Type of change:** ? <!-- Use one of the following: Bug fix, new feature, new example, new tests, documentation. --> Bug fix **Overview:** Passing `trust_remote_code` all the way through during export and import. This is needed since `DeepSeek` will error out if `trust_remote_code=True` but `Nemotron-H` will error out if `trust_remote_code=False` ## Usage <!-- You can potentially add a usage example below. --> ```python # Add a code snippet demonstrating how to use this ``` ## Testing <!-- Mention how have you tested your change if applicable. --> ## Before your PR is "*Ready for review*" <!-- If you haven't finished some of the above items you can still open `Draft` PR. --> - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes/No <!--- If No, explain why. --> - **Did you write any new necessary tests?**: Yes/No - **Did you add or update any necessary documentation?**: Yes/No - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: Yes/No <!--- Only for new features, API changes, critical bug fixes or bw breaking changes. --> ## Additional Information <!-- E.g. related issue. --> <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Updated default `trust_remote_code` parameter from `True` to `False` in GPT model export and import functionality. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
1 parent d39146e commit 7e04df8

2 files changed

Lines changed: 9 additions & 2 deletions

File tree

modelopt/torch/export/plugins/megatron_importer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def __init__(
7575
workspace_dir: str | None = None,
7676
dtype=torch.bfloat16,
7777
dequantize: bool = True,
78-
trust_remote_code: bool = False,
78+
trust_remote_code: bool = True,
7979
verbose: bool = False,
8080
moe_router_dtype: torch.dtype | None = None,
8181
):

modelopt/torch/export/unified_export_megatron.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def __init__(
137137
pretrained_model_name_or_path: str | os.PathLike | None = None,
138138
export_extra_modules: bool = False,
139139
dtype=torch.bfloat16,
140-
trust_remote_code: bool = True,
140+
trust_remote_code: bool = False,
141141
moe_router_dtype: torch.dtype | None = None,
142142
):
143143
"""Create a GPTModel exporter instance."""
@@ -1205,6 +1205,7 @@ def export_mcore_gpt_to_hf(
12051205
export_extra_modules: bool = False,
12061206
dtype: torch.dtype = torch.bfloat16,
12071207
export_dir: Path | str = tempfile.gettempdir(),
1208+
trust_remote_code: bool = False,
12081209
moe_router_dtype: torch.dtype | None = None,
12091210
):
12101211
"""Export Megatron Core GPTModel to unified checkpoint and save to export_dir.
@@ -1225,6 +1226,7 @@ def export_mcore_gpt_to_hf(
12251226
pretrained_model_name_or_path,
12261227
export_extra_modules=export_extra_modules,
12271228
dtype=dtype,
1229+
trust_remote_code=trust_remote_code,
12281230
moe_router_dtype=moe_router_dtype,
12291231
)
12301232
exporter.save_pretrained(export_dir, pretrained_model_name_or_path)
@@ -1235,6 +1237,7 @@ def import_mcore_gpt_from_hf(
12351237
pretrained_model_path: str,
12361238
workspace_dir: str | None = None,
12371239
dtype: torch.dtype = torch.bfloat16,
1240+
trust_remote_code: bool = False,
12381241
moe_router_dtype: torch.dtype | None = None,
12391242
):
12401243
"""Import GPTModel state_dict from supported HuggingFace pretrained model path.
@@ -1243,13 +1246,17 @@ def import_mcore_gpt_from_hf(
12431246
model: The Megatron Core GPTModel instance.
12441247
pretrained_model_path: A path to a *directory* containing model weights saved using
12451248
[`~PreTrainedModel.save_pretrained`], e.g., `./my_model_directory/`.
1249+
workspace_dir: The directory to save the workspace.
12461250
dtype: The weights data type to import.
1251+
trust_remote_code: If True, this allows importing from a wider range of sources.
1252+
moe_router_dtype: The data type to import the moe router weights.
12471253
"""
12481254
importer = GPTModelImporter(
12491255
model,
12501256
pretrained_model_path,
12511257
workspace_dir=workspace_dir,
12521258
dtype=dtype,
1259+
trust_remote_code=trust_remote_code,
12531260
moe_router_dtype=moe_router_dtype,
12541261
)
12551262
importer._import_state_dict()

0 commit comments

Comments
 (0)