Skip to content

Commit a5b715f

Browse files
Fix trust_remote_code=true issue
Signed-off-by: Daniel Korzekwa <dkorzekwa@nvidia.com>
1 parent 81f6d4e commit a5b715f

2 files changed

Lines changed: 4 additions & 1 deletion

File tree

examples/puzzletron/mbridge_distillation/distill_hf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ def _build_model_provider(hf_path):
309309
train_iters=args.train_iters,
310310
hf_export_path=args.hf_export_path,
311311
hf_model=args.hf_model,
312+
trust_remote_code=args.trust_remote_code,
312313
)
313314
except Exception as e:
314315
print(f"⚠️ Export failed: {e}")

modelopt/torch/puzzletron/export/mbridge/export_mbridge_to_hf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def export_to_hf_and_copy_config(
2828
train_iters: int,
2929
hf_export_path: str,
3030
hf_model: str,
31+
trust_remote_code: bool = False,
3132
) -> None:
3233
"""
3334
Export Megatron checkpoint to HuggingFace format and copy config.json from student model.
@@ -43,6 +44,7 @@ def export_to_hf_and_copy_config(
4344
train_iters: Number of training iterations (used to construct final checkpoint path)
4445
hf_export_path: Directory path where the HuggingFace model will be saved
4546
hf_model: HuggingFace model ID to use as template for export (e.g., meta-llama/Llama-3.1-8B-Instruct)
47+
trust_remote_code: Whether to trust remote modeling code when loading the HF template model
4648
"""
4749
print_rank_0(f"\n{'=' * 80}")
4850
print_rank_0("Exporting to HuggingFace format...")
@@ -58,7 +60,7 @@ def export_to_hf_and_copy_config(
5860
# Create bridge using standard model ID (not AnyModel checkpoint) to avoid sharding structure issues
5961
print_rank_0("🌉 Creating bridge...")
6062
print_rank_0(f" Using model ID: {hf_model}")
61-
bridge = AutoBridge.from_hf_pretrained(hf_model, trust_remote_code=True)
63+
bridge = AutoBridge.from_hf_pretrained(hf_model, trust_remote_code=trust_remote_code)
6264

6365
print_rank_0("📤 Exporting to HuggingFace format...")
6466
# Use strict=False for test_distill_hf.py which uses small models (2 layers) with fewer layers

0 commit comments

Comments
 (0)