Skip to content

Commit ffc045c

Browse files
authored
Merge branch 'main' into ernie-upstream
2 parents a817a99 + ad27e2c commit ffc045c

22 files changed

Lines changed: 1466 additions & 260 deletions

File tree

.main.commit

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
59fc89485f18c47038c0cb9aed65a35850030d34
1+
cc4cb01198875453c40cf6e3453ee3a9f88adeff

docs/skills-index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and verification steps.
1111
1212
skills/developer-guide/SKILL
1313
skills/mlm-bridge-training/SKILL
14+
skills/recipe-recommender/SKILL
1415
```
1516

1617
```{toctree}
@@ -32,7 +33,6 @@ skills/perf-techniques/parallelism-strategies/SKILL
3233
skills/perf-techniques/cuda-graphs/SKILL
3334
skills/perf-techniques/tp-dp-comm-overlap/SKILL
3435
skills/perf-techniques/megatron-fsdp/SKILL
35-
skills/perf-techniques/packed-sequences-long-context/SKILL
3636
skills/perf-techniques/sequence-packing/SKILL
3737
skills/perf-techniques/hybrid-context-parallel/SKILL
3838
skills/perf-techniques/expert-parallel-overlap/SKILL

docs/training/packed-sequences.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,5 +92,3 @@ The most stable caveats to remember are:
9292
- [docs/training/hybrid-context-parallel.md](hybrid-context-parallel.md)
9393
- [skills/perf-techniques/sequence-packing/SKILL.md](../skills/perf-techniques/sequence-packing/SKILL.md)
9494
- [skills/perf-techniques/sequence-packing/card.yaml](../skills/perf-techniques/sequence-packing/card.yaml)
95-
- [skills/perf-techniques/packed-sequences-long-context/SKILL.md](../skills/perf-techniques/packed-sequences-long-context/SKILL.md)
96-
- [skills/perf-techniques/packed-sequences-long-context/card.yaml](../skills/perf-techniques/packed-sequences-long-context/card.yaml)

examples/conversion/compare_hf_and_megatron/compare.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ def _load_hf_model(args, is_vl_model: bool):
446446
hf_model = model_class.from_pretrained(
447447
args.hf_model_path,
448448
torch_dtype=torch.bfloat16,
449-
device_map="auto",
449+
device_map="cuda",
450450
trust_remote_code=is_safe_repo(
451451
trust_remote_code=args.trust_remote_code,
452452
hf_path=args.hf_model_path,

examples/conversion/hf_to_megatron_generate_nemotron_vlm.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,10 @@ def vlm_forward_step(data_iterator, model, **kwargs) -> torch.Tensor:
114114
def loss_func(x, **kwargs):
115115
return x
116116

117-
return model(**forward_args), loss_func
117+
output = model(**forward_args)
118+
if isinstance(output, tuple):
119+
output = output[0]
120+
return output, loss_func
118121

119122

120123
def load_image(image_path: str) -> Image.Image:

scripts/performance/configs/qwen_vl/__init__.py

Lines changed: 121 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,22 @@
66
HAVE_MEGATRON_BRIDGE = False
77

88
if HAVE_MEGATRON_BRIDGE:
9-
from .qwen3_vl_pretrain import (
10-
qwen3_vl_30b_a3b_pretrain_config_b200,
11-
qwen3_vl_30b_a3b_pretrain_config_gb200,
12-
qwen3_vl_30b_a3b_pretrain_config_gb300,
13-
qwen3_vl_30b_a3b_pretrain_config_h100,
14-
qwen3_vl_235b_a22b_pretrain_config_b200,
15-
qwen3_vl_235b_a22b_pretrain_config_gb200,
16-
qwen3_vl_235b_a22b_pretrain_config_gb300,
17-
qwen3_vl_235b_a22b_pretrain_config_h100,
9+
from .qwen35_vl_pretrain import (
10+
qwen35_vl_35b_a3b_pretrain_config_b200,
11+
qwen35_vl_35b_a3b_pretrain_config_b300,
12+
qwen35_vl_35b_a3b_pretrain_config_gb200,
13+
qwen35_vl_35b_a3b_pretrain_config_gb300,
14+
qwen35_vl_35b_a3b_pretrain_config_h100,
15+
qwen35_vl_122b_a10b_pretrain_config_b200,
16+
qwen35_vl_122b_a10b_pretrain_config_b300,
17+
qwen35_vl_122b_a10b_pretrain_config_gb200,
18+
qwen35_vl_122b_a10b_pretrain_config_gb300,
19+
qwen35_vl_122b_a10b_pretrain_config_h100,
20+
qwen35_vl_397b_a17b_pretrain_config_b200,
21+
qwen35_vl_397b_a17b_pretrain_config_b300,
22+
qwen35_vl_397b_a17b_pretrain_config_gb200,
23+
qwen35_vl_397b_a17b_pretrain_config_gb300,
24+
qwen35_vl_397b_a17b_pretrain_config_h100,
1825
)
1926

2027
from .qwen3_vl_workload_base_configs import (
@@ -41,6 +48,50 @@
4148
QWEN3_VL_235B_A22B_PRETRAIN_CONFIG_H100_BF16,
4249
QWEN3_VL_235B_A22B_PRETRAIN_CONFIG_H100_FP8_CS,
4350
)
51+
from .qwen35_vl_workload_base_configs import (
52+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B200_BF16,
53+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B200_FP8_CS,
54+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B200_FP8_MX,
55+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B300_BF16,
56+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B300_FP8_CS,
57+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B300_FP8_MX,
58+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB200_BF16,
59+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB200_FP8_CS,
60+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB200_FP8_MX,
61+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB300_BF16,
62+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB300_FP8_CS,
63+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB300_FP8_MX,
64+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_H100_BF16,
65+
QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_H100_FP8_CS,
66+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B200_BF16,
67+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B200_FP8_CS,
68+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B200_FP8_MX,
69+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B300_BF16,
70+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B300_FP8_CS,
71+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B300_FP8_MX,
72+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB200_BF16,
73+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB200_FP8_CS,
74+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB200_FP8_MX,
75+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB300_BF16,
76+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB300_FP8_CS,
77+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB300_FP8_MX,
78+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_H100_BF16,
79+
QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_H100_FP8_CS,
80+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B200_BF16,
81+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B200_FP8_CS,
82+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B200_FP8_MX,
83+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B300_BF16,
84+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B300_FP8_CS,
85+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B300_FP8_MX,
86+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB200_BF16,
87+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB200_FP8_CS,
88+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB200_FP8_MX,
89+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB300_BF16,
90+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB300_FP8_CS,
91+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB300_FP8_MX,
92+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_H100_BF16,
93+
QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_H100_FP8_CS,
94+
)
4495

4596

4697
__all__ = [
@@ -66,18 +117,71 @@
66117
"QWEN3_VL_30B_A3B_PRETRAIN_CONFIG_B200_FP8_MX",
67118
"QWEN3_VL_30B_A3B_PRETRAIN_CONFIG_H100_BF16",
68119
"QWEN3_VL_30B_A3B_PRETRAIN_CONFIG_H100_FP8_CS",
120+
# Qwen3.5-VL 35B-A3B
121+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB300_BF16",
122+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB300_FP8_CS",
123+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB300_FP8_MX",
124+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B300_BF16",
125+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B300_FP8_CS",
126+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B300_FP8_MX",
127+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB200_BF16",
128+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB200_FP8_CS",
129+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_GB200_FP8_MX",
130+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B200_BF16",
131+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B200_FP8_CS",
132+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_B200_FP8_MX",
133+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_H100_BF16",
134+
"QWEN35_VL_35B_A3B_PRETRAIN_CONFIG_H100_FP8_CS",
135+
# Qwen3.5-VL 122B-A10B
136+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB300_BF16",
137+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB300_FP8_CS",
138+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB300_FP8_MX",
139+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B300_BF16",
140+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B300_FP8_CS",
141+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B300_FP8_MX",
142+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB200_BF16",
143+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB200_FP8_CS",
144+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_GB200_FP8_MX",
145+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B200_BF16",
146+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B200_FP8_CS",
147+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_B200_FP8_MX",
148+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_H100_BF16",
149+
"QWEN35_VL_122B_A10B_PRETRAIN_CONFIG_H100_FP8_CS",
150+
# Qwen3.5-VL 397B-A17B
151+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB300_BF16",
152+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB300_FP8_CS",
153+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB300_FP8_MX",
154+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B300_BF16",
155+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B300_FP8_CS",
156+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B300_FP8_MX",
157+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB200_BF16",
158+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB200_FP8_CS",
159+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_GB200_FP8_MX",
160+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B200_BF16",
161+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B200_FP8_CS",
162+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_B200_FP8_MX",
163+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_H100_BF16",
164+
"QWEN35_VL_397B_A17B_PRETRAIN_CONFIG_H100_FP8_CS",
69165
]
70166

71167
if HAVE_MEGATRON_BRIDGE:
72168
__all__.extend(
73169
[
74-
"qwen3_vl_30b_a3b_pretrain_config_b200",
75-
"qwen3_vl_30b_a3b_pretrain_config_gb200",
76-
"qwen3_vl_30b_a3b_pretrain_config_gb300",
77-
"qwen3_vl_30b_a3b_pretrain_config_h100",
78-
"qwen3_vl_235b_a22b_pretrain_config_b200",
79-
"qwen3_vl_235b_a22b_pretrain_config_gb200",
80-
"qwen3_vl_235b_a22b_pretrain_config_gb300",
81-
"qwen3_vl_235b_a22b_pretrain_config_h100",
170+
# Qwen3.5-VL
171+
"qwen35_vl_35b_a3b_pretrain_config_b200",
172+
"qwen35_vl_35b_a3b_pretrain_config_b300",
173+
"qwen35_vl_35b_a3b_pretrain_config_gb200",
174+
"qwen35_vl_35b_a3b_pretrain_config_gb300",
175+
"qwen35_vl_35b_a3b_pretrain_config_h100",
176+
"qwen35_vl_122b_a10b_pretrain_config_b200",
177+
"qwen35_vl_122b_a10b_pretrain_config_b300",
178+
"qwen35_vl_122b_a10b_pretrain_config_gb200",
179+
"qwen35_vl_122b_a10b_pretrain_config_gb300",
180+
"qwen35_vl_122b_a10b_pretrain_config_h100",
181+
"qwen35_vl_397b_a17b_pretrain_config_b200",
182+
"qwen35_vl_397b_a17b_pretrain_config_b300",
183+
"qwen35_vl_397b_a17b_pretrain_config_gb200",
184+
"qwen35_vl_397b_a17b_pretrain_config_gb300",
185+
"qwen35_vl_397b_a17b_pretrain_config_h100",
82186
]
83187
)

0 commit comments

Comments
 (0)