We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 689b107 commit 41fc91dCopy full SHA for 41fc91d
1 file changed
nemo_rl/models/generation/vllm/vllm_backend.py
@@ -38,6 +38,14 @@
38
39
40
def fix_gpt_oss_export_transpose(key: str, weight: torch.Tensor) -> torch.Tensor:
41
+ """Apply GPT-OSS down_proj transpose fix to the weight.
42
+
43
+ This is a workaround for the issue that the down_proj layout is not the same across different frameworks.
44
+ - HF needs [in, out] layout.
45
+ - Megatron needs [in, out] layout.
46
+ - vLLM needs [out, in] layout.
47
+ See https://github.com/NVIDIA-NeMo/Megatron-Bridge/pull/3271 for more details.
48
+ """
49
if key.endswith("mlp.experts.down_proj"):
50
weight = weight.transpose(-2, -1).contiguous()
51
return weight
0 commit comments