Skip to content

Commit d930bc1

Browse files
seonjinnterrykong
andauthored
Update nemo_rl/models/policy/__init__.py
Co-authored-by: Terry Kong <terryk@nvidia.com> Signed-off-by: Seonjin <sna@nvidia.com>
1 parent feee53e commit d930bc1

1 file changed

Lines changed: 6 additions & 3 deletions

File tree

nemo_rl/models/policy/__init__.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,12 +237,15 @@ class MegatronConfig(TypedDict):
237237
# Can be used only with 'alltoall' token dispatcher
238238
moe_shared_expert_overlap: bool
239239
# Offload specific module activations to CPU to reduce peak GPU memory.
240-
# Works with MoE models (offloads MoE expert activations). Different from
240+
# Works with both dense and MoE models. Different from
241241
# optimizer_cpu_offload which offloads optimizer states.
242+
# Requires transformer_engine implementation.
242243
fine_grained_activation_offloading: NotRequired[bool]
243244
# Modules to offload when fine_grained_activation_offloading is True.
244-
# Defaults to ["moe_act"] if not specified. Valid values include:
245-
# "moe_act", "core_attn", "qkv_linear", "mlp_norm", "attn_norm".
245+
# Required (no default). Valid values:
246+
# "attn_norm", "qkv_linear", "core_attn", "attn_proj", "mlp_norm",
247+
# "expert_fc1", "moe_act". Note: "attn_proj" requires "core_attn".
248+
# See: https://github.com/NVIDIA/Megatron-LM/blob/d30c3ae5469fe3f6a64d4fd2e63b6e7f7844ea81/megatron/core/transformer/transformer_config.py#L1440-L1448
246249
offload_modules: NotRequired[list[str]]
247250
peft: NotRequired[MegatronPeftConfig | MegatronPeftConfigDisabled]
248251
optimizer: MegatronOptimizerConfig

0 commit comments

Comments
 (0)