fix(rm): raise clear error when context parallelism is used with DTensor RM training

terrykong · terrykong · commit d3187097558f · 2026-05-14T22:15:47.000-07:00
Context parallelism (context_parallel_size &gt; 1) is not supported for
reward model training on the DTensor backend because the log_sigmoid
operator lacks a DTensor sharding strategy for CP meshes. Instead of
letting users hit cryptic runtime errors, raise a clear ValueError
during setup with a link to the tracking issue.

Signed-off-by: Terry Kong &lt;terryk@nvidia.com&gt;
diff --git a/nemo_rl/algorithms/rm.py b/nemo_rl/algorithms/rm.py
@@ -116,6 +116,21 @@ def setup(
 
     # Extract individual configs for easier access
     policy_config = master_config.policy
+
+    # TODO(https://github.com/NVIDIA-NeMo/RL/issues/2482): remove once CP is supported for RM training.
+    dtensor_cfg = policy_config.get("dtensor_cfg", {})
+    if (
+        dtensor_cfg.get("enabled", False)
+        and dtensor_cfg.get("context_parallel_size", 1) > 1
+    ):
+        raise ValueError(
+            "Context parallelism (context_parallel_size > 1) is not supported for reward model "
+            "training on the DTensor backend. The log_sigmoid operator used in the RM loss does "
+            "not have a DTensor sharding strategy registered for CP meshes. "
+            "Please set policy.dtensor_cfg.context_parallel_size=1. "
+            "See https://github.com/NVIDIA-NeMo/RL/issues/2482 for tracking."
+        )
+
     data_config = master_config.data
     rm_config = master_config.rm
     logger_config = master_config.logger
diff --git a/tests/unit/algorithms/test_rm.py b/tests/unit/algorithms/test_rm.py
@@ -19,7 +19,7 @@
 from torchdata.stateful_dataloader import StatefulDataLoader
 
 from nemo_rl.algorithms.loss import PreferenceLossFn
-from nemo_rl.algorithms.rm import MasterConfig, _default_rm_save_state, rm_train
+from nemo_rl.algorithms.rm import MasterConfig, _default_rm_save_state, rm_train, setup
 
 
 @pytest.fixture
@@ -125,6 +125,69 @@ def val_iter(self):
     }
 
 
+def test_context_parallel_rejected_for_dtensor_rm():
+    """Test that context_parallel_size > 1 raises ValueError for DTensor RM training.
+
+    TODO(https://github.com/NVIDIA-NeMo/RL/issues/2482): remove when CP is supported for RM.
+    """
+    config = MasterConfig.model_construct(
+        **{
+            "policy": {
+                "dtensor_cfg": {
+                    "enabled": True,
+                    "context_parallel_size": 2,
+                    "tensor_parallel_size": 1,
+                    "sequence_parallel": False,
+                    "activation_checkpointing": False,
+                    "cpu_offload": False,
+                },
+            },
+            "rm": {"seed": 42},
+            "data": {},
+            "logger": {},
+            "cluster": {},
+            "checkpointing": {},
+        }
+    )
+    with pytest.raises(
+        ValueError,
+        match="Context parallelism.*is not supported for reward model training",
+    ):
+        setup(config, MagicMock(), MagicMock(), {})
+
+
+def test_context_parallel_allowed_when_one():
+    """Test that context_parallel_size=1 does not raise for DTensor RM training.
+
+    We verify the CP check passes by confirming the error comes from a later
+    setup stage, not from our validation.
+
+    TODO(https://github.com/NVIDIA-NeMo/RL/issues/2482): remove when CP is supported for RM.
+    """
+    config = MasterConfig.model_construct(
+        **{
+            "policy": {
+                "dtensor_cfg": {
+                    "enabled": True,
+                    "context_parallel_size": 1,
+                    "tensor_parallel_size": 1,
+                    "sequence_parallel": False,
+                    "activation_checkpointing": False,
+                    "cpu_offload": False,
+                },
+            },
+            "rm": {"seed": 42},
+            "data": {},
+            "logger": {},
+            "cluster": {},
+            "checkpointing": {},
+        }
+    )
+    with pytest.raises(Exception) as excinfo:
+        setup(config, MagicMock(), MagicMock(), {})
+    assert "Context parallelism" not in str(excinfo.value)
+
+
 def test_exit_on_max_steps(mock_components):
     """Test that training loop exits when max_num_steps is reached"""
     # Set max steps to 12, which is less than len(train_dataloader) * max_num_epochs