From d3187097558f536f02cf5d80a6cd6c698236005a Mon Sep 17 00:00:00 2001
From: Terry Kong <terryk@nvidia.com>
Date: Wed, 13 May 2026 00:23:15 -0700
Subject: [PATCH 1/3] fix(rm): raise clear error when context parallelism is
 used with DTensor RM training

Context parallelism (context_parallel_size > 1) is not supported for
reward model training on the DTensor backend because the log_sigmoid
operator lacks a DTensor sharding strategy for CP meshes. Instead of
letting users hit cryptic runtime errors, raise a clear ValueError
during setup with a link to the tracking issue.

Signed-off-by: Terry Kong <terryk@nvidia.com>
---
 nemo_rl/algorithms/rm.py         | 15 ++++++++
 tests/unit/algorithms/test_rm.py | 65 +++++++++++++++++++++++++++++++-
 2 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/nemo_rl/algorithms/rm.py b/nemo_rl/algorithms/rm.py
index 80cfc51bc9..e11bc813ab 100644
--- a/nemo_rl/algorithms/rm.py
+++ b/nemo_rl/algorithms/rm.py
@@ -116,6 +116,21 @@ def setup(
 
     # Extract individual configs for easier access
     policy_config = master_config.policy
+
+    # TODO(https://github.com/NVIDIA-NeMo/RL/issues/2482): remove once CP is supported for RM training.
+    dtensor_cfg = policy_config.get("dtensor_cfg", {})
+    if (
+        dtensor_cfg.get("enabled", False)
+        and dtensor_cfg.get("context_parallel_size", 1) > 1
+    ):
+        raise ValueError(
+            "Context parallelism (context_parallel_size > 1) is not supported for reward model "
+            "training on the DTensor backend. The log_sigmoid operator used in the RM loss does "
+            "not have a DTensor sharding strategy registered for CP meshes. "
+            "Please set policy.dtensor_cfg.context_parallel_size=1. "
+            "See https://github.com/NVIDIA-NeMo/RL/issues/2482 for tracking."
+        )
+
     data_config = master_config.data
     rm_config = master_config.rm
     logger_config = master_config.logger
diff --git a/tests/unit/algorithms/test_rm.py b/tests/unit/algorithms/test_rm.py
index d00b31a1aa..dc922cc659 100644
--- a/tests/unit/algorithms/test_rm.py
+++ b/tests/unit/algorithms/test_rm.py
@@ -19,7 +19,7 @@
 from torchdata.stateful_dataloader import StatefulDataLoader
 
 from nemo_rl.algorithms.loss import PreferenceLossFn
-from nemo_rl.algorithms.rm import MasterConfig, _default_rm_save_state, rm_train
+from nemo_rl.algorithms.rm import MasterConfig, _default_rm_save_state, rm_train, setup
 
 
 @pytest.fixture
@@ -125,6 +125,69 @@ def val_iter(self):
     }
 
 
+def test_context_parallel_rejected_for_dtensor_rm():
+    """Test that context_parallel_size > 1 raises ValueError for DTensor RM training.
+
+    TODO(https://github.com/NVIDIA-NeMo/RL/issues/2482): remove when CP is supported for RM.
+    """
+    config = MasterConfig.model_construct(
+        **{
+            "policy": {
+                "dtensor_cfg": {
+                    "enabled": True,
+                    "context_parallel_size": 2,
+                    "tensor_parallel_size": 1,
+                    "sequence_parallel": False,
+                    "activation_checkpointing": False,
+                    "cpu_offload": False,
+                },
+            },
+            "rm": {"seed": 42},
+            "data": {},
+            "logger": {},
+            "cluster": {},
+            "checkpointing": {},
+        }
+    )
+    with pytest.raises(
+        ValueError,
+        match="Context parallelism.*is not supported for reward model training",
+    ):
+        setup(config, MagicMock(), MagicMock(), {})
+
+
+def test_context_parallel_allowed_when_one():
+    """Test that context_parallel_size=1 does not raise for DTensor RM training.
+
+    We verify the CP check passes by confirming the error comes from a later
+    setup stage, not from our validation.
+
+    TODO(https://github.com/NVIDIA-NeMo/RL/issues/2482): remove when CP is supported for RM.
+    """
+    config = MasterConfig.model_construct(
+        **{
+            "policy": {
+                "dtensor_cfg": {
+                    "enabled": True,
+                    "context_parallel_size": 1,
+                    "tensor_parallel_size": 1,
+                    "sequence_parallel": False,
+                    "activation_checkpointing": False,
+                    "cpu_offload": False,
+                },
+            },
+            "rm": {"seed": 42},
+            "data": {},
+            "logger": {},
+            "cluster": {},
+            "checkpointing": {},
+        }
+    )
+    with pytest.raises(Exception) as excinfo:
+        setup(config, MagicMock(), MagicMock(), {})
+    assert "Context parallelism" not in str(excinfo.value)
+
+
 def test_exit_on_max_steps(mock_components):
     """Test that training loop exits when max_num_steps is reached"""
     # Set max steps to 12, which is less than len(train_dataloader) * max_num_epochs

From 9c93d8d507a4ea46cd747d8a8ad4571a524bf236 Mon Sep 17 00:00:00 2001
From: Terry Kong <terryk@nvidia.com>
Date: Thu, 14 May 2026 23:23:36 -0700
Subject: [PATCH 2/3] ci: ignore NeMo Gym docs in sphinx linkcheck

The NeMo Gym docs URL returns 404, causing sphinx-build CI to fail.
Add the URL pattern to linkcheck_ignore since the external docs site
is not under our control.

Signed-off-by: Terry Kong <terryk@nvidia.com>
---
 docs/conf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/conf.py b/docs/conf.py
index 99a3d3f5ae..2995d7d26e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -117,6 +117,7 @@
 linkcheck_ignore = [
     ".*github\\.com.*",
     ".*githubusercontent\\.com.*",
+    "https://docs\\.nvidia\\.com/nemo/gym/.*",
 ]
 
 # PyTorch docs anchor IDs change between stable versions; verify the page

From 4c24d9eaab1cf4df86295ae4bbf05c5aab9cf2e0 Mon Sep 17 00:00:00 2001
From: Terry Kong <terryk@nvidia.com>
Date: Fri, 15 May 2026 00:26:17 -0700
Subject: [PATCH 3/3] fix(docs): pin gym doc links to v0.2.1 instead of
 blanket-ignoring

Replace the blanket linkcheck_ignore for all NeMo Gym docs with
pinned v0.2.1 URLs so linkcheck still validates them.

Signed-off-by: Terry Kong <terryk@nvidia.com>
---
 docs/conf.py                             | 1 -
 docs/design-docs/nemo-gym-integration.md | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 2995d7d26e..99a3d3f5ae 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -117,7 +117,6 @@
 linkcheck_ignore = [
     ".*github\\.com.*",
     ".*githubusercontent\\.com.*",
-    "https://docs\\.nvidia\\.com/nemo/gym/.*",
 ]
 
 # PyTorch docs anchor IDs change between stable versions; verify the page
diff --git a/docs/design-docs/nemo-gym-integration.md b/docs/design-docs/nemo-gym-integration.md
index 33e324547b..c83ae276d3 100644
--- a/docs/design-docs/nemo-gym-integration.md
+++ b/docs/design-docs/nemo-gym-integration.md
@@ -1,6 +1,6 @@
 # NeMo Gym Integration
 
-This document describes how NeMo RL integrates with [NeMo Gym](https://docs.nvidia.com/nemo/gym/latest/index.html) for multi-step and multi-turn reinforcement learning training.
+This document describes how NeMo RL integrates with [NeMo Gym](https://docs.nvidia.com/nemo/gym/v0.2.1/index.html) for multi-step and multi-turn reinforcement learning training.
 
 ## Overview
 
@@ -181,7 +181,7 @@ sequenceDiagram
     GRPO->>Policy: Compute loss and train
 ```
 
-> **NeMo Gym server types** (see [Core Components](https://docs.nvidia.com/nemo/gym/latest/about/concepts/core-components.html)):
+> **NeMo Gym server types** (see [Core Components](https://docs.nvidia.com/nemo/gym/v0.2.1/about/concepts/core-components/)):
 > - **Agent Server**: Orchestrates the rollout loop
 > - **Model Server**: HTTP proxy to vLLM; translates Responses API ↔ Chat Completions
 > - **Resource Server**: Provides tools and rewards
@@ -254,4 +254,4 @@ Token IDs are extracted at the NeMo RL vLLM layer via the `/tokenize` endpoint.
 - Tokenization matches the exact model and tokenizer used for generation
 - No re-tokenization drift between generation and training
 
-For details on on-policy token ID handling, see {doc}`../guides/environments` and the [NeMo Gym on-policy corrections documentation](https://docs.nvidia.com/nemo/gym/latest/contribute/rl-framework-integration/openai-compatible-http-server-on-policy-correction.html).
+For details on on-policy token ID handling, see {doc}`../guides/environments` and the [NeMo Gym on-policy corrections documentation](https://docs.nvidia.com/nemo/gym/v0.2.1/contribute/rl-framework-integration/openai-compatible-http-server-on-policy-correction.html).