adding support for None chat templates.

NicoGrande · NicoGrande · commit 27784e78d6b9 · 2026-04-21T22:39:01.000Z
diff --git a/src/maxtext/trainers/post_train/rl/train_rl.py b/src/maxtext/trainers/post_train/rl/train_rl.py
@@ -114,6 +114,10 @@ def get_dataset(
     )
 
   template_config = load_data_template_from_file(tmvp_config.chat_template_path)
+  if template_config is None:
+    raise ValueError(
+        f"Chat template is required for processing dataset but failed to load from {tmvp_config.chat_template_path}"
+    )
 
   loaded_dataset = (
       grain.MapDataset.source(data)
@@ -231,6 +235,10 @@ def prepare_openinstructmath2_dataset(
     split_name = trainer_config.train_split if trainer_config.train_split != "train" else "train_1M"
     splits = prepare_openinstructmath2_dataset(split=split_name)
     template_config = load_data_template_from_file(trainer_config.chat_template_path)
+    if template_config is None:
+      raise ValueError(
+          f"Chat template is required for processing dataset but failed to load from {trainer_config.chat_template_path}"
+      )
 
     train_dataset = (
         grain.MapDataset.source(splits["train"])
@@ -401,7 +409,6 @@ def create_rl_components(
           rollout_vllm_model_version=trainer_config.tokenizer_path,
           rollout_vllm_hbm_utilization=trainer_config.hbm_utilization_vllm,
           rollout_vllm_tpu_backend_type="jax",
-          rollout_vllm_swap_space_size_gb=trainer_config.swap_space_vllm_gb,
           rollout_vllm_hf_config_path=trainer_config.vllm_hf_config_path,
           rollout_vllm_additional_config=rollout_additional_config,
           rollout_vllm_init_with_random_weights=True,
@@ -495,6 +502,10 @@ def _reward_fn(**kwargs):
     )
     # Instantiate the custom MaxText chat parser
     template_config = load_data_template_from_file(trainer_config.chat_template_path)
+    if template_config is None:
+      raise ValueError(
+          f"Chat template is required for AgenticGRPOLearner but failed to load from {trainer_config.chat_template_path}"
+      )
     chat_parser = utils_rl.MaxTextChatParser(
         model_tokenizer=model_tokenizer, template_config=template_config, tmvp_config=trainer_config
     )
diff --git a/src/maxtext/trainers/post_train/rl/utils_rl.py b/src/maxtext/trainers/post_train/rl/utils_rl.py
@@ -526,8 +526,11 @@ def make_optimizer(learning_rate):
   return optax.inject_hyperparams(make_optimizer)(learning_rate=schedule)
 
 
-def format_maxtext_messages(messages: list[dict[str, str]], template_config: dict, tmvp_config) -> list[dict[str, str]]:
+def format_maxtext_messages(messages: list[str], template_config: dict, tmvp_config) -> list[dict[str, str]]:
   """Helper to inject MaxText's system prompt into the input user messages."""
+  if template_config is None:
+    raise ValueError("template_config cannot be None for format_maxtext_messages.")
+
   formatted_messages = []
   for msg in messages:
     formatted_content = template_config["TEMPLATE"].format(
diff --git a/tests/post_training/unit/rl_utils_test.py b/tests/post_training/unit/rl_utils_test.py
@@ -370,5 +370,37 @@ def test_returns_optimizer_with_clipping(self):
     self.assertIn("learning_rate", state.hyperparams)
 
 
+class TestFormatMaxTextMessages(unittest.TestCase):
+  """Tests for utils_rl.format_maxtext_messages."""
+
+  def setUp(self):
+    self.config = _make_config()
+    self.template_config = {
+        "SYSTEM_PROMPT": "Reason between {reasoning_start_token} and {reasoning_end_token}. "
+        + "Solution between {solution_start_token} and {solution_end_token}.",
+        "TEMPLATE": "system: {system_prompt}\nquestion: {question}",
+    }
+
+  @pytest.mark.cpu_only
+  def test_format_with_template(self):
+    """Test formatting when a template is provided."""
+    messages = ["What is 2+2?"]
+    formatted = utils_rl.format_maxtext_messages(messages, self.template_config, self.config)
+    self.assertEqual(len(formatted), 1)
+    self.assertEqual(formatted[0]["role"], "user")
+    expected_content = (
+        "system: Reason between <reasoning> and </reasoning>. "
+        "Solution between <answer> and </answer>.\n"
+        "question: What is 2+2?"
+    )
+    self.assertEqual(formatted[0]["content"], expected_content)
+
+  @pytest.mark.cpu_only
+  def test_format_without_template(self):
+    """Test formatting when template_config is None (the fix)."""
+    messages = ["What is 2+2?"]
+    self.assertRaises(ValueError, lambda: utils_rl.format_maxtext_messages(messages, None, self.config))
+
+
 if __name__ == "__main__":
   unittest.main()