enhance error logging during tracker.tokenize() for better debugging

binary-husky · binary-husky · commit 4cb513bc3a54 · 2026-02-06T13:18:16.000+08:00
diff --git a/ajet/context_tracker/basic_tracker.py b/ajet/context_tracker/basic_tracker.py
@@ -262,30 +262,28 @@ def tokenize_steps(
 
         # check reward structure
         self.reward_structure: Reward  # type: ignore
-        assert (
-            self.reward_structure.step_reward_arr is not None
-        ), "must call `process_reward` before tokenize_steps"
-        assert len(self.reward_structure.step_reward_arr) == total_steps
+        assert self.reward_structure.step_reward_arr is not None, "must call `process_reward` before tokenize_steps"
+        assert len(self.reward_structure.step_reward_arr) == total_steps, f"reward step count {len(self.reward_structure.step_reward_arr)} != total_steps {total_steps}"
 
         # mapping
         input_ids = []
         input_logprobs = []
         attention_mask = []
         loss_mask = []
-        split_prompt_reponse_index = -1
+        split_prompt_response_index = -1
         split_point_message_left_index = -1
         input_ids_len = []
 
         # cat all messages
         for i, ext_msg in enumerate(ext_steps):
             # find split index, this have to be done before input_ids += ext_msg.token_arr
-            if (split_prompt_reponse_index == -1) and (ext_msg.need_training):
-                split_prompt_reponse_index = len(input_ids)
+            if (split_prompt_response_index == -1) and (ext_msg.need_training):
+                split_prompt_response_index = len(input_ids)
                 split_point_message_left_index = i - 1
                 assert (
                     split_point_message_left_index >= 0
                 ), "There should be at least one message before the first training message"
-                assert split_prompt_reponse_index == input_ids_len[split_point_message_left_index]
+                assert split_prompt_response_index == input_ids_len[split_point_message_left_index]
                 assert (
                     ext_msg.author == "llm"
                 ), "The first message after initialization should be from LLM, not from env or user"
@@ -304,37 +302,37 @@ def tokenize_steps(
         # move the split index forward
         MAX_FORWARD_STEPS = 100
         for i in range(MAX_FORWARD_STEPS):
-            if loss_mask[split_prompt_reponse_index] == 0:
-                split_prompt_reponse_index += 1
+            if loss_mask[split_prompt_response_index] == 0:
+                split_prompt_response_index += 1
             else:
                 break
 
         # no matter what, the split index should not exceed max prompt length
         # make sure that the prompt length does not exceed `config.ajet.data.max_prompt_length`
-        if split_prompt_reponse_index > self.config.ajet.data.max_prompt_length:
-            split_prompt_reponse_index = self.config.ajet.data.max_prompt_length
+        if split_prompt_response_index > self.config.ajet.data.max_prompt_length:
+            split_prompt_response_index = self.config.ajet.data.max_prompt_length
 
         # check
         assert len(ext_steps) == len(
             input_ids_len
         ), "length of ext_steps and input_ids_len should be equal"
         assert (
-            split_prompt_reponse_index != -1
-        ), "split_prompt_reponse_index should not be -1, at least one message should be in the context"
+            split_prompt_response_index != -1
+        ), "split_prompt_response_index should not be -1, at least one message should be in the context"
         position_ids = compute_position_id_with_mask(torch.tensor(attention_mask)).tolist()
 
         # sperate prompt and response
-        prompt_ids = input_ids[:split_prompt_reponse_index]
-        prompt_attention_mask = attention_mask[:split_prompt_reponse_index]
-        prompt_position_ids = position_ids[:split_prompt_reponse_index]
-        prompt_loss_mask = loss_mask[:split_prompt_reponse_index]
-        prompt_logprobs = input_logprobs[:split_prompt_reponse_index]
-
-        response_ids = input_ids[split_prompt_reponse_index:]
-        response_attention_mask = attention_mask[split_prompt_reponse_index:]
-        response_position_ids = position_ids[split_prompt_reponse_index:]
-        response_loss_mask = loss_mask[split_prompt_reponse_index:]
-        response_logprobs = input_logprobs[split_prompt_reponse_index:]
+        prompt_ids = input_ids[:split_prompt_response_index]
+        prompt_attention_mask = attention_mask[:split_prompt_response_index]
+        prompt_position_ids = position_ids[:split_prompt_response_index]
+        prompt_loss_mask = loss_mask[:split_prompt_response_index]
+        prompt_logprobs = input_logprobs[:split_prompt_response_index]
+
+        response_ids = input_ids[split_prompt_response_index:]
+        response_attention_mask = attention_mask[split_prompt_response_index:]
+        response_position_ids = position_ids[split_prompt_response_index:]
+        response_loss_mask = loss_mask[split_prompt_response_index:]
+        response_logprobs = input_logprobs[split_prompt_response_index:]
 
         tracker_tokenized = {}
         tracker_tokenized["input_ids"] = input_ids
diff --git a/ajet/task_rollout/native_parallel_worker.py b/ajet/task_rollout/native_parallel_worker.py
@@ -606,6 +606,7 @@ def trajectories_to_samples(self, tracker_array: List[BaseContextTracker]) -> Li
             except Exception as e:
                 raise e
             finally:
+                logger.bind(exception=True).exception("Error during tracker.tokenize()")  # for debugging
                 tracker.generate_log(global_step=self.current_global_steps)
                 if os.environ.get("BEST_LOGGER_PATH", None) and os.environ.get(
                     "AJET_DEBUG", None