alibaba · tiankongdeguiji · Jun 8, 2026 · Jun 6, 2026 · Jun 6, 2026
diff --git a/tzrec/models/dlrm_hstu.py b/tzrec/models/dlrm_hstu.py
@@ -201,6 +201,10 @@ def predict(self, batch: Batch) -> Dict[str, torch.Tensor]:
         with record_function("## preprocess ##"):
             grouped_features = self.build_input(batch)
 
+        # Capture num_targets before the descending-timestamp flip below, so the
+        # output split key stays in the original (un-flipped) request order.
+        num_targets = grouped_features["candidate.sequence_length"]
+
         if not self._model_config.sequence_timestamp_is_ascending:
             # if timestamp of sequence is descending,
             # we should reverse all features
@@ -235,9 +239,7 @@ def predict(self, batch: Batch) -> Dict[str, torch.Tensor]:
                         suffix=f"_{task_name}",
                     )
                 )
-        predictions[TARGET_REPEAT_INTERLEAVE_KEY] = grouped_features[
-            "candidate.sequence_length"
-        ]
+        predictions[TARGET_REPEAT_INTERLEAVE_KEY] = num_targets
 
         return predictions
 

diff --git a/tzrec/models/dlrm_hstu_test.py b/tzrec/models/dlrm_hstu_test.py
@@ -24,6 +24,7 @@
 from tzrec.features.feature import create_features
 from tzrec.models.dlrm_hstu import DlrmHSTU
 from tzrec.models.model import TrainWrapper
+from tzrec.models.rank_model import TARGET_REPEAT_INTERLEAVE_KEY
 from tzrec.ops import Kernel
 from tzrec.protos import (
     feature_pb2,
@@ -445,6 +446,34 @@ def test_dlrm_hstu(
         self.assertEqual(predictions["logits_is_comment"].size(), (6,))
         self.assertEqual(predictions["probs_is_comment"].size(), (6,))
 
+    @unittest.skipIf(*gpu_unavailable)
+    def test_dlrm_hstu_predict_num_targets_order(self) -> None:
+        """num_targets split key must stay in input order.
+
+        ``_write_predictions`` regroups predictions per request via
+        ``cumsum(predictions[TARGET_REPEAT_INTERLEAVE_KEY])``. With
+        ``sequence_timestamp_is_ascending=False``, ``predict()`` flips features
+        (reversing request order) and flips predictions back, so the key must
+        also be un-flipped; reading it from the still-flipped
+        ``candidate.sequence_length`` returns [4, 2] for the [2, 4] test batch,
+        misassigning whole-request blocks. ``size()`` (== 6) can't catch it.
+        """
+        device = torch.device("cuda")
+        # candidate (cand_seq) counts in _build_batch, in input order.
+        expected_num_targets = [2, 4]
+        for ascending in (True, False):
+            model = _build_model(
+                device=device, sequence_timestamp_is_ascending=ascending
+            )
+            batch = _build_batch(device=device)
+            with torch.no_grad():
+                predictions = model.predict(batch)
+            self.assertEqual(
+                predictions[TARGET_REPEAT_INTERLEAVE_KEY].cpu().tolist(),
+                expected_num_targets,
+                msg=f"num_targets order wrong for ascending={ascending}",
+            )
+
     @unittest.skipIf(*gpu_unavailable)
     def test_dlrm_hstu_task_weight(self) -> None:
         device = torch.device("cuda")

diff --git a/tzrec/version.py b/tzrec/version.py
@@ -9,4 +9,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.2.16"
+__version__ = "1.2.17"