alibaba
diff --git a/‎docs/source/models/hstu_match.md‎
Lines changed: 12 additions & 0 deletions b/‎docs/source/models/hstu_match.md‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎scripts/ci/ci_data.sh‎
Lines changed: 2 additions & 2 deletions b/‎scripts/ci/ci_data.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tzrec/models/hstu.py‎
Lines changed: 15 additions & 0 deletions b/‎tzrec/models/hstu.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎tzrec/models/hstu_test.py‎
Lines changed: 32 additions & 1 deletion b/‎tzrec/models/hstu_test.py‎
Lines changed: 32 additions & 1 deletion
diff --git a/‎tzrec/modules/gr/hstu_transducer.py‎
Lines changed: 19 additions & 0 deletions b/‎tzrec/modules/gr/hstu_transducer.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎tzrec/modules/gr/positional_encoder.py‎
Lines changed: 5 additions & 0 deletions b/‎tzrec/modules/gr/positional_encoder.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎tzrec/ops/_pytorch/pt_position.py‎
Lines changed: 14 additions & 5 deletions b/‎tzrec/ops/_pytorch/pt_position.py‎
Lines changed: 14 additions & 5 deletions
@@ -104,6 +104,12 @@ feature_configs {
         }
     }
 }
+feature_configs {
+    raw_feature {
+        feature_name: "request_time"
+        expression: "user:request_time"
+    }
+}
 model_config {
     feature_groups {
         group_name: "contextual"
@@ -140,6 +146,11 @@ model_config {
         feature_names: "uih_seq__action_timestamp"
         group_type: JAGGED_SEQUENCE
     }
+    feature_groups {
+        group_name: "query_time"
+        feature_names: "request_time"
+        group_type: DEEP
+    }
     hstu_match {
         user_tower {
             input: "uih"
@@ -221,6 +232,7 @@ model_config {
   - uih_action: 用户历史交互的行为事件序列，注: 该行为事件按位存储，如 expr, click, add, buy 三个行为，则一般 expr=0, click=1, add=2, buy=4；类型为 JAGGED_SEQUENCE，当 `uih_preprocessor.action_encoder` 配置时必填
   - uih_watchtime: 用户历史交互的行为时长序列；类型为 JAGGED_SEQUENCE，当 action encoder 需要 watchtime 时必填
   - uih_timestamp: 用户历史交互的行为时间戳序列；类型为 JAGGED_SEQUENCE，当 `positional_encoder.use_time_encoding=true` 时必填
+  - query_time: 每行一个标量的请求时间 raw 特征 (需与 uih_timestamp 同单位)；类型为 DEEP，可选。配置后时间编码以请求时间为基准 (`ts_gap = query_time - 行为时间戳`)，否则回退到最后一个 UIH 行为时间
 
   **group_name 不能变**，user_tower/item_tower 通过 group_name 索引对应的 feature_group
 
 
@@ -10,7 +10,7 @@ wget https://tzrec.oss-accelerate.aliyuncs.com/data/test/kuairand-1k-rtp-eval-c4
 wget https://tzrec.oss-accelerate.aliyuncs.com/data/test/kuairand-mot-1k-train-c4096-s100-e28061f3c88f543b9e18f40be6ddb94d.parquet -O data/test/kuairand-mot-1k-train-c4096-s100.parquet
 wget https://tzrec.oss-accelerate.aliyuncs.com/data/test/kuairand-mot-1k-eval-c4096-s100-f185f38e3b4a49cb791d2e4302087a1f.parquet -O data/test/kuairand-mot-1k-eval-c4096-s100.parquet
 # kuairand-1k-match (HSTUMatch integration test fixtures)
-wget https://tzrec.oss-accelerate.aliyuncs.com/data/test/kuairand-1k-match-train-c4096-s100-f1892eabc70ae3407afe9ff5bca8cb5f.parquet -O data/test/kuairand-1k-match-train-c4096-s100.parquet
-wget https://tzrec.oss-accelerate.aliyuncs.com/data/test/kuairand-1k-match-eval-c4096-s100-e4ca5e15d157efa723041cd05c127228.parquet -O data/test/kuairand-1k-match-eval-c4096-s100.parquet
+wget https://tzrec.oss-accelerate.aliyuncs.com/data/test/kuairand-1k-match-train-c4096-s100-aa77964ed7f50ca30645f8dd08dbf10d.parquet -O data/test/kuairand-1k-match-train-c4096-s100.parquet
+wget https://tzrec.oss-accelerate.aliyuncs.com/data/test/kuairand-1k-match-eval-c4096-s100-8678a3ac699fb08f0602f4c06cef2edf.parquet -O data/test/kuairand-1k-match-eval-c4096-s100.parquet
 wget https://tzrec.oss-accelerate.aliyuncs.com/data/test/kuairand-1k-match-item-gl-3d459148303acd9f838da108efcc40e5.txt -O data/test/kuairand-1k-match-item-gl.txt
 wget https://tzrec.oss-accelerate.aliyuncs.com/data/test/kuairand-1k-match-item-c1-8dcadabdc3e9049ed9c2250565b4b134.parquet -O data/test/kuairand-1k-match-item-c1.parquet
@@ -96,6 +96,17 @@ def __init__(
             contextual_feature_dim = contextual_dims[0]
             max_contextual_seq_len = len(contextual_dims)
 
+        # Optional `query_time` DEEP group: per-row request-time anchor for the
+        # HSTU time bias (absent -> anchor on the last UIH timestamp).
+        query_time_key = next(
+            (
+                feature_group.group_name
+                for feature_group in feature_groups
+                if feature_group.group_name == "query_time"
+            ),
+            "",
+        )
+
         self._hstu_encoder: HSTUMatchEncoder = HSTUMatchEncoder(
             uih_embedding_dim=embedding_group.group_total_dim(
                 f"{tower_config.input}.sequence"
@@ -105,6 +116,7 @@ def __init__(
             contextual_group_name=contextual_group_name,
             scaling_seqlen=tower_config.max_seq_len,
             is_inference=False,
+            query_time_key=query_time_key,
             **config_to_kwargs(tower_config.hstu),
         )
         if self._output_dim > 0:
@@ -266,6 +278,9 @@ class HSTUMatch(MatchModel):
           UIHPreprocessor's action_encoder and the HSTU positional
           encoder's time bias. Required when `uih_preprocessor.action_encoder`
           is configured.
+        - "query_time" (optional, DEEP): a single per-row scalar request-time
+          raw feature used as the HSTU time-bias anchor; absent, the anchor
+          falls back to the last UIH timestamp.
 
     User tower returns the last-position UIH embedding per user; it is compared
     against candidate embeddings via the configured similarity at both train and
 
@@ -14,7 +14,7 @@
 import torch
 from hypothesis import Verbosity, assume, given, settings
 from hypothesis import strategies as st
-from torchrec import JaggedTensor, KeyedJaggedTensor
+from torchrec import JaggedTensor, KeyedJaggedTensor, KeyedTensor
 
 from tzrec.datasets.utils import BASE_DATA_GROUP, CAND_POS_LENGTHS, Batch
 from tzrec.features.feature import create_features
@@ -43,6 +43,10 @@ def _build_model(device: torch.device) -> HSTUMatch:
     dim / `embedding_name` so the two flattened features share one
     embedding table. `uih_seq` also carries the `historical_ts` raw
     sub-feature for the timestamp dense path.
+
+    Time encoding is on, with a scalar ``request_time`` raw feature exposed
+    through a ``query_time`` DEEP group — the per-row time-bias anchor
+    (mirrors the production config).
     """
     feature_cfgs = [
         feature_pb2.FeatureConfig(
@@ -84,6 +88,11 @@ def _build_model(device: torch.device) -> HSTUMatch:
             )
         ),
     ]
+    feature_cfgs.append(
+        feature_pb2.FeatureConfig(
+            raw_feature=feature_pb2.RawFeature(feature_name="request_time")
+        )
+    )
     features = create_features(feature_cfgs)
     feature_groups = [
         model_pb2.FeatureGroupConfig(
@@ -102,6 +111,13 @@ def _build_model(device: torch.device) -> HSTUMatch:
             group_type=model_pb2.FeatureGroupType.JAGGED_SEQUENCE,
         ),
     ]
+    feature_groups.append(
+        model_pb2.FeatureGroupConfig(
+            group_name="query_time",
+            feature_names=["request_time"],
+            group_type=model_pb2.FeatureGroupType.DEEP,
+        )
+    )
     model_config = model_pb2.ModelConfig(
         feature_groups=feature_groups,
         hstu_match=match_model_pb2.HSTUMatch(
@@ -120,6 +136,8 @@ def _build_model(device: torch.device) -> HSTUMatch:
                     attn_num_layers=2,
                     positional_encoder=module_pb2.GRPositionalEncoder(
                         num_position_buckets=512,
+                        num_time_buckets=512,
+                        use_time_encoding=True,
                     ),
                     input_preprocessor=module_pb2.GRInputPreprocessor(
                         uih_preprocessor=module_pb2.GRUIHPreprocessor(),
@@ -160,6 +178,9 @@ def _build_batch(device: torch.device) -> Batch:
     Candidates: row 0 = [pos_0]; row 1 (last) = [pos_1, simple_neg_0,
     simple_neg_1] -- the shared simple-neg pool sits in the last row's suffix.
     pos_lengths = [1, 1].
+
+    A per-row ``request_time`` dense scalar (strictly after each user's last
+    UIH event at ts 3 / 7) is included as the time-bias anchor.
     """
     sparse_feature = KeyedJaggedTensor.from_lengths_sync(
         keys=["uih_seq__video_id", "cand_seq__video_id"],
@@ -172,7 +193,14 @@ def _build_batch(device: torch.device) -> Batch:
             lengths=torch.tensor([3, 4]),
         ),
     }
+    dense_features = {
+        BASE_DATA_GROUP: KeyedTensor.from_tensor_list(
+            keys=["request_time"],
+            tensors=[torch.tensor([[100.0], [100.0]])],
+        )
+    }
     return Batch(
+        dense_features=dense_features,
         sparse_features={BASE_DATA_GROUP: sparse_feature},
         sequence_dense_features=sequence_dense_features,
         jagged_labels={
@@ -217,6 +245,9 @@ def test_hstu_match(self, graph_type, kernel, device_str) -> None:
 
         device = torch.device(device_str)
         hstu = _build_model(device=device)
+        # The query_time DEEP group is detected and threaded as the per-row
+        # time-bias anchor (request-time anchoring, not the last UIH event).
+        self.assertEqual(hstu.user_tower._hstu_encoder._query_time_key, "query_time")
         hstu.set_kernel(kernel)
         batch = _build_batch(device=device)
 
 
@@ -71,8 +71,13 @@ def __init__(
         attn_truncation_split_layer: int = 0,
         attn_truncation_tail_len: int = 0,
         name: str = "",
+        query_time_key: str = "",
     ) -> None:
         super().__init__(is_inference=is_inference)
+        # Grouped-feature key of the per-row request time used as the time-bias
+        # anchor. Empty -> anchor on the last in-sequence timestamp (canonical
+        # HSTU / DLRM-HSTU, which concatenates the candidate request time).
+        self._query_time_key: str = query_time_key
         self._input_preprocessor: InputPreprocessor = create_input_preprocessor(
             input_preprocessor,
             uih_embedding_dim=uih_embedding_dim,
@@ -129,6 +134,13 @@ def _preprocess(
                 output_num_targets,
             ) = self._input_preprocessor(grouped_features)
 
+        # Per-row request time anchor (HSTUMatch). Read from grouped_features
+        # rather than the preprocessor tuple so the shared ranking path is
+        # untouched. `[B, 1]` raw values -> the op reshapes to `[B]`.
+        query_time: Optional[torch.Tensor] = None
+        if self._query_time_key != "":
+            query_time = grouped_features[self._query_time_key]
+
         with record_function("hstu_positional_encoder"):
             if self._positional_encoder is not None:
                 output_seq_embeddings = self._positional_encoder(
@@ -138,6 +150,7 @@ def _preprocess(
                     seq_timestamps=output_seq_timestamps,
                     seq_embeddings=output_seq_embeddings,
                     num_targets=output_num_targets,
+                    query_time=query_time,
                 )
 
         output_seq_embeddings = torch.nn.functional.dropout(
@@ -468,6 +481,10 @@ class HSTUMatchEncoder(_HSTUPipelineBase):
         is_inference (bool): whether to run in inference mode.
         attn_truncation_split_layer (int): see `HSTUTransducer`.
         attn_truncation_tail_len (int): see `HSTUTransducer`.
+        query_time_key (str): grouped-feature key of the per-row request time
+            used as the time-bias anchor. Empty (default) anchors on the last
+            UIH timestamp; pass a scalar request-time group to anchor on the
+            actual request time (decoupled from UIH staleness).
     """
 
     def __init__(
@@ -487,6 +504,7 @@ def __init__(
         attn_truncation_split_layer: int = 0,
         attn_truncation_tail_len: int = 0,
         name: str = "",
+        query_time_key: str = "",
     ) -> None:
         super().__init__(
             uih_embedding_dim=uih_embedding_dim,
@@ -504,6 +522,7 @@ def __init__(
             attn_truncation_split_layer=attn_truncation_split_layer,
             attn_truncation_tail_len=attn_truncation_tail_len,
             name=name,
+            query_time_key=query_time_key,
         )
         self._output_postprocessor: OutputPostprocessor = create_output_postprocessor(
             output_postprocessor, embedding_dim=stu["embedding_dim"]
 
@@ -76,6 +76,7 @@ def forward(
         seq_timestamps: torch.Tensor,
         seq_embeddings: torch.Tensor,
         num_targets: Optional[torch.Tensor],
+        query_time: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
         """Forward the module.
 
@@ -86,6 +87,9 @@ def forward(
             seq_timestamps (torch.Tensor): input sequence timestamps.
             seq_embeddings (torch.Tensor): input sequence embeddings.
             num_targets (int): number of targets.
+            query_time (torch.Tensor, optional): per-row request time used as
+                the time-bias anchor (``ts_gap = query_time - timestamp``).
+                When ``None``, the last in-sequence timestamp is used.
 
         Returns:
             torch.Tensor: output sequence embedding with position embedding.
@@ -106,6 +110,7 @@ def forward(
                 time_bucket_fn=self._time_bucket_fn,
                 time_bucket_increments=self._time_bucket_increments,
                 kernel=self.kernel(),
+                query_time=query_time,
             )
         else:
             seq_embeddings = add_positional_embeddings(
 
@@ -93,6 +93,7 @@ def pytorch_add_timestamp_positional_embeddings(
     interleave_targets: bool,
     time_bucket_fn: str,
     time_bucket_increments: float,
+    query_time: Optional[torch.Tensor] = None,
 ) -> torch.Tensor:
     max_pos_ind = pos_embeddings.size(0)
     # position encoding
@@ -115,11 +116,19 @@ def pytorch_add_timestamp_positional_embeddings(
         max_lengths=[max_seq_len],
         padding_value=0.0,
     ).squeeze(-1)
-    query_time = torch.gather(
-        timestamps,
-        dim=1,
-        index=(seq_lengths - 1).unsqueeze(1).clamp(min=0).to(torch.int64),
-    )
+    if query_time is None:
+        # No explicit anchor: use the last in-sequence timestamp. For
+        # DLRM-HSTU the candidate is concatenated last, so this is the
+        # request time; for any UIH-only sequence it is the most-recent event.
+        query_time = torch.gather(
+            timestamps,
+            dim=1,
+            index=(seq_lengths - 1).unsqueeze(1).clamp(min=0).to(torch.int64),
+        )
+    else:
+        # Explicit per-row request time (HSTUMatch two-tower: no candidate is
+        # concatenated, so the anchor cannot be derived from the sequence).
+        query_time = query_time.view(-1, 1).to(timestamps.dtype)
     ts = query_time - timestamps
     ts = ts + time_delta
     ts = ts.clamp(min=1e-6) / time_bucket_increments