@@ -531,7 +531,8 @@ def match_prefix(
531531 # Step 2: Match Storage (if enabled and not skipped)
532532 if not skip_storage and self ._storage_scheduler and remaining_hashes :
533533 storage_matches = self ._match_storage (remaining_hashes )
534- result .storage_nodes = self .prepare_prefetch_metadata (storage_matches )
534+ start_node = matched_nodes [- 1 ] if matched_nodes else None
535+ result .storage_nodes = self .prepare_prefetch_metadata (storage_matches , start_node = start_node )
535536
536537 # Step 3: Increment ref count for matched blocks(only scheduling phase)
537538 if skip_storage :
@@ -562,11 +563,13 @@ def _match_storage(self, hash_values: List[str]) -> List[str]:
562563 consecutive prefix of hashes that are all present (prefix semantics
563564 are required because a cache miss in the middle breaks prefetch continuity).
564565
565- Uses rank=0 key as a probe: if rank 0 has the block, all ranks
566- are assumed to have it (all ranks write storage synchronously).
566+ Probes both rank=0 "key" and "value" kinds: a block is considered present
567+ only when both exist. This avoids false positives from partial writes where
568+ only one kind was stored, and prevents LRU asymmetry (probing only "key"
569+ would keep it hot while "value" gets evicted by Mooncake).
567570
568571 Storage key format (see cache_utils.storage_key_for_block):
569- "{hash_value}_0_key"
572+ "{hash_value}_0_key" / "{hash_value}_0_value"
570573
571574 Args:
572575 hash_values: List of block hash values to check, in prefix order.
@@ -584,21 +587,27 @@ def _match_storage(self, hash_values: List[str]) -> List[str]:
584587 logger .warning ("_match_storage: storage scheduler disconnected, skipping storage match" )
585588 return []
586589
587- # Build probe keys using rank=0 (same format as storage_key_for_block)
588- probe_keys = [storage_key_for_block (h , 0 , "key" ) for h in hash_values ]
590+ # Probe both key and value kinds for rank=0.
591+ # Interleaved: [h0_key, h0_value, h1_key, h1_value, ...]
592+ probe_keys = []
593+ for h in hash_values :
594+ probe_keys .append (storage_key_for_block (h , 0 , "key" ))
595+ probe_keys .append (storage_key_for_block (h , 0 , "value" ))
589596
590- # batch_exists returns a bool list aligned with probe_keys
591597 exist_flags = self ._storage_scheduler .batch_exists (probe_keys )
592598
593- # Return only the leading consecutive hit run
599+ # A block is present only when both key and value exist.
594600 matched = []
595- for h , exists in zip (hash_values , exist_flags ):
596- if not exists :
601+ for i , h in enumerate (hash_values ):
602+ key_ok = exist_flags [i * 2 ]
603+ val_ok = exist_flags [i * 2 + 1 ]
604+ if not (key_ok and val_ok ):
597605 break
598606 matched .append (h )
599607
600608 logger .debug (
601- f"[CacheManager] _match_storage: probing { len (probe_keys )} keys, matched hashes: { len (matched )} "
609+ f"[CacheManager] _match_storage: probing { len (hash_values )} blocks "
610+ f"({ len (probe_keys )} keys), matched={ len (matched )} "
602611 )
603612 return matched
604613 except Exception :
@@ -1001,6 +1010,7 @@ def drain_pending_prefetches(self) -> List[PendingPrefetch]:
10011010 def prepare_prefetch_metadata (
10021011 self ,
10031012 storage_hashes : List [str ],
1013+ start_node : Optional ["BlockNode" ] = None ,
10041014 ) -> Optional [List ["BlockNode" ]]:
10051015 """
10061016 Prepare metadata for storage prefetch operation.
@@ -1010,6 +1020,10 @@ def prepare_prefetch_metadata(
10101020
10111021 Args:
10121022 storage_hashes: List of storage hash values to prefetch
1023+ start_node: Node to start insertion from in the radix tree.
1024+ Must be the last matched node from find_prefix so that
1025+ the new LOADING_FROM_STORAGE nodes are attached as proper
1026+ extensions of the existing prefix chain.
10131027
10141028 Returns:
10151029 List of BlockNode objects if successful, None or empty list otherwise.
@@ -1032,17 +1046,24 @@ def prepare_prefetch_metadata(
10321046
10331047 blocks = list (zip (storage_hashes , host_block_ids ))
10341048 prefetch_nodes , wasted_block_ids = self ._radix_tree .insert (
1035- blocks = blocks , cache_status = CacheStatus .LOADING_FROM_STORAGE
1049+ blocks = blocks , cache_status = CacheStatus .LOADING_FROM_STORAGE , start_node = start_node
10361050 )
10371051 # Release any blocks that were wasted due to node reuse
10381052 if wasted_block_ids :
10391053 self ._host_pool .release (wasted_block_ids )
10401054
1041- # Register nodes in prefetch_node_map for fast status update on done
1055+ # Register only truly new LOADING_FROM_STORAGE nodes.
1056+ # insert() reuses existing nodes without updating their status, so nodes
1057+ # that were already HOST/DEVICE must be excluded — they don't need a
1058+ # storage transfer and would trigger a spurious "unexpected status" warning
1059+ # in update_storage_blocks_to_host.
1060+ actual_prefetch_nodes = []
10421061 for node in prefetch_nodes :
1043- self ._prefetch_node_map [node .block_id ] = node
1062+ if node .cache_status == CacheStatus .LOADING_FROM_STORAGE :
1063+ self ._prefetch_node_map [node .block_id ] = node
1064+ actual_prefetch_nodes .append (node )
10441065
1045- return prefetch_nodes
1066+ return actual_prefetch_nodes
10461067 except Exception as e :
10471068 logger .error (f"prepare_prefetch_metadata error: { e } , { str (traceback .format_exc ())} " )
10481069 return []
0 commit comments