Skip to content

Commit b275e52

Browse files
committed
up
1 parent d208a47 commit b275e52

2 files changed

Lines changed: 6 additions & 6 deletions

File tree

fastdeploy/cache_manager/prefix_cache_manager.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -881,7 +881,7 @@ def request_match_blocks(self, task: Request, block_size, *args):
881881
read_storage_task = ReadStorageTask(
882882
task_id=req_id,
883883
keys=no_match_block_keys,
884-
token_ids=input_token_ids,
884+
token_ids=input_token_ids if self.kvcache_storage_backend == "attention_store" else None,
885885
gpu_block_ids=gpu_recv_storage_block_ids,
886886
start_read_block_idx=match_token_num // block_size,
887887
)
@@ -1162,7 +1162,7 @@ def write_cache_to_storage(self, request: Request):
11621162
write_storage_task = WriteStorageTask(
11631163
task_id=req_id,
11641164
keys=keys,
1165-
token_ids=input_token_ids,
1165+
token_ids=input_token_ids if self.kvcache_storage_backend == "attention_store" else None,
11661166
gpu_block_ids=gpu_block_ids,
11671167
)
11681168
logger.debug(f"issue write storage task: {write_storage_task}")
@@ -1241,7 +1241,7 @@ def write_cache_to_storage_decode(self, request: Request):
12411241
write_storage_task = WriteStorageTask(
12421242
task_id=req_id,
12431243
keys=keys,
1244-
token_ids=input_token_ids,
1244+
token_ids=input_token_ids if self.kvcache_storage_backend == "attention_store" else None,
12451245
gpu_block_ids=gpu_block_ids,
12461246
)
12471247

@@ -2171,7 +2171,7 @@ def recv_data_transfer_result(self):
21712171
event_type = data[0]
21722172

21732173
if event_type.value == CacheStatus.STORAGE2GPU.value:
2174-
logger.info(f"recv_data_transfer_result: {data}")
2174+
logger.debug(f"recv_data_transfer_result: {data}")
21752175
task_id, hash_keys, block_ids = data[1:]
21762176
if task_id not in self.storage_prefetch_block_ids:
21772177
self.storage_prefetch_block_ids[task_id] = []
@@ -2182,7 +2182,7 @@ def recv_data_transfer_result(self):
21822182
if task_id in self.task_prefetch_event:
21832183
self.task_prefetch_event[task_id].set()
21842184
elif event_type.value == CacheStatus.GPU2STORAGE.value:
2185-
logger.info(f"recv_data_transfer_result: {data}")
2185+
logger.debug(f"recv_data_transfer_result: {data}")
21862186
task_id, hash_keys, block_ids = data[1:]
21872187
if task_id in self.task_write_back_event:
21882188
self.task_write_back_event[task_id].set()

fastdeploy/envs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def _validate_split_kv_size(value: int) -> int:
254254
# When v1 is enabled, the legacy /clear_load_weight and /update_model_weight
255255
# will adopt this new communication pattern.
256256
"FD_ENABLE_V1_UPDATE_WEIGHTS": lambda: bool(int(os.getenv("FD_ENABLE_V1_UPDATE_WEIGHTS", "0"))),
257-
# Whether to save the cache of output token for preemted request to radix tree or storage.
257+
# Whether to save the cache of output token for preempted request to storage.
258258
"FD_SAVE_OUTPUT_CACHE_FOR_PREEMPTED_REQUEST": lambda: bool(
259259
int(os.getenv("FD_SAVE_OUTPUT_CACHE_FOR_PREEMPTED_REQUEST", "1"))
260260
),

0 commit comments

Comments
 (0)