We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 814d267 commit eb57a00Copy full SHA for eb57a00
1 file changed
lightllm/server/httpserver/manager.py
@@ -704,7 +704,7 @@ async def _wait_to_token_package(
704
prompt_cache_len = metadata.pop("prompt_cache_len", 0)
705
cpu_prompt_cache_len = metadata.pop("cpu_prompt_cache_len", 0)
706
disk_prompt_cache_len = metadata.pop("disk_prompt_cache_len", 0)
707
- metadata["prompt_cache_len"] = prompt_cache_len
+ metadata["prompt_cache_len"] = prompt_cache_len + cpu_prompt_cache_len + disk_prompt_cache_len
708
sub_req_id_to_mtp_accepted_token_num[sub_req_id] = metadata.get("mtp_accepted_token_num", 0)
709
710
if is_first_token:
0 commit comments