Skip to content

Commit d731723

Browse files
update test to not reference metrics by index and count assertions
Signed-off-by: BenjaminBraunDev <benjaminbraun@google.com>
1 parent 8a268e3 commit d731723

1 file changed

Lines changed: 22 additions & 8 deletions

File tree

tests/unittest/llmapi/test_llm_pytorch.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1739,18 +1739,22 @@ def test_llm_context_only_timed_out(transceiver_runtime):
17391739
disaggregated_params=disaggregated_params):
17401740
print(output)
17411741

1742+
# Wait until the context-only request has allocated KV cache blocks
17421743
max_retries = 10
1744+
all_results = []
17431745
for _ in range(max_retries):
17441746
results = llm.get_stats(2)
1745-
if len(results) == 1:
1747+
all_results.extend(results)
1748+
if all_results and all_results[-1]["kvCacheStats"]["usedNumBlocks"] > 0:
17461749
break
17471750
time.sleep(1)
17481751
else:
17491752
pytest.fail(
1750-
f"Failed to get stats with len==1 after {max_retries} retries")
1753+
f"Context-only KV cache blocks not allocated after {max_retries} retries"
1754+
)
1755+
results = all_results
17511756

1752-
assert len(results) == 1
1753-
context_only_used_num_blocks = results[0]["kvCacheStats"]["usedNumBlocks"]
1757+
context_only_used_num_blocks = results[-1]["kvCacheStats"]["usedNumBlocks"]
17541758
print(f"Context only used num blocks: {context_only_used_num_blocks}")
17551759

17561760
# Sleep 5 seconds to allow context only request to time out
@@ -1760,11 +1764,21 @@ def test_llm_context_only_timed_out(transceiver_runtime):
17601764
for output in llm.generate(prompts0, sampling_params=sampling_params):
17611765
print(output)
17621766

1763-
# Get number of allocated blocks
1764-
results = llm.get_stats(2)
1765-
assert len(results) == 1
1766-
final_used_num_blocks = results[0]["kvCacheStats"]["usedNumBlocks"]
1767+
# Wait until KV cache blocks are released (usedNumBlocks == 0)
1768+
max_retries = 10
1769+
all_results = []
1770+
for _ in range(max_retries):
1771+
results = llm.get_stats(2)
1772+
all_results.extend(results)
1773+
if all_results and all_results[-1]["kvCacheStats"]["usedNumBlocks"] == 0:
1774+
break
1775+
time.sleep(1)
1776+
else:
1777+
pytest.fail(
1778+
f"KV cache blocks not released after {max_retries} retries")
1779+
results = all_results
17671780

1781+
final_used_num_blocks = results[-1]["kvCacheStats"]["usedNumBlocks"]
17681782
assert final_used_num_blocks == 0
17691783

17701784

0 commit comments

Comments
 (0)