|
| 1 | +"""Tests for KV cache offloading configuration.""" |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +from aphrodite.config import AphroditeConfig, CacheConfig, KVTransferConfig, ParallelConfig |
| 6 | + |
| 7 | +pytestmark = pytest.mark.cpu_test |
| 8 | + |
| 9 | + |
| 10 | +@pytest.mark.parametrize( |
| 11 | + "kv_offloading_backend,kv_offloading_size,tp,pp,expected_backend,expected_bytes", |
| 12 | + [ |
| 13 | + ("native", 4.0, 1, 1, "OffloadingConnector", 4.0 * (1 << 30)), |
| 14 | + # bytes per rank: 8.0 GiB / (2 * 2) = 2.0 GiB |
| 15 | + ("native", 8.0, 2, 2, "OffloadingConnector", 8.0 * (1 << 30) / 4), |
| 16 | + ("lmcache", 4.0, 1, 1, "LMCacheConnectorV1", 4.0), |
| 17 | + # size per rank: 8.0 GiB / (2 * 2) = 2.0 GiB |
| 18 | + ("lmcache", 8.0, 2, 2, "LMCacheConnectorV1", 2.0), |
| 19 | + (None, None, 1, 1, None, None), |
| 20 | + ], |
| 21 | +) |
| 22 | +def test_kv_connector(kv_offloading_backend, kv_offloading_size, tp, pp, expected_backend, expected_bytes): |
| 23 | + kv_transfer_config = ( |
| 24 | + KVTransferConfig(kv_connector_extra_config={"existing_key": "existing_value"}) |
| 25 | + if expected_backend is not None |
| 26 | + else None |
| 27 | + ) |
| 28 | + |
| 29 | + aphrodite_config = AphroditeConfig( |
| 30 | + cache_config=CacheConfig( |
| 31 | + kv_offloading_backend=kv_offloading_backend, |
| 32 | + kv_offloading_size=kv_offloading_size, |
| 33 | + ), |
| 34 | + kv_transfer_config=kv_transfer_config, |
| 35 | + parallel_config=ParallelConfig(tensor_parallel_size=tp, pipeline_parallel_size=pp), |
| 36 | + ) |
| 37 | + |
| 38 | + # No KV transfer config expected |
| 39 | + if expected_backend is None: |
| 40 | + assert aphrodite_config.kv_transfer_config is expected_backend |
| 41 | + return |
| 42 | + |
| 43 | + kv_transfer_config = aphrodite_config.kv_transfer_config |
| 44 | + kv_connector_extra_config = kv_transfer_config.kv_connector_extra_config |
| 45 | + |
| 46 | + assert kv_transfer_config.kv_connector == expected_backend |
| 47 | + assert kv_transfer_config.kv_role == "kv_both" |
| 48 | + |
| 49 | + if kv_offloading_backend == "native": |
| 50 | + assert kv_connector_extra_config["kv_bytes_per_rank"] == expected_bytes |
| 51 | + assert kv_connector_extra_config["num_cpu_blocks"] == 0 |
| 52 | + # Existing config should be preserved |
| 53 | + assert kv_connector_extra_config["existing_key"] == "existing_value" |
| 54 | + elif kv_offloading_backend == "lmcache": |
| 55 | + assert kv_connector_extra_config["lmcache.local_cpu"] is True |
| 56 | + assert kv_connector_extra_config["lmcache.max_local_cpu_size"] == expected_bytes |
| 57 | + # Existing config should be replaced |
| 58 | + assert "existing_key" not in kv_connector_extra_config |
0 commit comments