Skip to content

Commit 9785177

Browse files
authored
[Usage] Move use layerwise and hit ratio into config file (#784)
## Purpose Move use layerwise and hit ratio into config file. ## Modifications Get ucm_config inside UCMConnector, and use ucm_config to choose the actual connector. ## Test Co-authored-by: harrisonyhq <harrisonyhq@users.noreply.github.com>
1 parent 866ba78 commit 9785177

5 files changed

Lines changed: 18 additions & 32 deletions

File tree

docs/source/user-guide/prefix-cache/pipeline_store.md

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,16 @@ vllm serve Qwen/Qwen2.5-14B-Instruct \
151151
"kv_connector_extra_config": {"UCM_CONFIG_FILE": "/vllm-workspace/unified-cache-management/examples/ucm_config_example.yaml"}
152152
}'
153153
```
154-
You can also use the Layerwise Connector by adding `"use_layerwise": true` to the `kv_connector_extra_config`.
154+
You can also use the Layerwise Connector by adding `"use_layerwise": true` in the `UCM_CONFIG_FILE`.
155155
for example:
156156

157-
```bash
158-
"kv_connector_extra_config": {
159-
"use_layerwise": true,
160-
"UCM_CONFIG_FILE": "/home/qiuyuhao1/unified-cache-management/examples/ucm_config_example.yaml"
161-
}
157+
```yaml
158+
ucm_connectors:
159+
- ucm_connector_name: "UcmPipelineStore"
160+
ucm_connector_config:
161+
store_pipeline: "Cache|Posix"
162+
storage_backends: "/mnt/test"
163+
use_layerwise: true
162164
```
163165
164166
**⚠️ Make sure to replace `"/vllm-workspace/unified-cache-management/examples/ucm_config_example.yaml"` with your actual config file path.**

examples/deployments/scripts/vllm/config.properties

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ enable_ascend_scheduler=false
9191
#****************************************
9292
# set true to enable UCM
9393
ucm_enable=true
94-
use_layerwise=false
9594
ucm_config_yaml_path=/vllm-workspace/unified-cache-management/examples/ucm_config_example.yaml
9695
export ENABLE_UCM_PATCH=1
9796
export ENABLE_SPARSE=FALSE

examples/deployments/scripts/vllm/run_vllm.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ start_server() {
3737
echo "enable_prefix_caching = $enable_prefix_caching"
3838
echo "async_scheduling = $async_scheduling"
3939
echo "graph_mode = $graph_mode"
40-
echo "use_layerwise = $use_layerwise"
4140
if [[ "$ucm_enable" == "true" ]]; then
4241
echo "ucm_config_file = $ucm_config_yaml_path"
4342
fi
@@ -109,7 +108,6 @@ start_server() {
109108
\"kv_connector_module_path\":\"ucm.integration.vllm.ucm_connector\",
110109
\"kv_role\":\"kv_both\",
111110
\"kv_connector_extra_config\":{
112-
\"use_layerwise\": $use_layerwise,
113111
\"UCM_CONFIG_FILE\":\"$ucm_config_yaml_path\"
114112
}
115113
}"

examples/deployments/scripts/vllm/run_vllm_dp.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ start_server() {
8282
echo "enable_prefix_caching = $enable_prefix_caching"
8383
echo "async_scheduling = $async_scheduling"
8484
echo "graph_mode = $graph_mode"
85-
echo "use_layerwise = $use_layerwise"
8685
if [[ "$ucm_enable" == "true" ]]; then
8786
echo "ucm_config_file = $ucm_config_yaml_path"
8887
fi
@@ -155,7 +154,6 @@ start_server() {
155154
\"kv_connector_module_path\":\"ucm.integration.vllm.ucm_connector\",
156155
\"kv_role\":\"kv_both\",
157156
\"kv_connector_extra_config\":{
158-
\"use_layerwise\":$use_layerwise,
159157
\"UCM_CONFIG_FILE\":\"$ucm_config_yaml_path\"
160158
}
161159
}"

ucm/integration/vllm/ucm_connector.py

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,6 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
237237
ucm_config = Config(vllm_config.kv_transfer_config)
238238
self.engine_id = vllm_config.kv_transfer_config.engine_id
239239
self.launch_config = ucm_config.get_config()
240-
logger.info(f"self.launch_config: {self.launch_config}")
241240
self.connector_configs = self.launch_config.get("ucm_connectors", [])
242241
self.enable_event_sync = self.launch_config.get("enable_event_sync", True)
243242
assert len(self.connector_configs) > 0, "no storage connector name in config."
@@ -853,11 +852,7 @@ def wait_for_save(self) -> None:
853852
class UCMCPConnector(UCMLayerWiseConnector):
854853
def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
855854
super().__init__(vllm_config, role)
856-
self.use_layerwise = (
857-
self._vllm_config.kv_transfer_config.kv_connector_extra_config.get(
858-
"use_layerwise", False
859-
)
860-
)
855+
self.use_layerwise = self.launch_config.get("use_layerwise", False)
861856

862857
try:
863858
from vllm.distributed import get_dcp_group, get_pcp_group
@@ -1120,22 +1115,21 @@ class UCMConnector(KVConnectorBase_V1):
11201115
def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
11211116
super().__init__(vllm_config=vllm_config, role=role)
11221117
self.connector: KVConnectorBase_V1
1123-
# TODO new conn by config
1118+
ucm_config = Config(vllm_config.kv_transfer_config)
1119+
self.launch_config = ucm_config.get_config()
1120+
logger.info(f"self.launch_config: {self.launch_config}")
1121+
11241122
use_layerwise = (
1125-
self._vllm_config.kv_transfer_config.kv_connector_extra_config.get(
1126-
"use_layerwise", False
1127-
)
1123+
self.launch_config.get("use_layerwise", False)
1124+
if self.launch_config is not None
1125+
else False
11281126
)
11291127
pp_enabled = self._vllm_config.parallel_config.pipeline_parallel_size > 1
11301128
if pp_enabled and not use_layerwise:
11311129
raise RuntimeError(
11321130
"Pipeline parallelism is not supported in UCMDirectConnector, please set use_layerwise=True."
11331131
)
1134-
if (
1135-
self._vllm_config.kv_transfer_config is not None
1136-
and "hit_ratio"
1137-
in self._vllm_config.kv_transfer_config.kv_connector_extra_config
1138-
):
1132+
if self.launch_config is not None and "hit_ratio" in self.launch_config:
11391133
self.connector = UCMMockConnector(vllm_config, role)
11401134
elif (
11411135
hasattr(self._vllm_config.parallel_config, "prefill_context_parallel_size")
@@ -1147,12 +1141,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
11471141
> 1
11481142
):
11491143
self.connector = UCMCPConnector(vllm_config, role)
1150-
elif (
1151-
self._vllm_config.kv_transfer_config is not None
1152-
and self._vllm_config.kv_transfer_config.kv_connector_extra_config.get(
1153-
"use_layerwise", False
1154-
)
1155-
):
1144+
elif use_layerwise:
11561145
self.connector = UCMLayerWiseConnector(vllm_config, role)
11571146
else:
11581147
self.connector = UCMDirectConnector(vllm_config, role)

0 commit comments

Comments
 (0)