diff --git a/ucm/integration/vllm/blend_connector.py b/ucm/integration/vllm/blend_connector.py index 39e5fdb06..d18effcd9 100644 --- a/ucm/integration/vllm/blend_connector.py +++ b/ucm/integration/vllm/blend_connector.py @@ -133,7 +133,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): self.enable_blend = True self.chunk_end_token_id = blend_config["chunk_end_token_id"] else: - raise "UCMBlendConnector init failed, please check your config" + raise ValueError("UCMBlendConnector init failed, please check your config") self.requests_blend_meta: dict[str, BlendRequestMeta] = {} self.cos_sin_cache: torch.Tensor = None @@ -327,7 +327,7 @@ def _post_process_chunk_cache(self, k_cache, vllm_ids, positions) -> None: post process loaded chunk kcache """ if self.cos_sin_cache is None: - raise "Please call setup model first." + raise RuntimeError("Please call setup model first.") # triton kernl for block-wise delta rope block_wise_rope_forward(k_cache, vllm_ids, positions, self.cos_sin_cache) @@ -336,7 +336,9 @@ def _register_cos_sin_cache(self, model: "Model") -> None: rotary_emb = model.model.layers[0].self_attn.rotary_emb self.cos_sin_cache = rotary_emb.cos_sin_cache except Exception: - raise "get cos_sin_cache from model failed! current not implemented for this model" + raise RuntimeError( + "get cos_sin_cache from model failed! current not implemented for this model" + ) def setup_model(self, model: "Model") -> None: self._register_cos_sin_cache(model)