diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py index 743b12aab..6061269fb 100644 --- a/ucm/integration/vllm/ucm_connector.py +++ b/ucm/integration/vllm/ucm_connector.py @@ -2900,8 +2900,6 @@ def __init__( self.connector = UCMHybridLinearAttentionConnector( vllm_config, role, kv_cache_config ) - # elif use_hma: - # self.connector = UCMHMAConnector(vllm_config, role, kv_cache_config) else: self.connector = UCMDirectConnector(vllm_config, role, kv_cache_config) diff --git a/ucm/sandbox/sparse/retake/retake/longvideo_cache.py b/ucm/sandbox/sparse/retake/retake/longvideo_cache.py index d66f7affc..228684c0f 100644 --- a/ucm/sandbox/sparse/retake/retake/longvideo_cache.py +++ b/ucm/sandbox/sparse/retake/retake/longvideo_cache.py @@ -485,13 +485,10 @@ def update( position_ids = cache_kwargs.pop("position_ids", None) compression_ratio = self.budget_allocation(layer_idx) + no_compression = math.isclose(compression_ratio, 1.0, rel_tol=1e-9) # print('compression_ratio of layer %d: %.4f' % (layer_idx, compression_ratio)) - if ( - self.kvcache_compression - and self.compression_ratio < 1.0 - and compression_ratio == 1.0 - ): + if self.kvcache_compression and self.compression_ratio < 1.0 and no_compression: # Truncate the prompts directly when no compression key_states = key_states[:, :, : -self.prompt_length] value_states = value_states[:, :, : -self.prompt_length] @@ -503,7 +500,7 @@ def update( ) if ( - self.kvcache_compression and compression_ratio < 1.0 + self.kvcache_compression and compression_ratio < 1.0 and not no_compression ): # when compression is enabled query_states = cache_kwargs.pop("query_states") rotary_emb_fn = cache_kwargs.pop("rotary_emb") diff --git a/ucm/shared/trans/cuda/gdr/gdr_copy.cc b/ucm/shared/trans/cuda/gdr/gdr_copy.cc index 3af8ea562..febf2bf41 100644 --- a/ucm/shared/trans/cuda/gdr/gdr_copy.cc +++ b/ucm/shared/trans/cuda/gdr/gdr_copy.cc @@ -742,6 +742,8 @@ void GdrCopyLib::RegisterHostBuffer(void* host, size_t size) try { channel.RegisterHostBuffer(reinterpret_cast(host), size); } catch (...) { + // Ignore registration failure on existing channels, as the new registration will be + // picked up later. } }); } diff --git a/ucm/sparse/esa/retrieval/cpy/retrieval_backend.cpp b/ucm/sparse/esa/retrieval/cpy/retrieval_backend.cpp index 17d1e92af..56418e810 100644 --- a/ucm/sparse/esa/retrieval/cpy/retrieval_backend.cpp +++ b/ucm/sparse/esa/retrieval/cpy/retrieval_backend.cpp @@ -92,7 +92,7 @@ class RetrievalWorkerBackend { for (size_t i = 0; i < n_requests; ++i) { for (size_t j = 0; j < max_index_number; ++j) { int index = idx_ptr[i * max_index_number + j]; - if (index != -1) idxvec[i].push_back(index); + if (index != -1) { idxvec[i].push_back(index); } } } diff --git a/ucm/sparse/gsa_on_device/csrc/cuda/hash_retrieval/cpy/hash_retrieval_backend.cpp b/ucm/sparse/gsa_on_device/csrc/cuda/hash_retrieval/cpy/hash_retrieval_backend.cpp index fdbe9d88e..8cf1de9e1 100644 --- a/ucm/sparse/gsa_on_device/csrc/cuda/hash_retrieval/cpy/hash_retrieval_backend.cpp +++ b/ucm/sparse/gsa_on_device/csrc/cuda/hash_retrieval/cpy/hash_retrieval_backend.cpp @@ -241,7 +241,7 @@ class HashRetrievalWorkerBackend { for (size_t i = 0; i < n_requests; ++i) { for (size_t j = 0; j < max_index_number; ++j) { int index = idx_ptr[i * max_index_number + j]; - if (index != -1) idxvec[i].push_back(index); + if (index != -1) { idxvec[i].push_back(index); } } } diff --git a/ucm/store/cache/cc/trans_buffer.cc b/ucm/store/cache/cc/trans_buffer.cc index 835ec743b..8692c7a93 100644 --- a/ucm/store/cache/cc/trans_buffer.cc +++ b/ucm/store/cache/cc/trans_buffer.cc @@ -427,7 +427,8 @@ class SharedBufferStrategy : public BufferStrategy { class SharedBufferWatcherStrategy : public SharedBufferStrategy { public: - SharedBufferWatcherStrategy(const std::string& uuid) : SharedBufferStrategy(uuid, -1, 0, 0, 0) + explicit SharedBufferWatcherStrategy(const std::string& uuid) + : SharedBufferStrategy(uuid, -1, 0, 0, 0) { } Status Setup() override diff --git a/ucm/store/nfsstore/cc/domain/space/space_shard_layout.cc b/ucm/store/nfsstore/cc/domain/space/space_shard_layout.cc index 88f145665..5cbaf40c7 100644 --- a/ucm/store/nfsstore/cc/domain/space/space_shard_layout.cc +++ b/ucm/store/nfsstore/cc/domain/space/space_shard_layout.cc @@ -64,15 +64,17 @@ struct SpaceShardLayout::DataIterator : public SpaceLayout::DataIterator { this->stk.pop(); } } - Status Setup(const SpaceLayout* layout, const std::string& root) { - this->layout = layout; - this->root = root; - auto dir = OpenDir(root); + Status Setup(const SpaceLayout* newLayout, const std::string& newRoot) + { + this->layout = newLayout; + this->root = newRoot; + auto dir = OpenDir(newRoot); if (!dir) { return Status::OsApiError(); } - this->stk.emplace(dir, root); + this->stk.emplace(dir, newRoot); return Status::OK(); } - Status Next() { + Status Next() + { this->current.clear(); while (!this->stk.empty()) { auto entry = ::readdir64(this->stk.top().first); diff --git a/ucm/store/pipeline/connector.py b/ucm/store/pipeline/connector.py index 0b3864f9b..947ca8d99 100644 --- a/ucm/store/pipeline/connector.py +++ b/ucm/store/pipeline/connector.py @@ -197,7 +197,8 @@ def _build_cache_compress_posix_pipeline( if config.get("device_id", -1) >= 0: if (posix_config["block_size"] % posix_config["shard_size"]) != 0: print( - f'_build_cache_compress_posix_pipeline: error paraments {posix_config["block_size"]} {posix_config["shard_size"]}' + "_build_cache_compress_posix_pipeline: error paraments " + f"{posix_config['block_size']} {posix_config['shard_size']}" ) return layers = posix_config["block_size"] // posix_config["shard_size"] diff --git a/ucm/store/posix/cc/io_engine_aio.h b/ucm/store/posix/cc/io_engine_aio.h index 7d283bff4..e661b7a86 100644 --- a/ucm/store/posix/cc/io_engine_aio.h +++ b/ucm/store/posix/cc/io_engine_aio.h @@ -83,7 +83,7 @@ class IoEngineAio : public Detail::TaskWrapper { { const auto last = shard.index + 1 == nShardPerBlock_; const auto& id = shard.owner; - auto handleFailure = [&](int32_t fd) { + auto handleFailure = [this, tid, w, last, id](int32_t fd) { failureSet_.Insert(tid); if (fd >= 0) { ::close(fd); } if constexpr (dump) {