Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions ucm/integration/vllm/ucm_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2900,8 +2900,6 @@ def __init__(
self.connector = UCMHybridLinearAttentionConnector(
vllm_config, role, kv_cache_config
)
# elif use_hma:
# self.connector = UCMHMAConnector(vllm_config, role, kv_cache_config)
else:
self.connector = UCMDirectConnector(vllm_config, role, kv_cache_config)

Expand Down
9 changes: 3 additions & 6 deletions ucm/sandbox/sparse/retake/retake/longvideo_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,13 +485,10 @@ def update(

position_ids = cache_kwargs.pop("position_ids", None)
compression_ratio = self.budget_allocation(layer_idx)
no_compression = math.isclose(compression_ratio, 1.0, rel_tol=1e-9)
# print('compression_ratio of layer %d: %.4f' % (layer_idx, compression_ratio))

if (
self.kvcache_compression
and self.compression_ratio < 1.0
and compression_ratio == 1.0
):
if self.kvcache_compression and self.compression_ratio < 1.0 and no_compression:
# Truncate the prompts directly when no compression
key_states = key_states[:, :, : -self.prompt_length]
value_states = value_states[:, :, : -self.prompt_length]
Expand All @@ -503,7 +500,7 @@ def update(
)

if (
self.kvcache_compression and compression_ratio < 1.0
self.kvcache_compression and compression_ratio < 1.0 and not no_compression
): # when compression is enabled
query_states = cache_kwargs.pop("query_states")
rotary_emb_fn = cache_kwargs.pop("rotary_emb")
Expand Down
2 changes: 2 additions & 0 deletions ucm/shared/trans/cuda/gdr/gdr_copy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,8 @@ void GdrCopyLib::RegisterHostBuffer(void* host, size_t size)
try {
channel.RegisterHostBuffer(reinterpret_cast<uint64_t>(host), size);
} catch (...) {
Comment thread
ygwpz marked this conversation as resolved.
// Ignore registration failure on existing channels, as the new registration will be
// picked up later.
}
});
}
Expand Down
2 changes: 1 addition & 1 deletion ucm/sparse/esa/retrieval/cpy/retrieval_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class RetrievalWorkerBackend {
for (size_t i = 0; i < n_requests; ++i) {
for (size_t j = 0; j < max_index_number; ++j) {
int index = idx_ptr[i * max_index_number + j];
if (index != -1) idxvec[i].push_back(index);
if (index != -1) { idxvec[i].push_back(index); }
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ class HashRetrievalWorkerBackend {
for (size_t i = 0; i < n_requests; ++i) {
for (size_t j = 0; j < max_index_number; ++j) {
int index = idx_ptr[i * max_index_number + j];
if (index != -1) idxvec[i].push_back(index);
if (index != -1) { idxvec[i].push_back(index); }
}
}

Expand Down
3 changes: 2 additions & 1 deletion ucm/store/cache/cc/trans_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,8 @@ class SharedBufferStrategy : public BufferStrategy {

class SharedBufferWatcherStrategy : public SharedBufferStrategy {
public:
SharedBufferWatcherStrategy(const std::string& uuid) : SharedBufferStrategy(uuid, -1, 0, 0, 0)
explicit SharedBufferWatcherStrategy(const std::string& uuid)
: SharedBufferStrategy(uuid, -1, 0, 0, 0)
{
}
Status Setup() override
Expand Down
14 changes: 8 additions & 6 deletions ucm/store/nfsstore/cc/domain/space/space_shard_layout.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,17 @@ struct SpaceShardLayout::DataIterator : public SpaceLayout::DataIterator {
this->stk.pop();
}
}
Status Setup(const SpaceLayout* layout, const std::string& root) {
this->layout = layout;
this->root = root;
auto dir = OpenDir(root);
Status Setup(const SpaceLayout* newLayout, const std::string& newRoot)
{
this->layout = newLayout;
this->root = newRoot;
auto dir = OpenDir(newRoot);
if (!dir) { return Status::OsApiError(); }
this->stk.emplace(dir, root);
this->stk.emplace(dir, newRoot);
return Status::OK();
}
Status Next() {
Status Next()
{
this->current.clear();
while (!this->stk.empty()) {
auto entry = ::readdir64(this->stk.top().first);
Expand Down
3 changes: 2 additions & 1 deletion ucm/store/pipeline/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ def _build_cache_compress_posix_pipeline(
if config.get("device_id", -1) >= 0:
if (posix_config["block_size"] % posix_config["shard_size"]) != 0:
print(
f'_build_cache_compress_posix_pipeline: error paraments {posix_config["block_size"]} {posix_config["shard_size"]}'
"_build_cache_compress_posix_pipeline: error paraments "
f"{posix_config['block_size']} {posix_config['shard_size']}"
)
return
layers = posix_config["block_size"] // posix_config["shard_size"]
Expand Down
2 changes: 1 addition & 1 deletion ucm/store/posix/cc/io_engine_aio.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class IoEngineAio : public Detail::TaskWrapper<TransTask, Detail::TaskHandle> {
{
const auto last = shard.index + 1 == nShardPerBlock_;
const auto& id = shard.owner;
auto handleFailure = [&](int32_t fd) {
auto handleFailure = [this, tid, w, last, id](int32_t fd) {
failureSet_.Insert(tid);
if (fd >= 0) { ::close(fd); }
if constexpr (dump) {
Expand Down
Loading