|
| 1 | +/* |
| 2 | + * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | + * All rights reserved. |
| 4 | + * |
| 5 | + * This source code is licensed under the BSD-style license found in the |
| 6 | + * LICENSE file in the root directory of this source tree. |
| 7 | + */ |
| 8 | + |
| 9 | +#include "deeplearning/fbgemm/fbgemm_gpu/src/dram_kv_embedding_cache/dram_kv_embedding_cache.h" |
| 10 | + |
| 11 | +#include <fmt/format.h> |
| 12 | +#include <glog/logging.h> |
| 13 | +#include <gtest/gtest.h> |
| 14 | +#include <vector> |
| 15 | + |
| 16 | +namespace kv_mem { |
| 17 | + |
| 18 | +struct MetaHeader { |
| 19 | + int64_t key; |
| 20 | + uint32_t timestamp; |
| 21 | + uint32_t count : 31; |
| 22 | + bool used : 1; |
| 23 | +}; |
| 24 | + |
| 25 | +class DramKVEmbeddingCacheTest : public ::testing::Test { |
| 26 | + protected: |
| 27 | + static constexpr int EMBEDDING_DIM = 16; |
| 28 | + static constexpr int NUM_SHARDS = 4; |
| 29 | + |
| 30 | + void SetUp() override { |
| 31 | + FLAGS_logtostderr = true; |
| 32 | + FLAGS_minloglevel = 0; |
| 33 | + |
| 34 | + auto hash_size_cumsum = at::tensor({0, 100000}, at::kLong); |
| 35 | + |
| 36 | + dram_cache_ = std::make_shared<DramKVEmbeddingCache<float>>( |
| 37 | + EMBEDDING_DIM, |
| 38 | + /*uniform_init_lower=*/-0.1, |
| 39 | + /*uniform_init_upper=*/0.1, |
| 40 | + /*feature_evict_config=*/std::nullopt, |
| 41 | + NUM_SHARDS, |
| 42 | + /*num_threads=*/4, |
| 43 | + /*row_storage_bitwidth=*/32, |
| 44 | + /*backend_return_whole_row=*/false, |
| 45 | + /*enable_async_update=*/false, |
| 46 | + /*table_dims=*/std::nullopt, |
| 47 | + hash_size_cumsum, |
| 48 | + /*is_training=*/false, |
| 49 | + /*disable_random_init=*/true); |
| 50 | + } |
| 51 | + |
| 52 | + void TearDown() override { |
| 53 | + dram_cache_.reset(); |
| 54 | + } |
| 55 | + |
| 56 | + void insertEmbedding(int64_t id, float value = 1.0f) { |
| 57 | + auto indices = at::tensor({id}, at::kLong); |
| 58 | + std::vector<float> emb(EMBEDDING_DIM, value); |
| 59 | + auto weights = at::from_blob( |
| 60 | + emb.data(), {1, EMBEDDING_DIM}, at::TensorOptions().dtype(at::kFloat)); |
| 61 | + auto count = at::tensor({1}, at::kLong); |
| 62 | + folly::coro::blockingWait( |
| 63 | + dram_cache_->set_kv_db_async(indices, weights.clone(), count)); |
| 64 | + } |
| 65 | + |
| 66 | + void insertEmbeddings(const std::vector<int64_t>& ids, float value = 1.0f) { |
| 67 | + auto num = static_cast<int64_t>(ids.size()); |
| 68 | + auto indices = at::tensor(ids, at::kLong); |
| 69 | + auto weights = at::full( |
| 70 | + {num, EMBEDDING_DIM}, value, at::TensorOptions().dtype(at::kFloat)); |
| 71 | + auto count = at::tensor({num}, at::kLong); |
| 72 | + folly::coro::blockingWait( |
| 73 | + dram_cache_->set_kv_db_async(indices, weights, count)); |
| 74 | + } |
| 75 | + |
| 76 | + std::shared_ptr<DramKVEmbeddingCache<float>> dram_cache_; |
| 77 | +}; |
| 78 | + |
| 79 | +// Test: get_kv_metadata_rows returns correct shape and key for single inserted |
| 80 | +// id |
| 81 | +TEST_F(DramKVEmbeddingCacheTest, SingleKeyMetadata) { |
| 82 | + const int64_t test_id = 42; |
| 83 | + insertEmbedding(test_id, 2.5f); |
| 84 | + |
| 85 | + auto indices = at::tensor({test_id}, at::kLong); |
| 86 | + auto count = at::tensor({1}, at::kLong); |
| 87 | + auto metadata = dram_cache_->get_kv_metadata_rows(indices, count); |
| 88 | + |
| 89 | + const int64_t expected_dim = |
| 90 | + static_cast<int64_t>(FixedBlockPool::get_metaheader_dim<float>()); |
| 91 | + EXPECT_EQ(metadata.dim(), 2); |
| 92 | + EXPECT_EQ(metadata.size(0), 1); |
| 93 | + EXPECT_EQ(metadata.size(1), expected_dim); |
| 94 | + EXPECT_EQ(metadata.dtype(), at::kFloat); |
| 95 | + static_assert(sizeof(MetaHeader) == 16, "MetaHeader must be 16 bytes"); |
| 96 | + |
| 97 | + MetaHeader header{}; |
| 98 | + std::memcpy(&header, metadata.data_ptr<float>(), sizeof(MetaHeader)); |
| 99 | + |
| 100 | + EXPECT_EQ(header.key, test_id); |
| 101 | + EXPECT_TRUE(header.used); |
| 102 | + EXPECT_GT(header.timestamp, 0u); |
| 103 | + // count may be 0 initially or updated depending on implementation |
| 104 | + EXPECT_GE(header.count, 0u); |
| 105 | +} |
| 106 | + |
| 107 | +// Test: get_kv_metadata_rows returns correct metadata for multiple keys across |
| 108 | +// shards |
| 109 | +TEST_F(DramKVEmbeddingCacheTest, MultipleKeysMetadata) { |
| 110 | + std::vector<int64_t> keys = {1, 2, 3, 10, 100, 1000}; |
| 111 | + insertEmbeddings(keys, 1.0f); |
| 112 | + |
| 113 | + auto indices = at::tensor(keys, at::kLong); |
| 114 | + auto count = at::tensor({static_cast<int64_t>(keys.size())}, at::kLong); |
| 115 | + auto metadata = dram_cache_->get_kv_metadata_rows(indices, count); |
| 116 | + |
| 117 | + const int64_t expected_dim = |
| 118 | + static_cast<int64_t>(FixedBlockPool::get_metaheader_dim<float>()); |
| 119 | + EXPECT_EQ( |
| 120 | + metadata.sizes(), |
| 121 | + at::IntArrayRef({static_cast<int64_t>(keys.size()), expected_dim})); |
| 122 | + |
| 123 | + auto* md_ptr = metadata.data_ptr<float>(); |
| 124 | + const int64_t stride = expected_dim; |
| 125 | + for (size_t i = 0; i < keys.size(); ++i) { |
| 126 | + MetaHeader header{}; |
| 127 | + std::memcpy(&header, md_ptr + i * stride, sizeof(MetaHeader)); |
| 128 | + EXPECT_EQ(header.key, keys[i]) << "Mismatch at index " << i; |
| 129 | + EXPECT_TRUE(header.used) << "Used flag false for key " << keys[i]; |
| 130 | + EXPECT_GT(header.timestamp, 0u) << "Timestamp not set for key " << keys[i]; |
| 131 | + } |
| 132 | +} |
| 133 | + |
| 134 | +// Test: get_kv_metadata_rows with empty input returns empty tensor with correct |
| 135 | +// dim |
| 136 | +TEST_F(DramKVEmbeddingCacheTest, EmptyInputReturnsEmpty) { |
| 137 | + auto indices = at::empty({0}, at::kLong); |
| 138 | + auto count = at::tensor({0}, at::kLong); |
| 139 | + auto metadata = dram_cache_->get_kv_metadata_rows(indices, count); |
| 140 | + |
| 141 | + const int64_t expected_dim = |
| 142 | + static_cast<int64_t>(FixedBlockPool::get_metaheader_dim<float>()); |
| 143 | + EXPECT_EQ(metadata.dim(), 2); |
| 144 | + EXPECT_EQ(metadata.size(0), 0); |
| 145 | + EXPECT_EQ(metadata.size(1), expected_dim); |
| 146 | +} |
| 147 | + |
| 148 | +// Test: get_kv_metadata_rows reflects updated timestamp after re-insert |
| 149 | +TEST_F(DramKVEmbeddingCacheTest, TimestampUpdatesOnReinsert) { |
| 150 | + const int64_t test_id = 7; |
| 151 | + insertEmbedding(test_id, 1.0f); |
| 152 | + |
| 153 | + auto indices = at::tensor({test_id}, at::kLong); |
| 154 | + auto count = at::tensor({1}, at::kLong); |
| 155 | + auto metadata1 = dram_cache_->get_kv_metadata_rows(indices, count); |
| 156 | + MetaHeader h1{}; |
| 157 | + std::memcpy(&h1, metadata1.data_ptr<float>(), sizeof(MetaHeader)); |
| 158 | + |
| 159 | + // Sleep to ensure timestamp advances (timestamp is in seconds) |
| 160 | + std::this_thread::sleep_for(std::chrono::seconds(2)); |
| 161 | + |
| 162 | + // Re-insert same key to update timestamp |
| 163 | + insertEmbedding(test_id, 3.0f); |
| 164 | + auto metadata2 = dram_cache_->get_kv_metadata_rows(indices, count); |
| 165 | + MetaHeader h2{}; |
| 166 | + std::memcpy(&h2, metadata2.data_ptr<float>(), sizeof(MetaHeader)); |
| 167 | + |
| 168 | + EXPECT_EQ(h2.key, test_id); |
| 169 | + EXPECT_TRUE(h2.used); |
| 170 | + EXPECT_GE(h2.timestamp, h1.timestamp); |
| 171 | +} |
| 172 | + |
| 173 | +// Test: get_kv_metadata_rows works with float16 weight type via separate cache |
| 174 | +// instance |
| 175 | +TEST_F(DramKVEmbeddingCacheTest, HalfPrecisionMetadataDim) { |
| 176 | + auto hash_size_cumsum = at::tensor({0, 100000}, at::kLong); |
| 177 | + auto dram_cache_half = std::make_shared<DramKVEmbeddingCache<at::Half>>( |
| 178 | + EMBEDDING_DIM, |
| 179 | + -0.1, |
| 180 | + 0.1, |
| 181 | + std::nullopt, |
| 182 | + NUM_SHARDS, |
| 183 | + 4, |
| 184 | + 16, |
| 185 | + false, |
| 186 | + false, |
| 187 | + std::nullopt, |
| 188 | + hash_size_cumsum, |
| 189 | + false, |
| 190 | + true); |
| 191 | + |
| 192 | + // Insert one key |
| 193 | + auto indices = at::tensor({5}, at::kLong); |
| 194 | + auto weights = |
| 195 | + at::full({1, EMBEDDING_DIM}, 1.0, at::TensorOptions().dtype(at::kHalf)); |
| 196 | + auto count = at::tensor({1}, at::kLong); |
| 197 | + folly::coro::blockingWait( |
| 198 | + dram_cache_half->set_kv_db_async(indices, weights, count)); |
| 199 | + |
| 200 | + auto metadata = dram_cache_half->get_kv_metadata_rows(indices, count); |
| 201 | + const int64_t expected_dim = |
| 202 | + static_cast<int64_t>(FixedBlockPool::get_metaheader_dim<at::Half>()); |
| 203 | + // 16 bytes / 2 bytes per half = 8 |
| 204 | + EXPECT_EQ(expected_dim, 8); |
| 205 | + EXPECT_EQ(metadata.sizes(), at::IntArrayRef({1, expected_dim})); |
| 206 | + EXPECT_EQ(metadata.dtype(), at::kHalf); |
| 207 | + |
| 208 | + // Decode first 8 bytes as int64 key from half tensor raw bytes |
| 209 | + int64_t decoded_key = 0; |
| 210 | + std::memcpy(&decoded_key, metadata.data_ptr<at::Half>(), sizeof(int64_t)); |
| 211 | + EXPECT_EQ(decoded_key, 5); |
| 212 | +} |
| 213 | + |
| 214 | +} // namespace kv_mem |
0 commit comments