Skip to content

Commit a49f83c

Browse files
branch-4.1: [improvement](filecache) limit file cache LRU replay queues #64381 (#64387)
Cherry-picked from #64381 Co-authored-by: zhengyu <zhangzhengyu@selectdb.com>
1 parent 94f4c3d commit a49f83c

9 files changed

Lines changed: 323 additions & 35 deletions

be/src/common/config.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,7 @@ DEFINE_mInt64(file_cache_remove_block_qps_limit, "1000");
12311231
DEFINE_mInt64(file_cache_background_gc_interval_ms, "100");
12321232
DEFINE_mInt64(file_cache_background_block_lru_update_interval_ms, "5000");
12331233
DEFINE_mInt64(file_cache_background_block_lru_update_qps_limit, "1000");
1234+
DEFINE_mInt64(file_cache_background_block_lru_update_queue_max_size, "500000");
12341235
DEFINE_mBool(enable_file_cache_async_touch_on_get_or_set, "false");
12351236
DEFINE_mBool(enable_reader_dryrun_when_download_file_cache, "true");
12361237
DEFINE_mInt64(file_cache_background_monitor_interval_ms, "5000");
@@ -1242,7 +1243,8 @@ DEFINE_mInt64(file_cache_background_lru_dump_interval_ms, "60000");
12421243
// dump queue only if the queue update specific times through several dump intervals
12431244
DEFINE_mInt64(file_cache_background_lru_dump_update_cnt_threshold, "1000");
12441245
DEFINE_mInt64(file_cache_background_lru_dump_tail_record_num, "5000000");
1245-
DEFINE_mInt64(file_cache_background_lru_log_replay_interval_ms, "1000");
1246+
DEFINE_mInt64(file_cache_background_lru_log_queue_max_size, "500000");
1247+
DEFINE_mInt64(file_cache_background_lru_log_replay_interval_ms, "1");
12461248
DEFINE_mBool(enable_evaluate_shadow_queue_diff, "false");
12471249

12481250
DEFINE_mBool(file_cache_enable_only_warm_up_idx, "false");

be/src/common/config.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,7 @@ DECLARE_mInt64(file_cache_remove_block_qps_limit);
12681268
DECLARE_mInt64(file_cache_background_gc_interval_ms);
12691269
DECLARE_mInt64(file_cache_background_block_lru_update_interval_ms);
12701270
DECLARE_mInt64(file_cache_background_block_lru_update_qps_limit);
1271+
DECLARE_mInt64(file_cache_background_block_lru_update_queue_max_size);
12711272
DECLARE_mBool(enable_file_cache_async_touch_on_get_or_set);
12721273
DECLARE_mBool(enable_reader_dryrun_when_download_file_cache);
12731274
DECLARE_mInt64(file_cache_background_monitor_interval_ms);
@@ -1282,6 +1283,7 @@ DECLARE_mInt64(file_cache_background_lru_dump_interval_ms);
12821283
// dump queue only if the queue update specific times through several dump intervals
12831284
DECLARE_mInt64(file_cache_background_lru_dump_update_cnt_threshold);
12841285
DECLARE_mInt64(file_cache_background_lru_dump_tail_record_num);
1286+
DECLARE_mInt64(file_cache_background_lru_log_queue_max_size);
12851287
DECLARE_mInt64(file_cache_background_lru_log_replay_interval_ms);
12861288
DECLARE_mBool(enable_evaluate_shadow_queue_diff);
12871289

be/src/io/cache/block_file_cache.cpp

Lines changed: 90 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -59,24 +59,53 @@
5959
namespace doris::io {
6060
#include "common/compile_check_begin.h"
6161

62+
namespace {
63+
64+
constexpr std::array<FileCacheType, 4> LRU_LOG_REPLAY_TYPES = {
65+
FileCacheType::TTL, FileCacheType::INDEX, FileCacheType::NORMAL, FileCacheType::DISPOSABLE};
66+
67+
size_t file_cache_type_index(FileCacheType type) {
68+
return static_cast<size_t>(type);
69+
}
70+
71+
} // namespace
72+
6273
// Insert a block pointer into one shard while swallowing allocation failures.
63-
bool NeedUpdateLRUBlocks::insert(FileBlockSPtr block) {
64-
if (!block) {
74+
bool NeedUpdateLRUBlocks::insert(FileBlockSPtr block, size_t max_queue_size) {
75+
if (!block || max_queue_size == 0) {
6576
return false;
6677
}
78+
bool reserved = false;
6779
try {
6880
auto* raw_ptr = block.get();
6981
auto idx = shard_index(raw_ptr);
7082
auto& shard = _shards[idx];
7183
std::lock_guard lock(shard.mutex);
72-
auto [_, inserted] = shard.entries.emplace(raw_ptr, std::move(block));
73-
if (inserted) {
74-
_size.fetch_add(1, std::memory_order_relaxed);
84+
if (shard.entries.contains(raw_ptr)) {
85+
return false;
7586
}
76-
return inserted;
87+
size_t cur_size = _size.load(std::memory_order_relaxed);
88+
while (cur_size < max_queue_size) {
89+
if (_size.compare_exchange_weak(cur_size, cur_size + 1, std::memory_order_relaxed)) {
90+
reserved = true;
91+
break;
92+
}
93+
}
94+
if (!reserved) {
95+
return false;
96+
}
97+
auto [_, inserted] = shard.entries.emplace(raw_ptr, std::move(block));
98+
DORIS_CHECK(inserted);
99+
return true;
77100
} catch (const std::exception& e) {
101+
if (reserved) {
102+
decrease_size(1);
103+
}
78104
LOG(WARNING) << "Failed to enqueue block for LRU update: " << e.what();
79105
} catch (...) {
106+
if (reserved) {
107+
decrease_size(1);
108+
}
80109
LOG(WARNING) << "Failed to enqueue block for LRU update: unknown error";
81110
}
82111
return false;
@@ -103,7 +132,7 @@ size_t NeedUpdateLRUBlocks::drain(size_t limit, std::vector<FileBlockSPtr>* outp
103132
++shard_drained;
104133
}
105134
if (shard_drained > 0) {
106-
_size.fetch_sub(shard_drained, std::memory_order_relaxed);
135+
decrease_size(shard_drained);
107136
drained += shard_drained;
108137
}
109138
}
@@ -123,7 +152,7 @@ void NeedUpdateLRUBlocks::clear() {
123152
if (!shard.entries.empty()) {
124153
auto removed = shard.entries.size();
125154
shard.entries.clear();
126-
_size.fetch_sub(removed, std::memory_order_relaxed);
155+
decrease_size(removed);
127156
}
128157
}
129158
} catch (const std::exception& e) {
@@ -133,6 +162,16 @@ void NeedUpdateLRUBlocks::clear() {
133162
}
134163
}
135164

165+
void NeedUpdateLRUBlocks::decrease_size(size_t delta) {
166+
size_t cur_size = _size.load(std::memory_order_relaxed);
167+
while (true) {
168+
DORIS_CHECK(cur_size >= delta);
169+
if (_size.compare_exchange_weak(cur_size, cur_size - delta, std::memory_order_relaxed)) {
170+
return;
171+
}
172+
}
173+
}
174+
136175
size_t NeedUpdateLRUBlocks::shard_index(FileBlock* ptr) const {
137176
DCHECK(ptr != nullptr);
138177
return std::hash<FileBlock*> {}(ptr)&kShardMask;
@@ -348,12 +387,30 @@ BlockFileCache::BlockFileCache(const std::string& cache_base_path,
348387
_cache_base_path.c_str(), "file_cache_recycle_keys_length");
349388
_need_update_lru_blocks_length_recorder = std::make_shared<bvar::LatencyRecorder>(
350389
_cache_base_path.c_str(), "file_cache_need_update_lru_blocks_length");
390+
_need_update_lru_blocks_produce_metrics = std::make_shared<bvar::Adder<size_t>>(
391+
_cache_base_path.c_str(), "file_cache_need_update_lru_blocks_produce");
392+
_need_update_lru_blocks_consume_metrics = std::make_shared<bvar::Adder<size_t>>(
393+
_cache_base_path.c_str(), "file_cache_need_update_lru_blocks_consume");
351394
_update_lru_blocks_latency_us = std::make_shared<bvar::LatencyRecorder>(
352395
_cache_base_path.c_str(), "file_cache_update_lru_blocks_latency_us");
353396
_ttl_gc_latency_us = std::make_shared<bvar::LatencyRecorder>(_cache_base_path.c_str(),
354397
"file_cache_ttl_gc_latency_us");
355398
_shadow_queue_levenshtein_distance = std::make_shared<bvar::LatencyRecorder>(
356399
_cache_base_path.c_str(), "file_cache_shadow_queue_levenshtein_distance");
400+
for (FileCacheType type : {FileCacheType::DISPOSABLE, FileCacheType::NORMAL,
401+
FileCacheType::INDEX, FileCacheType::TTL}) {
402+
size_t idx = file_cache_type_index(type);
403+
std::string metric_prefix =
404+
"file_cache_lru_recorder_" + cache_type_to_string(type) + "_record_queue";
405+
_lru_recorder_queue_length_recorder[idx] = std::make_shared<bvar::LatencyRecorder>(
406+
_cache_base_path.c_str(), metric_prefix + "_length");
407+
_lru_recorder_queue_produce_metrics[idx] = std::make_shared<bvar::Adder<size_t>>(
408+
_cache_base_path.c_str(), metric_prefix + "_produce");
409+
_lru_recorder_queue_consume_metrics[idx] = std::make_shared<bvar::Adder<size_t>>(
410+
_cache_base_path.c_str(), metric_prefix + "_consume");
411+
}
412+
_lru_recorder_log_replay_idle_metrics = std::make_shared<bvar::Adder<size_t>>(
413+
_cache_base_path.c_str(), "file_cache_lru_recorder_log_replay_idle");
357414

358415
_disposable_queue = LRUQueue(cache_settings.disposable_queue_size,
359416
cache_settings.disposable_queue_elements, 60 * 60);
@@ -648,7 +705,10 @@ FileBlocks BlockFileCache::get_impl(const UInt128Wrapper& hash, const CacheConte
648705
}
649706

650707
void BlockFileCache::add_need_update_lru_block(FileBlockSPtr block) {
651-
if (_need_update_lru_blocks.insert(std::move(block))) {
708+
int64_t queue_limit = config::file_cache_background_block_lru_update_queue_max_size;
709+
size_t max_queue_size = queue_limit <= 0 ? 0 : static_cast<size_t>(queue_limit);
710+
if (_need_update_lru_blocks.insert(std::move(block), max_queue_size)) {
711+
*_need_update_lru_blocks_produce_metrics << 1;
652712
*_need_update_lru_blocks_length_recorder << _need_update_lru_blocks.size();
653713
}
654714
}
@@ -2093,6 +2153,7 @@ void BlockFileCache::run_background_block_lru_update() {
20932153
*_need_update_lru_blocks_length_recorder << _need_update_lru_blocks.size();
20942154
continue;
20952155
}
2156+
*_need_update_lru_blocks_consume_metrics << drained;
20962157

20972158
int64_t duration_ns = 0;
20982159
{
@@ -2307,19 +2368,28 @@ void BlockFileCache::run_background_lru_log_replay() {
23072368
}
23082369
}
23092370

2310-
_lru_recorder->replay_queue_event(FileCacheType::TTL);
2311-
_lru_recorder->replay_queue_event(FileCacheType::INDEX);
2312-
_lru_recorder->replay_queue_event(FileCacheType::NORMAL);
2313-
_lru_recorder->replay_queue_event(FileCacheType::DISPOSABLE);
2371+
replay_lru_logs_once();
2372+
}
2373+
}
2374+
2375+
size_t BlockFileCache::replay_lru_logs_once() {
2376+
size_t replayed = 0;
2377+
for (FileCacheType type : LRU_LOG_REPLAY_TYPES) {
2378+
replayed += _lru_recorder->replay_queue_event(type);
2379+
}
23142380

2315-
if (config::enable_evaluate_shadow_queue_diff) {
2316-
SCOPED_CACHE_LOCK(_mutex, this);
2317-
_lru_recorder->evaluate_queue_diff(_ttl_queue, "ttl", cache_lock);
2318-
_lru_recorder->evaluate_queue_diff(_index_queue, "index", cache_lock);
2319-
_lru_recorder->evaluate_queue_diff(_normal_queue, "normal", cache_lock);
2320-
_lru_recorder->evaluate_queue_diff(_disposable_queue, "disposable", cache_lock);
2321-
}
2381+
if (replayed == 0) {
2382+
*_lru_recorder_log_replay_idle_metrics << 1;
2383+
}
2384+
2385+
if (config::enable_evaluate_shadow_queue_diff) {
2386+
SCOPED_CACHE_LOCK(_mutex, this);
2387+
_lru_recorder->evaluate_queue_diff(_ttl_queue, "ttl", cache_lock);
2388+
_lru_recorder->evaluate_queue_diff(_index_queue, "index", cache_lock);
2389+
_lru_recorder->evaluate_queue_diff(_normal_queue, "normal", cache_lock);
2390+
_lru_recorder->evaluate_queue_diff(_disposable_queue, "disposable", cache_lock);
23222391
}
2392+
return replayed;
23232393
}
23242394

23252395
void BlockFileCache::dump_lru_queues(bool force) {

be/src/io/cache/block_file_cache.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <atomic>
2626
#include <boost/lockfree/spsc_queue.hpp>
2727
#include <functional>
28+
#include <limits>
2829
#include <memory>
2930
#include <mutex>
3031
#include <optional>
@@ -91,7 +92,7 @@ class NeedUpdateLRUBlocks {
9192

9293
// Insert a block into the pending set. Returns true only when the block
9394
// was not already queued. Null inputs are ignored.
94-
bool insert(FileBlockSPtr block);
95+
bool insert(FileBlockSPtr block, size_t max_queue_size = std::numeric_limits<size_t>::max());
9596

9697
// Drain up to `limit` unique blocks into `output`. The method returns how
9798
// many blocks were actually drained and shrinks the internal size
@@ -114,6 +115,7 @@ class NeedUpdateLRUBlocks {
114115
};
115116

116117
size_t shard_index(FileBlock* ptr) const;
118+
void decrease_size(size_t delta);
117119

118120
std::array<Shard, kShardCount> _shards;
119121
std::atomic<size_t> _size {0};
@@ -464,6 +466,7 @@ class BlockFileCache {
464466
void run_background_monitor();
465467
void run_background_gc();
466468
void run_background_lru_log_replay();
469+
size_t replay_lru_logs_once();
467470
void run_background_lru_dump();
468471
void restore_lru_queues_from_disk(std::lock_guard<std::mutex>& cache_lock);
469472
void run_background_evict_in_advance();
@@ -611,9 +614,15 @@ class BlockFileCache {
611614
std::shared_ptr<bvar::LatencyRecorder> _recycle_keys_length_recorder;
612615
std::shared_ptr<bvar::LatencyRecorder> _update_lru_blocks_latency_us;
613616
std::shared_ptr<bvar::LatencyRecorder> _need_update_lru_blocks_length_recorder;
617+
std::shared_ptr<bvar::Adder<size_t>> _need_update_lru_blocks_produce_metrics;
618+
std::shared_ptr<bvar::Adder<size_t>> _need_update_lru_blocks_consume_metrics;
614619
std::shared_ptr<bvar::LatencyRecorder> _ttl_gc_latency_us;
615620

616621
std::shared_ptr<bvar::LatencyRecorder> _shadow_queue_levenshtein_distance;
622+
std::array<std::shared_ptr<bvar::LatencyRecorder>, 4> _lru_recorder_queue_length_recorder;
623+
std::array<std::shared_ptr<bvar::Adder<size_t>>, 4> _lru_recorder_queue_produce_metrics;
624+
std::array<std::shared_ptr<bvar::Adder<size_t>>, 4> _lru_recorder_queue_consume_metrics;
625+
std::shared_ptr<bvar::Adder<size_t>> _lru_recorder_log_replay_idle_metrics;
617626
// keep _storage last so it will deconstruct first
618627
// otherwise, load_cache_info_into_memory might crash
619628
// coz it will use other members of BlockFileCache

be/src/io/cache/lru_queue_recorder.cpp

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,27 +17,53 @@
1717

1818
#include "io/cache/lru_queue_recorder.h"
1919

20+
#include "common/config.h"
21+
#include "common/status.h"
2022
#include "io/cache/block_file_cache.h"
2123
#include "io/cache/file_cache_common.h"
2224

2325
namespace doris::io {
2426

27+
namespace {
28+
29+
size_t file_cache_type_index(FileCacheType type) {
30+
return static_cast<size_t>(type);
31+
}
32+
33+
} // namespace
34+
2535
void LRUQueueRecorder::record_queue_event(FileCacheType type, CacheLRULogType log_type,
2636
const UInt128Wrapper hash, const size_t offset,
2737
const size_t size) {
28-
CacheLRULogQueue& log_queue = get_lru_log_queue(type);
29-
log_queue.enqueue(std::make_unique<CacheLRULog>(log_type, hash, offset, size));
38+
if (config::file_cache_background_lru_dump_tail_record_num <= 0) {
39+
return;
40+
}
3041
++(_lru_queue_update_cnt_from_last_dump[type]);
42+
auto log = std::make_unique<CacheLRULog>(log_type, hash, offset, size);
43+
if (!reserve_lru_log_queue_slot(type)) {
44+
return;
45+
}
46+
CacheLRULogQueue& log_queue = get_lru_log_queue(type);
47+
if (!log_queue.enqueue(std::move(log))) {
48+
release_lru_log_queue_slot(type);
49+
return;
50+
}
51+
size_t idx = file_cache_type_index(type);
52+
*(_mgr->_lru_recorder_queue_produce_metrics[idx]) << 1;
53+
*(_mgr->_lru_recorder_queue_length_recorder[idx]) << lru_log_queue_size(type);
3154
}
3255

33-
void LRUQueueRecorder::replay_queue_event(FileCacheType type) {
56+
size_t LRUQueueRecorder::replay_queue_event(FileCacheType type) {
3457
// we don't need the real cache lock for the shadow queue, but we do need a lock to prevent read/write contension
3558
CacheLRULogQueue& log_queue = get_lru_log_queue(type);
3659
LRUQueue& shadow_queue = get_shadow_queue(type);
3760

3861
std::lock_guard<std::mutex> lru_log_lock(_mutex_lru_log);
3962
std::unique_ptr<CacheLRULog> log;
63+
size_t replayed = 0;
4064
while (log_queue.try_dequeue(log)) {
65+
release_lru_log_queue_slot(type);
66+
++replayed;
4167
try {
4268
switch (log->type) {
4369
case CacheLRULogType::ADD: {
@@ -79,6 +105,12 @@ void LRUQueueRecorder::replay_queue_event(FileCacheType type) {
79105
LOG(WARNING) << "Failed to replay queue event: " << e.what();
80106
}
81107
}
108+
size_t idx = file_cache_type_index(type);
109+
if (replayed > 0) {
110+
*(_mgr->_lru_recorder_queue_consume_metrics[idx]) << replayed;
111+
}
112+
*(_mgr->_lru_recorder_queue_length_recorder[idx]) << lru_log_queue_size(type);
113+
return replayed;
82114
}
83115

84116
// we evaluate the diff between two queue by calculate how many operation is
@@ -137,4 +169,34 @@ void LRUQueueRecorder::reset_lru_queue_update_cnt_from_last_dump(FileCacheType t
137169
_lru_queue_update_cnt_from_last_dump[type] = 0;
138170
}
139171

172+
size_t LRUQueueRecorder::lru_log_queue_size(FileCacheType type) const {
173+
return _lru_log_queue_size[file_cache_type_index(type)].load(std::memory_order_relaxed);
174+
}
175+
176+
bool LRUQueueRecorder::reserve_lru_log_queue_slot(FileCacheType type) {
177+
int64_t queue_limit = config::file_cache_background_lru_log_queue_max_size;
178+
if (queue_limit <= 0) {
179+
return false;
180+
}
181+
auto& queue_size = _lru_log_queue_size[file_cache_type_index(type)];
182+
size_t cur_size = queue_size.load(std::memory_order_relaxed);
183+
while (cur_size < static_cast<size_t>(queue_limit)) {
184+
if (queue_size.compare_exchange_weak(cur_size, cur_size + 1, std::memory_order_relaxed)) {
185+
return true;
186+
}
187+
}
188+
return false;
189+
}
190+
191+
void LRUQueueRecorder::release_lru_log_queue_slot(FileCacheType type) {
192+
auto& queue_size = _lru_log_queue_size[file_cache_type_index(type)];
193+
size_t cur_size = queue_size.load(std::memory_order_relaxed);
194+
while (true) {
195+
DORIS_CHECK(cur_size > 0);
196+
if (queue_size.compare_exchange_weak(cur_size, cur_size - 1, std::memory_order_relaxed)) {
197+
return;
198+
}
199+
}
200+
}
201+
140202
} // end of namespace doris::io

0 commit comments

Comments
 (0)