Skip to content

Commit 668805a

Browse files
authored
[fix](filecache) reject oversized block size in add_cell (apache#62878)
When directory_entry::file_size(ec) fails, it returns (uintmax_t)-1. load_cache_info_into_memory did not check ec, so UINT64_MAX flowed into add_cell, which only warned and then registered the cell — corrupting _files, LRUQueue::cache_size, _cur_cache_size, and the LRU dump (survives restart). - fs_file_cache_storage: check ec after file_size, skip on error - add_cell: turn the >1GiB check into a hard reject (return nullptr) - UT: add_cell_rejects_oversized_size
1 parent ceeb9bc commit 668805a

4 files changed

Lines changed: 84 additions & 1 deletion

File tree

be/src/io/cache/block_file_cache.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -881,9 +881,10 @@ FileBlockCell* BlockFileCache::add_cell(const UInt128Wrapper& hash, const CacheC
881881
<< " tablet_id=" << context.tablet_id;
882882

883883
if (size > 1024 * 1024 * 1024) {
884-
LOG(WARNING) << "File block size is too large for a block. size=" << size
884+
LOG(WARNING) << "File block size is too large for a block, reject. size=" << size
885885
<< " hash=" << hash.to_string() << " offset=" << offset
886886
<< " stack:" << get_stack_trace();
887+
return nullptr;
887888
}
888889

889890
auto& offsets = _files[hash];

be/src/io/cache/block_file_cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ class BlockFileCache {
169169
friend class CacheLRUDumper;
170170
friend class LRUQueueRecorder;
171171
friend struct FileBlockCell;
172+
friend class BlockFileCacheTest;
172173

173174
public:
174175
// hash the file_name to uint128

be/src/io/cache/fs_file_cache_storage.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,11 @@ void FSFileCacheStorage::load_cache_info_into_memory_from_fs(BlockFileCache* mgr
695695
context.expiration_time = expiration_time;
696696
for (; offset_it != std::filesystem::directory_iterator(); ++offset_it) {
697697
size_t size = offset_it->file_size(ec);
698+
if (ec) [[unlikely]] {
699+
LOG(WARNING) << "skip cache file, file_size failed, file="
700+
<< offset_it->path().native() << " err=" << ec.message();
701+
continue;
702+
}
698703
size_t offset = 0;
699704
bool is_tmp = false;
700705
FileCacheType cache_type = FileCacheType::NORMAL;

be/test/io/cache/block_file_cache_test.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8058,4 +8058,80 @@ TEST_F(BlockFileCacheTest, set_downloaded_empty_block_branch) {
80588058
ASSERT_EQ(block.get_downloader(), 0);
80598059
}
80608060

8061+
// Reproduces OPENSOURCE-325: when fs_file_cache_storage's directory scan reads
8062+
// a file size via std::filesystem::directory_entry::file_size(ec) and the
8063+
// underlying syscall fails (e.g. file removed concurrently by clear/GC), the
8064+
// returned value is (uintmax_t)-1 == UINT64_MAX. Before the fix, that bogus
8065+
// size flowed into BlockFileCache::add_cell, which only logged a WARNING and
8066+
// then registered the cell, polluting _files, the LRU queue's cache_size, and
8067+
// _cur_cache_size. After the fix, add_cell rejects sizes > 1GiB and returns
8068+
// nullptr without touching any internal state.
8069+
TEST_F(BlockFileCacheTest, add_cell_rejects_oversized_size) {
8070+
if (fs::exists(cache_base_path)) {
8071+
fs::remove_all(cache_base_path);
8072+
}
8073+
fs::create_directories(cache_base_path);
8074+
8075+
io::FileCacheSettings settings;
8076+
settings.query_queue_size = 30_mb;
8077+
settings.query_queue_elements = 30;
8078+
settings.capacity = 30_mb;
8079+
settings.max_file_block_size = 1_mb;
8080+
settings.max_query_cache_size = 30;
8081+
8082+
io::BlockFileCache cache(cache_base_path, settings);
8083+
ASSERT_TRUE(cache.initialize());
8084+
for (int i = 0; i < 100; ++i) {
8085+
if (cache.get_async_open_success()) {
8086+
break;
8087+
}
8088+
std::this_thread::sleep_for(std::chrono::milliseconds(1));
8089+
}
8090+
ASSERT_TRUE(cache.get_async_open_success());
8091+
8092+
auto hash = io::BlockFileCache::hash("opensource_325_key");
8093+
io::CacheContext ctx;
8094+
TUniqueId qid;
8095+
qid.hi = 1;
8096+
qid.lo = 1;
8097+
ctx.query_id = qid;
8098+
ctx.cache_type = io::FileCacheType::NORMAL;
8099+
ReadStatistics rstats;
8100+
ctx.stats = &rstats;
8101+
8102+
const size_t kBadSize = std::numeric_limits<size_t>::max(); // (uintmax_t)-1
8103+
const size_t kOffset = 1128267776; // matches the JIRA report
8104+
8105+
size_t cur_cache_size_before = 0;
8106+
size_t normal_queue_size_before = 0;
8107+
{
8108+
std::lock_guard<std::mutex> cache_lock(cache._mutex);
8109+
cur_cache_size_before = cache._cur_cache_size;
8110+
normal_queue_size_before =
8111+
cache.get_queue(io::FileCacheType::NORMAL).get_capacity(cache_lock);
8112+
}
8113+
8114+
{
8115+
std::lock_guard<std::mutex> cache_lock(cache._mutex);
8116+
auto* cell = cache.add_cell(hash, ctx, kOffset, kBadSize, io::FileBlock::State::DOWNLOADED,
8117+
cache_lock);
8118+
// Defensive guard must reject oversized blocks instead of polluting state.
8119+
ASSERT_EQ(cell, nullptr);
8120+
}
8121+
8122+
{
8123+
std::lock_guard<std::mutex> cache_lock(cache._mutex);
8124+
// _files index untouched: no entry for (hash, kOffset).
8125+
ASSERT_EQ(cache._files.find(hash), cache._files.end());
8126+
// Global and per-queue size counters untouched (no UINT64_MAX added).
8127+
ASSERT_EQ(cache._cur_cache_size, cur_cache_size_before);
8128+
ASSERT_EQ(cache.get_queue(io::FileCacheType::NORMAL).get_capacity(cache_lock),
8129+
normal_queue_size_before);
8130+
}
8131+
8132+
if (fs::exists(cache_base_path)) {
8133+
fs::remove_all(cache_base_path);
8134+
}
8135+
}
8136+
80618137
} // namespace doris::io

0 commit comments

Comments
 (0)