Skip to content

Commit a558318

Browse files
committed
Add async WAL precreation
Add an immutable async_wal_precreate DB option that reserves and creates the next WAL on a background thread, then consumes it during WAL rotation when available. Fall back to synchronous WAL creation when precreation is unavailable, wait for pending precreation at rotation, and release unused precreated WAL writers on close without deleting the empty future WAL. Add option plumbing, db_bench/db_stress flags, crash-test coverage, unit tests, docs, and release note.
1 parent 330962b commit a558318

28 files changed

Lines changed: 806 additions & 31 deletions

db/c.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5286,6 +5286,15 @@ size_t rocksdb_options_get_recycle_log_file_num(rocksdb_options_t* opt) {
52865286
return opt->rep.recycle_log_file_num;
52875287
}
52885288

5289+
void rocksdb_options_set_async_wal_precreate(rocksdb_options_t* opt,
5290+
unsigned char v) {
5291+
opt->rep.async_wal_precreate = v;
5292+
}
5293+
5294+
unsigned char rocksdb_options_get_async_wal_precreate(rocksdb_options_t* opt) {
5295+
return opt->rep.async_wal_precreate;
5296+
}
5297+
52895298
void rocksdb_options_set_soft_pending_compaction_bytes_limit(
52905299
rocksdb_options_t* opt, size_t v) {
52915300
opt->rep.soft_pending_compaction_bytes_limit = v;

db/c_test.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2869,6 +2869,9 @@ int main(int argc, char** argv) {
28692869
rocksdb_options_set_track_and_verify_wals_in_manifest(o, 42);
28702870
CheckCondition(1 ==
28712871
rocksdb_options_get_track_and_verify_wals_in_manifest(o));
2872+
CheckCondition(0 == rocksdb_options_get_async_wal_precreate(o));
2873+
rocksdb_options_set_async_wal_precreate(o, 1);
2874+
CheckCondition(1 == rocksdb_options_get_async_wal_precreate(o));
28722875

28732876
/* Blob Options */
28742877
rocksdb_options_set_enable_blob_files(o, 1);

db/db_impl/db_impl.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,12 +788,27 @@ Status DBImpl::CloseHelper() {
788788
bg_flush_scheduled_ || bg_purge_scheduled_ ||
789789
bg_pressure_callback_in_progress_ ||
790790
bg_async_file_open_state_ == AsyncFileOpenState::kScheduled ||
791+
async_wal_precreate_state_ == AsyncWALPrecreateState::kScheduled ||
791792
pending_purge_obsolete_files_ ||
792793
error_handler_.IsRecoveryInProgress()) {
793794
TEST_SYNC_POINT("DBImpl::~DBImpl:WaitJob");
794795
bg_cv_.Wait();
795796
}
796797

798+
// Release any opened-but-unpublished WAL writer after the in-flight worker
799+
// has published its result. Clear the DB-owned async slot while holding
800+
// mutex_, but destroy the detached writer after dropping mutex_ because
801+
// log::Writer / WritableFileWriter destruction can flush and close the file.
802+
// The file itself can be left behind as an empty future WAL; recovery already
803+
// tolerates it and marks its file number used if observed.
804+
UnpublishedWAL unused_async_wal = std::move(async_wal_precreate_wal_);
805+
async_wal_precreate_state_ = AsyncWALPrecreateState::kNotScheduled;
806+
if (unused_async_wal.writer) {
807+
mutex_.Unlock();
808+
unused_async_wal.Reset();
809+
mutex_.Lock();
810+
}
811+
797812
// Ensure subclasses don't forget to schedule async file opening
798813
assert(!immutable_db_options_.open_files_async || !opened_successfully_ ||
799814
bg_async_file_open_state_ != AsyncFileOpenState::kNotScheduled);

db/db_impl/db_impl.h

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include <limits>
1616
#include <list>
1717
#include <map>
18+
#include <memory>
19+
#include <optional>
1820
#include <set>
1921
#include <string>
2022
#include <unordered_map>
@@ -2809,7 +2811,68 @@ class DBImpl : public DB {
28092811
size_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
28102812
Env::WriteLifeTimeHint CalculateWALWriteHint() { return Env::WLTH_SHORT; }
28112813

2812-
IOStatus CreateWAL(const WriteOptions& write_options, uint64_t log_file_num,
2814+
// Returns true when async WAL precreation is enabled and compatible with the
2815+
// active WAL strategy. WAL recycling already avoids file creation latency, so
2816+
// precreation is disabled when recycle_log_file_num is non-zero.
2817+
bool AsyncWALPrecreateEnabled() const;
2818+
2819+
// A WAL file that has a reserved file number and may have an opened writer,
2820+
// but has not been added to DBImpl's in-memory logical WAL tracking lists
2821+
// (logs_ and alive_wal_files_).
2822+
struct UnpublishedWAL {
2823+
uint64_t log_number = 0;
2824+
std::unique_ptr<log::Writer> writer;
2825+
2826+
UnpublishedWAL() = default;
2827+
UnpublishedWAL(const UnpublishedWAL&) = delete;
2828+
UnpublishedWAL& operator=(const UnpublishedWAL&) = delete;
2829+
2830+
UnpublishedWAL(UnpublishedWAL&& other) noexcept {
2831+
*this = std::move(other);
2832+
}
2833+
UnpublishedWAL& operator=(UnpublishedWAL&& other) noexcept {
2834+
if (this != &other) {
2835+
log_number = other.log_number;
2836+
writer = std::move(other.writer);
2837+
other.Reset();
2838+
}
2839+
return *this;
2840+
}
2841+
2842+
void Reset() {
2843+
log_number = 0;
2844+
writer.reset();
2845+
}
2846+
};
2847+
2848+
// Reserves the next WAL file number and schedules a HIGH-priority background
2849+
// task to precreate that WAL file. A precreated WAL is not a logical WAL
2850+
// until a foreground WAL rotation consumes it.
2851+
void MaybeScheduleAsyncWALPrecreate(size_t preallocate_block_size);
2852+
2853+
// Background task for opening the reserved future WAL and publishing the
2854+
// result under mutex_.
2855+
static void BGWorkAsyncWALPrecreate(void* arg);
2856+
2857+
// Waits for an in-flight async WAL precreation and returns a prepared WAL if
2858+
// one is available. If precreation failed, returns an empty WAL and lets the
2859+
// foreground rotation create the WAL synchronously. Caller must hold mutex_.
2860+
UnpublishedWAL WaitForAsyncWALPrecreate();
2861+
2862+
// Opens and preallocates a WAL writer without writing logical WAL records.
2863+
// Used by async WAL precreation and by synchronous WAL creation.
2864+
IOStatus CreateWALWriter(const DBOptions& db_options, uint64_t log_file_num,
2865+
uint64_t recycle_log_number,
2866+
size_t preallocate_block_size,
2867+
UnpublishedWAL* new_wal);
2868+
2869+
// Starts an opened WAL file by writing the initial records required before it
2870+
// can be installed as the current WAL for foreground writes.
2871+
IOStatus StartWALFile(const WriteOptions& write_options,
2872+
const PredecessorWALInfo& predecessor_wal_info,
2873+
log::Writer* new_log);
2874+
IOStatus CreateWAL(const DBOptions& db_options,
2875+
const WriteOptions& write_options, uint64_t log_file_num,
28132876
uint64_t recycle_log_number, size_t preallocate_block_size,
28142877
const PredecessorWALInfo& predecessor_wal_info,
28152878
log::Writer** new_log);
@@ -3282,6 +3345,28 @@ class DBImpl : public DB {
32823345
AsyncFileOpenState bg_async_file_open_state_ =
32833346
AsyncFileOpenState::kNotScheduled;
32843347

3348+
// State machine for the single async WAL precreation slot protected by
3349+
// mutex_. Background precreation failure returns to kNotScheduled; foreground
3350+
// rotation handles it the same as no prepared WAL and creates one
3351+
// synchronously. kScheduled owns a reserved file number; kReady owns an
3352+
// opened writer that has not been started or added to logical WAL tracking.
3353+
enum class AsyncWALPrecreateState : uint8_t {
3354+
kNotScheduled = 0, // No WAL precreate work is in-flight or ready.
3355+
kScheduled, // Background task owns creation of the reserved WAL.
3356+
kReady, // Reserved WAL writer is open but not logically live.
3357+
};
3358+
3359+
// Protected by mutex_. Tracks at most one background precreated WAL. A
3360+
// precreated WAL is only reserved empty storage until SwitchMemtable()
3361+
// consumes it and installs it in DBImpl's in-memory logical WAL tracking
3362+
// lists (logs_ and alive_wal_files_).
3363+
AsyncWALPrecreateState async_wal_precreate_state_ =
3364+
AsyncWALPrecreateState::kNotScheduled;
3365+
3366+
// Reserved in-flight/ready precreated WAL. The writer is populated only while
3367+
// state is kReady.
3368+
UnpublishedWAL async_wal_precreate_wal_;
3369+
32853370
std::deque<ManualCompactionState*> manual_compaction_dequeue_;
32863371

32873372
// shall we disable deletion of obsolete files

0 commit comments

Comments
 (0)