Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions db/c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5286,6 +5286,15 @@ size_t rocksdb_options_get_recycle_log_file_num(rocksdb_options_t* opt) {
return opt->rep.recycle_log_file_num;
}

void rocksdb_options_set_async_wal_precreate(rocksdb_options_t* opt,
unsigned char v) {
opt->rep.async_wal_precreate = v;
}

unsigned char rocksdb_options_get_async_wal_precreate(rocksdb_options_t* opt) {
return opt->rep.async_wal_precreate;
}

void rocksdb_options_set_soft_pending_compaction_bytes_limit(
rocksdb_options_t* opt, size_t v) {
opt->rep.soft_pending_compaction_bytes_limit = v;
Expand Down
3 changes: 3 additions & 0 deletions db/c_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2869,6 +2869,9 @@ int main(int argc, char** argv) {
rocksdb_options_set_track_and_verify_wals_in_manifest(o, 42);
CheckCondition(1 ==
rocksdb_options_get_track_and_verify_wals_in_manifest(o));
CheckCondition(0 == rocksdb_options_get_async_wal_precreate(o));
rocksdb_options_set_async_wal_precreate(o, 1);
CheckCondition(1 == rocksdb_options_get_async_wal_precreate(o));

/* Blob Options */
rocksdb_options_set_enable_blob_files(o, 1);
Expand Down
15 changes: 15 additions & 0 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -788,12 +788,27 @@ Status DBImpl::CloseHelper() {
bg_flush_scheduled_ || bg_purge_scheduled_ ||
bg_pressure_callback_in_progress_ ||
bg_async_file_open_state_ == AsyncFileOpenState::kScheduled ||
async_wal_precreate_state_ == AsyncWALPrecreateState::kScheduled ||
pending_purge_obsolete_files_ ||
error_handler_.IsRecoveryInProgress()) {
TEST_SYNC_POINT("DBImpl::~DBImpl:WaitJob");
bg_cv_.Wait();
}

// Release any opened-but-unpublished WAL writer after the in-flight worker
// has published its result. Clear the DB-owned async slot while holding
// mutex_, but destroy the detached writer after dropping mutex_ because
// log::Writer / WritableFileWriter destruction can flush and close the file.
// The file itself can be left behind as an empty future WAL; recovery already
// tolerates it and marks its file number used if observed.
UnpublishedWAL unused_async_wal = std::move(async_wal_precreate_wal_);
async_wal_precreate_state_ = AsyncWALPrecreateState::kNotScheduled;
if (unused_async_wal.writer) {
mutex_.Unlock();
unused_async_wal.Reset();
mutex_.Lock();
}

// Ensure subclasses don't forget to schedule async file opening
assert(!immutable_db_options_.open_files_async || !opened_successfully_ ||
bg_async_file_open_state_ != AsyncFileOpenState::kNotScheduled);
Expand Down
87 changes: 86 additions & 1 deletion db/db_impl/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <unordered_map>
Expand Down Expand Up @@ -2809,7 +2811,68 @@ class DBImpl : public DB {
size_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
Env::WriteLifeTimeHint CalculateWALWriteHint() { return Env::WLTH_SHORT; }

IOStatus CreateWAL(const WriteOptions& write_options, uint64_t log_file_num,
// Returns true when async WAL precreation is enabled and compatible with the
// active WAL strategy. WAL recycling already avoids file creation latency, so
// precreation is disabled when recycle_log_file_num is non-zero.
bool AsyncWALPrecreateEnabled() const;

// A WAL file that has a reserved file number and may have an opened writer,
// but has not been added to DBImpl's in-memory logical WAL tracking lists
// (logs_ and alive_wal_files_).
struct UnpublishedWAL {
uint64_t log_number = 0;
std::unique_ptr<log::Writer> writer;

UnpublishedWAL() = default;
UnpublishedWAL(const UnpublishedWAL&) = delete;
UnpublishedWAL& operator=(const UnpublishedWAL&) = delete;

UnpublishedWAL(UnpublishedWAL&& other) noexcept {
*this = std::move(other);
}
UnpublishedWAL& operator=(UnpublishedWAL&& other) noexcept {
if (this != &other) {
log_number = other.log_number;
writer = std::move(other.writer);
other.Reset();
}
return *this;
}

void Reset() {
log_number = 0;
writer.reset();
}
};

// Reserves the next WAL file number and schedules a HIGH-priority background
// task to precreate that WAL file. A precreated WAL is not a logical WAL
// until a foreground WAL rotation consumes it.
void MaybeScheduleAsyncWALPrecreate(size_t preallocate_block_size);

// Background task for opening the reserved future WAL and publishing the
// result under mutex_.
static void BGWorkAsyncWALPrecreate(void* arg);

// Waits for an in-flight async WAL precreation and returns a prepared WAL if
// one is available. If precreation failed, returns an empty WAL and lets the
// foreground rotation create the WAL synchronously. Caller must hold mutex_.
UnpublishedWAL WaitForAsyncWALPrecreate();

// Opens and preallocates a WAL writer without writing logical WAL records.
// Used by async WAL precreation and by synchronous WAL creation.
IOStatus CreateWALWriter(const DBOptions& db_options, uint64_t log_file_num,
uint64_t recycle_log_number,
size_t preallocate_block_size,
UnpublishedWAL* new_wal);

// Starts an opened WAL file by writing the initial records required before it
// can be installed as the current WAL for foreground writes.
IOStatus StartWALFile(const WriteOptions& write_options,
const PredecessorWALInfo& predecessor_wal_info,
log::Writer* new_log);
IOStatus CreateWAL(const DBOptions& db_options,
const WriteOptions& write_options, uint64_t log_file_num,
uint64_t recycle_log_number, size_t preallocate_block_size,
const PredecessorWALInfo& predecessor_wal_info,
log::Writer** new_log);
Expand Down Expand Up @@ -3282,6 +3345,28 @@ class DBImpl : public DB {
AsyncFileOpenState bg_async_file_open_state_ =
AsyncFileOpenState::kNotScheduled;

// State machine for the single async WAL precreation slot protected by
// mutex_. Background precreation failure returns to kNotScheduled; foreground
// rotation handles it the same as no prepared WAL and creates one
// synchronously. kScheduled owns a reserved file number; kReady owns an
// opened writer that has not been started or added to logical WAL tracking.
enum class AsyncWALPrecreateState : uint8_t {
kNotScheduled = 0, // No WAL precreate work is in-flight or ready.
kScheduled, // Background task owns creation of the reserved WAL.
kReady, // Reserved WAL writer is open but not logically live.
};

// Protected by mutex_. Tracks at most one background precreated WAL. A
// precreated WAL is only reserved empty storage until SwitchMemtable()
// consumes it and installs it in DBImpl's in-memory logical WAL tracking
// lists (logs_ and alive_wal_files_).
AsyncWALPrecreateState async_wal_precreate_state_ =
AsyncWALPrecreateState::kNotScheduled;

// Reserved in-flight/ready precreated WAL. The writer is populated only while
// state is kReady.
UnpublishedWAL async_wal_precreate_wal_;

std::deque<ManualCompactionState*> manual_compaction_dequeue_;

// shall we disable deletion of obsolete files
Expand Down
Loading
Loading