Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions fdbclient/KeyRangeMap.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "fdbclient/CommitTransaction.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/ReadYourWrites.h"
#include "fdbclient/IKnobCollection.h"
#include "flow/UnitTest.h"
#include "flow/actorcompiler.h" // has to be last include

Expand Down Expand Up @@ -221,6 +222,15 @@ ACTOR Future<Void> krmSetRange(Reference<ReadYourWritesTransaction> tr, Key mapP
}

// Sets a range of keys in a key range map, coalescing with adjacent regions if the values match
static bool shouldTolerateUncoalescedKRM() {
try {
return IKnobCollection::getGlobalKnobCollection().getServerKnobs().DD_TOLERATE_UNCOALESCED_KRM;
} catch (Error&) {
// Client-only processes (e.g. fdbbackup) don't have server knobs.
return false;
}
}

// Ranges outside of maxRange will not be coalesced
// CAUTION: use care when attempting to coalesce multiple ranges in the same prefix in a single transaction
ACTOR template <class Transaction>
Expand Down Expand Up @@ -292,12 +302,26 @@ static Future<Void> krmSetRangeCoalescing_(Transaction* tr,
endValue = existingValue;
}

tr->clear(KeyRangeRef(beginKey, endKey));
// Check for uncoalesced data: if value equals endValue but we're not at maxWithPrefix.end,
// there are redundant entries beyond what we read. With the knob enabled, warn and continue
// (may write an uncoalesced boundary at endKey, but dropping the operation is worse).
// Without knob, ASSERT as before.
if (value == endValue && endKey != maxWithPrefix.end) {
if (shouldTolerateUncoalescedKRM()) {
TraceEvent(SevWarnAlways, "KRMToleratingUncoalescedEntries")
.detail("MapPrefix", mapPrefix)
.detail("BeginKey", beginKey)
.detail("EndKey", endKey)
.detail("MaxEnd", maxWithPrefix.end)
.detail("Value", value);
} else {
ASSERT(false); // Uncoalesced KRM entries detected; set DD_TOLERATE_UNCOALESCED_KRM=true to tolerate
Copy link

Copilot AI Apr 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the non-tolerating path this switches from ASSERT(value != endValue || endKey == maxWithPrefix.end) to ASSERT(false), which loses the original condition string and makes failures harder to diagnose. Consider asserting the original condition (possibly after a TraceEvent with details) so the crash output still reflects the invariant being violated.

Suggested change
ASSERT(false); // Uncoalesced KRM entries detected; set DD_TOLERATE_UNCOALESCED_KRM=true to tolerate
ASSERT(value != endValue ||
endKey == maxWithPrefix.end); // Uncoalesced KRM entries detected; set DD_TOLERATE_UNCOALESCED_KRM=true to tolerate

Copilot uses AI. Check for mistakes.
}
}

ASSERT(value != endValue || endKey == maxWithPrefix.end);
tr->clear(KeyRangeRef(beginKey, endKey));
tr->set(beginKey, value);
tr->set(endKey, endValue);

return Void();
}
Future<Void> krmSetRangeCoalescing(Transaction* const& tr,
Expand Down
1 change: 1 addition & 0 deletions fdbclient/ServerKnobs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( DD_VALIDATE_LOCALITY, true ); if( randomize && BUGGIFY ) DD_VALIDATE_LOCALITY = false;
init( DD_CHECK_INVALID_LOCALITY_DELAY, 60 ); if( randomize && BUGGIFY ) DD_CHECK_INVALID_LOCALITY_DELAY = 1 + deterministicRandom()->random01() * 600;
init( DD_ENABLE_VERBOSE_TRACING, true ); if( randomize && BUGGIFY ) DD_ENABLE_VERBOSE_TRACING = false;
init( DD_TOLERATE_UNCOALESCED_KRM, false ); // Off by default; enable to tolerate corrupted metadata instead of crashing
init( DD_SS_FAILURE_VERSIONLAG, 250000000 );
init( DD_SS_ALLOWED_VERSIONLAG, 200000000 ); if( randomize && BUGGIFY ) { DD_SS_FAILURE_VERSIONLAG = deterministicRandom()->randomInt(15000000, 500000000); DD_SS_ALLOWED_VERSIONLAG = 0.75 * DD_SS_FAILURE_VERSIONLAG; }
init( DD_SS_STUCK_TIME_LIMIT, 300.0 ); if( randomize && BUGGIFY ) { DD_SS_STUCK_TIME_LIMIT = 200.0 + deterministicRandom()->random01() * 100.0; }
Expand Down
1 change: 1 addition & 0 deletions fdbclient/include/fdbclient/ServerKnobs.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ class ServerKnobs : public KnobsImpl<ServerKnobs> {
bool DD_VALIDATE_LOCALITY;
int DD_CHECK_INVALID_LOCALITY_DELAY;
bool DD_ENABLE_VERBOSE_TRACING;
bool DD_TOLERATE_UNCOALESCED_KRM; // If true, tolerate uncoalesced KRM entries instead of crashing with ASSERT
Copy link

Copilot AI Apr 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR description mentions enabling the behavior with DD_COALESCE_UNCOALESCED_KRM=true, but the code/config knob added here is DD_TOLERATE_UNCOALESCED_KRM (toml: dd_tolerate_uncoalesced_krm). To avoid operational confusion during recovery, align the documentation/PR description with the actual knob name.

Copilot uses AI. Check for mistakes.
int64_t
DD_SS_FAILURE_VERSIONLAG; // Allowed SS version lag from the current read version before marking it as failed.
int64_t DD_SS_ALLOWED_VERSIONLAG; // SS will be marked as healthy if it's version lag goes below this value.
Expand Down
14 changes: 13 additions & 1 deletion fdbserver/MoveKeys.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,20 @@ ACTOR Future<Void> unassignServerKeys(Transaction* tr, UID ssId, KeyRange range,

tr->clear(KeyRangeRef(beginKey, endKey));

// Write entries in pairs, checking for uncoalesced entries (adjacent with same value).
Copy link

Copilot AI Apr 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new comment says “Write entries in pairs”, but the loop advances by 1 and writes overlapping adjacent pairs (i,i+1), so it’s not actually writing non-overlapping pairs. Consider rewording to clarify it’s checking adjacent entries for coalescing violations (or change the loop to step by 2 if that was the intent).

Suggested change
// Write entries in pairs, checking for uncoalesced entries (adjacent with same value).
// Write entries while checking adjacent entries for uncoalesced KRM values.

Copilot uses AI. Check for mistakes.
for (int i = 0; i < kvs.size() - 1; ++i) {
ASSERT(kvs[i].value != kvs[i + 1].value || kvs[i + 1].key.removePrefix(mapPrefix) == allKeys.end);
bool atEnd = kvs[i + 1].key.removePrefix(mapPrefix) == allKeys.end;
if (kvs[i].value == kvs[i + 1].value && !atEnd) {
if (SERVER_KNOBS->DD_TOLERATE_UNCOALESCED_KRM) {
TraceEvent(SevWarnAlways, "MoveKeysUncoalescedDetected", logId)
.detail("SSID", ssId)
.detail("Key1", kvs[i].key)
.detail("Key2", kvs[i + 1].key)
.detail("Value", kvs[i].value);
} else {
ASSERT(false); // Uncoalesced KRM entries detected; set DD_TOLERATE_UNCOALESCED_KRM=true to tolerate
Copy link

Copilot AI Apr 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the tolerance knob is disabled, this now does ASSERT(false) without preserving the original assertion condition or emitting any SevError trace details. This is a regression in debuggability compared to the previous ASSERT(kvs[i].value != kvs[i + 1].value || atEnd) condition. Consider keeping the original ASSERT condition (and/or logging the keys/value before asserting) in the non-tolerating path.

Suggested change
ASSERT(false); // Uncoalesced KRM entries detected; set DD_TOLERATE_UNCOALESCED_KRM=true to tolerate
TraceEvent(SevError, "MoveKeysUncoalescedDetected", logId)
.detail("SSID", ssId)
.detail("Key1", kvs[i].key)
.detail("Key2", kvs[i + 1].key)
.detail("Value", kvs[i].value);
ASSERT(kvs[i].value != kvs[i + 1].value ||
atEnd); // Uncoalesced KRM entries detected; set DD_TOLERATE_UNCOALESCED_KRM=true to tolerate

Copilot uses AI. Check for mistakes.
}
}
tr->set(kvs[i].key, kvs[i].value);
tr->set(kvs[i + 1].key, kvs[i + 1].value);
}
Expand Down
252 changes: 252 additions & 0 deletions fdbserver/workloads/KRMCoalesceTest.actor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
/*
* KRMCoalesceTest.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2024 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Test workload for KRM (KeyRangeMap) tolerance of uncoalesced entries.
//
// This test verifies that when DD_TOLERATE_UNCOALESCED_KRM is enabled,
// krmSetRangeCoalescing warns about adjacent entries with the same value
// and still applies the caller's mutation, rather than crashing with ASSERT.
//
// The test:
// 1. Creates a test KRM with a custom prefix
// 2. Manually injects uncoalesced entries (adjacent keys with same value)
// 3. Calls krmSetRangeCoalescing on an overlapping range
// 4. Verifies the operation completes without crashing AND the data is correct

#include "fdbclient/FDBTypes.h"
#include "fdbclient/KeyRangeMap.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/ReadYourWrites.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/Knobs.h"
#include "flow/actorcompiler.h" // This must be the last #include.

struct KRMCoalesceTestWorkload : TestWorkload {
static constexpr auto NAME = "KRMCoalesceTest";

Key testPrefix;
bool enabled;

KRMCoalesceTestWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
enabled = !clientId && g_network->isSimulated();
testPrefix = "/krmcoalescetest/"_sr;
}

Future<Void> setup(Database const& cx) override { return Void(); }

Future<Void> start(Database const& cx) override {
if (!enabled)
return Void();
return runTest(this, cx);
}

Future<bool> check(Database const& cx) override { return true; }

void getMetrics(std::vector<PerfMetric>& m) override {}

ACTOR static Future<Void> runTest(KRMCoalesceTestWorkload* self, Database cx) {
wait(testCoalesceUncoalescedEntries(self, cx));
wait(testNormalCoalescing(self, cx));
TraceEvent("KRMCoalesceTestComplete");
return Void();
}

// Helper to read all entries under a prefix in range [startSuffix, endSuffix)
ACTOR static Future<RangeResult> readEntries(Database cx, Key prefix, Key startSuffix, Key endSuffix) {
state Transaction tr(cx);
loop {
try {
RangeResult r = wait(tr.getRange(
KeyRangeRef(prefix.withSuffix(startSuffix), prefix.withSuffix(endSuffix)), CLIENT_KNOBS->TOO_MANY));
return r;
} catch (Error& e) {
wait(tr.onError(e));
}
}
}

ACTOR static Future<Void> testCoalesceUncoalescedEntries(KRMCoalesceTestWorkload* self, Database cx) {
state Key prefix = self->testPrefix.withSuffix("uncoalesced/"_sr);
state Key keyA = "a"_sr;
state Key keyB = "b"_sr;
state Key keyC = "c"_sr;
state Key keyD = "d"_sr;
state Key keyE = "e"_sr;
state Key keyF = "f"_sr;
state Key keyG = "g"_sr;
state Value valueX = "X"_sr;
state Value valueY = "Y"_sr;

TraceEvent("KRMCoalesceTestStartUncoalesced").detail("Prefix", prefix);

if (!SERVER_KNOBS->DD_TOLERATE_UNCOALESCED_KRM) {
TraceEvent("KRMCoalesceTestSkipped").detail("Reason", "DD_TOLERATE_UNCOALESCED_KRM is false");
return Void();
}

// Step 1: Inject uncoalesced entries
// Create: prefix+A=X, prefix+B=X, prefix+C=X, prefix+D=X, prefix+E=X, prefix+F=Y
// B through E are uncoalesced (same value X as A)
state Transaction tr1(cx);
loop {
try {
tr1.set(prefix.withSuffix(keyA), valueX);
tr1.set(prefix.withSuffix(keyB), valueX);
tr1.set(prefix.withSuffix(keyC), valueX);
tr1.set(prefix.withSuffix(keyD), valueX);
tr1.set(prefix.withSuffix(keyE), valueX);
tr1.set(prefix.withSuffix(keyF), valueY);
wait(tr1.commit());
break;
} catch (Error& e) {
wait(tr1.onError(e));
}
}

// Step 2: Verify 6 entries exist
RangeResult beforeEntries = wait(readEntries(cx, prefix, keyA, keyG));
ASSERT_EQ(beforeEntries.size(), 6);
TraceEvent("KRMCoalesceTestInjectedUncoalesced").detail("EntryCount", beforeEntries.size());

// Step 3: Call krmSetRangeCoalescing — triggers tolerance path
// Set range [A, B) to valueX with maxRange [A, F)
// The function reads entries near B, finds B=X and C=X (uncoalesced),
// hits value(X)==endValue(X) && endKey(C)!=maxEnd(F), logs warning,
// then performs: clear([A,C)), set(A,X), set(C,X)
state Reference<ReadYourWritesTransaction> tr3 = makeReference<ReadYourWritesTransaction>(cx);
loop {
try {
wait(krmSetRangeCoalescing(tr3, prefix, KeyRangeRef(keyA, keyB), KeyRangeRef(keyA, keyF), valueX));
wait(tr3->commit());
break;
} catch (Error& e) {
wait(tr3->onError(e));
}
}

TraceEvent("KRMCoalesceTestToleratedUncoalesced");

// Step 4: Verify correctness — the mutation was applied, not silently dropped
RangeResult afterEntries = wait(readEntries(cx, prefix, keyA, keyG));
TraceEvent("KRMCoalesceTestAfterTolerate")
.detail("EntryCount", afterEntries.size())
.detail("OriginalCount", 6);

// Verify A=X is present (the mutation was applied)
ASSERT(afterEntries.size() >= 1);
ASSERT(afterEntries[0].key == prefix.withSuffix(keyA));
ASSERT(afterEntries[0].value == valueX);

// Verify F=Y is still present (untouched by our operation)
bool foundF = false;
for (int i = 0; i < afterEntries.size(); i++) {
if (afterEntries[i].key == prefix.withSuffix(keyF)) {
ASSERT(afterEntries[i].value == valueY);
foundF = true;
}
}
ASSERT(foundF);

// B should have been cleared (it was in the clear range [A, C))
for (int i = 0; i < afterEntries.size(); i++) {
ASSERT(afterEntries[i].key != prefix.withSuffix(keyB));
}

// Clean up
state Transaction tr5(cx);
loop {
try {
tr5.clear(KeyRangeRef(prefix, strinc(prefix)));
wait(tr5.commit());
break;
} catch (Error& e) {
wait(tr5.onError(e));
}
}

TraceEvent("KRMCoalesceTestUncoalescedComplete");
return Void();
}

ACTOR static Future<Void> testNormalCoalescing(KRMCoalesceTestWorkload* self, Database cx) {
state Key prefix = self->testPrefix.withSuffix("normal/"_sr);
state Key keyA = "a"_sr;
state Key keyB = "b"_sr;
state Key keyC = "c"_sr;
state Value valueX = "X"_sr;
state Value valueY = "Y"_sr;

TraceEvent("KRMCoalesceTestStartNormal").detail("Prefix", prefix);

// Set [A, B) = X
state Reference<ReadYourWritesTransaction> tr1 = makeReference<ReadYourWritesTransaction>(cx);
loop {
try {
wait(
krmSetRangeCoalescing(tr1, prefix, KeyRangeRef(keyA, keyB), KeyRangeRef(""_sr, "\xff"_sr), valueX));
wait(tr1->commit());
break;
} catch (Error& e) {
wait(tr1->onError(e));
}
}

// Set [B, C) = Y (different value, should not coalesce with A)
state Reference<ReadYourWritesTransaction> tr2 = makeReference<ReadYourWritesTransaction>(cx);
loop {
try {
wait(
krmSetRangeCoalescing(tr2, prefix, KeyRangeRef(keyB, keyC), KeyRangeRef(""_sr, "\xff"_sr), valueY));
wait(tr2->commit());
break;
} catch (Error& e) {
wait(tr2->onError(e));
}
}

// Verify entries
RangeResult entries = wait(readEntries(cx, prefix, keyA, "z"_sr));
TraceEvent("KRMCoalesceTestNormalEntries").detail("EntryCount", entries.size());

// Should have at least A=X and B=Y (plus possible end sentinel)
ASSERT(entries.size() >= 2);
ASSERT(entries[0].key == prefix.withSuffix(keyA));
ASSERT(entries[0].value == valueX);
ASSERT(entries[1].key == prefix.withSuffix(keyB));
ASSERT(entries[1].value == valueY);

// Clean up
state Transaction tr4(cx);
loop {
try {
tr4.clear(KeyRangeRef(prefix, strinc(prefix)));
wait(tr4.commit());
break;
} catch (Error& e) {
wait(tr4.onError(e));
}
}

TraceEvent("KRMCoalesceTestNormalComplete");
return Void();
}
};

WorkloadFactory<KRMCoalesceTestWorkload> KRMCoalesceTestWorkloadFactory;
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES fast/InventoryTestAlmostReadOnly.toml)
add_fdb_test(TEST_FILES fast/InventoryTestSomeWrites.toml)
add_fdb_test(TEST_FILES fast/KillRegionCycle.toml)
add_fdb_test(TEST_FILES fast/KRMCoalesceTest.toml)
add_fdb_test(TEST_FILES fast/LocalRatekeeper.toml)
add_fdb_test(TEST_FILES fast/LongStackWriteDuringRead.toml)
add_fdb_test(TEST_FILES fast/LowLatency.toml)
Expand Down
14 changes: 14 additions & 0 deletions tests/fast/KRMCoalesceTest.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[configuration]
allowDefaultTenant = false
storageEngineExcludeTypes = [5]

[[knobs]]
dd_tolerate_uncoalesced_krm = true

[[test]]
testTitle = 'KRMCoalesceTest'
clearAfterTest = true
simBackupAgents = 'Disabled'

[[test.workload]]
testName = 'KRMCoalesceTest'