Skip to content

Commit 0740ff9

Browse files
badrishcCopilot
andauthored
Fix three broken/flaky Tsavorite and Garnet tests (#1711)
1. LargeObjectMultiFlushedPages (Tsavorite ObjectTests): Remove [Explicit] and add a no-op ShiftReadOnlyAddress(currentROA, wait: true) sync point so the FlushedUntilAddress / ReadOnlyAddress assertions see a quiesced state after the async flush pipeline catches up. MonotonicUpdate is a no-op when passed the current ROA; the built-in wait loop in ShiftReadOnlyAddressWithWait blocks until FUA >= ROA, which is exactly the observation the test wants. 2. LargeObjectLinearizeFlushedPages (Tsavorite ObjectTests): Drop the Is.True assertion on the second ShiftReadOnlyAddress in DoFlush. That call intentionally races with the main thread's ShiftReadOnlyToTail; if the latter wins, MonotonicUpdate correctly refuses to move ROA backwards and returns false. The post- Task.WhenAll invariants (ROA == FUA == TailAddress) still fully verify correctness, and the test continues to exercise the overlapping-shift linearization path. 3. ConfigSetHeapMemorySizeUtilizationTest (Garnet RespConfigTests): Rewrite test body. The old test inserted 4 tiny lists (i++ inside the loop meant only 4 unique keys) against a 3MB budget, so the LogSizeTracker never saw over-budget and PostMemoryTrim was never invoked; trimCompleteEvent.Wait timed out at 90s. The post-wait assertion also expected AllocatedPageCount < MinLogAllocatedPageCount (== 2), which is logically impossible. The fix inserts 128 list objects with meaningful heap (16 * 256-byte items each, ~512 KB total), then issues CONFIG SET memory 8192 to trigger the shrink branch of UpdateTargetSize. TotalSize > highTargetSize causes the resizer to compute an eviction range, shift addresses with waitForEviction, and invoke PostMemoryTrim. The test now waits at most ~30s for the callback and asserts that APC or heap actually decreased. Remove [Explicit]. No product code changes. Validated with many consecutive green runs of each affected test, plus a full pass of the LargeObject and RespConfig* suites. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 8113f56 commit 0740ff9

2 files changed

Lines changed: 60 additions & 41 deletions

File tree

libs/storage/Tsavorite/cs/test/ObjectTests.cs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,6 @@ void DoRead(bool onDisk)
273273

274274
[Test, Category(TsavoriteKVTestCategory), Category(LogRecordCategory), Category(SmokeTestCategory), Category(ObjectIdMapCategory)]
275275
//[Repeat(300)]
276-
[Explicit("Temporary while I figure out why FlushedUntilAddress is not as expected")]
277276
public void LargeObjectMultiFlushedPages([Values(SerializeKeyValueSize.Thirty, SerializeKeyValueSize.OneK)] SerializeKeyValueSize serializeValueSize)
278277
{
279278
// Ensure our size calculations are correct by validating the test parameters are what we expect
@@ -304,12 +303,20 @@ public void LargeObjectMultiFlushedPages([Values(SerializeKeyValueSize.Thirty, S
304303
_ = bContext.Upsert(key, ref input, value, ref output);
305304
}
306305

306+
// The upsert loop implicitly advanced ReadOnlyAddress as pages were turned, which kicks off asynchronous page flushes.
307+
// ROA is already at its final value, but the flush completions that drive FlushedUntilAddress arrive on background
308+
// I/O threads and may not all have landed by the time we reach here. Synchronize by re-invoking ShiftReadOnlyAddress
309+
// with wait:true; the shift itself is a no-op (MonotonicUpdate fails for the same address) but the wait loop blocks
310+
// until FlushedUntilAddress catches up to the current ReadOnlyAddress.
311+
store.Log.ShiftReadOnlyAddress(store.hlogBase.ReadOnlyAddress, wait: true);
312+
307313
// Test that the expected number of pages were flushed and that we get the right results back. We've flushed in a multiple of full pages,
308-
// so ReadOnlyAddress will be aligned to page size. We have 0.1 mutable fraction, so we should have closed 2 pages and have 3 mutable,
309-
// leaving 27 immutable, totaling FUA 29 * PageSize. (That's why we verified 32 above.. easier to verify numbers here).
314+
// so ReadOnlyAddress will be aligned to page size. With 0.1 mutable fraction, tail at 34 * PageSize, and head shifted to page 2 during
315+
// the last two page-turns (each of which moves head by one page once MaxAllocatedPageCount is reached), CalculateReadOnlyAddress snaps
316+
// ROA to page 29. That is: 2 closed pages (0, 1), 27 immutable (2..28) and 5 mutable (29..33), with FUA == ROA == 29 * PageSize.
310317
// Note: This test does NOT use SizeTracker; therefore it does not evict except when page count is exceeded.
311318
Assert.That(IsAligned(store.hlogBase.ReadOnlyAddress, store.hlogBase.PageSize), $"ReadOnlyAddress ({store.hlogBase.ReadOnlyAddress}) should be page-aligned");
312-
Assert.That(store.hlogBase.FlushedUntilAddress, Is.EqualTo(store.hlogBase.PageSize * 29), $"FUA ({store.hlogBase.FlushedUntilAddress}) != PageSize * 29"); // <============== Fix this AF
319+
Assert.That(store.hlogBase.FlushedUntilAddress, Is.EqualTo(store.hlogBase.PageSize * 29), $"FUA ({store.hlogBase.FlushedUntilAddress}) != PageSize * 29");
313320
Assert.That(store.hlogBase.FlushedUntilAddress, Is.EqualTo(store.hlogBase.ReadOnlyAddress), $"FUA ({store.hlogBase.FlushedUntilAddress}) == ROA");
314321
DoRead(0, 2 * ObjectsPerPage - 1, onDisk: true);
315322
DoRead(2 * ObjectsPerPage, lastKey, onDisk: false);
@@ -463,9 +470,16 @@ void DoFlush(long newReadOnlyAddress, ManualResetEventSlim gate)
463470
try
464471
{
465472
// Do this in two pieces so we signal the gate in between to give the second call above a chance to launch.
473+
// The first shift must succeed because no concurrent shift has happened yet.
466474
Assert.That(store.hlogBase.ShiftReadOnlyAddress(newReadOnlyAddress - store.hlogBase.PageSize * 10), Is.True);
467475
gate?.Set();
468-
Assert.That(store.hlogBase.ShiftReadOnlyAddress(newReadOnlyAddress), Is.True);
476+
477+
// The second shift races with the main thread's ShiftReadOnlyToTail. Either ordering is valid and exercises
478+
// the linearization path: if this shift lands first, ShiftReadOnlyToTail extends ROA from newReadOnlyAddress
479+
// to the tail; if ShiftReadOnlyToTail lands first, ROA is already past newReadOnlyAddress and MonotonicUpdate
480+
// (correctly) refuses to move ROA backwards, so this call returns false. Both outcomes converge to the same
481+
// post-condition (ROA == FUA == TailAddress), which is what the caller asserts after awaiting both tasks.
482+
_ = store.hlogBase.ShiftReadOnlyAddress(newReadOnlyAddress);
469483
}
470484
finally
471485
{

test/Garnet.test/RespConfigTests.cs

Lines changed: 41 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -682,59 +682,64 @@ public void TearDown()
682682
}
683683

684684
/// <summary>
685-
/// This test verifies that dynamically changing the object store heap size configuration using CONFIG SET
686-
/// incurs a reduction in the used memory of the store.
685+
/// This test verifies that dynamically shrinking the main-log memory target via CONFIG SET memory
686+
/// drives the log size tracker to evict pages / release heap so that total memory usage is reduced.
687687
/// </summary>
688-
/// <param name="largerSize">Heap size larger than configured size</param>
688+
/// <param name="smallerSize">Memory size smaller than the current total log+heap usage</param>
689689
[Test]
690690
[TestCase("8192")]
691-
[Explicit("Currently hangs due to waiting for eviction callback.")]
692-
public void ConfigSetHeapMemorySizeUtilizationTest(string largerSize)
691+
public void ConfigSetHeapMemorySizeUtilizationTest(string smallerSize)
693692
{
694693
using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true));
695694
var db = redis.GetDatabase(0);
696695
var option = "memory";
697696

698-
// Verify that initial allocated page count is the minimum (before we've added anything)
699697
var store = server.Provider.StoreWrapper.store;
700698
var tracker = store.Log.LogSizeTracker;
701699

700+
// Verify initial state: no data yet, allocated page count at the minimum.
701+
ClassicAssert.AreEqual(RespConfigTests.MinLogAllocatedPageCount, store.Log.AllocatedPageCount);
702+
702703
using var trimCompleteEvent = new ManualResetEventSlim(false);
703704
tracker.PostMemoryTrim = (allocatedPageCount, headAddress) => { trimCompleteEvent.Set(); };
704705

705-
var initialApc = RespConfigTests.MinLogAllocatedPageCount;
706-
ClassicAssert.AreEqual(initialApc, store.Log.AllocatedPageCount);
707-
708-
// TODO make this larger. Assert that HA has advanced before reaching MaxAllocatedPageCount
709-
710-
711-
// Add objects to store to fill up heap
706+
// Insert enough list objects so that (a) the inline log grows beyond MinResizeTargetPageCount pages
707+
// (required before DetermineEvictionRange can evict anything), and (b) the tracked heap size is
708+
// well above the new target we're about to configure. Each list contributes ~4KB of heap
709+
// (16 items * 256 bytes), and each record adds a small inline entry to the log.
710+
const int numKeys = 128;
712711
var values = new RedisValue[16];
712+
var valPayload = new string('x', 256);
713713
for (var i = 0; i < values.Length; i++)
714-
values[i] = "x";
715-
716-
for (var i = 0; i < 8; i++)
717-
_ = db.ListRightPush($"key{i++:00000}", values);
718-
719-
// Wait for the logSizeTracker to stabilize.
720-
Assert.That(trimCompleteEvent.Wait(TimeSpan.FromSeconds(3 * 3 * LogSizeTracker.ResizeTaskDelaySeconds)),
721-
"Timeout occurred. Resizing did not happen within the specified time.");
722-
723-
// Verify that allocated page count has decreased
724-
ClassicAssert.Less(store.Log.AllocatedPageCount, initialApc);
725-
var prevApc = store.Log.AllocatedPageCount;
726-
727-
// TODO verify that the HeadAddress has moved
728-
729-
//////////////////////////////////////////////////////
730-
// Try to set heap size to a larger value than current
731-
var result = db.Execute("CONFIG", "SET", option, largerSize);
714+
values[i] = valPayload;
715+
for (var i = 0; i < numKeys; i++)
716+
_ = db.ListRightPush($"key{i:00000}", values);
717+
718+
// Sanity-check the preconditions for the shrink/eviction we are about to trigger.
719+
var apcBefore = store.Log.AllocatedPageCount;
720+
var heapBefore = tracker.LogHeapSizeBytes;
721+
Assert.That(apcBefore, Is.GreaterThan(LogSizeTracker.MinResizeTargetPageCount),
722+
"Test precondition: need more than MinResizeTargetPageCount pages for eviction to be possible.");
723+
Assert.That(heapBefore, Is.GreaterThan(0), "Test precondition: heap should be non-empty after inserts.");
724+
725+
// Shrink the memory target. The 'shrink' branch of LogSizeTracker.UpdateTargetSize signals the
726+
// resizer task; because TotalSize is now well above highTargetSize, the task calls
727+
// DetermineEvictionRange + ShiftAddresses and then invokes PostMemoryTrim.
728+
var result = db.Execute("CONFIG", "SET", option, smallerSize);
732729
ClassicAssert.AreEqual("OK", result.ToString());
733-
734-
// There is no need to wait for the logSizeTracker to stabilize, as it will simply increase the page count.
735-
736-
// Verify that MaxAllocatedPageCount has increased.
737-
ClassicAssert.Less(store.Log.AllocatedPageCount, prevApc);
730+
var smallerTarget = ServerOptions.ParseSize(smallerSize, out _);
731+
Assert.That(tracker.TargetSize, Is.EqualTo(smallerTarget));
732+
733+
// Wait for the trim callback.
734+
Assert.That(trimCompleteEvent.Wait(TimeSpan.FromSeconds(3 * LogSizeTracker.ResizeTaskDelaySeconds)),
735+
"Timeout occurred. PostMemoryTrim was not invoked after CONFIG SET memory shrink.");
736+
737+
// Verify that memory usage actually dropped: we expect at least one of the allocated page count
738+
// or the tracked heap size to have decreased as a result of the eviction scan.
739+
var apcAfter = store.Log.AllocatedPageCount;
740+
var heapAfter = tracker.LogHeapSizeBytes;
741+
Assert.That(apcAfter < apcBefore || heapAfter < heapBefore, Is.True,
742+
$"Expected APC ({apcBefore}->{apcAfter}) or heap ({heapBefore}->{heapAfter}) to decrease after trim.");
738743
}
739744
}
740745
}

0 commit comments

Comments
 (0)