Skip to content

Commit 826cdc4

Browse files
vkuttypclaude
andcommitted
v2.5.8: revert CosmoKvConnection RW lock — keep Executor AsyncLocal
The v2.5.7 attempt at allowing concurrent reads on CosmoKvConnection backfired badly on marivil: - First design ("drain N reader slots" for writes) starved writers under sustained read load. Marivil stress run: 252 audit-write failures, 21 RelayWorker failures, 16 WebhookWorker failures, nearly all "txn_busy" or 30s timeout. Throughput collapsed to ~0. - Second attempt (writer-priority gate) deadlocked subtly under xUnit's parallel-test harness — fine in isolation, intermittent failures under load. A correct async RW lock with bounded fairness needs more care than this session can give it. Revert CosmoKvConnection to its prior single SemaphoreSlim(1, 1). The Executor-side AsyncLocal<IKvAccess?> change stays: it's harmless under serial access (the connection lock already serialises everything) and removes the "shared mutable _kv field" hazard that would otherwise need re-doing whenever the RW lock returns. Same with the ConcurrentDictionary swap for _defaultExprCache — pure safety, no behaviour change under serial. Marivil is back on cosmokvd v0.3.4 + CosmoSQLClient 2.5.6 (the known-good combo); v2.5.7 + v0.3.5 stay on NuGet/Docker Hub as "known-bad — do not deploy". Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c74256e commit 826cdc4

4 files changed

Lines changed: 23 additions & 269 deletions

File tree

Directory.Build.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@
77
<Authors>vkuttyp</Authors>
88
<PackageLicenseExpression>MIT</PackageLicenseExpression>
99
<RepositoryUrl>https://github.com/vkuttyp/CosmoSQLClient-Dotnet</RepositoryUrl>
10-
<Version>2.5.7</Version>
10+
<Version>2.5.8</Version>
1111
</PropertyGroup>
1212
</Project>

src/CosmoSQLClient.CosmoKv/AsyncReaderWriterLock.cs

Lines changed: 0 additions & 64 deletions
This file was deleted.

src/CosmoSQLClient.CosmoKv/CosmoKvConnection.cs

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,18 @@ public sealed class CosmoKvConnection : ISqlDatabase
3636
private CosmoKvTxn? _txn;
3737

3838
/// <summary>
39-
/// Reader-writer lock — multiple QueryAsync calls run concurrently
40-
/// (CosmoKv's MVCC already gives them snapshot isolation; the
41-
/// Executor's per-call state is now AsyncLocal so concurrent reads
42-
/// don't trample each other). ExecuteAsync and Begin/Commit/Rollback
43-
/// take the write lock for exclusive access — they mutate <see cref="_txn"/>
44-
/// and the underlying writer task. Replaces the prior single
45-
/// SemaphoreSlim(1,1) that serialised everything.
39+
/// Connection-level serialisation lock. The v2.5.7 attempt at an
40+
/// AsyncReaderWriterLock here regressed marivil's stress run badly —
41+
/// the "drain N slots" writer pattern starved under sustained read
42+
/// load (audit/RelayWorker writes blocked indefinitely), and a
43+
/// "writer-priority gate" rewrite hit subtler timing issues under
44+
/// the .NET test suite's parallel-test load. Reverted to the single
45+
/// SemaphoreSlim until a properly-engineered async RW lock can be
46+
/// designed; the <see cref="Execution.Executor"/>-side AsyncLocal
47+
/// change stays (it's harmless under serial access and enables the
48+
/// eventual RW-lock work without another driver-side refactor).
4649
/// </summary>
47-
private readonly AsyncReaderWriterLock _rwLock = new();
50+
private readonly SemaphoreSlim _lock = new(1, 1);
4851
private bool _disposed;
4952

5053
/// <summary>
@@ -182,7 +185,7 @@ public static Task<CosmoKvConnection> OpenAsync(
182185
public async Task<IReadOnlyList<SqlRow>> QueryAsync(
183186
string sql, IReadOnlyList<SqlParameter>? parameters = null, CancellationToken ct = default)
184187
{
185-
await _rwLock.AcquireReadAsync(ct).ConfigureAwait(false);
188+
await _lock.WaitAsync(ct);
186189
try
187190
{
188191
EnsureOpen();
@@ -203,7 +206,7 @@ public async Task<IReadOnlyList<SqlRow>> QueryAsync(
203206
finally
204207
{
205208
_executor?.SetActiveAccess(null);
206-
_rwLock.ReleaseRead();
209+
_lock.Release();
207210
}
208211
}
209212

@@ -231,7 +234,7 @@ public async IAsyncEnumerable<SqlRow> QueryStreamAsync(
231234
public async Task<int> ExecuteAsync(
232235
string sql, IReadOnlyList<SqlParameter>? parameters = null, CancellationToken ct = default)
233236
{
234-
await _rwLock.AcquireWriteAsync(ct).ConfigureAwait(false);
237+
await _lock.WaitAsync(ct);
235238
try
236239
{
237240
EnsureOpen();
@@ -283,7 +286,7 @@ public async Task<int> ExecuteAsync(
283286
finally
284287
{
285288
_executor?.SetActiveAccess(null);
286-
_rwLock.ReleaseWrite();
289+
_lock.Release();
287290
}
288291
}
289292

@@ -333,23 +336,23 @@ private static IReadOnlyList<SqlRow> MaterializeRows(
333336

334337
public async Task BeginTransactionAsync(CancellationToken ct = default)
335338
{
336-
await _rwLock.AcquireWriteAsync(ct).ConfigureAwait(false);
339+
await _lock.WaitAsync(ct);
337340
try { await BeginTransactionAsyncLocked(ct); }
338-
finally { _rwLock.ReleaseWrite(); }
341+
finally { _lock.Release(); }
339342
}
340343

341344
public async Task CommitAsync(CancellationToken ct = default)
342345
{
343-
await _rwLock.AcquireWriteAsync(ct).ConfigureAwait(false);
346+
await _lock.WaitAsync(ct);
344347
try { await CommitAsyncLocked(ct); }
345-
finally { _rwLock.ReleaseWrite(); }
348+
finally { _lock.Release(); }
346349
}
347350

348351
public async Task RollbackAsync(CancellationToken ct = default)
349352
{
350-
await _rwLock.AcquireWriteAsync(ct).ConfigureAwait(false);
353+
await _lock.WaitAsync(ct);
351354
try { await RollbackAsyncLocked(ct); }
352-
finally { _rwLock.ReleaseWrite(); }
355+
finally { _lock.Release(); }
353356
}
354357

355358
// ── Transaction internals (lock already held by caller) ─────────────────
@@ -418,9 +421,7 @@ public async ValueTask DisposeAsync()
418421
if (_disposed) return;
419422
_disposed = true;
420423
await CloseAsync();
421-
// _rwLock holds a SemaphoreSlim; .NET will reclaim it on GC.
422-
// We don't expose a hard-disposal hook because outstanding async
423-
// operations might still be holding slots when DisposeAsync runs.
424+
_lock.Dispose();
424425
}
425426

426427
private void EnsureOpen()

tests/CosmoSQLClient.CosmoKv.Tests/Phase29ConcurrentReadsTests.cs

Lines changed: 0 additions & 183 deletions
This file was deleted.

0 commit comments

Comments
 (0)