Skip to content

Commit d137e52

Browse files
committed
remove (or #if) most of the trace info that was added re the write stall;
1 parent b2818bc commit d137e52

6 files changed

Lines changed: 70 additions & 12 deletions

File tree

docs/ReleaseNotes.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Release Notes
22

3+
## (unreleased)
4+
5+
- performance: resolve intermittent stall in the write-lock that could lead to unexpected timeouts even when at low/reasonable (but concurrent) load
6+
37
## 2.0.571
48

59
- performance: use new [arena allocation API](https://mgravell.github.io/Pipelines.Sockets.Unofficial/docs/arenas) to avoid `RawResult[]` overhead

src/StackExchange.Redis/ConnectionMultiplexer.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2152,15 +2152,15 @@ private static async Task<T> ExecuteAsyncImpl_Awaited<T>(ConnectionMultiplexer @
21522152
return tcs == null ? default(T) : await tcs.Task.ForAwait();
21532153
}
21542154

2155-
internal Exception GetException(WriteResult result, Message message, ServerEndPoint server, [CallerMemberName] string caller = null)
2155+
internal Exception GetException(WriteResult result, Message message, ServerEndPoint server)
21562156
{
21572157
switch (result)
21582158
{
21592159
case WriteResult.Success: return null;
21602160
case WriteResult.NoConnectionAvailable:
21612161
return ExceptionFactory.NoConnectionAvailable(IncludeDetailInExceptions, IncludePerformanceCountersInExceptions, message.Command, message, server, GetServerSnapshot());
21622162
case WriteResult.TimeoutBeforeWrite:
2163-
return ExceptionFactory.Timeout(this, "The timeout was reached before the message could be written to the output buffer, and it was not sent", message, server, result, origin: caller);
2163+
return ExceptionFactory.Timeout(this, "The timeout was reached before the message could be written to the output buffer, and it was not sent", message, server, result);
21642164
case WriteResult.WriteFailure:
21652165
default:
21662166
return ExceptionFactory.ConnectionFailure(IncludeDetailInExceptions, ConnectionFailureType.ProtocolFailure, "An unknown error occurred when writing the message", server);

src/StackExchange.Redis/ExceptionFactory.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ internal static string GetLibVersion()
181181
}
182182
return _libVersion;
183183
}
184-
internal static Exception Timeout(ConnectionMultiplexer mutiplexer, string baseErrorMessage, Message message, ServerEndPoint server, WriteResult? result = null, string origin = null)
184+
internal static Exception Timeout(ConnectionMultiplexer mutiplexer, string baseErrorMessage, Message message, ServerEndPoint server, WriteResult? result = null)
185185
{
186186
List<Tuple<string, string>> data = new List<Tuple<string, string>> { Tuple.Create("Message", message.CommandAndKey) };
187187
var sb = new StringBuilder();
@@ -203,16 +203,16 @@ void add(string lk, string sk, string v)
203203
}
204204
}
205205

206-
if (!string.IsNullOrWhiteSpace(origin)) add("Origin", null, origin);
207-
208206
// Add timeout data, if we have it
209207
if (result == WriteResult.TimeoutBeforeWrite)
210208
{
211209
add("Timeout", "timeout", Format.ToString(mutiplexer.TimeoutMilliseconds));
212210
try
213211
{
212+
#if DEBUG
214213
if (message.QueuePosition >= 0) add("QueuePosition", null, message.QueuePosition.ToString()); // the position the item was when added to the queue
215214
if ((int)message.ConnectionWriteState >= 0) add("WriteState", null, message.ConnectionWriteState.ToString()); // what the physical was doing when it was added to the queue
215+
#endif
216216
if (message.TryGetPhysicalState(out var state, out var sentDelta, out var receivedDelta))
217217
{
218218
add("PhysicalState", "phys", state.ToString());

src/StackExchange.Redis/Message.cs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Diagnostics;
34
using System.IO;
45
using System.Linq;
56
using System.Runtime.CompilerServices;
@@ -51,13 +52,18 @@ protected override void WriteImpl(PhysicalConnection physical)
5152
internal abstract class Message : ICompletable
5253
{
5354
public readonly int Db;
55+
56+
#if DEBUG
5457
internal int QueuePosition { get; private set; }
5558
internal PhysicalConnection.WriteStatus ConnectionWriteState { get; private set; }
56-
59+
#endif
60+
[Conditional("DEBUG")]
5761
internal void SetBacklogState(int position, PhysicalConnection physical)
5862
{
63+
#if DEBUG
5964
QueuePosition = position;
6065
ConnectionWriteState = physical?.Status ?? (PhysicalConnection.WriteStatus)(-1);
66+
#endif
6167
}
6268

6369
internal const CommandFlags InternalCallFlag = (CommandFlags)128;
@@ -623,8 +629,10 @@ internal bool TrySetResult<T>(T value)
623629

624630
internal void SetEnqueued(PhysicalConnection connection)
625631
{
632+
#if DEBUG
626633
QueuePosition = -1;
627634
ConnectionWriteState = (PhysicalConnection.WriteStatus)(-1);
635+
#endif
628636
SetWriteTime();
629637
performance?.SetEnqueued();
630638
_enqueuedTo = connection;

src/StackExchange.Redis/PhysicalBridge.cs

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -643,8 +643,10 @@ private WriteResult WriteMessageInsideLock(PhysicalConnection physical, Message
643643
Multiplexer?.OnInfoMessage($"reentrant call to WriteMessageTakingWriteLock for {message.CommandAndKey}, {existingMessage.CommandAndKey} is still active");
644644
return WriteResult.NoConnectionAvailable;
645645
}
646+
#if DEBUG
646647
int startWriteTime = Environment.TickCount;
647648
try
649+
#endif
648650
{
649651
physical.SetWriting();
650652
var messageIsSent = false;
@@ -679,6 +681,7 @@ private WriteResult WriteMessageInsideLock(PhysicalConnection physical, Message
679681
return WriteMessageToServerInsideWriteLock(physical, message);
680682
}
681683
}
684+
#if DEBUG
682685
finally
683686
{
684687
int endWriteTime = Environment.TickCount;
@@ -689,10 +692,12 @@ private WriteResult WriteMessageInsideLock(PhysicalConnection physical, Message
689692
_maxWriteCommand = message?.Command ?? default;
690693
}
691694
}
695+
#endif
692696
}
693-
697+
#if DEBUG
694698
private volatile int _maxWriteTime = -1;
695699
private RedisCommand _maxWriteCommand;
700+
#endif
696701

697702
[Obsolete("prefer async")]
698703
internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical, Message message)
@@ -755,10 +760,14 @@ private bool PushToBacklog(Message message, bool onlyIfExists)
755760
private void StartBacklogProcessor()
756761
{
757762
var sched = Multiplexer.SocketManager?.SchedulerPool ?? DedicatedThreadPoolPipeScheduler.Default;
763+
#if DEBUG
758764
_backlogProcessorRequestedTime = Environment.TickCount;
765+
#endif
759766
sched.Schedule(s_ProcessBacklog, _weakRefThis);
760767
}
768+
#if DEBUG
761769
private volatile int _backlogProcessorRequestedTime;
770+
#endif
762771

763772
private static readonly Action<object> s_ProcessBacklog = s =>
764773
{
@@ -807,20 +816,26 @@ private void ProcessBacklog()
807816
LockToken token = default;
808817
try
809818
{
819+
#if DEBUG
810820
int tryToAcquireTime = Environment.TickCount;
811821
var msToStartWorker = unchecked(tryToAcquireTime - _backlogProcessorRequestedTime);
812822
int failureCount = 0;
823+
#endif
813824
while(true)
814825
{
815826
// try and get the lock; if unsuccessful, check for termination
816827
token = _singleWriterMutex.TryWait();
817828
if (token) break; // got the lock
818829
lock (_backlog) { if (_backlog.Count == 0) return; }
830+
#if DEBUG
819831
failureCount++;
832+
#endif
820833
}
821834
_backlogStatus = BacklogStatus.Started;
835+
#if DEBUG
822836
int acquiredTime = Environment.TickCount;
823837
var msToGetLock = unchecked(acquiredTime - tryToAcquireTime);
838+
#endif
824839

825840
// so now we are the writer; write some things!
826841
Message message;
@@ -841,14 +856,15 @@ private void ProcessBacklog()
841856
{
842857
_backlogStatus = BacklogStatus.RecordingTimeout;
843858
var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint);
859+
#if DEBUG // additional tracking
844860
ex.Data["Redis-BacklogStartDelay"] = msToStartWorker;
845861
ex.Data["Redis-BacklogGetLockDelay"] = msToGetLock;
846862
if (failureCount != 0) ex.Data["Redis-BacklogFailCount"] = failureCount;
847863
if (_maxWriteTime >= 0) ex.Data["Redis-MaxWrite"] = _maxWriteTime.ToString() + "ms, " + _maxWriteCommand.ToString();
848864
var maxFlush = physical?.MaxFlushTime ?? -1;
849865
if (maxFlush >= 0) ex.Data["Redis-MaxFlush"] = maxFlush.ToString() + "ms, " + (physical?.MaxFlushBytes ?? -1).ToString();
850866
if (_maxLockDuration >= 0) ex.Data["Redis-MaxLockDuration"] = _maxLockDuration;
851-
867+
#endif
852868
message.SetExceptionAndComplete(ex, this);
853869
}
854870
else
@@ -974,26 +990,33 @@ internal ValueTask<WriteResult> WriteMessageTakingWriteLockAsync(PhysicalConnect
974990
{
975991
if (releaseLock & token.Success)
976992
{
993+
#if DEBUG
977994
RecordLockDuration(lockTaken);
995+
#endif
978996
token.Dispose();
979997
}
980998
}
981999
}
1000+
#if DEBUG
9821001
private void RecordLockDuration(int lockTaken)
9831002
{
1003+
9841004
var lockDuration = unchecked(Environment.TickCount - lockTaken);
9851005
if (lockDuration > _maxLockDuration) _maxLockDuration = lockDuration;
9861006
}
9871007
volatile int _maxLockDuration = -1;
1008+
#endif
9881009

989-
private async ValueTask<WriteResult> WriteMessageTakingWriteLockAsync_Awaited(ValueTask<LockToken> pending, PhysicalConnection physical, Message message)
1010+
private async ValueTask<WriteResult> WriteMessageTakingWriteLockAsync_Awaited(ValueTask<LockToken> pending, PhysicalConnection physical, Message message)
9901011
{
9911012
try
9921013
{
9931014
using (var token = await pending.ForAwait())
9941015
{
9951016
if (!token.Success) return TimedOutBeforeWrite(message);
1017+
#if DEBUG
9961018
int lockTaken = Environment.TickCount;
1019+
#endif
9971020
var result = WriteMessageInsideLock(physical, message);
9981021

9991022
if (result == WriteResult.Success)
@@ -1004,7 +1027,9 @@ private async ValueTask<WriteResult> WriteMessageTakingWriteLockAsync_Awaited(Va
10041027
UnmarkActiveMessage(message);
10051028
physical.SetIdle();
10061029

1030+
#if DEBUG
10071031
RecordLockDuration(lockTaken);
1032+
#endif
10081033
return result;
10091034
}
10101035
}
@@ -1026,7 +1051,9 @@ private async ValueTask<WriteResult> CompleteWriteAndReleaseLockAsync(LockToken
10261051
return result;
10271052
}
10281053
catch (Exception ex) { return HandleWriteException(message, ex); }
1054+
#if DEBUG
10291055
finally { RecordLockDuration(lockTaken); }
1056+
#endif
10301057
}
10311058
}
10321059

src/StackExchange.Redis/PhysicalConnection.cs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -842,12 +842,18 @@ internal static int WriteRaw(Span<byte> span, long value, bool withLengthPrefix
842842
return WriteCrlf(span, offset);
843843
}
844844

845-
private async ValueTask<WriteResult> FlushAsync_Awaited(PhysicalConnection connection, ValueTask<FlushResult> flush, bool throwOnFailure, int startFlush, long flushBytes)
845+
private async ValueTask<WriteResult> FlushAsync_Awaited(PhysicalConnection connection, ValueTask<FlushResult> flush, bool throwOnFailure
846+
#if DEBUG
847+
, int startFlush, long flushBytes
848+
#endif
849+
)
846850
{
847851
try
848852
{
849853
await flush.ForAwait();
854+
#if DEBUG
850855
RecordEndFlush(startFlush, flushBytes);
856+
#endif
851857
connection._writeStatus = WriteStatus.Flushed;
852858
connection.UpdateLastWriteTime();
853859
return WriteResult.Success;
@@ -872,7 +878,9 @@ internal WriteResult FlushSync(bool throwOnFailure, int millisecondsTimeout)
872878

873879
void ThrowTimeout()
874880
{
881+
#if DEBUG
875882
if (millisecondsTimeout > _maxFlushTime) _maxFlushTime = millisecondsTimeout; // a fair bet even if we didn't measure
883+
#endif
876884
throw new TimeoutException("timeout while synchronously flushing");
877885
}
878886
}
@@ -883,12 +891,20 @@ internal ValueTask<WriteResult> FlushAsync(bool throwOnFailure)
883891
try
884892
{
885893
_writeStatus = WriteStatus.Flushing;
894+
#if DEBUG
886895
int startFlush = Environment.TickCount;
887896
long flushBytes = -1;
888897
if (_ioPipe is SocketConnection sc) flushBytes = sc.GetCounters().BytesWaitingToBeSent;
898+
#endif
889899
var flush = tmp.FlushAsync();
890-
if (!flush.IsCompletedSuccessfully) return FlushAsync_Awaited(this, flush, throwOnFailure, startFlush, flushBytes);
900+
if (!flush.IsCompletedSuccessfully) return FlushAsync_Awaited(this, flush, throwOnFailure
901+
#if DEBUG
902+
, startFlush, flushBytes
903+
#endif
904+
);
905+
#if DEBUG
891906
RecordEndFlush(startFlush, flushBytes);
907+
#endif
892908
_writeStatus = WriteStatus.Flushed;
893909
UpdateLastWriteTime();
894910
return new ValueTask<WriteResult>(WriteResult.Success);
@@ -899,8 +915,10 @@ internal ValueTask<WriteResult> FlushAsync(bool throwOnFailure)
899915
return new ValueTask<WriteResult>(WriteResult.WriteFailure);
900916
}
901917
}
918+
#if DEBUG
902919
private void RecordEndFlush(int start, long bytes)
903920
{
921+
904922
var end = Environment.TickCount;
905923
int taken = unchecked(end - start);
906924
if (taken > _maxFlushTime)
@@ -913,8 +931,9 @@ private void RecordEndFlush(int start, long bytes)
913931
private long _maxFlushBytes = -1;
914932
internal int MaxFlushTime => _maxFlushTime;
915933
internal long MaxFlushBytes => _maxFlushBytes;
934+
#endif
916935

917-
private static readonly ReadOnlyMemory<byte> NullBulkString = Encoding.ASCII.GetBytes("$-1\r\n"), EmptyBulkString = Encoding.ASCII.GetBytes("$0\r\n\r\n");
936+
private static readonly ReadOnlyMemory<byte> NullBulkString = Encoding.ASCII.GetBytes("$-1\r\n"), EmptyBulkString = Encoding.ASCII.GetBytes("$0\r\n\r\n");
918937

919938
private static void WriteUnifiedBlob(PipeWriter writer, byte[] value)
920939
{

0 commit comments

Comments
 (0)