Skip to content

Commit 9af4b75

Browse files
authored
Cluster: Proactively reconfigure when we hit a MOVED response (#2286)
Meant to help address #1520, #1660, #2074, and #2020. I'm not 100% sure about this because if there is a MOVED happening (e.g. bad proxy somewhere) this would just continually re-run...but only once every 5 seconds. Overall though, we linger in a bad state retrying moves until a discovery happens today and this could be resolved much faster.
1 parent f8303a6 commit 9af4b75

3 files changed

Lines changed: 21 additions & 8 deletions

File tree

docs/ReleaseNotes.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Current package versions:
88

99
## Unreleased
1010

11-
No pending changes for the next release yet.
11+
- Fix [#1520](https://github.com/StackExchange/StackExchange.Redis/issues/1520) & [#1660](https://github.com/StackExchange/StackExchange.Redis/issues/1660): When `MOVED` is encountered from a cluster, a reconfigure will happen proactively to react to cluster changes ASAP ([#2286 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2286))
1212

1313

1414
## 2.6.80

src/StackExchange.Redis/ConnectionMultiplexer.cs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ public sealed partial class ConnectionMultiplexer : IInternalConnectionMultiplex
4949

5050
ConfigurationOptions IInternalConnectionMultiplexer.RawConfig => RawConfig;
5151

52+
private int lastReconfigiureTicks = Environment.TickCount;
53+
internal long LastReconfigureSecondsAgo =>
54+
unchecked(Environment.TickCount - Thread.VolatileRead(ref lastReconfigiureTicks)) / 1000;
55+
5256
private int _activeHeartbeatErrors, lastHeartbeatTicks;
5357
internal long LastHeartbeatSecondsAgo =>
5458
pulse is null
@@ -366,8 +370,19 @@ internal void CheckMessage(Message message)
366370
}
367371
}
368372

369-
internal bool TryResend(int hashSlot, Message message, EndPoint endpoint, bool isMoved) =>
370-
ServerSelectionStrategy.TryResend(hashSlot, message, endpoint, isMoved);
373+
internal bool TryResend(int hashSlot, Message message, EndPoint endpoint, bool isMoved)
374+
{
375+
// If we're being told to re-send something because the hash slot moved, that means our topology is out of date
376+
// ...and we should re-evaluate what's what.
377+
// Allow for a 5-second back-off so we don't hammer this in a loop though
378+
if (isMoved && LastReconfigureSecondsAgo > 5)
379+
{
380+
// Async kickoff a reconfigure
381+
ReconfigureIfNeeded(endpoint, false, "MOVED encountered");
382+
}
383+
384+
return ServerSelectionStrategy.TryResend(hashSlot, message, endpoint, isMoved);
385+
}
371386

372387
/// <summary>
373388
/// Wait for a given asynchronous operation to complete (or timeout).
@@ -1214,6 +1229,7 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, LogP
12141229
}
12151230
Trace("Starting reconfiguration...");
12161231
Trace(blame != null, "Blaming: " + Format.ToString(blame));
1232+
Interlocked.Exchange(ref lastReconfigiureTicks, Environment.TickCount);
12171233

12181234
log?.WriteLine(RawConfig.ToString(includePassword: false));
12191235
log?.WriteLine();
@@ -1552,10 +1568,7 @@ public EndPoint[] GetEndPoints(bool configuredOnly = false) =>
15521568
foreach (EndPoint endpoint in clusterEndpoints)
15531569
{
15541570
serverEndpoint = GetServerEndPoint(endpoint);
1555-
if (serverEndpoint != null)
1556-
{
1557-
serverEndpoint.UpdateNodeRelations(clusterConfig);
1558-
}
1571+
serverEndpoint?.UpdateNodeRelations(clusterConfig);
15591572
}
15601573
return clusterEndpoints;
15611574
}

tests/StackExchange.Redis.Tests/CommandTimeoutTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public async Task DefaultHeartbeatLowTimeout()
4747
using var conn = ConnectionMultiplexer.Connect(options);
4848

4949
var pauseServer = GetServer(pauseConn);
50-
var pauseTask = pauseServer.ExecuteAsync("CLIENT", "PAUSE", 500);
50+
var pauseTask = pauseServer.ExecuteAsync("CLIENT", "PAUSE", 2000);
5151

5252
var key = Me();
5353
var db = conn.GetDatabase();

0 commit comments

Comments
 (0)