Skip to content

Commit f3ac74a

Browse files
authored
Fix #2249: Handle fail on cluster node responses appropriately (#2288)
Right now we don't pay attention to fail state (PFAIL == FAIL) and continue trying to connect in the main loop. I don't believe this was intended looking at the code, we just weren't handling the flag appropriately. Added now. Docs at: https://redis.io/commands/cluster-nodes/
1 parent 9af4b75 commit f3ac74a

5 files changed

Lines changed: 21 additions & 5 deletions

File tree

docs/ReleaseNotes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Current package versions:
99
## Unreleased
1010

1111
- Fix [#1520](https://github.com/StackExchange/StackExchange.Redis/issues/1520) & [#1660](https://github.com/StackExchange/StackExchange.Redis/issues/1660): When `MOVED` is encountered from a cluster, a reconfigure will happen proactively to react to cluster changes ASAP ([#2286 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2286))
12+
- Fix [#2249](https://github.com/StackExchange/StackExchange.Redis/issues/2249): Properly handle a `fail` state (new `ClusterNode.IsFail` property) for `CLUSTER NODES` and expose `fail?` as a property (`IsPossiblyFail`) as well ([#2288 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2288))
1213

1314

1415
## 2.6.80

src/StackExchange.Redis/ClusterConfiguration.cs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ internal ClusterConfiguration(ServerSelectionStrategy serverSelectionStrategy, s
171171
var node = new ClusterNode(this, line, origin);
172172

173173
// Be resilient to ":0 {primary,replica},fail,noaddr" nodes, and nodes where the endpoint doesn't parse
174-
if (node.IsNoAddr || node.EndPoint == null)
174+
if (node.IsNoAddr || node.IsFail || node.EndPoint == null)
175175
continue;
176176

177177
// Override the origin value with the endpoint advertised with the target node to
@@ -301,6 +301,8 @@ internal ClusterNode(ClusterConfiguration configuration, string raw, EndPoint or
301301
}
302302

303303
NodeId = parts[0];
304+
IsFail = flags.Contains("fail");
305+
IsPossiblyFail = flags.Contains("fail?");
304306
IsReplica = flags.Contains("slave") || flags.Contains("replica");
305307
IsNoAddr = flags.Contains("noaddr");
306308
ParentNodeId = string.IsNullOrWhiteSpace(parts[3]) ? null : parts[3];
@@ -345,6 +347,17 @@ public IList<ClusterNode> Children
345347
/// </summary>
346348
public EndPoint? EndPoint { get; }
347349

350+
/// <summary>
351+
/// Gets whether this node is in a failed state.
352+
/// </summary>
353+
public bool IsFail { get; }
354+
355+
/// <summary>
356+
/// Gets whether this node is possibly in a failed state.
357+
/// Possibly here means the node we're getting status from can't communicate with it, but doesn't mean it's down for sure.
358+
/// </summary>
359+
public bool IsPossiblyFail { get; }
360+
348361
/// <summary>
349362
/// Gets whether this is the node which responded to the CLUSTER NODES request.
350363
/// </summary>

src/StackExchange.Redis/PublicAPI/PublicAPI.Shipped.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,10 @@ StackExchange.Redis.ClusterNode.CompareTo(StackExchange.Redis.ClusterNode? other
140140
StackExchange.Redis.ClusterNode.EndPoint.get -> System.Net.EndPoint?
141141
StackExchange.Redis.ClusterNode.Equals(StackExchange.Redis.ClusterNode? other) -> bool
142142
StackExchange.Redis.ClusterNode.IsConnected.get -> bool
143+
StackExchange.Redis.ClusterNode.IsFail.get -> bool
143144
StackExchange.Redis.ClusterNode.IsMyself.get -> bool
144145
StackExchange.Redis.ClusterNode.IsNoAddr.get -> bool
146+
StackExchange.Redis.ClusterNode.IsPossiblyFail.get -> bool
145147
StackExchange.Redis.ClusterNode.IsReplica.get -> bool
146148
StackExchange.Redis.ClusterNode.IsSlave.get -> bool
147149
StackExchange.Redis.ClusterNode.NodeId.get -> string!

tests/StackExchange.Redis.Tests/CommandTimeoutTests.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ public class CommandTimeoutTests : TestBase
1010
{
1111
public CommandTimeoutTests(ITestOutputHelper output) : base (output) { }
1212

13-
[Fact]
13+
[FactLongRunning]
1414
public async Task DefaultHeartbeatTimeout()
1515
{
1616
var options = ConfigurationOptions.Parse(TestConfig.Current.PrimaryServerAndPort);
@@ -21,15 +21,15 @@ public async Task DefaultHeartbeatTimeout()
2121
using var conn = ConnectionMultiplexer.Connect(options);
2222

2323
var pauseServer = GetServer(pauseConn);
24-
var pauseTask = pauseServer.ExecuteAsync("CLIENT", "PAUSE", 2500);
24+
var pauseTask = pauseServer.ExecuteAsync("CLIENT", "PAUSE", 5000);
2525

2626
var key = Me();
2727
var db = conn.GetDatabase();
2828
var sw = ValueStopwatch.StartNew();
2929
var ex = await Assert.ThrowsAsync<RedisTimeoutException>(async () => await db.StringGetAsync(key));
3030
Log(ex.Message);
3131
var duration = sw.GetElapsedTime();
32-
Assert.True(duration < TimeSpan.FromSeconds(2100), $"Duration ({duration.Milliseconds} ms) should be less than 2100ms");
32+
Assert.True(duration < TimeSpan.FromSeconds(4000), $"Duration ({duration.Milliseconds} ms) should be less than 4000ms");
3333

3434
// Await as to not bias the next test
3535
await pauseTask;

tests/StackExchange.Redis.Tests/ConfigTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ public async Task TestAutomaticHeartbeat()
414414
await Task.Delay(8000).ForAwait();
415415

416416
var after = innerConn.OperationCount;
417-
Assert.True(after >= before + 2, $"after: {after}, before: {before}");
417+
Assert.True(after >= before + 1, $"after: {after}, before: {before}");
418418
}
419419
finally
420420
{

0 commit comments

Comments
 (0)