Skip to content

Commit a8b344d

Browse files
committed
integrate health-check
1 parent 41e2407 commit a8b344d

13 files changed

Lines changed: 372 additions & 156 deletions

src/StackExchange.Redis/ConfigurationOptions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,7 @@ public string TieBreaker
761761
public int WriteBuffer
762762
{
763763
get => 0;
764+
// ReSharper disable once ValueParameterNotUsed
764765
set { }
765766
}
766767

src/StackExchange.Redis/FrameworkShims.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ internal sealed class OverloadResolutionPriorityAttribute(int priority) : Attrib
2828
#endif
2929

3030
#if !NET
31-
3231
namespace System.Text
3332
{
3433
internal static class EncodingExtensions
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
using System.Net;
2+
using System.Threading.Tasks;
3+
4+
namespace StackExchange.Redis;
5+
6+
public sealed partial class HealthCheck
7+
{
8+
public partial class HealthCheckProbe
9+
{
10+
/// <summary>
11+
/// Report health using the <see cref="IServer.IsConnected"/> property, without any additional tests.
12+
/// </summary>
13+
public static HealthCheckProbe IsConnected => ConnectedProbe.Instance;
14+
}
15+
16+
private sealed class ConnectedProbe : HealthCheckProbe
17+
{
18+
public static ConnectedProbe Instance { get; } = new();
19+
private ConnectedProbe() { }
20+
21+
public override Task<HealthCheckResult> CheckHealthAsync(HealthCheck healthCheck, IServer server)
22+
=> server.IsConnected ? HealthyTask : UnhealthyTask;
23+
}
24+
}

src/StackExchange.Redis/HealthCheck.Execute.cs

Lines changed: 126 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,140 @@
11
using System;
2+
using System.Buffers;
23
using System.Diagnostics;
34
using System.Net;
5+
using System.Threading;
46
using System.Threading.Tasks;
57

68
namespace StackExchange.Redis;
79

810
public sealed partial class HealthCheck
911
{
12+
/// <summary>
13+
/// Evaluate the health of the specified multiplexer, by evaluating all endpoints.
14+
/// </summary>
15+
public Task<HealthCheckResult> CheckHealthAsync(IConnectionMultiplexer multiplexer)
16+
=> multiplexer.IsConnected ? CheckHealthCoreAsync(multiplexer) : HealthCheckProbe.UnhealthyTask;
17+
18+
private async Task<HealthCheckResult> CheckHealthCoreAsync(IConnectionMultiplexer multiplexer)
19+
{
20+
try
21+
{
22+
Task<HealthCheckResult>[] pending;
23+
if (multiplexer is IInternalConnectionMultiplexer internalMultiplexer)
24+
{
25+
var snapshot = internalMultiplexer.GetServerSnapshot();
26+
pending = GetReusablePending(ref _reusablePending, snapshot.Length);
27+
for (int i = 0; i < pending.Length; i++)
28+
{
29+
pending[i] = CheckHealthAsync(snapshot[i].GetRedisServer(null));
30+
}
31+
}
32+
else
33+
{
34+
var servers = multiplexer.GetServers();
35+
pending = GetReusablePending(ref _reusablePending, servers.Length);
36+
for (int i = 0; i < pending.Length; i++)
37+
{
38+
pending[i] = CheckHealthAsync(servers[i]);
39+
}
40+
}
41+
var result = await CollateAsync(pending, TotalTimeoutMillis()).ForAwait();
42+
43+
// on successful completion (regardless of outcome), we can reuse the pending array
44+
PutReusablePending(ref _reusablePending, ref pending);
45+
return result;
46+
}
47+
catch
48+
{
49+
// definitely unhappy
50+
return HealthCheckResult.Unhealthy;
51+
}
52+
}
53+
54+
internal int TotalTimeoutMillis()
55+
{
56+
int count = ProbeCount;
57+
if (count <= 0)
58+
{
59+
Debug.Fail("We shouldn't get as far as calculating timeouts with a non-positive probe count.");
60+
return 0;
61+
}
62+
63+
TimeSpan probeTimeout = ProbeTimeout, probeInterval = ProbeInterval;
64+
65+
// the first probe doesn't have an interval before it, the rest do
66+
var totalTicks = probeTimeout.Ticks
67+
+ ((probeTimeout.Ticks + probeInterval.Ticks) * (count - 1));
68+
var millis = (int)TimeSpan.FromTicks(totalTicks).TotalMilliseconds;
69+
Debug.Assert(millis > 0, "Total timeout should be positive");
70+
return millis;
71+
}
72+
73+
// apply timeout and collation logic to a group of probes
74+
internal static async Task<HealthCheckResult> CollateAsync(Task<HealthCheckResult>[] probes, int timeoutMilliseconds)
75+
{
76+
var pendingAll = Task.WhenAll(probes).ObserveErrors();
77+
int success = 0, failure = 0;
78+
79+
if (await pendingAll.TimeoutAfter(timeoutMilliseconds).ForAwait())
80+
{
81+
// all completed inside timeout; all results should now be available
82+
for (int i = 0; i < probes.Length; i++)
83+
{
84+
var individualResult = await probes[i].ForAwait();
85+
switch (individualResult)
86+
{
87+
case HealthCheckResult.Healthy: success++; break;
88+
case HealthCheckResult.Unhealthy: failure++; break;
89+
}
90+
}
91+
}
92+
else
93+
{
94+
// timeout
95+
for (int i = 0; i < probes.Length; i++)
96+
{
97+
_ = probes[i].ObserveErrors();
98+
}
99+
throw new TimeoutException();
100+
}
101+
102+
if (failure > 0) return HealthCheckResult.Unhealthy;
103+
if (success > 0) return HealthCheckResult.Healthy;
104+
return HealthCheckResult.Inconclusive;
105+
}
106+
107+
private Task<HealthCheckResult>[]? _reusablePending;
108+
109+
// The number of pending tasks is determined by the number of endpoints, which doesn't change frequently
110+
// (if at all); consequently, we can often re-use this buffer between health-checks, as long as we're careful.
111+
internal static Task<HealthCheckResult>[] GetReusablePending(ref Task<HealthCheckResult>[]? field, int count)
112+
{
113+
var result = Interlocked.Exchange(ref field, null);
114+
if (result is null || result.Length != count)
115+
{
116+
result = count == 0 ? [] : new Task<HealthCheckResult>[count];
117+
}
118+
return result;
119+
}
120+
121+
internal static void PutReusablePending(ref Task<HealthCheckResult>[]? field, ref Task<HealthCheckResult>[] value)
122+
{
123+
if (value is { Length: > 0 })
124+
{
125+
Array.Clear(value, 0, value.Length);
126+
Interlocked.Exchange(ref field, value);
127+
value = [];
128+
}
129+
}
130+
10131
/// <summary>
11132
/// Evaluate the health of an endpoint.
12133
/// </summary>
13-
public async Task<HealthCheckResult> CheckHealthAsync(IConnectionMultiplexer multiplexer, EndPoint endpoint)
134+
public Task<HealthCheckResult> CheckHealthAsync(IServer server)
135+
=> server.IsConnected ? CheckHealthCoreAsync(server) : HealthCheckProbe.UnhealthyTask;
136+
137+
private async Task<HealthCheckResult> CheckHealthCoreAsync(IServer server)
14138
{
15139
try
16140
{
@@ -20,7 +144,7 @@ public async Task<HealthCheckResult> CheckHealthAsync(IConnectionMultiplexer mul
20144
HealthCheckResult probeResult;
21145
try
22146
{
23-
var pendingProbe = Probe.CheckHealthAsync(this, multiplexer, endpoint);
147+
var pendingProbe = Probe.CheckHealthAsync(this, server);
24148
probeResult = await pendingProbe.TimeoutAfter(timeout).ForAwait()
25149
? await pendingProbe.ForAwait() // completed
26150
: HealthCheckResult.Unhealthy; // timeout

src/StackExchange.Redis/HealthCheck.HealthCheckProbe.cs

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,24 @@ public abstract partial class HealthCheckProbe
1414
/// <summary>
1515
/// Check the health of the specified endpoint.
1616
/// </summary>
17-
public abstract Task<HealthCheckResult> CheckHealthAsync(HealthCheck healthCheck, IConnectionMultiplexer multiplexer, EndPoint endpoint);
17+
public abstract Task<HealthCheckResult> CheckHealthAsync(HealthCheck healthCheck, IServer server);
1818

19-
private static Task<HealthCheckResult>? _inconclusive;
19+
private static Task<HealthCheckResult>? _inconclusive, _healthy, _unhealthy;
2020

2121
/// <summary>
22-
/// Reports a probe that was skipped without being evaluated.
22+
/// Reports a memoized probe that was skipped without being evaluated.
2323
/// </summary>
24-
protected static Task<HealthCheckResult> Inconclusive => _inconclusive ??= Task.FromResult(HealthCheckResult.Inconclusive);
24+
protected internal static Task<HealthCheckResult> InconclusiveTask => _inconclusive ??= Task.FromResult(HealthCheckResult.Inconclusive);
25+
26+
/// <summary>
27+
/// Reports a memoized probe that was healthy.
28+
/// </summary>
29+
protected internal static Task<HealthCheckResult> HealthyTask => _healthy ??= Task.FromResult(HealthCheckResult.Healthy);
30+
31+
/// <summary>
32+
/// Reports a memoized probe that was unhealthy.
33+
/// </summary>
34+
protected internal static Task<HealthCheckResult> UnhealthyTask => _unhealthy ??= Task.FromResult(HealthCheckResult.Unhealthy);
2535
}
2636

2737
/// <summary>
@@ -30,15 +40,14 @@ public abstract partial class HealthCheckProbe
3040
public abstract class KeyWriteHealthCheckProbe : HealthCheckProbe
3141
{
3242
/// <inheritdoc/>
33-
public override Task<HealthCheckResult> CheckHealthAsync(HealthCheck healthCheck, IConnectionMultiplexer multiplexer, EndPoint endpoint)
43+
public override Task<HealthCheckResult> CheckHealthAsync(HealthCheck healthCheck, IServer server)
3444
{
35-
var server = multiplexer.GetServer(endpoint);
36-
if (server.IsReplica) return Inconclusive;
45+
if (server.IsReplica) return InconclusiveTask;
3746

38-
RedisKey key = server.InventKey("health-check/"u8);
39-
if (key.IsNull) return Inconclusive;
40-
Debug.Assert(multiplexer.GetServer(key).EndPoint == endpoint, "Key was not routed to the correct endpoint");
41-
return CheckHealthAsync(healthCheck, multiplexer.GetDatabase(), key);
47+
RedisKey key = server.InventKey("health-check/");
48+
if (key.IsNull) return InconclusiveTask;
49+
Debug.Assert(server.Multiplexer.GetServer(key).EndPoint == server.EndPoint, "Key was not routed to the correct endpoint");
50+
return CheckHealthAsync(healthCheck, server.Multiplexer.GetDatabase(), key);
4251
}
4352

4453
/// <summary>

src/StackExchange.Redis/HealthCheck.PingProbe.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@ private sealed class PingProbe : HealthCheckProbe
1818
public static PingProbe Instance { get; } = new();
1919
private PingProbe() { }
2020

21-
public override async Task<HealthCheckResult> CheckHealthAsync(HealthCheck healthCheck, IConnectionMultiplexer multiplexer, EndPoint endpoint)
21+
public override async Task<HealthCheckResult> CheckHealthAsync(HealthCheck healthCheck, IServer server)
2222
{
23-
var server = multiplexer.GetServer(endpoint);
2423
await server.PingAsync();
2524
return HealthCheckResult.Healthy;
2625
}

0 commit comments

Comments
 (0)