Skip to content

Commit eb16a97

Browse files
committed
fix for tests
1 parent ff12c92 commit eb16a97

27 files changed

+686
-526
lines changed

AGENTS.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ If no new rule is detected -> do not update the file.
115115
- Always run required builds and tests yourself; do not ask the user to execute them (explicit user directive)
116116
- Commit messages are short and imperative; common prefixes in history include `fix:`, `tests:`, and `code style`
117117

118+
### Reviews
119+
120+
- For code reviews, be extra thorough and explicitly call out low-value/AI-sounding changes and whether changes actually improve behavior, performance, or safety
121+
- Never run SignalR work on Orleans scheduler/context; offload to dedicated tasks/threads to avoid blocking
122+
118123
### Documentation (ALL TASKS)
119124

120125
- All docs live in `docs/` (or `.wiki/`)
@@ -143,6 +148,7 @@ If no new rule is detected -> do not update the file.
143148
### Testing (ALL TASKS)
144149

145150
- Framework: xUnit + Shouldly; tests live in `ManagedCode.Orleans.SignalR.Tests` and end with `*Tests.cs`
151+
- Avoid introducing new `[GenerateSerializer]` state types in tests; reuse production models to keep Orleans serialization types in product code.
146152
- Integration tests use Orleans TestingHost with fixtures in `ManagedCode.Orleans.SignalR.Tests/Cluster` and the minimal host in `ManagedCode.Orleans.SignalR.Tests/TestApp`
147153
- Prefer TDD for new behaviour and bugfixes: write a failing test first, then implement the smallest change to make it pass, then refactor safely
148154
- Every behaviour change needs sufficient automated tests to cover its cases; one is the minimum, not the target
@@ -180,6 +186,10 @@ If no new rule is detected -> do not update the file.
180186
- Never use `ConfigureAwait(false)`
181187
- No magic literals - extract to constants, enums, config
182188

189+
### Comments
190+
191+
- When offloading SignalR observer sends with Task.Run, add a critical comment explaining it must not run on the Orleans scheduler to avoid blocking
192+
183193
### Critical (NEVER violate)
184194

185195
- Never commit secrets, keys, connection strings

ManagedCode.Orleans.SignalR.Core/Config/OrleansSignalROptions.cs

Lines changed: 2 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ public class OrleansSignalROptions
6363
public int MaxQueuedMessagesPerUser { get; set; } = 100;
6464

6565
/// <summary>
66-
/// Number of consecutive failures before an observer is considered dead and removed.
66+
/// Number of consecutive failures before the circuit breaker opens.
67+
/// When the circuit breaker is disabled or the grace period is zero, the observer is removed.
6768
/// Set to 0 to disable failure tracking.
6869
/// The default value is 3.
6970
/// </summary>
@@ -111,45 +112,4 @@ public class OrleansSignalROptions
111112
/// </summary>
112113
public int MaxBufferedMessagesPerObserver { get; set; } = 50;
113114

114-
/// <summary>
115-
/// Maximum number of connections allowed per partition grain.
116-
/// New connections are rejected when the limit is exceeded.
117-
/// Set to 0 to disable connection limits (not recommended for production).
118-
/// The default value is 100,000.
119-
/// </summary>
120-
public int MaxConnectionsPerPartition { get; set; } = 100_000;
121-
122-
/// <summary>
123-
/// Maximum number of groups per partition grain.
124-
/// New groups are rejected when the limit is exceeded.
125-
/// Set to 0 to disable group limits.
126-
/// The default value is 50,000.
127-
/// </summary>
128-
public int MaxGroupsPerPartition { get; set; } = 50_000;
129-
130-
/// <summary>
131-
/// Timeout for slow client message delivery.
132-
/// Connections that cannot receive messages within this time may be terminated.
133-
/// The default value is 10 seconds.
134-
/// </summary>
135-
public TimeSpan SlowClientTimeout { get; set; } = TimeSpan.FromSeconds(10);
136-
137-
/// <summary>
138-
/// Enables backpressure handling for slow clients.
139-
/// When enabled, messages to slow clients are dropped or the connection is terminated.
140-
/// The default value is true.
141-
/// </summary>
142-
public bool EnableSlowClientHandling { get; set; } = true;
143-
144-
/// <summary>
145-
/// Maximum number of pending messages allowed per connection before backpressure is applied.
146-
/// The default value is 1000.
147-
/// </summary>
148-
public int MaxPendingMessagesPerConnection { get; set; } = 1000;
149-
150-
/// <summary>
151-
/// Enables metrics collection for monitoring and diagnostics.
152-
/// The default value is true.
153-
/// </summary>
154-
public bool EnableMetrics { get; set; } = true;
155115
}

ManagedCode.Orleans.SignalR.Core/Helpers/RetryHelper.cs

Lines changed: 0 additions & 225 deletions
This file was deleted.

ManagedCode.Orleans.SignalR.Core/SignalR/Observers/ObserverHealthTracker.cs

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ namespace ManagedCode.Orleans.SignalR.Core.SignalR.Observers;
88

99
/// <summary>
1010
/// Tracks observer health by monitoring delivery failures with circuit breaker support.
11-
/// Observers exceeding the failure threshold have their circuit opened to prevent cascade failures.
11+
/// When circuit breaker is enabled, failures open the circuit and optionally buffer messages
12+
/// during a grace period before observers are removed.
1213
/// Supports graceful expiration with message buffering for timing edge cases.
1314
///
1415
/// Note: This class is designed to be used within Orleans grains which provide single-threaded
@@ -82,7 +83,8 @@ public FailureResult RecordFailure(string connectionId, Exception? exception = n
8283
CircuitBreakerEnabled,
8384
_failureThreshold,
8485
_circuitOpenDuration,
85-
_halfOpenTestInterval);
86+
_halfOpenTestInterval,
87+
markDeadOnCircuitOpen: !_gracePeriodBuffer.IsEnabled);
8688
_healthStates[connectionId] = state;
8789
}
8890

@@ -311,6 +313,7 @@ private sealed class ObserverHealthState
311313
private readonly TimeSpan _failureWindow;
312314
private readonly bool _circuitBreakerEnabled;
313315
private readonly int _failureThreshold;
316+
private readonly bool _markDeadOnCircuitOpen;
314317
private readonly List<long> _failureTimestamps = new();
315318
private readonly ObserverCircuitBreaker? _circuitBreaker;
316319

@@ -319,11 +322,13 @@ public ObserverHealthState(
319322
bool circuitBreakerEnabled,
320323
int failureThreshold,
321324
TimeSpan circuitOpenDuration,
322-
TimeSpan halfOpenTestInterval)
325+
TimeSpan halfOpenTestInterval,
326+
bool markDeadOnCircuitOpen)
323327
{
324328
_failureWindow = failureWindow;
325329
_circuitBreakerEnabled = circuitBreakerEnabled;
326330
_failureThreshold = failureThreshold;
331+
_markDeadOnCircuitOpen = markDeadOnCircuitOpen;
327332

328333
if (circuitBreakerEnabled)
329334
{
@@ -373,17 +378,28 @@ public FailureResult RecordFailure(Exception? exception)
373378
LastException = exception;
374379

375380
var failureCount = _failureTimestamps.Count;
376-
var circuitOpened = _circuitBreaker?.RecordFailure(exception) ?? false;
377381

378-
if (failureCount >= _failureThreshold)
382+
if (_circuitBreaker is not null)
379383
{
380-
IsDead = true;
381-
return FailureResult.Dead;
384+
var circuitOpened = _circuitBreaker.RecordFailure(exception);
385+
if (circuitOpened)
386+
{
387+
if (_markDeadOnCircuitOpen)
388+
{
389+
IsDead = true;
390+
return FailureResult.Dead;
391+
}
392+
393+
return FailureResult.CircuitOpened;
394+
}
395+
396+
return FailureResult.Healthy;
382397
}
383398

384-
if (circuitOpened)
399+
if (failureCount >= _failureThreshold)
385400
{
386-
return FailureResult.CircuitOpened;
401+
IsDead = true;
402+
return FailureResult.Dead;
387403
}
388404

389405
return FailureResult.Healthy;

0 commit comments

Comments
 (0)