Skip to content

Commit 5b2b7f5

Browse files
thomhurstclaude
andauthored
fix: Improve timeout handling and fix hanging tests (#1451)
* chore: Add .worktrees/ to .gitignore for parallel development 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: Improve pipeline context isolation and thread safety (#1417) This PR addresses issue #1417 by implementing proper module-scoped isolation: **Breaking Changes:** - `IEnvironmentContext.ContentDirectory` and `WorkingDirectory` are now readonly (changed from `{ get; set; }` to `{ get; }`) **Improvements:** - Add AsyncLocal cleanup in ModuleRunner to prevent potential context leaks - Replace `List<T>` with `ConcurrentBag<T>` in ModuleLoggerContainer for lock-free thread-safe operations during high-concurrency module execution - Add documentation explaining PipelineContext scoped lifetime and logger caching safety **Files Changed:** - ModuleRunner.cs: Added try/finally to clear AsyncLocal logger context - IEnvironmentContext.cs: Made ContentDirectory/WorkingDirectory readonly with docs - EnvironmentContext.cs: Implementation matches interface changes - PipelineContext.cs: Added XML documentation for scoped DI behavior - ModuleLoggerContainer.cs: Replaced List+locks with ConcurrentBag+Interlocked Closes #1417 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * fix: Improve timeout handling and fix hanging tests - Add TUnit-inspired TimeoutHelper with clean timeout pattern - Fast path for no timeout (avoids allocations) - Linked token sources for combining cancellation - 1-second grace period for cleanup after timeout - Distinguishes external cancellation from timeout - Refactor ModuleExecutionPipeline to use TimeoutHelper - Replaces redundant CancelAfter + Task.Delay pattern - Cleaner single-point timeout handling - Fix cancellation propagation in ModuleExecutionContext - Make ModuleCancellationTokenSource settable - Use CreateLinkedTokenSource for proper token linking - Fix hanging FolderTests that searched entire AppData folder - Replace with controlled temporary folder structure tests - Tests now complete in ~1.6s vs hanging indefinitely - Add test timeout infrastructure - Default 60s timeout in TestHostSettings - Timeout parameter in ExecutePipelineAsync overloads 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 8e7e39b commit 5b2b7f5

15 files changed

Lines changed: 315 additions & 83 deletions

File tree

Directory.Packages.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
<PackageVersion Include="Microsoft.Extensions.TimeProvider.Testing" Version="10.1.0" />
6161
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="18.0.1" />
6262
<PackageVersion Include="Microsoft.SourceLink.GitHub" Version="8.0.0" />
63+
<PackageVersion Include="Microsoft.Testing.Extensions.HangDump" Version="2.0.2" />
6364
<PackageVersion Include="Microsoft.VSSDK.BuildTools" Version="17.14.2120" />
6465
<PackageVersion Include="Moq" Version="4.20.72" />
6566
<PackageVersion Include="MSTest.TestAdapter" Version="4.0.2" />

src/ModularPipelines/Context/EnvironmentContext.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ public EnvironmentContext(IHostEnvironment hostEnvironment,
2727

2828
public Folder AppDomainDirectory { get; } = AppDomain.CurrentDomain.BaseDirectory!;
2929

30-
public Folder ContentDirectory { get; set; }
30+
public Folder ContentDirectory { get; }
3131

32-
public Folder WorkingDirectory { get; set; } = Environment.CurrentDirectory!;
32+
public Folder WorkingDirectory { get; } = Environment.CurrentDirectory!;
3333

3434
public IEnvironmentVariables EnvironmentVariables { get; }
3535

src/ModularPipelines/Context/IEnvironmentContext.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,20 @@ public interface IEnvironmentContext
3030
public Folder AppDomainDirectory { get; }
3131

3232
/// <inheritdoc cref="IHostEnvironment.ContentRootPath"/>
33-
public Folder ContentDirectory { get; set; }
33+
/// <remarks>
34+
/// This property is immutable after pipeline initialization.
35+
/// If you need to change the working directory for command execution,
36+
/// use command options or <see cref="System.Environment.CurrentDirectory"/> directly.
37+
/// </remarks>
38+
public Folder ContentDirectory { get; }
3439

3540
/// <inheritdoc cref="Environment.CurrentDirectory"/>
36-
public Folder WorkingDirectory { get; set; }
41+
/// <remarks>
42+
/// This property captures the working directory at pipeline initialization time and is immutable.
43+
/// If you need to change the working directory for command execution,
44+
/// use command options or <see cref="System.Environment.CurrentDirectory"/> directly.
45+
/// </remarks>
46+
public Folder WorkingDirectory { get; }
3747

3848
/// <inheritdoc cref="Environment.GetFolderPath(System.Environment.SpecialFolder)"/>
3949
public Folder? GetFolder(Environment.SpecialFolder specialFolder);

src/ModularPipelines/Context/PipelineContext.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,31 @@
1010

1111
namespace ModularPipelines.Context;
1212

13+
/// <summary>
14+
/// Provides context and services for module execution.
15+
/// </summary>
16+
/// <remarks>
17+
/// This class is registered as Scoped in the DI container, meaning each module execution
18+
/// gets its own instance. This ensures proper isolation between concurrent module executions.
19+
/// </remarks>
1320
internal class PipelineContext : IPipelineContext
1421
{
1522
private readonly IModuleLoggerProvider _moduleLoggerProvider;
23+
24+
/// <summary>
25+
/// Cached logger instance for this context.
26+
/// </summary>
27+
/// <remarks>
28+
/// This caching is safe because PipelineContext is Scoped (one instance per module execution).
29+
/// Each module gets a fresh PipelineContext, so the cached logger is inherently module-scoped.
30+
/// </remarks>
1631
private IModuleLogger? _logger;
1732

33+
/// <inheritdoc />
34+
/// <remarks>
35+
/// Logger is lazily initialized and cached for the lifetime of this context.
36+
/// Since PipelineContext is Scoped, this provides a module-specific logger.
37+
/// </remarks>
1838
public IModuleLogger Logger => _logger ??= _moduleLoggerProvider.GetLogger();
1939

2040
public IServiceProvider ServiceProvider { get; }

src/ModularPipelines/Engine/Execution/ModuleRunner.cs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,31 @@ private async Task ExecuteModuleWithPipeline(ModuleState moduleState, IServicePr
147147
var logger = GetOrCreateLogger(moduleType, scopedServiceProvider);
148148
var moduleContext = new ModuleContext(pipelineContext, module, executionContext, logger);
149149

150-
// Set up logging
150+
// Set up logging - use try/finally to ensure cleanup of AsyncLocal context
151151
ModuleLogger.Values.Value = logger;
152152

153+
try
154+
{
155+
await ExecuteModuleLifecycle(moduleState, scopedServiceProvider, pipelineContext, executionContext, moduleContext, cancellationToken).ConfigureAwait(false);
156+
}
157+
finally
158+
{
159+
// Clear AsyncLocal to prevent potential leaks in edge cases (thread pool reuse, long-running contexts)
160+
ModuleLogger.Values.Value = null;
161+
}
162+
}
163+
164+
private async Task ExecuteModuleLifecycle(
165+
ModuleState moduleState,
166+
IServiceProvider scopedServiceProvider,
167+
IPipelineContext pipelineContext,
168+
ModuleExecutionContext executionContext,
169+
IModuleContext moduleContext,
170+
CancellationToken cancellationToken)
171+
{
172+
var module = moduleState.Module;
173+
var moduleType = moduleState.ModuleType;
174+
153175
// Before module hooks
154176
await _pipelineSetupExecutor.OnBeforeModuleStartAsync(moduleState).ConfigureAwait(false);
155177

src/ModularPipelines/Engine/ModuleExecutionContext.cs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,13 @@ public ModuleExecutionContext(IModule module, Type moduleType)
7575
public Stopwatch Stopwatch { get; }
7676

7777
/// <summary>
78-
/// Gets the cancellation token source for this module.
78+
/// Gets or sets the cancellation token source for this module.
7979
/// </summary>
80-
public CancellationTokenSource ModuleCancellationTokenSource { get; }
80+
/// <remarks>
81+
/// This may be replaced with a linked token source during setup
82+
/// to combine external and engine-level cancellation.
83+
/// </remarks>
84+
public CancellationTokenSource ModuleCancellationTokenSource { get; set; }
8185

8286
/// <summary>
8387
/// Gets the list of sub-module trackers.
@@ -230,7 +234,7 @@ internal interface IModuleExecutionContext
230234

231235
Stopwatch Stopwatch { get; }
232236

233-
CancellationTokenSource ModuleCancellationTokenSource { get; }
237+
CancellationTokenSource ModuleCancellationTokenSource { get; set; }
234238

235239
List<SubModuleTracker> SubModules { get; }
236240

src/ModularPipelines/Engine/ModuleExecutionPipeline.cs

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,14 @@ private void SetupCancellation(
155155
var isAlwaysRun = module is IAlwaysRun || module.ModuleRunType == ModuleRunType.AlwaysRun;
156156
if (!isAlwaysRun)
157157
{
158-
engineCancellationToken.Register(() =>
159-
executionContext.ModuleCancellationTokenSource.Cancel());
158+
// Create a linked token source that cancels when:
159+
// - The engine singleton is cancelled (module failures, external cancellation via Ctrl+C or test timeout)
160+
// - The original module token is cancelled (preserves any existing cancellation on the module)
161+
// All external cancellation flows through _engineCancellationToken (see ExecutionOrchestrator line 108)
162+
var originalToken = executionContext.ModuleCancellationTokenSource.Token;
163+
executionContext.ModuleCancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(
164+
_engineCancellationToken.Token,
165+
originalToken);
160166
}
161167

162168
executionContext.ModuleCancellationTokenSource.Token.ThrowIfCancellationRequested();
@@ -207,34 +213,27 @@ private async Task<ModuleResult<T>> HandleSkipped<T>(
207213
var timeout = GetTimeout(module);
208214
var cancellationToken = executionContext.ModuleCancellationTokenSource.Token;
209215

210-
// Setup timeout
211-
if (timeout != TimeSpan.Zero)
212-
{
213-
executionContext.ModuleCancellationTokenSource.CancelAfter(timeout);
214-
}
215-
216216
// Get retry policy if applicable
217217
var retryPolicy = GetRetryPolicy(module, moduleContext);
218218

219-
// Execute with policies
220-
var executeTask = retryPolicy != null
221-
? retryPolicy.ExecuteAsync(() => module.ExecuteAsync(moduleContext, cancellationToken))
222-
: module.ExecuteAsync(moduleContext, cancellationToken);
219+
// Create the execution function that optionally includes retry
220+
Func<CancellationToken, Task<T?>> executeFunc = retryPolicy != null
221+
? ct => retryPolicy.ExecuteAsync(() => module.ExecuteAsync(moduleContext, ct))
222+
: ct => module.ExecuteAsync(moduleContext, ct);
223223

224-
if (timeout != TimeSpan.Zero)
224+
// Use TimeoutHelper for clean timeout handling with grace period
225+
try
225226
{
226-
// Race against timeout
227-
var timeoutTask = Task.Delay(timeout, cancellationToken);
228-
229-
var completedTask = await Task.WhenAny(executeTask, timeoutTask).ConfigureAwait(false);
230-
231-
if (completedTask == timeoutTask && executionContext.Status != Status.Successful)
232-
{
233-
throw new ModuleTimeoutException(executionContext.ModuleType, timeout);
234-
}
227+
return await TimeoutHelper.ExecuteWithTimeoutAsync(
228+
executeFunc,
229+
timeout == TimeSpan.Zero ? null : timeout,
230+
cancellationToken,
231+
$"Module {executionContext.ModuleType.Name} timed out after {timeout}").ConfigureAwait(false);
232+
}
233+
catch (TimeoutException)
234+
{
235+
throw new ModuleTimeoutException(executionContext.ModuleType, timeout);
235236
}
236-
237-
return await executeTask.ConfigureAwait(false);
238237
}
239238

240239
private TimeSpan GetTimeout(IModule module)

src/ModularPipelines/Extensions/HostExtensions.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ internal static class HostExtensions
99
{
1010
public static async Task<PipelineSummary> ExecutePipelineAsync(this IPipelineHost host)
1111
{
12-
return await host.Services.GetRequiredService<IExecutionOrchestrator>().ExecuteAsync().ConfigureAwait(false);
12+
return await host.ExecutePipelineAsync(CancellationToken.None).ConfigureAwait(false);
13+
}
14+
15+
public static async Task<PipelineSummary> ExecutePipelineAsync(this IPipelineHost host, CancellationToken cancellationToken)
16+
{
17+
return await host.Services.GetRequiredService<IExecutionOrchestrator>().ExecuteAsync(cancellationToken).ConfigureAwait(false);
1318
}
1419
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
namespace ModularPipelines.Helpers;
2+
3+
/// <summary>
4+
/// Reusable utility for executing tasks with timeout support that can return
5+
/// control immediately when timeout elapses.
6+
/// </summary>
7+
/// <remarks>
8+
/// This implementation is inspired by the TUnit testing framework's TimeoutHelper.
9+
/// </remarks>
10+
internal static class TimeoutHelper
11+
{
12+
/// <summary>
13+
/// Grace period to allow tasks to handle cancellation before throwing
14+
/// timeout exception.
15+
/// </summary>
16+
private static readonly TimeSpan GracePeriod = TimeSpan.FromSeconds(1);
17+
18+
/// <summary>
19+
/// Executes a task with an optional timeout. If the timeout elapses before
20+
/// the task completes, control is returned to the caller immediately with a
21+
/// TimeoutException.
22+
/// </summary>
23+
public static async Task ExecuteWithTimeoutAsync(
24+
Func<CancellationToken, Task> taskFactory,
25+
TimeSpan? timeout,
26+
CancellationToken cancellationToken,
27+
string? timeoutMessage = null)
28+
{
29+
await ExecuteWithTimeoutAsync(
30+
async ct =>
31+
{
32+
await taskFactory(ct).ConfigureAwait(false);
33+
return true;
34+
},
35+
timeout,
36+
cancellationToken,
37+
timeoutMessage).ConfigureAwait(false);
38+
}
39+
40+
/// <summary>
41+
/// Executes a task with an optional timeout and returns a result. If the
42+
/// timeout elapses before the task completes, control is returned to the
43+
/// caller immediately with a TimeoutException.
44+
/// </summary>
45+
public static async Task<T> ExecuteWithTimeoutAsync<T>(
46+
Func<CancellationToken, Task<T>> taskFactory,
47+
TimeSpan? timeout,
48+
CancellationToken cancellationToken,
49+
string? timeoutMessage = null)
50+
{
51+
// Fast path: no timeout specified
52+
if (!timeout.HasValue || timeout.Value == TimeSpan.Zero)
53+
{
54+
var task = taskFactory(cancellationToken);
55+
56+
// If the token can't be cancelled, just await directly (avoid allocations)
57+
if (!cancellationToken.CanBeCanceled)
58+
{
59+
return await task.ConfigureAwait(false);
60+
}
61+
62+
// Race against cancellation - TrySetCanceled makes the TCS throw
63+
// OperationCanceledException when awaited
64+
var tcs = new TaskCompletionSource<T>(
65+
TaskCreationOptions.RunContinuationsAsynchronously);
66+
using var reg = cancellationToken.Register(
67+
static state => ((TaskCompletionSource<T>)state!).TrySetCanceled(),
68+
tcs);
69+
70+
// await await: first gets winning task, then awaits it
71+
// (propagates result or exception)
72+
return await await Task.WhenAny(task, tcs.Task)
73+
.ConfigureAwait(false);
74+
}
75+
76+
// Timeout path: create linked token so task can observe both timeout
77+
// and external cancellation.
78+
using var timeoutCts = CancellationTokenSource
79+
.CreateLinkedTokenSource(cancellationToken);
80+
81+
// Set up cancellation detection BEFORE scheduling timeout to avoid race
82+
// condition where timeout fires before registration completes
83+
// (with very small timeouts)
84+
var cancelledTcs = new TaskCompletionSource<T>(
85+
TaskCreationOptions.RunContinuationsAsynchronously);
86+
using var registration = timeoutCts.Token.Register(
87+
static state => ((TaskCompletionSource<T>)state!)
88+
.TrySetCanceled(),
89+
cancelledTcs);
90+
91+
// Now schedule the timeout - registration is guaranteed to catch it
92+
timeoutCts.CancelAfter(timeout.Value);
93+
94+
var executionTask = taskFactory(timeoutCts.Token);
95+
96+
var winner = await Task.WhenAny(executionTask, cancelledTcs.Task)
97+
.ConfigureAwait(false);
98+
99+
if (winner == cancelledTcs.Task)
100+
{
101+
// Determine if it was external cancellation or timeout
102+
if (cancellationToken.IsCancellationRequested)
103+
{
104+
throw new OperationCanceledException(cancellationToken);
105+
}
106+
107+
// Timeout occurred - give the execution task a brief grace period
108+
// to clean up
109+
try
110+
{
111+
await executionTask.WaitAsync(GracePeriod, CancellationToken.None).ConfigureAwait(false);
112+
}
113+
catch
114+
{
115+
// Ignore all exceptions - task was cancelled, we're just giving
116+
// it time to clean up
117+
}
118+
119+
// Even if task completed during grace period, timeout already elapsed
120+
// so we throw
121+
throw new TimeoutException(
122+
timeoutMessage ?? $"Operation timed out after {timeout.Value}");
123+
}
124+
125+
return await executionTask.ConfigureAwait(false);
126+
}
127+
}

0 commit comments

Comments
 (0)