Skip to content

Commit 9da3f45

Browse files
ericstjCopilot
andcommitted
Fix race: double-sweep pending requests for non-atomic ConcurrentDictionary
When a server process exits immediately, ProcessMessagesCoreAsync's reading loop completes and sweeps _pendingRequests to fail them. But ConcurrentDictionary.GetEnumerator() is non-atomic: it traverses buckets without locks, so an entry added to an already-traversed bucket can be missed. This leaves the TCS orphaned and CreateAsync hangs. Fix by sweeping twice. The second pass catches entries the first pass missed (nothing modifies those entries between passes). Entries registered after the _messageProcessingComplete flag is set are self-handled by SendRequestAsync's flag check. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 37976fc commit 9da3f45

8 files changed

Lines changed: 13 additions & 104 deletions

File tree

.github/workflows/ci-build-test.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,6 @@ jobs:
6161
run: make build CONFIGURATION=${{ matrix.configuration }}
6262

6363
- name: 🧪 Test
64-
# TODO: Remove DOTNET_RuntimeAsync=0 workaround once the .NET runtime fix is available.
65-
# Tracked by: https://github.com/dotnet/runtime/issues/126325
66-
# xUnit v3 test host hangs intermittently on .NET 10 with RuntimeAsync enabled.
67-
env:
68-
DOTNET_RuntimeAsync: "0"
6964
run: make test CONFIGURATION=${{ matrix.configuration }}
7065

7166
- name: 🧪 AOT Compatibility
@@ -86,7 +81,6 @@ jobs:
8681
name: testresults-${{ matrix.os }}-${{ matrix.configuration }}
8782
path: |
8883
artifacts/testresults/**
89-
**/xunit-runtest-diag.log
9084
9185
publish-coverage:
9286
if: github.actor != 'dependabot[bot]'

src/ModelContextProtocol.Core/Client/McpClientImpl.cs

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ public async Task ConnectAsync(CancellationToken cancellationToken = default)
533533
{
534534
// We don't want the ConnectAsync token to cancel the message processing loop after we've successfully connected.
535535
// The session handler handles cancelling the loop upon its disposal.
536-
var processingTask = _sessionHandler.ProcessMessagesAsync(CancellationToken.None);
536+
_ = _sessionHandler.ProcessMessagesAsync(CancellationToken.None);
537537

538538
// Perform initialization sequence
539539
using var initializationCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
@@ -543,7 +543,7 @@ public async Task ConnectAsync(CancellationToken cancellationToken = default)
543543
{
544544
// Send initialize request
545545
string requestProtocol = _options.ProtocolVersion ?? McpSessionHandler.LatestProtocolVersion;
546-
var initializeTask = SendRequestAsync(
546+
var initializeResponse = await SendRequestAsync(
547547
RequestMethods.Initialize,
548548
new InitializeRequestParams
549549
{
@@ -553,22 +553,7 @@ public async Task ConnectAsync(CancellationToken cancellationToken = default)
553553
},
554554
McpJsonUtilities.JsonContext.Default.InitializeRequestParams,
555555
McpJsonUtilities.JsonContext.Default.InitializeResult,
556-
cancellationToken: initializationCts.Token).AsTask();
557-
558-
// Race the initialize request against the message processing loop.
559-
// If the processing loop exits first (e.g., the server process died and
560-
// its stdout closed), the initialize request will never get a response.
561-
// Detect this immediately rather than waiting for the initialization timeout.
562-
if (await Task.WhenAny(initializeTask, processingTask).ConfigureAwait(false) == processingTask)
563-
{
564-
// Observe the processing task so its exception isn't unobserved.
565-
try { await processingTask.ConfigureAwait(false); }
566-
catch { }
567-
568-
throw new IOException("Transport closed before initialization could complete.");
569-
}
570-
571-
var initializeResponse = await initializeTask.ConfigureAwait(false);
556+
cancellationToken: initializationCts.Token).ConfigureAwait(false);
572557

573558
// Store server information
574559
if (_logger.IsEnabled(LogLevel.Information))

src/ModelContextProtocol.Core/McpSessionHandler.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,9 +338,17 @@ ex is OperationCanceledException &&
338338
? innerException
339339
: new IOException("The server shut down unexpectedly.");
340340

341-
foreach (var entry in _pendingRequests)
341+
// ConcurrentDictionary.GetEnumerator() is non-atomic: it traverses buckets
342+
// one-by-one without locks. An entry added to an already-traversed bucket
343+
// during iteration can be missed. Sweep twice so the second pass catches any
344+
// entries the first pass skipped. Entries registered after the flag is set are
345+
// self-handled by SendRequestAsync's flag check.
346+
for (int pass = 0; pass < 2; pass++)
342347
{
343-
entry.Value.TrySetException(pendingException);
348+
foreach (var entry in _pendingRequests)
349+
{
350+
entry.Value.TrySetException(pendingException);
351+
}
344352
}
345353
}
346354
}
-175 KB
Binary file not shown.
-477 KB
Binary file not shown.

tests/Common/Utils/DiagnosticExceptionTracing.cs

Lines changed: 0 additions & 56 deletions
This file was deleted.

tests/ModelContextProtocol.AspNetCore.Tests/ModelContextProtocol.AspNetCore.Tests.csproj

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,4 @@
6262
<ProjectReference Include="..\ModelContextProtocol.ConformanceServer\ModelContextProtocol.ConformanceServer.csproj" />
6363
</ItemGroup>
6464

65-
<!-- TEMPORARY: Replace xUnit DLLs with instrumented versions that add tracing to
66-
TestRunner.RunTest to diagnose intermittent hang where finished TCS is never signaled.
67-
Remove this target once the root cause is identified. -->
68-
<Target Name="ReplaceXunitWithInstrumented" AfterTargets="Build"
69-
Condition="Exists('$(MSBuildThisFileDirectory)..\Common\InstrumentedXunit\xunit.v3.core.dll')">
70-
<Copy SourceFiles="$(MSBuildThisFileDirectory)..\Common\InstrumentedXunit\xunit.v3.core.dll"
71-
DestinationFolder="$(OutputPath)" OverwriteReadOnlyFiles="true" SkipUnchangedFiles="false" />
72-
<Copy SourceFiles="$(MSBuildThisFileDirectory)..\Common\InstrumentedXunit\xunit.v3.common.dll"
73-
DestinationFolder="$(OutputPath)" OverwriteReadOnlyFiles="true" SkipUnchangedFiles="false" />
74-
</Target>
75-
7665
</Project>

tests/ModelContextProtocol.Tests/ModelContextProtocol.Tests.csproj

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -98,15 +98,4 @@
9898
</Content>
9999
</ItemGroup>
100100

101-
<!-- TEMPORARY: Replace xUnit DLLs with instrumented versions that add tracing to
102-
TestRunner.RunTest to diagnose intermittent hang where finished TCS is never signaled.
103-
Remove this target once the root cause is identified. -->
104-
<Target Name="ReplaceXunitWithInstrumented" AfterTargets="Build"
105-
Condition="Exists('$(MSBuildThisFileDirectory)..\Common\InstrumentedXunit\xunit.v3.core.dll')">
106-
<Copy SourceFiles="$(MSBuildThisFileDirectory)..\Common\InstrumentedXunit\xunit.v3.core.dll"
107-
DestinationFolder="$(OutputPath)" OverwriteReadOnlyFiles="true" SkipUnchangedFiles="false" />
108-
<Copy SourceFiles="$(MSBuildThisFileDirectory)..\Common\InstrumentedXunit\xunit.v3.common.dll"
109-
DestinationFolder="$(OutputPath)" OverwriteReadOnlyFiles="true" SkipUnchangedFiles="false" />
110-
</Target>
111-
112101
</Project>

0 commit comments

Comments
 (0)