Skip to content

Commit 5889432

Browse files
authored
Add watchdog instrumentation to OleTxTests.Recovery to capture hang diagnostics (dotnet#126666)
1 parent dc803e0 commit 5889432

1 file changed

Lines changed: 81 additions & 61 deletions

File tree

src/libraries/System.Transactions.Local/tests/OleTxTests.cs

Lines changed: 81 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -304,72 +304,92 @@ public void SinglePhaseCommit(SinglePhaseEnlistment singlePhaseEnlistment)
304304
[ConditionalFact(typeof(OleTxTests), nameof(IsRemoteExecutorSupportedAndNotNano))]
305305
public void Recovery()
306306
{
307-
Test(() =>
307+
// Start a watchdog thread to generate a crash dump if the test hangs longer than 5 minutes.
308+
// This helps diagnose the hang described in https://github.com/dotnet/runtime/issues/126304.
309+
var testCompleted = new ManualResetEventSlim(false);
310+
var watchdog = new Thread(() =>
308311
{
309-
// We are going to spin up an external process to also enlist in the transaction, and then to crash when it
310-
// receives the commit notification. We will then initiate the recovery flow.
311-
312-
using var tx = new CommittableTransaction();
313-
314-
var outcomeEvent1 = new AutoResetEvent(false);
315-
var enlistment1 = new TestEnlistment(Phase1Vote.Prepared, EnlistmentOutcome.Committed, outcomeReceived: outcomeEvent1);
316-
var guid1 = Guid.NewGuid();
317-
tx.EnlistDurable(guid1, enlistment1, EnlistmentOptions.None);
318-
319-
// The propagation token is used to propagate the transaction to that process so it can enlist to our
320-
// transaction. We also provide the resource manager identifier GUID, and a path where the external process will
321-
// write the recovery information it will receive from the MSDTC when preparing.
322-
// We'll need these two elements later in order to Reenlist and trigger recovery.
323-
byte[] propagationToken = TransactionInterop.GetTransmitterPropagationToken(tx);
324-
string propagationTokenText = Convert.ToBase64String(propagationToken);
325-
var guid2 = Guid.NewGuid();
326-
string secondEnlistmentRecoveryFilePath = Path.GetTempFileName();
327-
328-
using var waitHandle = new EventWaitHandle(
329-
initialState: false,
330-
EventResetMode.ManualReset,
331-
"System.Transactions.Tests.OleTxTests.Recovery");
312+
if (!testCompleted.Wait(TimeSpan.FromMinutes(5)))
313+
{
314+
Environment.FailFast("OleTxTests.Recovery did not complete within 5 minutes. See https://github.com/dotnet/runtime/issues/126304");
315+
}
316+
});
317+
watchdog.IsBackground = true;
318+
watchdog.Start();
332319

333-
try
320+
try
321+
{
322+
Test(() =>
334323
{
335-
using (RemoteExecutor.Invoke(
336-
EnlistAndCrash,
337-
propagationTokenText, guid2.ToString(), secondEnlistmentRecoveryFilePath,
338-
// Bound the child process lifetime so that if MSDTC is unresponsive
339-
// and the process hangs, Dispose() will kill it instead of blocking indefinitely.
340-
new RemoteInvokeOptions { ExpectedExitCode = 42, TimeOut = 120_000 }))
341-
{
342-
// Wait for the external process to enlist in the transaction, it will signal this EventWaitHandle.
343-
Assert.True(waitHandle.WaitOne(Timeout));
324+
// We are going to spin up an external process to also enlist in the transaction, and then to crash when it
325+
// receives the commit notification. We will then initiate the recovery flow.
326+
327+
using var tx = new CommittableTransaction();
328+
329+
var outcomeEvent1 = new AutoResetEvent(false);
330+
var enlistment1 = new TestEnlistment(Phase1Vote.Prepared, EnlistmentOutcome.Committed, outcomeReceived: outcomeEvent1);
331+
var guid1 = Guid.NewGuid();
332+
tx.EnlistDurable(guid1, enlistment1, EnlistmentOptions.None);
333+
334+
// The propagation token is used to propagate the transaction to that process so it can enlist to our
335+
// transaction. We also provide the resource manager identifier GUID, and a path where the external process will
336+
// write the recovery information it will receive from the MSDTC when preparing.
337+
// We'll need these two elements later in order to Reenlist and trigger recovery.
338+
byte[] propagationToken = TransactionInterop.GetTransmitterPropagationToken(tx);
339+
string propagationTokenText = Convert.ToBase64String(propagationToken);
340+
var guid2 = Guid.NewGuid();
341+
string secondEnlistmentRecoveryFilePath = Path.GetTempFileName();
342+
343+
using var waitHandle = new EventWaitHandle(
344+
initialState: false,
345+
EventResetMode.ManualReset,
346+
"System.Transactions.Tests.OleTxTests.Recovery");
344347

345-
tx.Commit();
348+
try
349+
{
350+
using (RemoteExecutor.Invoke(
351+
EnlistAndCrash,
352+
propagationTokenText, guid2.ToString(), secondEnlistmentRecoveryFilePath,
353+
// Bound the child process lifetime so that if MSDTC is unresponsive
354+
// and the process hangs, Dispose() will kill it instead of blocking indefinitely.
355+
new RemoteInvokeOptions { ExpectedExitCode = 42, TimeOut = 120_000 }))
356+
{
357+
// Wait for the external process to enlist in the transaction, it will signal this EventWaitHandle.
358+
Assert.True(waitHandle.WaitOne(Timeout));
359+
360+
tx.Commit();
361+
}
362+
363+
// The other has crashed when the MSDTC notified it to commit.
364+
// Load the recovery information the other process has written to disk for us and reenlist with
365+
// the failed RM's Guid to commit.
366+
var outcomeEvent3 = new AutoResetEvent(false);
367+
var enlistment3 = new TestEnlistment(Phase1Vote.Prepared, EnlistmentOutcome.Committed, outcomeReceived: outcomeEvent3);
368+
byte[] secondRecoveryInformation = File.ReadAllBytes(secondEnlistmentRecoveryFilePath);
369+
_ = TransactionManager.Reenlist(guid2, secondRecoveryInformation, enlistment3);
370+
TransactionManager.RecoveryComplete(guid2);
371+
372+
Assert.True(outcomeEvent1.WaitOne(Timeout));
373+
Assert.True(outcomeEvent3.WaitOne(Timeout));
374+
Assert.Equal(EnlistmentOutcome.Committed, enlistment1.Outcome);
375+
Assert.Equal(EnlistmentOutcome.Committed, enlistment3.Outcome);
376+
Assert.Equal(TransactionStatus.Committed, tx.TransactionInformation.Status);
377+
378+
// Note: verify manually in the MSDTC console that the distributed transaction is gone
379+
// (i.e. successfully committed),
380+
// (Start -> Component Services -> Computers -> My Computer -> Distributed Transaction Coordinator ->
381+
// Local DTC -> Transaction List)
346382
}
347-
348-
// The other has crashed when the MSDTC notified it to commit.
349-
// Load the recovery information the other process has written to disk for us and reenlist with
350-
// the failed RM's Guid to commit.
351-
var outcomeEvent3 = new AutoResetEvent(false);
352-
var enlistment3 = new TestEnlistment(Phase1Vote.Prepared, EnlistmentOutcome.Committed, outcomeReceived: outcomeEvent3);
353-
byte[] secondRecoveryInformation = File.ReadAllBytes(secondEnlistmentRecoveryFilePath);
354-
_ = TransactionManager.Reenlist(guid2, secondRecoveryInformation, enlistment3);
355-
TransactionManager.RecoveryComplete(guid2);
356-
357-
Assert.True(outcomeEvent1.WaitOne(Timeout));
358-
Assert.True(outcomeEvent3.WaitOne(Timeout));
359-
Assert.Equal(EnlistmentOutcome.Committed, enlistment1.Outcome);
360-
Assert.Equal(EnlistmentOutcome.Committed, enlistment3.Outcome);
361-
Assert.Equal(TransactionStatus.Committed, tx.TransactionInformation.Status);
362-
363-
// Note: verify manually in the MSDTC console that the distributed transaction is gone
364-
// (i.e. successfully committed),
365-
// (Start -> Component Services -> Computers -> My Computer -> Distributed Transaction Coordinator ->
366-
// Local DTC -> Transaction List)
367-
}
368-
finally
369-
{
370-
File.Delete(secondEnlistmentRecoveryFilePath);
371-
}
372-
});
383+
finally
384+
{
385+
File.Delete(secondEnlistmentRecoveryFilePath);
386+
}
387+
});
388+
}
389+
finally
390+
{
391+
testCompleted.Set();
392+
}
373393

374394
static void EnlistAndCrash(string propagationTokenText, string resourceManagerIdentifierGuid, string recoveryInformationFilePath)
375395
=> Test(() =>

0 commit comments

Comments
 (0)