@@ -8,6 +8,17 @@ parameters:
88 - Continuous
99 - name : AgentPool
1010 type : object
11+ # When set to true on a PR-validation queue, the E2E app deliberately
12+ # crashes (simulateCrashForTesting) or hangs (simulateHangForTesting) so we
13+ # can re-validate that the crash-dump collection path still produces a
14+ # usable artifact. Disabled by default — the test step is doomed by design
15+ # when these are on.
16+ - name : simulateCrashForTesting
17+ type : boolean
18+ default : false
19+ - name : simulateHangForTesting
20+ type : boolean
21+ default : false
1122 - name : buildMatrix
1223 type : object
1324 default :
@@ -185,6 +196,12 @@ jobs:
185196 platform : ${{ matrix.BuildPlatform }}
186197 configuration : Release
187198 buildEnvironment : ${{ config.buildEnvironment }}
199+ # Capture crash dumps for the E2E test app (packaged UWP) and
200+ # the Metro bundler. ProcDump-as-AeDebug does not reliably fire
201+ # for packaged apps; WER LocalDumps does.
202+ localDumpsExeNames :
203+ - RNTesterApp-Fabric
204+ - node
188205
189206 - pwsh : |
190207 Write-Host "##vso[task.setvariable variable=BuildLogDirectory]$(Build.BinariesDirectory)\${{ matrix.BuildPlatform }}\BuildLogs"
@@ -209,11 +226,238 @@ jobs:
209226 echo ##vso[task.setvariable variable=StartedFabricTests]true
210227 displayName: Set StartedFabricTests
211228
212- - script : |
213- yarn e2etest
214- displayName: yarn e2etest
215- workingDirectory: packages/e2e-test-app-fabric
216- timeoutInMinutes: 10 # Time to wait for this task to complete before the server kills it.
229+ # Test-only: arm the crash-simulation sentinel so RNTesterApp-Fabric
230+ # crashes on startup. Validates the in-process minidump path.
231+ - ${{ if eq(parameters.simulateCrashForTesting, true) }} :
232+ - pwsh : |
233+ $flagPath = Join-Path $env:ProgramData 'rnw-e2e-simulate-crash.flag'
234+ New-Item -Path $flagPath -ItemType File -Force | Out-Null
235+ Write-Host "Crash-simulation sentinel created at $flagPath"
236+ $dumpDir = Join-Path $env:ProgramData 'RNW-E2E-Dumps'
237+ if (Test-Path $dumpDir) {
238+ Remove-Item -Path "$dumpDir\*" -Recurse -Force -ErrorAction SilentlyContinue
239+ Write-Host "Cleared stale dumps under $dumpDir"
240+ }
241+ displayName: Arm crash-simulation sentinel (TEST ONLY)
242+
243+ # Test-only: arm the hang-simulation env var, which switches on
244+ # the HangSimulationTest.test.ts test. That test invokes the
245+ # `HangForTesting` automation command, jamming the app's UI thread
246+ # so the post-failure ProcDump path captures a hang dump.
247+ - ${{ if eq(parameters.simulateHangForTesting, true) }} :
248+ - pwsh : |
249+ Write-Host "##vso[task.setvariable variable=RNW_SIMULATE_HANG]1"
250+ Write-Host "Hang simulation armed (RNW_SIMULATE_HANG=1)"
251+ displayName: Arm hang-simulation env var (TEST ONLY)
252+
253+ # When simulating a hang, run ONLY the HangSimulationTest. The default
254+ # jest sequencer puts brand-new (no-timing-history) tests late in the order,
255+ # so without filtering the test step times out before the hang test even
256+ # runs. 4-minute timeout: enough for app launch (~30 s) + the test's 70 s
257+ # jest testTimeout + jest teardown attempt; ADO will cut off at 4 min if the
258+ # hang prevents jest from exiting cleanly, which is fine — Capture step then
259+ # finds the still-alive UI-hung app.
260+ - ${{ if eq(parameters.simulateHangForTesting, true) }} :
261+ - script : |
262+ yarn e2etest --testPathPattern HangSimulationTest
263+ displayName: yarn e2etest (hang simulation only)
264+ workingDirectory: packages/e2e-test-app-fabric
265+ timeoutInMinutes: 4
266+
267+ - ${{ if not(eq(parameters.simulateHangForTesting, true)) }} :
268+ - script : |
269+ yarn e2etest
270+ displayName: yarn e2etest
271+ workingDirectory: packages/e2e-test-app-fabric
272+ # Drop to 2 min during crash simulation — the app crashes
273+ # immediately on startup, so a 10-minute wait is dead time.
274+ ${{ if eq(parameters.simulateCrashForTesting, true) }}:
275+ timeoutInMinutes: 2
276+ ${{ if not(eq(parameters.simulateCrashForTesting, true)) }}:
277+ timeoutInMinutes: 10
278+
279+ # Always disarm the crash sentinel so it cannot leak to a rerun on
280+ # the same agent.
281+ - ${{ if eq(parameters.simulateCrashForTesting, true) }} :
282+ - pwsh : |
283+ $flagPath = Join-Path $env:ProgramData 'rnw-e2e-simulate-crash.flag'
284+ if (Test-Path $flagPath) {
285+ Remove-Item $flagPath -Force
286+ Write-Host "Removed crash-simulation sentinel at $flagPath"
287+ }
288+ displayName: Disarm crash-simulation sentinel (TEST ONLY)
289+ condition: always()
290+
291+ # Always disarm the hang-simulation env var so the post-failure
292+ # `Update snapshots` step (which also runs `yarn e2etest`) does not
293+ # re-trigger the hang and burn 10 minutes of dead time. Setting an
294+ # ADO variable to empty string clears it for subsequent steps.
295+ - ${{ if eq(parameters.simulateHangForTesting, true) }} :
296+ - pwsh : |
297+ Write-Host "##vso[task.setvariable variable=RNW_SIMULATE_HANG]"
298+ Write-Host "Hang simulation disarmed (RNW_SIMULATE_HANG cleared)"
299+ displayName: Disarm hang-simulation env var (TEST ONLY)
300+ condition: always()
301+
302+ # On test failure, snapshot any lingering RNTesterApp-Fabric / node
303+ # processes before subsequent steps (or the agent) tear them down.
304+ # WER LocalDumps only fires on actual crashes; this catches hangs
305+ # (e.g. "Unable to enter correct text" timeouts) where the process
306+ # is alive but unresponsive.
307+ #
308+ # Dumps must go into a subfolder of $(CrashDumpRootPath). Files
309+ # written directly at the root were observed to disappear during
310+ # the long `Update snapshots` step that runs after a failed test;
311+ # files in a subfolder survive. We don't know which agent
312+ # behavior deletes them — Defender, a 1ES cleanup script, or a
313+ # side-effect of `yarn e2etest -u` — but a subfolder evades it.
314+ - pwsh : |
315+ $procDump = Join-Path "$(ProcDumpPath)" 'procdump64.exe'
316+ if (-not (Test-Path $procDump)) {
317+ Write-Host "ProcDump not found at $procDump; skipping live-process dump capture."
318+ exit 0
319+ }
320+
321+ $hangDir = Join-Path "$(CrashDumpRootPath)" 'hang'
322+ New-Item -ItemType Directory -Path $hangDir -Force | Out-Null
323+
324+ $targets = @('RNTesterApp-Fabric', 'node')
325+ foreach ($name in $targets) {
326+ Get-Process -Name $name -ErrorAction SilentlyContinue | ForEach-Object {
327+ $dumpPath = Join-Path $hangDir ("hang_{0}_{1}.dmp" -f $name, $_.Id)
328+ Write-Host "Capturing full dump of $name (pid $($_.Id)) to $dumpPath"
329+ & $procDump -accepteula -ma $_.Id $dumpPath
330+ Write-Host ("ProcDump exit code: {0} (non-zero is normal - encodes the dump count written)" -f $LASTEXITCODE)
331+ }
332+ }
333+ # ProcDump uses non-zero exit codes to encode the number of dumps written.
334+ # Force a clean PowerShell exit so the step doesn't show as a warning.
335+ exit 0
336+ displayName: Capture dumps of surviving test processes
337+ condition: and(failed(), eq(variables.StartedFabricTests, 'true'))
338+ continueOnError: true
339+
340+ # Collect any in-process minidumps the app's UEF wrote to
341+ # %ProgramData%\RNW-E2E-Dumps, plus any dumps WER may have written
342+ # to its standard fallback locations, and stage them into
343+ # subfolders of $(CrashDumpRootPath) so they ride the crash-dumps
344+ # artifact. Dumps in subfolders survive the post-failure
345+ # `Update snapshots` step (see comment on the Capture step above).
346+ - pwsh : |
347+ # In-process minidumps (primary mechanism for actual crashes).
348+ $inProc = Join-Path $env:ProgramData 'RNW-E2E-Dumps'
349+ if (Test-Path $inProc) {
350+ $dest = Join-Path "$(CrashDumpRootPath)" 'in-process'
351+ New-Item -ItemType Directory -Path $dest -Force | Out-Null
352+ Copy-Item -Path "$inProc\*" -Destination $dest -Recurse -Force -ErrorAction SilentlyContinue
353+ Get-ChildItem -Path $dest -Recurse -Force -ErrorAction SilentlyContinue |
354+ Select-Object FullName, Length | Format-Table -AutoSize | Out-String | Write-Host
355+ }
356+
357+ # Fallback search: if the agent image ever changes back to a
358+ # working WER LocalDumps configuration, dumps may land here.
359+ $searchRoots = @(
360+ "$env:LOCALAPPDATA\CrashDumps",
361+ "$env:ProgramData\Microsoft\Windows\WER\ReportQueue",
362+ "$env:ProgramData\Microsoft\Windows\WER\ReportArchive",
363+ "$env:ProgramData\Microsoft\Windows\WER\Temp"
364+ )
365+ $found = @()
366+ foreach ($root in $searchRoots) {
367+ if (-not (Test-Path $root)) { continue }
368+ $found += Get-ChildItem -Path $root -Recurse -Include *.dmp,*.mdmp -ErrorAction SilentlyContinue -Force |
369+ Where-Object { -not $_.PSIsContainer -and $_.LastWriteTime -gt (Get-Date).AddHours(-2) }
370+ }
371+ if ($found.Count -gt 0) {
372+ $dest = Join-Path "$(CrashDumpRootPath)" 'recovered'
373+ New-Item -ItemType Directory -Path $dest -Force | Out-Null
374+ foreach ($h in $found) {
375+ $target = Join-Path $dest ($h.FullName -replace '[:\\/]', '_')
376+ Copy-Item -LiteralPath $h.FullName -Destination $target -Force -ErrorAction SilentlyContinue
377+ Write-Host "Recovered $($h.FullName) ($($h.Length) bytes) -> $target"
378+ }
379+ }
380+ displayName: Collect in-process and fallback crash dumps
381+ condition: and(failed(), eq(variables.StartedFabricTests, 'true'))
382+ continueOnError: true
383+
384+ # Bundle matching PDBs and a debugging README into the Crash dumps
385+ # artifact so the dump is self-contained for an offline developer.
386+ # Skipped if no .dmp/.mdmp files exist — $(CrashDumpRootPath) also
387+ # holds MSBuild failure logs (MSBUILDDEBUGPATH points here), and
388+ # those don't need symbols or this README.
389+ - pwsh : |
390+ $dumps = Get-ChildItem -Path "$(CrashDumpRootPath)" -Recurse -Include *.dmp,*.mdmp -File -ErrorAction SilentlyContinue
391+ if (-not $dumps -or $dumps.Count -eq 0) {
392+ Write-Host "No .dmp/.mdmp files in $(CrashDumpRootPath); skipping symbols + README bundling."
393+ exit 0
394+ }
395+ Write-Host "Found $($dumps.Count) dump file(s); bundling matching PDBs and README."
396+
397+ $symbolsDir = Join-Path "$(CrashDumpRootPath)" 'symbols'
398+ $releaseRoot = "$(Build.SourcesDirectory)\packages\e2e-test-app-fabric\windows\${{ matrix.BuildPlatform }}\Release"
399+ if (Test-Path $releaseRoot) {
400+ $pdbs = Get-ChildItem -Path $releaseRoot -Recurse -Filter *.pdb -File -ErrorAction SilentlyContinue
401+ foreach ($pdb in $pdbs) {
402+ $rel = $pdb.FullName.Substring($releaseRoot.Length).TrimStart('\','/')
403+ $target = Join-Path $symbolsDir $rel
404+ New-Item -ItemType Directory -Path (Split-Path -Parent $target) -Force | Out-Null
405+ Copy-Item -LiteralPath $pdb.FullName -Destination $target -Force -ErrorAction SilentlyContinue
406+ }
407+ Write-Host "Staged $($pdbs.Count) PDB(s) under $symbolsDir"
408+ } else {
409+ Write-Host "Release root not found at $releaseRoot; skipping PDB stage."
410+ }
411+
412+ $readme = @'
413+ # Reading these crash dumps
414+
415+ This artifact contains crash and/or hang dumps from a failed React
416+ Native Windows E2E test run, plus matching debug symbols.
417+
418+ ## What is in here
419+
420+ - `hang/` -- full-memory dumps captured by procdump64 from
421+ RNTesterApp-Fabric / node processes that were still alive when
422+ the test step timed out.
423+ - `in-process/` -- full-memory minidumps written by
424+ RNTesterApp-Fabric's own unhandled-exception filter when the app
425+ actually crashed.
426+ - `recovered/` -- dumps recovered from common WER fallback
427+ locations on the agent. Usually empty.
428+ - `symbols/` -- PDBs that match the binaries deployed to the test
429+ agent. Folder layout mirrors the test app's Release deploy tree.
430+
431+ ## Opening in WinDbg
432+
433+ 1. Download and extract this artifact. Note the absolute path of
434+ the extracted `symbols/` folder.
435+ 2. Open a dump:
436+
437+ windbg -z hang\hang_RNTesterApp-Fabric_<pid>.dmp
438+
439+ 3. Set the symbol path (this artifact's symbols + Microsoft public
440+ symbol server) and reload:
441+
442+ .sympath srv*C:\symbols*https://msdl.microsoft.com/download/symbols;<extracted-path>\symbols
443+ .reload /f
444+
445+ 4. Useful first commands:
446+ - `~* k` -- call stack of every thread (most useful for hangs)
447+ - `!analyze -v` -- automatic crash analysis (most useful for crashes)
448+
449+ ## If you need the binaries too
450+
451+ The PDBs alone are enough for stack walks and type info. If you
452+ need module bytes (e.g. to disassemble), download the matching
453+ `RNTesterApp-Fabric-<plat>-<attempt>` artifact from the same
454+ pipeline run; its layout matches `symbols/` here.
455+ '@
456+ Set-Content -LiteralPath "$(CrashDumpRootPath)\README.md" -Value $readme -Encoding utf8
457+ Write-Host "Wrote $(CrashDumpRootPath)\README.md"
458+ displayName: Bundle symbols and README with crash dumps
459+ condition: and(failed(), eq(variables.StartedFabricTests, 'true'))
460+ continueOnError: true
217461
218462 - script : npx jest --clearCache
219463 displayName : clear jest cache
0 commit comments