@@ -8,13 +8,26 @@ parameters:
88 - Continuous
99 - name : AgentPool
1010 type : object
11+ # When set to true on a PR-validation queue, the E2E app deliberately
12+ # crashes (simulateCrashForTesting) or hangs (simulateHangForTesting) so we
13+ # can re-validate that the crash-dump collection path still produces a
14+ # usable artifact. Disabled by default — the test step is doomed by design
15+ # when these are on.
16+ - name : simulateCrashForTesting
17+ type : boolean
18+ default : false
19+ - name : simulateHangForTesting
20+ type : boolean
21+ default : false
1122 - name : buildMatrix
1223 type : object
1324 default :
1425 - BuildEnvironment : PullRequest
1526 Matrix :
1627 - Name : X64Hermes
1728 BuildPlatform : x64
29+ - Name : X86Hermes
30+ BuildPlatform : x86
1831 - BuildEnvironment : Continuous
1932 Matrix :
2033 - Name : X64Hermes
4457 platform : ${{ matrix.BuildPlatform }}
4558 configuration : Release
4659 buildEnvironment : ${{ config.buildEnvironment }}
60+ # Capture crash dumps for the E2E test app (packaged UWP) and
61+ # the Metro bundler. ProcDump-as-AeDebug does not reliably fire
62+ # for packaged apps; WER LocalDumps does.
63+ localDumpsExeNames :
64+ - RNTesterApp-Fabric
65+ - node
4766
4867 - pwsh : |
4968 Write-Host "##vso[task.setvariable variable=BuildLogDirectory]$(Build.BinariesDirectory)\${{ matrix.BuildPlatform }}\BuildLogs"
@@ -68,11 +87,238 @@ jobs:
6887 echo ##vso[task.setvariable variable=StartedFabricTests]true
6988 displayName: Set StartedFabricTests
7089
71- - script : |
72- yarn e2etest
73- displayName: yarn e2etest
74- workingDirectory: packages/e2e-test-app-fabric
75- timeoutInMinutes: 10 # Time to wait for this task to complete before the server kills it.
90+ # Test-only: arm the crash-simulation sentinel so RNTesterApp-Fabric
91+ # crashes on startup. Validates the in-process minidump path.
92+ - ${{ if eq(parameters.simulateCrashForTesting, true) }} :
93+ - pwsh : |
94+ $flagPath = Join-Path $env:ProgramData 'rnw-e2e-simulate-crash.flag'
95+ New-Item -Path $flagPath -ItemType File -Force | Out-Null
96+ Write-Host "Crash-simulation sentinel created at $flagPath"
97+ $dumpDir = Join-Path $env:ProgramData 'RNW-E2E-Dumps'
98+ if (Test-Path $dumpDir) {
99+ Remove-Item -Path "$dumpDir\*" -Recurse -Force -ErrorAction SilentlyContinue
100+ Write-Host "Cleared stale dumps under $dumpDir"
101+ }
102+ displayName: Arm crash-simulation sentinel (TEST ONLY)
103+
104+ # Test-only: arm the hang-simulation env var, which switches on
105+ # the HangSimulationTest.test.ts test. That test invokes the
106+ # `HangForTesting` automation command, jamming the app's UI thread
107+ # so the post-failure ProcDump path captures a hang dump.
108+ - ${{ if eq(parameters.simulateHangForTesting, true) }} :
109+ - pwsh : |
110+ Write-Host "##vso[task.setvariable variable=RNW_SIMULATE_HANG]1"
111+ Write-Host "Hang simulation armed (RNW_SIMULATE_HANG=1)"
112+ displayName: Arm hang-simulation env var (TEST ONLY)
113+
114+ # When simulating a hang, run ONLY the HangSimulationTest. The default
115+ # jest sequencer puts brand-new (no-timing-history) tests late in the order,
116+ # so without filtering the test step times out before the hang test even
117+ # runs. 4-minute timeout: enough for app launch (~30 s) + the test's 70 s
118+ # jest testTimeout + jest teardown attempt; ADO will cut off at 4 min if the
119+ # hang prevents jest from exiting cleanly, which is fine — Capture step then
120+ # finds the still-alive UI-hung app.
121+ - ${{ if eq(parameters.simulateHangForTesting, true) }} :
122+ - script : |
123+ yarn e2etest --testPathPattern HangSimulationTest
124+ displayName: yarn e2etest (hang simulation only)
125+ workingDirectory: packages/e2e-test-app-fabric
126+ timeoutInMinutes: 4
127+
128+ - ${{ if not(eq(parameters.simulateHangForTesting, true)) }} :
129+ - script : |
130+ yarn e2etest
131+ displayName: yarn e2etest
132+ workingDirectory: packages/e2e-test-app-fabric
133+ # Drop to 2 min during crash simulation — the app crashes
134+ # immediately on startup, so a 10-minute wait is dead time.
135+ ${{ if eq(parameters.simulateCrashForTesting, true) }}:
136+ timeoutInMinutes: 2
137+ ${{ if not(eq(parameters.simulateCrashForTesting, true)) }}:
138+ timeoutInMinutes: 10
139+
140+ # Always disarm the crash sentinel so it cannot leak to a rerun on
141+ # the same agent.
142+ - ${{ if eq(parameters.simulateCrashForTesting, true) }} :
143+ - pwsh : |
144+ $flagPath = Join-Path $env:ProgramData 'rnw-e2e-simulate-crash.flag'
145+ if (Test-Path $flagPath) {
146+ Remove-Item $flagPath -Force
147+ Write-Host "Removed crash-simulation sentinel at $flagPath"
148+ }
149+ displayName: Disarm crash-simulation sentinel (TEST ONLY)
150+ condition: always()
151+
152+ # Always disarm the hang-simulation env var so the post-failure
153+ # `Update snapshots` step (which also runs `yarn e2etest`) does not
154+ # re-trigger the hang and burn 10 minutes of dead time. Setting an
155+ # ADO variable to empty string clears it for subsequent steps.
156+ - ${{ if eq(parameters.simulateHangForTesting, true) }} :
157+ - pwsh : |
158+ Write-Host "##vso[task.setvariable variable=RNW_SIMULATE_HANG]"
159+ Write-Host "Hang simulation disarmed (RNW_SIMULATE_HANG cleared)"
160+ displayName: Disarm hang-simulation env var (TEST ONLY)
161+ condition: always()
162+
163+ # On test failure, snapshot any lingering RNTesterApp-Fabric / node
164+ # processes before subsequent steps (or the agent) tear them down.
165+ # WER LocalDumps only fires on actual crashes; this catches hangs
166+ # (e.g. "Unable to enter correct text" timeouts) where the process
167+ # is alive but unresponsive.
168+ #
169+ # Dumps must go into a subfolder of $(CrashDumpRootPath). Files
170+ # written directly at the root were observed to disappear during
171+ # the long `Update snapshots` step that runs after a failed test;
172+ # files in a subfolder survive. We don't know which agent
173+ # behavior deletes them — Defender, a 1ES cleanup script, or a
174+ # side-effect of `yarn e2etest -u` — but a subfolder evades it.
175+ - pwsh : |
176+ $procDump = Join-Path "$(ProcDumpPath)" 'procdump64.exe'
177+ if (-not (Test-Path $procDump)) {
178+ Write-Host "ProcDump not found at $procDump; skipping live-process dump capture."
179+ exit 0
180+ }
181+
182+ $hangDir = Join-Path "$(CrashDumpRootPath)" 'hang'
183+ New-Item -ItemType Directory -Path $hangDir -Force | Out-Null
184+
185+ $targets = @('RNTesterApp-Fabric', 'node')
186+ foreach ($name in $targets) {
187+ Get-Process -Name $name -ErrorAction SilentlyContinue | ForEach-Object {
188+ $dumpPath = Join-Path $hangDir ("hang_{0}_{1}.dmp" -f $name, $_.Id)
189+ Write-Host "Capturing full dump of $name (pid $($_.Id)) to $dumpPath"
190+ & $procDump -accepteula -ma $_.Id $dumpPath
191+ Write-Host ("ProcDump exit code: {0} (non-zero is normal - encodes the dump count written)" -f $LASTEXITCODE)
192+ }
193+ }
194+ # ProcDump uses non-zero exit codes to encode the number of dumps written.
195+ # Force a clean PowerShell exit so the step doesn't show as a warning.
196+ exit 0
197+ displayName: Capture dumps of surviving test processes
198+ condition: and(failed(), eq(variables.StartedFabricTests, 'true'))
199+ continueOnError: true
200+
201+ # Collect any in-process minidumps the app's UEF wrote to
202+ # %ProgramData%\RNW-E2E-Dumps, plus any dumps WER may have written
203+ # to its standard fallback locations, and stage them into
204+ # subfolders of $(CrashDumpRootPath) so they ride the crash-dumps
205+ # artifact. Dumps in subfolders survive the post-failure
206+ # `Update snapshots` step (see comment on the Capture step above).
207+ - pwsh : |
208+ # In-process minidumps (primary mechanism for actual crashes).
209+ $inProc = Join-Path $env:ProgramData 'RNW-E2E-Dumps'
210+ if (Test-Path $inProc) {
211+ $dest = Join-Path "$(CrashDumpRootPath)" 'in-process'
212+ New-Item -ItemType Directory -Path $dest -Force | Out-Null
213+ Copy-Item -Path "$inProc\*" -Destination $dest -Recurse -Force -ErrorAction SilentlyContinue
214+ Get-ChildItem -Path $dest -Recurse -Force -ErrorAction SilentlyContinue |
215+ Select-Object FullName, Length | Format-Table -AutoSize | Out-String | Write-Host
216+ }
217+
218+ # Fallback search: if the agent image ever changes back to a
219+ # working WER LocalDumps configuration, dumps may land here.
220+ $searchRoots = @(
221+ "$env:LOCALAPPDATA\CrashDumps",
222+ "$env:ProgramData\Microsoft\Windows\WER\ReportQueue",
223+ "$env:ProgramData\Microsoft\Windows\WER\ReportArchive",
224+ "$env:ProgramData\Microsoft\Windows\WER\Temp"
225+ )
226+ $found = @()
227+ foreach ($root in $searchRoots) {
228+ if (-not (Test-Path $root)) { continue }
229+ $found += Get-ChildItem -Path $root -Recurse -Include *.dmp,*.mdmp -ErrorAction SilentlyContinue -Force |
230+ Where-Object { -not $_.PSIsContainer -and $_.LastWriteTime -gt (Get-Date).AddHours(-2) }
231+ }
232+ if ($found.Count -gt 0) {
233+ $dest = Join-Path "$(CrashDumpRootPath)" 'recovered'
234+ New-Item -ItemType Directory -Path $dest -Force | Out-Null
235+ foreach ($h in $found) {
236+ $target = Join-Path $dest ($h.FullName -replace '[:\\/]', '_')
237+ Copy-Item -LiteralPath $h.FullName -Destination $target -Force -ErrorAction SilentlyContinue
238+ Write-Host "Recovered $($h.FullName) ($($h.Length) bytes) -> $target"
239+ }
240+ }
241+ displayName: Collect in-process and fallback crash dumps
242+ condition: and(failed(), eq(variables.StartedFabricTests, 'true'))
243+ continueOnError: true
244+
245+ # Bundle matching PDBs and a debugging README into the Crash dumps
246+ # artifact so the dump is self-contained for an offline developer.
247+ # Skipped if no .dmp/.mdmp files exist — $(CrashDumpRootPath) also
248+ # holds MSBuild failure logs (MSBUILDDEBUGPATH points here), and
249+ # those don't need symbols or this README.
250+ - pwsh : |
251+ $dumps = Get-ChildItem -Path "$(CrashDumpRootPath)" -Recurse -Include *.dmp,*.mdmp -File -ErrorAction SilentlyContinue
252+ if (-not $dumps -or $dumps.Count -eq 0) {
253+ Write-Host "No .dmp/.mdmp files in $(CrashDumpRootPath); skipping symbols + README bundling."
254+ exit 0
255+ }
256+ Write-Host "Found $($dumps.Count) dump file(s); bundling matching PDBs and README."
257+
258+ $symbolsDir = Join-Path "$(CrashDumpRootPath)" 'symbols'
259+ $releaseRoot = "$(Build.SourcesDirectory)\packages\e2e-test-app-fabric\windows\${{ matrix.BuildPlatform }}\Release"
260+ if (Test-Path $releaseRoot) {
261+ $pdbs = Get-ChildItem -Path $releaseRoot -Recurse -Filter *.pdb -File -ErrorAction SilentlyContinue
262+ foreach ($pdb in $pdbs) {
263+ $rel = $pdb.FullName.Substring($releaseRoot.Length).TrimStart('\','/')
264+ $target = Join-Path $symbolsDir $rel
265+ New-Item -ItemType Directory -Path (Split-Path -Parent $target) -Force | Out-Null
266+ Copy-Item -LiteralPath $pdb.FullName -Destination $target -Force -ErrorAction SilentlyContinue
267+ }
268+ Write-Host "Staged $($pdbs.Count) PDB(s) under $symbolsDir"
269+ } else {
270+ Write-Host "Release root not found at $releaseRoot; skipping PDB stage."
271+ }
272+
273+ $readme = @'
274+ # Reading these crash dumps
275+
276+ This artifact contains crash and/or hang dumps from a failed React
277+ Native Windows E2E test run, plus matching debug symbols.
278+
279+ ## What is in here
280+
281+ - `hang/` -- full-memory dumps captured by procdump64 from
282+ RNTesterApp-Fabric / node processes that were still alive when
283+ the test step timed out.
284+ - `in-process/` -- full-memory minidumps written by
285+ RNTesterApp-Fabric's own unhandled-exception filter when the app
286+ actually crashed.
287+ - `recovered/` -- dumps recovered from common WER fallback
288+ locations on the agent. Usually empty.
289+ - `symbols/` -- PDBs that match the binaries deployed to the test
290+ agent. Folder layout mirrors the test app's Release deploy tree.
291+
292+ ## Opening in WinDbg
293+
294+ 1. Download and extract this artifact. Note the absolute path of
295+ the extracted `symbols/` folder.
296+ 2. Open a dump:
297+
298+ windbg -z hang\hang_RNTesterApp-Fabric_<pid>.dmp
299+
300+ 3. Set the symbol path (this artifact's symbols + Microsoft public
301+ symbol server) and reload:
302+
303+ .sympath srv*C:\symbols*https://msdl.microsoft.com/download/symbols;<extracted-path>\symbols
304+ .reload /f
305+
306+ 4. Useful first commands:
307+ - `~* k` -- call stack of every thread (most useful for hangs)
308+ - `!analyze -v` -- automatic crash analysis (most useful for crashes)
309+
310+ ## If you need the binaries too
311+
312+ The PDBs alone are enough for stack walks and type info. If you
313+ need module bytes (e.g. to disassemble), download the matching
314+ `RNTesterApp-Fabric-<plat>-<attempt>` artifact from the same
315+ pipeline run; its layout matches `symbols/` here.
316+ '@
317+ Set-Content -LiteralPath "$(CrashDumpRootPath)\README.md" -Value $readme -Encoding utf8
318+ Write-Host "Wrote $(CrashDumpRootPath)\README.md"
319+ displayName: Bundle symbols and README with crash dumps
320+ condition: and(failed(), eq(variables.StartedFabricTests, 'true'))
321+ continueOnError: true
76322
77323 - powershell : |
78324 if (Test-Path "packages/e2e-test-app-fabric/test/__image_snapshots__/__diff_output__") {
0 commit comments