Skip to content

Commit 7176bdd

Browse files
committed
ci(ios): simulator boot / availability fixes
Poll simctl bootstatus with migration logging before app install, raise pre-boot and job timeouts, start simulator artifacts after boot, and document iOS simulator reliability in OKF.
1 parent 21c7618 commit 7176bdd

7 files changed

Lines changed: 274 additions & 33 deletions

File tree

Lines changed: 146 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,158 @@
11
#!/bin/bash
22

3-
# Any command here that exits non-zero is an error
4-
set -e
3+
# Boot the Detox iOS simulator, wait until it is fully ready for testing (including
4+
# first-boot data migration on fresh simulators), then install the test app.
5+
# Uses the device *name* from tests/.detoxrc.js — no pinned UDID in the workflow.
6+
set -euo pipefail
7+
8+
BOOT_POLL_INTERVAL_SECONDS="${BOOT_POLL_INTERVAL_SECONDS:-20}"
9+
BOOT_PROBE_TIMEOUT_SECONDS="${BOOT_PROBE_TIMEOUT_SECONDS:-12}"
10+
BOOT_MAX_WAIT_SECONDS="${BOOT_MAX_WAIT_SECONDS:-660}"
11+
12+
run_with_timeout() {
13+
local max="$1"
14+
shift
15+
"$@" &
16+
local cmd_pid=$!
17+
local waited=0
18+
while kill -0 "$cmd_pid" 2>/dev/null && (( waited < max )); do
19+
sleep 1
20+
waited=$((waited + 1))
21+
done
22+
if kill -0 "$cmd_pid" 2>/dev/null; then
23+
kill "$cmd_pid" 2>/dev/null
24+
wait "$cmd_pid" 2>/dev/null || true
25+
return 124
26+
fi
27+
wait "$cmd_pid"
28+
}
29+
30+
log_boot_status() {
31+
echo "[boot-status] $*"
32+
}
33+
34+
describe_booted_device() {
35+
local device="$1"
36+
xcrun simctl list devices booted 2>/dev/null \
37+
| grep -i "${device} (" \
38+
| grep -v 'Phone:' \
39+
| grep -v 'unavailable' \
40+
| grep -v CoreSimulator \
41+
| head -1 \
42+
|| true
43+
}
44+
45+
log_migration_status() {
46+
local device="$1"
47+
local migration_output probe_rc
48+
49+
log_boot_status "probing data migration (bootstatus -d, up to ${BOOT_PROBE_TIMEOUT_SECONDS}s)..."
50+
set +e
51+
migration_output="$(run_with_timeout "$BOOT_PROBE_TIMEOUT_SECONDS" xcrun simctl bootstatus "$device" -d 2>&1)"
52+
probe_rc=$?
53+
set -e
54+
55+
if [[ "$probe_rc" -eq 124 ]]; then
56+
log_boot_status " data migration / system bring-up still in progress"
57+
return 1
58+
fi
59+
60+
if [[ -n "$migration_output" ]]; then
61+
while IFS= read -r line; do
62+
[[ -z "$line" ]] && continue
63+
log_boot_status " ${line}"
64+
done <<<"$migration_output"
65+
else
66+
log_boot_status " no migration details reported"
67+
fi
68+
return 0
69+
}
70+
71+
wait_for_simulator_ready() {
72+
local device="$1"
73+
local start=$SECONDS
74+
75+
while (( SECONDS - start < BOOT_MAX_WAIT_SECONDS )); do
76+
local elapsed=$(( SECONDS - start ))
77+
local booted_line ready_rc
78+
79+
log_boot_status "elapsed=${elapsed}s phase=wait_for_full_boot device=\"${device}\""
80+
81+
booted_line="$(describe_booted_device "$device")"
82+
if [[ -z "$booted_line" ]]; then
83+
log_boot_status " simctl list: not in Booted state yet"
84+
else
85+
log_boot_status " simctl list: ${booted_line}"
86+
log_migration_status "$device" || true
87+
fi
88+
89+
set +e
90+
run_with_timeout "$BOOT_PROBE_TIMEOUT_SECONDS" xcrun simctl bootstatus "$device" >/dev/null 2>&1
91+
ready_rc=$?
92+
set -e
93+
94+
if [[ "$ready_rc" -eq 0 ]]; then
95+
log_boot_status "bootstatus: simulator ready after ${elapsed}s"
96+
log_migration_status "$device" || true
97+
return 0
98+
fi
99+
100+
if [[ "$ready_rc" -eq 124 ]]; then
101+
log_boot_status "bootstatus: still booting (probe timed out after ${BOOT_PROBE_TIMEOUT_SECONDS}s)"
102+
else
103+
log_boot_status "bootstatus: probe exited with status ${ready_rc}"
104+
fi
105+
106+
sleep "$BOOT_POLL_INTERVAL_SECONDS"
107+
done
108+
109+
log_boot_status "ERROR: timed out after ${BOOT_MAX_WAIT_SECONDS}s waiting for simulator to become ready"
110+
return 1
111+
}
5112

6113
# Get our simulator name from our test Detox config
7-
pushd "$(dirname "$0")/../../../tests" || exit 1
8-
SIM="$(cat .detoxrc.js | grep iPhone | cut -d"'" -f2)"
9-
echo "Attempting to boot iOS Simulator $SIM..."
114+
pushd "$(dirname "$0")/../../../tests" >/dev/null || exit 1
115+
SIM="$(grep iPhone .detoxrc.js | head -1 | cut -d"'" -f2)"
116+
popd >/dev/null || exit 1
117+
118+
log_boot_status "phase=resolve_device name=\"${SIM}\" (from tests/.detoxrc.js)"
10119

11120
# Clear up any existing attempts in case we are re-trying
12-
echo "...killing any existing Simulator processes..."
13-
killall Simulator || true
121+
log_boot_status "phase=shutdown_existing killing Simulator.app if running..."
122+
killall Simulator 2>/dev/null || true
123+
xcrun simctl shutdown "$SIM" 2>/dev/null || true
124+
125+
log_boot_status "phase=boot_command starting simctl boot..."
126+
set +e
127+
boot_output="$(xcrun simctl boot "$SIM" 2>&1)"
128+
boot_rc=$?
129+
set -e
130+
if [[ "$boot_rc" -ne 0 ]]; then
131+
log_boot_status "simctl boot exited ${boot_rc}: ${boot_output}"
132+
else
133+
log_boot_status "simctl boot command returned (device may still be migrating data)"
134+
fi
135+
136+
log_boot_status "phase=foreground_simulator opening Simulator.app..."
137+
open -a Simulator.app
14138

15-
# Boot the simulator if not booted, make sure it is in the foreground
16-
echo "...booting $SIM and foregrounding Simulator..."
17-
(xcrun simctl boot "$SIM" || true) && open -a Simulator.app
139+
if ! wait_for_simulator_ready "$SIM"; then
140+
exit 1
141+
fi
18142

19-
# Is it booted?
20-
echo "...waiting to make sure $SIM is booted..."
21-
xcrun simctl list |grep -i "$SIM ("|grep -v 'Phone:'|grep -v 'unavailable'|grep -v CoreSimulator|grep Booted
143+
pushd "$(dirname "$0")/../../../tests" >/dev/null || exit 1
144+
BUILDDIR="$(find ios/build/Build/Products -type d -name 'testing.app' 2>/dev/null | head -1)"
22145

23-
# Are we a Debug or Release build?
24-
BUILDDIR="$( find ios/build/Build/Products -type d |grep 'testing.app$' | head -1)"
146+
if [[ -z "$BUILDDIR" || ! -d "$BUILDDIR" ]]; then
147+
log_boot_status "ERROR: could not find tests/ios/build/.../testing.app"
148+
popd >/dev/null || exit 1
149+
exit 1
150+
fi
25151

26-
# Install our app (glob so Release or Debug works)
27-
echo "...installing the Test app build on $SIM..."
152+
log_boot_status "phase=install_app bundle=\"${BUILDDIR}\""
153+
install_start=$SECONDS
28154
xcrun simctl install "$SIM" "$BUILDDIR"
155+
log_boot_status "install complete in $((SECONDS - install_start))s"
156+
popd >/dev/null || exit 1
29157

30-
echo "Successfully booted $SIM and installed test app."
158+
log_boot_status "phase=complete device=\"${SIM}\" ready with test app installed"

.github/workflows/tests_e2e_ios.yml

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ jobs:
9090
runs-on: macos-26
9191
needs: matrix_prep
9292
# TODO matrix across APIs, at least 11 and 15 (lowest to highest)
93-
timeout-minutes: 80
93+
timeout-minutes: 87
9494
env:
9595
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
9696
CCACHE_SLOPPINESS: clang_index_store,file_stat_matches,include_file_ctime,include_file_mtime,ivfsoverlay,pch_defines,modules,system_headers,time_macros
@@ -322,34 +322,33 @@ jobs:
322322
curl --output /dev/null --silent --head --fail "http://localhost:8081/index.bundle?platform=ios&dev=true&minify=false&inlineSourceMap=true"
323323
echo "...javascript bundle ready"
324324
325-
- name: Start Screen and Simulator Recordings and System Logging
326-
# With a little delay so the detox test below has time to spawn it, missing the first part of boot is fine
325+
- name: Start Screen and System Logging
327326
continue-on-error: true
328327
run: |
329328
nohup sh -c "sleep 314159265 | screencapture -v -C -k -T0 -g screenrecording.mov > screenrecording.log 2>&1 &"
330329
nohup sh -c "log stream --backtrace --color none --style syslog > syslog.log 2>&1 &"
331-
nohup sh -c "sleep 110 && xcrun simctl io booted recordVideo --codec=h264 -f simulator.mp4 2>&1 &"
332-
333-
- name: Create Simulator Log
334-
# With a little delay so the detox test below has time to spawn it, missing the first part of boot is fine
335-
# If you boot the simulator separately from detox, some other race fails and detox testee never sends ready to proxy
336-
continue-on-error: true
337-
run: nohup sh -c "sleep 110 && xcrun simctl spawn booted log stream --level debug --style compact > simulator.log 2>&1 &"
338330
339331
- name: Pre-Boot Simulator
340-
# The goal here is to separate Simulator boot from Detox run,
341-
# So that Simulator boot issues we seem to have may be handled separately
332+
# Separate Simulator boot from Detox run. boot-simulator.sh polls bootstatus and logs
333+
# migration progress so long first-boot waits are visible in the step log.
342334
# https://github.com/nick-fields/retry/releases
343335
uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
344336
with:
345-
timeout_minutes: 5
337+
timeout_minutes: 12
346338
retry_wait_seconds: 60
347339
max_attempts: 3
348340
command: ./.github/workflows/scripts/boot-simulator.sh
349341

342+
- name: Start Simulator Recordings and Log
343+
# Start after Pre-Boot so booted exists and logging covers the Detox run (not a fixed delay).
344+
continue-on-error: true
345+
run: |
346+
nohup sh -c "xcrun simctl io booted recordVideo --codec=h264 -f simulator.mp4 2>&1 &"
347+
nohup sh -c "xcrun simctl spawn booted log stream --level debug --style compact > simulator.log 2>&1 &"
348+
350349
- name: Detox Test Debug
351350
if: contains(matrix.buildmode, 'debug')
352-
timeout-minutes: 55
351+
timeout-minutes: 62
353352
run: yarn tests:ios:test-cover
354353

355354
- name: Process iOS native coverage
@@ -359,7 +358,7 @@ jobs:
359358

360359
- name: Detox Test Release
361360
if: contains(matrix.buildmode, 'release')
362-
timeout-minutes: 55
361+
timeout-minutes: 62
363362
run: yarn tests:ios:test:release
364363

365364
- name: Stop Screen and App Video and System Logging

okf-bundle/ci-workflows/android.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Android CI workflows
2+
3+
TBD — Gradle/Detox emulator reliability, `coverage.ec` pull behavior, and artifact troubleshooting.

okf-bundle/ci-workflows/index.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# CI workflows
2+
3+
Knowledge for GitHub Actions workflows in this repository: how jobs are structured, platform-specific reliability concerns, and how to debug failures from CI artifacts.
4+
5+
## Platforms
6+
7+
* [iOS](ios.md) — simulator boot reliability, logging, and troubleshooting
8+
* [Android](android.md) — TBD
9+
* [Other](other.md) — macOS Detox (non-iOS), Windows, and shared workflow concerns — TBD
10+
11+
## Related
12+
13+
* [Testing / coverage design](../testing/coverage-design.md) — e2e coverage collection that runs inside the iOS workflow

okf-bundle/ci-workflows/ios.md

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# iOS CI workflows
2+
3+
This document covers the **Testing E2E iOS** workflow (`.github/workflows/tests_e2e_ios.yml`) and scripts it uses under `.github/workflows/scripts/`.
4+
5+
## Simulator reliability
6+
7+
### Problem
8+
9+
On GitHub Actions macOS runners (currently `macos-26` with `XCODE_VERSION: latest-stable`), booting an iOS Simulator for Detox is not instantaneous. A simulator can report `Booted` in `simctl list` while it is still unusable:
10+
11+
1. **First-boot data migration**`com.apple.datamigrator` can run for several minutes on a fresh simulator (observed ~4+ minutes on iOS 26.5). SpringBoard and app install are not reliable until migration finishes.
12+
2. **Ambiguous device names** — runners often have multiple simulators with the same marketing name (e.g. several `iPhone 17` entries across iOS runtimes). We intentionally use the **device name** from `tests/.detoxrc.js`, not a pinned UDID, so we do not churn workflow YAML when runner images change.
13+
3. **`Booted` ≠ ready for testing** — installing or launching the app during migration can block or fail; Detox may time out while the simulator is still migrating.
14+
15+
### What we do
16+
17+
**Pre-boot step** (`.github/workflows/scripts/boot-simulator.sh`), run via `nick-fields/retry` before Detox:
18+
19+
| Phase | What happens |
20+
|--------|----------------|
21+
| `resolve_device` | Read simulator name from `tests/.detoxrc.js` (e.g. `iPhone 17`) |
22+
| `shutdown_existing` | Kill `Simulator.app` and `simctl shutdown` the target |
23+
| `boot_command` | `xcrun simctl boot <name>` |
24+
| `wait_for_full_boot` | Poll every 20s (up to 11 min) until `simctl bootstatus` reports ready |
25+
| `install_app` | `simctl install` the built `testing.app` **only after** bootstatus succeeds |
26+
27+
During `wait_for_full_boot`, the script logs to the **GitHub Actions step log** with the `[boot-status]` prefix:
28+
29+
- Whether `simctl list` shows the device as `Booted`
30+
- **Data migration** snippets from `xcrun simctl bootstatus <name> -d` (probed with a short timeout so the step keeps printing progress instead of looking hung)
31+
- Elapsed time per poll
32+
33+
We wait for **`simctl bootstatus`** (full boot completion), not merely the `Booted` line in `simctl list`.
34+
35+
**Timeouts** (tuned for first-boot migration on latest iOS):
36+
37+
| Setting | Value | Notes |
38+
|---------|-------|--------|
39+
| Pre-Boot retry step | 12 min × 3 attempts | was 5 min |
40+
| Job `timeout-minutes` | 87 | +7 min vs previous 80 |
41+
| Detox test step | 62 min | +7 min vs previous 55 |
42+
43+
Simulator **caching** of device data is intentionally deferred — caching a bad migration state would require cache invalidation policy.
44+
45+
### Simulator logging and video (troubleshooting)
46+
47+
Artifacts are uploaded on every run (`if: always()`), even when tests fail.
48+
49+
| Artifact | Source | Use when |
50+
|----------|--------|----------|
51+
| `simulator-<buildmode>-<iteration>_log` | `xcrun simctl spawn booted log stream``simulator.log` | In-simulator system/app logs during Detox |
52+
| `simulator-<buildmode>-<iteration>_video` | `xcrun simctl io booted recordVideo``simulator.mp4` | Visual confirmation of UI state |
53+
| `screenrecording-<buildmode>-<iteration>` | `screencapture` of the Mac desktop | Includes Simulator.app window |
54+
| `screenrecording-setup-<buildmode>-<iteration>.mov` | Guidepup setup recording | Very early environment setup |
55+
| `emulator-scripts-logs-<buildmode>-<iteration>` | `.github/workflows/scripts/*.log` | Script output if redirected |
56+
57+
**When to use which log**
58+
59+
- **Boot / migration / “simulator won’t start”** — read the **Pre-Boot Simulator** step log in GitHub Actions first. Look for `[boot-status]` lines and `bootstatus -d` migration output. That captures first-boot migration even though `simulator.log` starts only after pre-boot succeeds.
60+
- **Detox / app / test failures** — download `simulator-*_log` and search for `com.invertase.testing`, `SpringBoard`, `xctest`, or `Detox`.
61+
- **UI regressions**`simulator-*_video` or `screenrecording-*`.
62+
63+
**Downloading artifacts**
64+
65+
From the workflow run page: **Artifacts** section at the bottom, or:
66+
67+
```bash
68+
gh run download <run-id> -n simulator-debug-0_log
69+
```
70+
71+
**Analyzing `simulator.log`**
72+
73+
The file is unified logging from the booted simulator (compact style). Useful patterns:
74+
75+
```bash
76+
rg -i "datamigrator|Telemetry: duration|systemShellWillBootstrap" simulator.log
77+
rg -i "com\.invertase\.testing|installcoordination" simulator.log
78+
rg -i "test daemon not ready|xctest" simulator.log
79+
```
80+
81+
A long gap with only `com.apple.datamigrator` activity and no `com.invertase.testing` usually means the simulator was still in first-boot migration or pre-boot had not finished installing the app yet.
82+
83+
### Detox configuration
84+
85+
Device type is defined in `tests/.detoxrc.js` (`devices.simulator.device.type`). The boot script and Detox both use this name. CI does not hard-code a UDID.
86+
87+
### Operational notes
88+
89+
- **Release vs debug** — matrix runs both; each has separate artifacts (`debug` / `release` in the artifact name).
90+
- **Retry** — Pre-Boot retries up to 3 times with 60s between attempts (clean shutdown + boot each time).
91+
- **Do not boot the simulator only inside Detox** — historical races where the testee never sent “ready” to the Detox proxy; pre-boot remains mandatory.

okf-bundle/ci-workflows/other.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Other CI workflows
2+
3+
TBD — macOS Detox (`tests_e2e_other.yml`), Windows, documentation workflows, and shared actions (caches, Codecov, etc.).

okf-bundle/index.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ okf_version: "0.1"
66

77
Knowledge documents for react-native-firebase development, testing, and maintenance.
88

9+
# CI workflows
10+
11+
* [CI workflows](/ci-workflows/index.md) — GitHub Actions reliability, logging, and troubleshooting (iOS simulator boot documented; Android/other TBD)
12+
913
# Testing
1014

1115
* [Coverage design](/testing/coverage-design.md) - unit and e2e coverage goals, pipelines, and Codecov uploads

0 commit comments

Comments
 (0)