eggjs
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 18 additions & 13 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 18 additions & 13 deletions
diff --git a/‎benchmark/ci-test/README.md‎
Lines changed: 15 additions & 6 deletions b/‎benchmark/ci-test/README.md‎
Lines changed: 15 additions & 6 deletions
@@ -2,6 +2,7 @@
 name: CI
 
 on:
+  workflow_dispatch:
   push:
     paths-ignore:
       - '**/*.md'
@@ -229,17 +230,13 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: ['ubuntu-latest', 'windows-latest']
-        node: ['24']
-        coverage: [false]
-        exclude:
-          - os: 'ubuntu-latest'
-            node: '24'
-            coverage: false
         include:
           - os: 'ubuntu-latest'
             node: '24'
             coverage: true
+          - os: 'windows-latest'
+            node: '24'
+            coverage: false
 
     name: Test bin (${{ matrix.os }}, ${{ matrix.node }})
     runs-on: ${{ matrix.os }}
@@ -273,17 +270,25 @@ jobs:
           UTOO_CACHE_DIR: ${{ runner.temp }}/utoo-store
         run: ut install --from pnpm
 
+      - name: Build egg-bin
+        run: ut run build -- --workspace ./tools/egg-bin
+
+      - name: Report egg-bin runner diagnostics
+        if: always()
+        run: node scripts/ci-runner-diagnostics.js --name "Egg-bin runner diagnostics (${{ matrix.os }}, Node ${{ matrix.node }})" --samples 5 -- node tools/egg-bin/bin/run.js --version
+
       - name: Run tests (with coverage)
         if: ${{ matrix.coverage }}
-        run: |
-          ut run build -- --workspace ./tools/egg-bin
-          ut run ci --workspace @eggjs/bin
+        run: ut run ci --workspace @eggjs/bin
 
       - name: Run tests
         if: ${{ !matrix.coverage }}
-        run: |
-          ut run build -- --workspace ./tools/egg-bin
-          ut run test --workspace @eggjs/bin
+        run: ut run test --workspace @eggjs/bin
+
+      - name: Report bin parallelism metrics
+        if: always()
+        working-directory: tools/egg-bin
+        run: node ../../scripts/ci-test-benchmark.js --report-only --vitest-json benchmark/ci-test/ci-run/vitest-results.json --output-dir benchmark/ci-test/ci-run --name "Egg-bin test metrics (${{ matrix.os }}, Node ${{ matrix.node }})" -- vitest run ${{ matrix.coverage && '--coverage' || '' }}
 
       - name: Code Coverage
         if: ${{ matrix.coverage }}
 
@@ -21,9 +21,14 @@ ut run benchmark:ci-test -- --output-dir .tmp/ci-benchmark -- ut execute vitest
 
 # Summarize an existing Vitest JSON without running tests (the CI path)
 ut run benchmark:ci-test -- --report-only --vitest-json benchmark/ci-test/ci-run/vitest-results.json
+
+# From tools/egg-bin, summarize a package-local run with command metadata
+cd tools/egg-bin
+node ../../scripts/ci-test-benchmark.js --report-only --vitest-json benchmark/ci-test/ci-run/vitest-results.json -- vitest run
 ```
 
 The default output directory is `benchmark/ci-test/<timestamp>`. Use `--output-dir` for a deterministic path when collecting artifacts.
+In report-only mode, arguments after `--` describe the original Vitest command for the report's parameter table; they are not executed.
 
 ## Parallelism metrics
 
@@ -36,20 +41,24 @@ Tests run with `isolate: false` (full parallelism; the tegg `TeggScope` per-app
 - **Parallel efficiency** — `avg concurrency ÷ worker ceiling` (the ceiling mirrors `vitest.config.ts`: Windows CI caps workers, otherwise the machine's available parallelism).
 - **Critical path** — the longest single-file span (the wall-clock floor with unlimited workers).
 
+The report also includes long-tail project, file, and test tables. The test table comes from Vitest `assertionResults[*].duration`, so it points at the specific slow cases inside fork-heavy files such as `tools/egg-bin`.
+
 > **Interval caveat:** the Vitest 4 JSON reporter derives a file's `startTime`/`endTime` from test-level timings only, so the span covers test bodies and per-test `beforeEach`/`afterEach` but **excludes suite-level `beforeAll`/`afterAll` (where egg boots its apps — often the dominant per-file cost) and module transform/import**. That excluded time still occupies the worker threads, so **avg concurrency and parallel efficiency are lower bounds** on real worker utilization; for `beforeAll`-heavy suites avg can read below 1 while peak is high. Use **peak concurrency** as the primary signal. Fully-skipped files (no test timings) are dropped from the calculation.
 
 ## CI integration
 
-The `test` gating job in `.github/workflows/ci.yml` is instrumented without changing gate semantics:
+The `test` and `test-egg-bin` gating jobs in `.github/workflows/ci.yml` are instrumented without changing gate semantics:
 
-1. `vitest.config.ts` adds a `json` reporter when `CI` is set, writing `benchmark/ci-test/ci-run/vitest-results.json` during the gating `ut run ci` run.
-2. A `Report parallelism metrics` step (`if: always()`) runs the harness in `--report-only` mode against that JSON.
-3. When `GITHUB_STEP_SUMMARY` is set, the Markdown report (including the parallelism table) is appended to the GitHub Actions job summary, so the metrics are visible on the run page per OS/Node matrix entry.
+1. The relevant `vitest.config.ts` adds a `json` reporter when `CI` is set, writing `benchmark/ci-test/ci-run/vitest-results.json` during the gating test run.
+2. The `test-egg-bin` job splits build and test into separate steps, and runs `scripts/ci-runner-diagnostics.js` after build to log runner hardware, Node process startup, egg-bin command startup, and temp-file IO baselines.
+3. The `test-egg-bin` package harness enables `EGG_BIN_TIMING=1` when `CI` is set, propagates that flag into each spawned `egg-bin` child, and forces coffee debug output, so CI logs show per-child startup, oclif import/config/manifest/command-load phases, command init, glob/config, `startVitest`, and `vitest.close` timings. On Windows the entrypoint also presets `SHELL` when it is missing, avoiding oclif's synchronous shell probe in every child.
+4. A `Report parallelism metrics` step (`if: always()`) runs the harness in `--report-only` mode against that JSON.
+5. When `GITHUB_STEP_SUMMARY` is set, the Markdown reports (including the parallelism and long-tail test tables) are appended to the GitHub Actions job summary, so the metrics are visible on the run page per OS/Node matrix entry.
 
-Gating still comes entirely from `ut run ci`; the metrics step is informational and exits `0` even when the JSON is missing.
+Gating still comes entirely from the preceding test command (`ut run ci` or `ut run test`); the metrics step is informational and exits `0` even when the JSON is missing.
 
 ## Outputs
 
 - `report.md`: human-readable benchmark report (run, environment, parameters, parallelism, long-tail tables).
-- `report.json`: structured report containing environment, command, wall time, Vitest summary, parallelism metrics, long-tail file/project durations, and coverage/worker/isolate parameters.
+- `report.json`: structured report containing environment, command, wall time, Vitest summary, parallelism metrics, long-tail project/file/test durations, and coverage/worker/isolate parameters.
 - `vitest-results.json`: raw Vitest JSON reporter output.