From 498aab2c5ca478f2b60abd6eab4e56ab5e83d8a1 Mon Sep 17 00:00:00 2001
From: Hassan Abdel-Rahman <hassan.abdelrahman@gmail.com>
Date: Wed, 3 Jun 2026 16:25:51 -0400
Subject: [PATCH 1/2] =?UTF-8?q?Instrument=20the=20realm-server=20search=20?=
 =?UTF-8?q?path=20with=20a=20request=E2=86=92response=20timeline=20(CS-113?=
 =?UTF-8?q?63)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During a from-scratch index, prerendered cards block for tens of seconds on
in-render `_search` round-trips while the SQL behind them runs in
milliseconds. We could see the client wait (host-side `boxel_index.diagnostics`)
but had no server-side view of where those seconds went. This adds that view:

- A client-minted `x-boxel-request-id` correlation id, stamped on the
  prerendered host's `_federated-search` fetch (prerender-gated; live SPA
  traffic is untouched). Join key only — no new client measurement.
- A `realm:search-timing` line keyed by that id with the request→response
  stage breakdown: parse, resolveRealms, sql, loadLinks, populate
  (relationship/query-field assembly), per-instance cache read/write +
  hit/miss, stringify, and result count. Emitted by `handle-search` for the
  full handler picture, and standalone by `searchRealms` on the host-test
  path. Gated on a correlation id, so normal traffic allocates nothing.
- Request duration + the correlation id on the `realm:requests` log lines
  (the outermost request→response bound, incl. body read + send).
- A `realm:health` sampler: event-loop lag (mean/p99/max) alongside the
  in-flight `_search` count and heap, logged only during saturation windows
  — the signal that confirms or refutes single-thread saturation directly.

The timing collector is threaded through the shared `SearchOpts` and is a
no-op without a correlation id; `RealmIndexQueryEngine` only ever runs in the
realm-server, so the host stays unaffected.

A host integration test exercises the whole path end-to-end: the in-realm
browser mints the id, issues a real `store.search`, and the test asserts the
id lands in the server's `realm:search-timing` line (and that live,
non-prerender traffic stamps nothing and emits nothing).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../host/app/lib/prerender-fetch-headers.ts   |  31 +++
 packages/host/app/services/store.ts           |   2 +
 .../tests/helpers/realm-server-mock/routes.ts |  10 +
 .../search-correlation-id-test.gts            | 195 ++++++++++++++++++
 .../realm-server/handlers/handle-search.ts    |  74 ++++++-
 packages/realm-server/health-sampler.ts       |  65 ++++++
 packages/realm-server/main.ts                 |   7 +
 packages/realm-server/middleware/index.ts     |  56 ++++-
 packages/realm-server/search-inflight.ts      |  21 ++
 packages/runtime-common/index.ts              |   1 +
 packages/runtime-common/prerender-headers.ts  |  24 +++
 .../realm-index-query-engine.ts               |  95 ++++++---
 packages/runtime-common/realm.ts              |   1 +
 packages/runtime-common/request-timings.ts    |  64 ++++++
 packages/runtime-common/search-utils.ts       |  67 +++++-
 15 files changed, 668 insertions(+), 45 deletions(-)
 create mode 100644 packages/host/tests/integration/search-correlation-id-test.gts
 create mode 100644 packages/realm-server/health-sampler.ts
 create mode 100644 packages/realm-server/search-inflight.ts
 create mode 100644 packages/runtime-common/request-timings.ts

diff --git a/packages/host/app/lib/prerender-fetch-headers.ts b/packages/host/app/lib/prerender-fetch-headers.ts
index a17823bcf68..a4b810acc31 100644
--- a/packages/host/app/lib/prerender-fetch-headers.ts
+++ b/packages/host/app/lib/prerender-fetch-headers.ts
@@ -2,6 +2,7 @@ import {
   DURING_PRERENDER_HEADER,
   X_BOXEL_CONSUMING_REALM_HEADER,
   X_BOXEL_JOB_ID_HEADER,
+  X_BOXEL_REQUEST_ID_HEADER,
 } from '@cardstack/runtime-common';
 
 // Set by the prerender server's `evaluateOnNewDocument` before the
@@ -46,3 +47,33 @@ export function jobIdHeader(): Record<string, string> {
   let j = (globalThis as unknown as { __boxelJobId?: string }).__boxelJobId;
   return j ? { [X_BOXEL_JOB_ID_HEADER]: j } : {};
 }
+
+// Per-search correlation id. Minted fresh for each `_federated-search`
+// fetch the SPA issues while rendering inside a prerender tab, and stamped
+// as `x-boxel-request-id`. The realm-server reads it back out and keys its
+// `realm:search-timing` line on it, so a search the prerender observes as
+// slow (surfaced in its `queryLoadsInFlight` diagnostics) can be joined to
+// the realm-server's stage-by-stage view of the same request. Gated on the
+// prerender context — exactly like the job-id / consuming-realm headers —
+// so live SPA traffic is unaffected and emits no server-side timing line.
+export function requestIdHeader(): Record<string, string> {
+  let flag = (globalThis as unknown as { __boxelRenderContext?: boolean })
+    .__boxelRenderContext;
+  if (flag !== true) {
+    return {};
+  }
+  return { [X_BOXEL_REQUEST_ID_HEADER]: newCorrelationId() };
+}
+
+function newCorrelationId(): string {
+  let c = (globalThis as { crypto?: { randomUUID?: () => string } }).crypto;
+  if (c?.randomUUID) {
+    return c.randomUUID();
+  }
+  // Fallback for the rare environment without `crypto.randomUUID` — the id
+  // only needs to disambiguate concurrent searches in a log line, not be
+  // cryptographically strong.
+  return `r-${Date.now().toString(36)}-${Math.floor(
+    Math.random() * 1e9,
+  ).toString(36)}`;
+}
diff --git a/packages/host/app/services/store.ts b/packages/host/app/services/store.ts
index cabb1df446b..1b105f79960 100644
--- a/packages/host/app/services/store.ts
+++ b/packages/host/app/services/store.ts
@@ -89,6 +89,7 @@ import {
   consumingRealmHeader,
   duringPrerenderHeaders,
   jobIdHeader,
+  requestIdHeader,
 } from '../lib/prerender-fetch-headers';
 import { searchCacheKey } from '../lib/search-cache-key';
 import { searchInFlightKey } from '../lib/search-in-flight-key';
@@ -1153,6 +1154,7 @@ export default class StoreService extends Service implements StoreInterface {
           ...consumingRealmHeader(),
           ...jobIdHeader(),
           ...jobPriorityHeader(),
+          ...requestIdHeader(),
         },
         body: JSON.stringify({ ...query, realms }),
       },
diff --git a/packages/host/tests/helpers/realm-server-mock/routes.ts b/packages/host/tests/helpers/realm-server-mock/routes.ts
index 83005da512c..266e36318ed 100644
--- a/packages/host/tests/helpers/realm-server-mock/routes.ts
+++ b/packages/host/tests/helpers/realm-server-mock/routes.ts
@@ -7,9 +7,11 @@ import {
   parseSearchQueryFromPayload,
   parseSearchRequestPayload,
   SearchRequestError,
+  sanitizeRequestId,
   searchPrerenderedRealms,
   searchRealms,
   SupportedMimeType,
+  X_BOXEL_REQUEST_ID_HEADER,
   type RealmInfo,
   type Query,
 } from '@cardstack/runtime-common';
@@ -118,9 +120,17 @@ function registerSearchRoutes() {
         throw e;
       }
 
+      // Mirror the realm-server's `handle-search`: read the client's
+      // correlation id off the request and thread it into searchRealms, so
+      // the real `realm:search-timing` line is emitted (and observable by
+      // host integration tests) keyed by the id the client minted.
+      let requestId = sanitizeRequestId(
+        req.headers.get(X_BOXEL_REQUEST_ID_HEADER),
+      );
       let combined = await searchRealms(
         realmList.map((realmURL) => getSearchableRealmForURL(realmURL)),
         cardsQuery,
+        requestId ? { requestId } : undefined,
       );
 
       return new Response(JSON.stringify(combined), {
diff --git a/packages/host/tests/integration/search-correlation-id-test.gts b/packages/host/tests/integration/search-correlation-id-test.gts
new file mode 100644
index 00000000000..e215722cdc6
--- /dev/null
+++ b/packages/host/tests/integration/search-correlation-id-test.gts
@@ -0,0 +1,195 @@
+import type { RenderingTestContext } from '@ember/test-helpers';
+import { settled } from '@ember/test-helpers';
+
+import { getService } from '@universal-ember/test-support';
+import { module, test } from 'qunit';
+
+import {
+  baseRealm,
+  rri,
+  setSearchTimingSinkForTests,
+  X_BOXEL_REQUEST_ID_HEADER,
+} from '@cardstack/runtime-common';
+import type { Loader } from '@cardstack/runtime-common/loader';
+import type { Query } from '@cardstack/runtime-common/query';
+
+import type NetworkService from '@cardstack/host/services/network';
+import type StoreService from '@cardstack/host/services/store';
+
+import {
+  testRealmURL,
+  setupCardLogs,
+  setupLocalIndexing,
+  setupIntegrationTestRealm,
+} from '../helpers';
+import { setupMockMatrix } from '../helpers/mock-matrix';
+import { setupRenderingTest } from '../helpers/setup';
+
+// End-to-end coverage for the search correlation id: the in-realm browser
+// (the prerendered host SPA) mints `x-boxel-request-id` on its
+// `_federated-search` fetch, and the realm-server's search path emits a
+// `realm:search-timing` line keyed by that same id. This proves the id
+// threads all the way from the client that originated it through to the
+// server log a triage would join against.
+//
+// The host test exercises the *real* code on both ends: the SPA's
+// `requestIdHeader()` stamps the header, and the realm-server-mock hands it
+// to the real `searchRealms`, which emits the line. Only the prerender
+// context flag is simulated (the host normally sets it inside a prerender
+// tab).
+
+const personModule = `
+  import { contains, field, CardDef } from 'https://cardstack.com/base/card-api';
+  import StringField from 'https://cardstack.com/base/string';
+
+  export class Person extends CardDef {
+    static displayName = 'Person';
+    @field name = contains(StringField);
+  }
+`;
+
+let loader: Loader;
+
+module('Integration | search correlation id', function (hooks) {
+  setupRenderingTest(hooks);
+  setupLocalIndexing(hooks);
+  let mockMatrixUtils = setupMockMatrix(hooks);
+
+  hooks.beforeEach(function (this: RenderingTestContext) {
+    loader = getService('loader-service').loader;
+  });
+
+  setupCardLogs(
+    hooks,
+    async () => await loader.import(`${baseRealm.url}card-api`),
+  );
+
+  hooks.beforeEach(async function () {
+    await setupIntegrationTestRealm({
+      mockMatrixUtils,
+      contents: {
+        'person.gts': personModule,
+        'person-1.json': {
+          data: {
+            attributes: { name: 'Alice' },
+            meta: { adoptsFrom: { module: './person', name: 'Person' } },
+          },
+        },
+        'person-2.json': {
+          data: {
+            attributes: { name: 'Bob' },
+            meta: { adoptsFrom: { module: './person', name: 'Person' } },
+          },
+        },
+      },
+    });
+  });
+
+  // Restore globals + sink between tests so a failure can't leak into the
+  // next test or the rest of the suite.
+  hooks.afterEach(function () {
+    delete (globalThis as Record<string, unknown>).__boxelRenderContext;
+    setSearchTimingSinkForTests(undefined);
+  });
+
+  const personQuery: Query = {
+    filter: { type: { module: rri(`${testRealmURL}person`), name: 'Person' } },
+  };
+
+  test('a client-issued search threads its correlation id into the server timing log', async function (assert) {
+    let store = getService('store') as StoreService;
+    let network = getService('network') as NetworkService;
+
+    // Capture the realm-server's `realm:search-timing` emissions.
+    let timingLines: string[] = [];
+    setSearchTimingSinkForTests((line) => timingLines.push(line));
+
+    // Capture the correlation id the client actually puts on the wire.
+    let sentRequestIds: string[] = [];
+    let spy = async (request: Request) => {
+      if (new URL(request.url).pathname.endsWith('/_federated-search')) {
+        let id = request.headers.get(X_BOXEL_REQUEST_ID_HEADER);
+        if (id) {
+          sentRequestIds.push(id);
+        }
+      }
+      // Return null to fall through to the realm-server-mock route.
+      return null;
+    };
+    network.virtualNetwork.mount(spy, { prepend: true });
+
+    // Simulate the prerender context, which is what gates the host's
+    // correlation-id stamping (mirrors a card rendering inside a prerender
+    // tab issuing a query-backed search).
+    (globalThis as Record<string, unknown>).__boxelRenderContext = true;
+
+    let results = await store.search(personQuery, [testRealmURL]);
+    await settled();
+
+    assert.strictEqual(results.length, 2, 'the search returned both people');
+
+    assert.strictEqual(
+      sentRequestIds.length,
+      1,
+      'the client stamped exactly one correlation id on its _federated-search fetch',
+    );
+    let sentId = sentRequestIds[0];
+    assert.ok(
+      /^[A-Za-z0-9._:-]{8,}$/.test(sentId),
+      `client-minted correlation id looks well-formed (${sentId})`,
+    );
+
+    let matching = timingLines.filter((line) => line.includes(`req=${sentId}`));
+    assert.strictEqual(
+      matching.length,
+      1,
+      `the server emitted exactly one realm:search-timing line keyed by the client's id (lines: ${JSON.stringify(
+        timingLines,
+      )})`,
+    );
+    assert.ok(
+      /\bsql=\d+\b/.test(matching[0]),
+      `the timing line carries the sql stage (${matching[0]})`,
+    );
+    assert.ok(
+      /\bloadLinks=\d+\b/.test(matching[0]),
+      `the timing line carries the loadLinks stage (${matching[0]})`,
+    );
+  });
+
+  test('a non-prerender search stamps no id and emits no timing line', async function (assert) {
+    let store = getService('store') as StoreService;
+    let network = getService('network') as NetworkService;
+
+    let timingLines: string[] = [];
+    setSearchTimingSinkForTests((line) => timingLines.push(line));
+
+    let sawHeader = false;
+    let spy = async (request: Request) => {
+      if (
+        new URL(request.url).pathname.endsWith('/_federated-search') &&
+        request.headers.get(X_BOXEL_REQUEST_ID_HEADER)
+      ) {
+        sawHeader = true;
+      }
+      return null;
+    };
+    network.virtualNetwork.mount(spy, { prepend: true });
+
+    // No __boxelRenderContext: live SPA traffic must not stamp the header
+    // (so it pays nothing and the server emits no timing line).
+    let results = await store.search(personQuery, [testRealmURL]);
+    await settled();
+
+    assert.strictEqual(results.length, 2, 'the search still returns results');
+    assert.false(
+      sawHeader,
+      'live (non-prerender) traffic sends no x-boxel-request-id header',
+    );
+    assert.strictEqual(
+      timingLines.length,
+      0,
+      'no realm:search-timing line is emitted without a correlation id',
+    );
+  });
+});
diff --git a/packages/realm-server/handlers/handle-search.ts b/packages/realm-server/handlers/handle-search.ts
index 1ec88e752f2..8a5a7579c63 100644
--- a/packages/realm-server/handlers/handle-search.ts
+++ b/packages/realm-server/handlers/handle-search.ts
@@ -10,6 +10,10 @@ import {
   sanitizeConsumingRealmHeader,
   SearchRequestError,
   searchRealms,
+  sanitizeRequestId,
+  X_BOXEL_REQUEST_ID_HEADER,
+  RequestTimings,
+  emitSearchTiming,
 } from '@cardstack/runtime-common';
 import {
   fetchRequestFromContext,
@@ -36,16 +40,29 @@ export default function handleSearch(opts: {
 }): (ctxt: Koa.Context) => Promise<void> {
   let { reconciler, searchCache } = opts;
   return async function (ctxt: Koa.Context) {
+    // Hoof-to-snout server-side timing for one search: from the moment the
+    // handler is entered through to the response being assembled. Stamped
+    // only when the client supplied a correlation id (prerender traffic);
+    // live / external callers allocate nothing and emit no line. The
+    // outermost request→response bound (incl. body read + send) is the
+    // `realm:requests` middleware's `dur=`, keyed by the same id.
+    let handlerStart = Date.now();
+    let requestId = sanitizeRequestId(ctxt.get(X_BOXEL_REQUEST_ID_HEADER));
+    let timings = requestId !== null ? new RequestTimings() : undefined;
+
     let { realmList } = getMultiRealmAuthorization(ctxt);
 
     let cardsQuery;
     let request = await fetchRequestFromContext(ctxt);
     try {
       let payload = getSearchRequestPayload(ctxt);
-      cardsQuery =
+      let parseQuery = async () =>
         payload !== undefined
           ? parseSearchQueryFromPayload(payload)
           : await parseSearchQueryFromRequest(request);
+      cardsQuery = timings
+        ? await timings.time('parse', parseQuery)
+        : await parseQuery();
     } catch (e) {
       if (e instanceof SearchRequestError) {
         if (e.code === 'invalid-query') {
@@ -106,6 +123,21 @@ export default function handleSearch(opts: {
     if (prerenderJobId) searchOpts.jobIdentity = prerenderJobId;
     let normalizedSearchOpts =
       Object.keys(searchOpts).length > 0 ? searchOpts : undefined;
+    // `requestId` / `timings` are deliberately kept OUT of `searchOpts`:
+    // that object is the job-scoped search cache's key material (see
+    // `computeETag` / `getOrPopulate` below), and per-request values would
+    // make every key unique and defeat the cache. They only need to reach
+    // `searchRealms` (which stamps the SQL + loadLinks stages onto the same
+    // collector this handler emits), so they ride on the run-time opts and
+    // never touch the cache key.
+    let runSearchOpts =
+      requestId !== null
+        ? {
+            ...(normalizedSearchOpts ?? {}),
+            requestId,
+            ...(timings ? { timings } : {}),
+          }
+        : normalizedSearchOpts;
     // `consumingRealm` is read unconditionally — even when the
     // job-scoped search cache is disabled, `resolveRealmsForFederatedRequest`
     // uses it to scope CS-11259's self-mount fast-path. The cache gate
@@ -116,15 +148,34 @@ export default function handleSearch(opts: {
     // Lazy-mount inside runSearch so cache hits (304 / cached body)
     // skip the lazy-mount work entirely.
     let runSearch = async () => {
-      let realmInstances = await resolveRealmsForFederatedRequest(
-        reconciler,
-        realmList,
-        { consumingRealm },
-      );
-      return JSON.stringify(
-        await searchRealms(realmInstances, cardsQuery, normalizedSearchOpts),
-        null,
-        2,
+      let resolveRealms = () =>
+        resolveRealmsForFederatedRequest(reconciler, realmList, {
+          consumingRealm,
+        });
+      let realmInstances = timings
+        ? await timings.time('resolveRealms', resolveRealms)
+        : await resolveRealms();
+      // `searchRealms` stamps `sql` / `loadLinks` / `populate` / cache stages
+      // onto `runSearchOpts.timings`; because the handler passed a collector
+      // it won't emit its own line — this handler emits the complete one.
+      let doc = await searchRealms(realmInstances, cardsQuery, runSearchOpts);
+      let stringify = async () => JSON.stringify(doc, null, 2);
+      return timings ? await timings.time('stringify', stringify) : stringify();
+    };
+
+    // Emit the complete request→response stage breakdown. Called on every
+    // terminal path that produced a response (skipped only on the parse-
+    // error early returns above, which never reach here). No-op without a
+    // correlation id.
+    let emitTimeline = () => {
+      if (!timings || requestId === null) {
+        return;
+      }
+      emitSearchTiming(
+        `req=${requestId}` +
+          (prerenderJobId ? ` job=${prerenderJobId}` : '') +
+          ` handler=${Date.now() - handlerStart}ms ` +
+          timings.toLogFragment(),
       );
     };
 
@@ -181,6 +232,7 @@ export default function handleSearch(opts: {
         if (cached !== undefined) {
           ctxt.status = 304;
           ctxt.set('ETag', expectedEtag);
+          emitTimeline();
           return;
         }
       }
@@ -200,6 +252,7 @@ export default function handleSearch(opts: {
           },
         }),
       );
+      emitTimeline();
       return;
     }
 
@@ -210,5 +263,6 @@ export default function handleSearch(opts: {
         headers: { 'content-type': SupportedMimeType.CardJson },
       }),
     );
+    emitTimeline();
   };
 }
diff --git a/packages/realm-server/health-sampler.ts b/packages/realm-server/health-sampler.ts
new file mode 100644
index 00000000000..ca9deb1ccc5
--- /dev/null
+++ b/packages/realm-server/health-sampler.ts
@@ -0,0 +1,65 @@
+import { monitorEventLoopDelay, type IntervalHistogram } from 'node:perf_hooks';
+import { logger } from '@cardstack/runtime-common';
+import { getSearchInFlight } from './search-inflight';
+
+// Periodically samples the realm-server process's event-loop health and
+// in-flight search count, logging a `realm:health` line whenever there's a
+// saturation signal worth capturing.
+//
+// Why: during a from-scratch index, prerendered cards block in
+// `waiting-stability` on `_search` round-trips that the realm-server is slow
+// to answer — yet the SQL behind them runs in milliseconds. The missing
+// piece is whether the single-threaded realm-server's event loop is starved
+// (by the synchronous, CPU-bound post-SQL JSON serialization across many
+// concurrent searches) so requests sit unserviced. Event-loop lag rising in
+// lockstep with `inFlightSearch` is the fingerprint of exactly that.
+//
+// `monitorEventLoopDelay` measures the delay between when a timer was
+// scheduled and when it actually fired — i.e. how long synchronous work kept
+// the loop from turning. Values are nanoseconds.
+export interface HealthSamplerOptions {
+  // How often to sample + maybe log. Defaults to 5s.
+  intervalMs?: number;
+  // Only log when peak loop lag in the window exceeds this (or a search is
+  // in flight). Keeps the line quiet on an idle/healthy server. Defaults to
+  // 200ms.
+  lagThresholdMs?: number;
+}
+
+export function startHealthSampler(
+  opts: HealthSamplerOptions = {},
+): () => void {
+  let intervalMs = opts.intervalMs ?? 5000;
+  let lagThresholdMs = opts.lagThresholdMs ?? 200;
+  // Created here (not at module load) to avoid racing the circular import
+  // that installs the logger factory; startup calls this well after boot.
+  let log = logger('realm:health');
+  let histogram: IntervalHistogram = monitorEventLoopDelay({ resolution: 20 });
+  histogram.enable();
+
+  let timer = setInterval(() => {
+    let toMs = (ns: number) => (Number.isFinite(ns) ? ns / 1e6 : 0);
+    let maxLagMs = toMs(histogram.max);
+    let meanLagMs = toMs(histogram.mean);
+    let p99LagMs = toMs(histogram.percentile(99));
+    histogram.reset();
+    let inFlightSearch = getSearchInFlight();
+    // Stay silent when the loop is healthy and nothing is in flight — only
+    // the saturation windows are interesting.
+    if (maxLagMs < lagThresholdMs && inFlightSearch === 0) {
+      return;
+    }
+    let heapMB = Math.round(process.memoryUsage().heapUsed / (1024 * 1024));
+    log.info(
+      `eventLoopLagMs(mean/p99/max)=${meanLagMs.toFixed(0)}/${p99LagMs.toFixed(0)}/${maxLagMs.toFixed(0)} ` +
+        `inFlightSearch=${inFlightSearch} heapMB=${heapMB}`,
+    );
+  }, intervalMs);
+  // Don't keep the process alive solely for sampling.
+  timer.unref?.();
+
+  return () => {
+    clearInterval(timer);
+    histogram.disable();
+  };
+}
diff --git a/packages/realm-server/main.ts b/packages/realm-server/main.ts
index abab0bca538..8d8d327d634 100644
--- a/packages/realm-server/main.ts
+++ b/packages/realm-server/main.ts
@@ -43,6 +43,7 @@ import { ModuleCacheCoordinator } from './lib/module-cache-coordination';
 import { JobsFinishedListener } from './lib/jobs-finished-listener';
 import { JobScopedSearchCache } from './job-scoped-search-cache';
 import { JobScopedInstanceCache } from './job-scoped-instance-cache';
+import { startHealthSampler } from './health-sampler';
 import { resolveFullIndexOnStartup } from './lib/full-index-on-startup';
 import { PUBLISHED_DIRECTORY_NAME } from '@cardstack/runtime-common';
 
@@ -391,6 +392,11 @@ const smokeTestHostApp = async () => {
   // JobsFinishedListener below) and the age-based janitor backstop.
   let instanceCache = new JobScopedInstanceCache(dbAdapter);
   instanceCache.startJanitor();
+  // Periodic event-loop-lag + in-flight-search sampler. Emits a
+  // `realm:health` line only during saturation windows, so a stalled
+  // `_search` can be checked against whether the process's event loop was
+  // starved at the time.
+  let stopHealthSampler = startHealthSampler();
   let reconciler: RealmRegistryReconciler | undefined;
   let fileChangesListener: RealmFileChangesListener | undefined;
   let indexUpdatedListener: RealmIndexUpdatedListener | undefined;
@@ -642,6 +648,7 @@ const smokeTestHostApp = async () => {
     }
     searchCache.stopJanitor();
     instanceCache.stopJanitor();
+    stopHealthSampler();
     httpServer.close(() => {
       (async () => {
         await Promise.all([
diff --git a/packages/realm-server/middleware/index.ts b/packages/realm-server/middleware/index.ts
index 49a01db2304..f850aa803cb 100644
--- a/packages/realm-server/middleware/index.ts
+++ b/packages/realm-server/middleware/index.ts
@@ -4,6 +4,8 @@ import type { ResponseWithNodeStream } from '@cardstack/runtime-common';
 import {
   logger as getLogger,
   webStreamToText,
+  sanitizeRequestId,
+  X_BOXEL_REQUEST_ID_HEADER,
 } from '@cardstack/runtime-common';
 import type Koa from 'koa';
 import mime from 'mime-types';
@@ -18,6 +20,16 @@ import {
   PRERENDER_JOB_ID_HEADER,
   sanitizePrerenderJobId,
 } from '../prerender/prerender-constants';
+import {
+  incrementSearchInFlight,
+  decrementSearchInFlight,
+} from '../search-inflight';
+
+// Matches the realm-server's search endpoints (`/_search`,
+// `/_search-prerendered`, `/_federated-search`,
+// `/_federated-search-prerendered`) so the request middleware can track how
+// many searches are in flight for the health sampler.
+const SEARCH_PATH_PATTERN = /(^|\/)_(federated-)?search(-prerendered)?$/;
 
 const REQUEST_BODY_STATE = 'requestBody';
 
@@ -156,20 +168,58 @@ export function httpLogging(ctxt: Koa.Context, next: Koa.Next) {
   // realm-server lines for an indexing job alongside worker lines.
   let jobId = sanitizePrerenderJobId(ctxt.get(PRERENDER_JOB_ID_HEADER));
   let jobTag = jobId ? ` [job: ${jobId}]` : '';
+  // Correlation id minted by the client; echoed onto both request log
+  // lines (and into the response header) so a client-observed slow search
+  // joins to the realm-server's view of the same request. The matching
+  // `realm:search-timing` line (emitted by `searchRealms`) is keyed by the
+  // same value.
+  let requestId = sanitizeRequestId(ctxt.get(X_BOXEL_REQUEST_ID_HEADER));
+  let reqTag = requestId ? ` req=${requestId}` : '';
+  if (requestId) {
+    ctxt.set(X_BOXEL_REQUEST_ID_HEADER, requestId);
+  }
+  let startedAt = Date.now();
+
+  // Track in-flight search load for the health sampler across the request's
+  // full lifecycle (queue → parse → SQL → serialize → send), which is the
+  // window during which a saturated event loop would leave it unserviced.
+  let isSearch = SEARCH_PATH_PATTERN.test(ctxt.path);
+  let releasedInFlight = false;
+  let releaseInFlight = () => {
+    if (releasedInFlight) {
+      return;
+    }
+    releasedInFlight = true;
+    decrementSearchInFlight();
+  };
+  if (isSearch) {
+    incrementSearchInFlight();
+  }
 
   logger.info(
     `<-- ${ctxt.method} ${ctxt.req.headers.accept} ${
       fullRequestURL(ctxt).href
-    }${jobTag}`,
+    }${jobTag}${reqTag}`,
   );
 
-  ctxt.res.on('finish', () => {
+  let onSettled = () => {
+    if (isSearch) {
+      releaseInFlight();
+    }
     logger.info(
       `--> ${ctxt.method} ${ctxt.req.headers.accept} ${
         fullRequestURL(ctxt).href
-      }: ${ctxt.status}${jobTag}`,
+      }: ${ctxt.status}${jobTag}${reqTag} dur=${Date.now() - startedAt}ms`,
     );
     logger.debug(JSON.stringify(ctxt.req.headers));
+  };
+  // `finish` fires on a fully-sent response; `close` covers a connection
+  // torn down before that (so the in-flight count can't leak on aborts).
+  ctxt.res.on('finish', onSettled);
+  ctxt.res.on('close', () => {
+    if (isSearch) {
+      releaseInFlight();
+    }
   });
   return next();
 }
diff --git a/packages/realm-server/search-inflight.ts b/packages/realm-server/search-inflight.ts
new file mode 100644
index 00000000000..79549e5d277
--- /dev/null
+++ b/packages/realm-server/search-inflight.ts
@@ -0,0 +1,21 @@
+// Count of `_federated-search` requests the realm-server is currently
+// handling. Incremented at the search handler's entry and decremented when
+// it settles. Read by the health sampler so a spike in concurrent searches
+// can be correlated with event-loop lag — the signature of the realm-server
+// process being saturated while prerenders wait on in-render `_search`
+// round-trips. A plain module-level counter (the realm-server is a single
+// process) kept separate from the sampler so the search handler doesn't pull
+// in `perf_hooks`.
+let inFlight = 0;
+
+export function incrementSearchInFlight(): void {
+  inFlight++;
+}
+
+export function decrementSearchInFlight(): void {
+  inFlight = inFlight > 0 ? inFlight - 1 : 0;
+}
+
+export function getSearchInFlight(): number {
+  return inFlight;
+}
diff --git a/packages/runtime-common/index.ts b/packages/runtime-common/index.ts
index 23b27c8ce4d..d18b15ae3d2 100644
--- a/packages/runtime-common/index.ts
+++ b/packages/runtime-common/index.ts
@@ -705,6 +705,7 @@ export * from './resource-types';
 export * from './prerender-headers';
 export * from './query';
 export * from './search-utils';
+export * from './request-timings';
 export * from './prerendered-html-format';
 export * from './query-field-utils';
 export * from './relationship-utils';
diff --git a/packages/runtime-common/prerender-headers.ts b/packages/runtime-common/prerender-headers.ts
index e1e8b7dc19d..b72eae1b409 100644
--- a/packages/runtime-common/prerender-headers.ts
+++ b/packages/runtime-common/prerender-headers.ts
@@ -93,3 +93,27 @@ export function sanitizeJobPriorityHeader(
   if (!Number.isSafeInteger(n) || n < 0 || n > JOB_PRIORITY_MAX) return null;
   return n;
 }
+
+// Per-search correlation id minted by the host SPA on each
+// `_federated-search` fetch it issues while rendering inside a prerender
+// tab. The realm-server's search path reads it back out and emits a
+// `realm:search-timing` line keyed by it, so a client-observed slow
+// search (visible in the prerender's `queryLoadsInFlight` diagnostics)
+// can be joined to where the realm-server actually spent the time. Lives
+// here so the host SPA can import the header name without depending on
+// the realm-server package.
+export const X_BOXEL_REQUEST_ID_HEADER = 'x-boxel-request-id';
+
+// Sanitize the inbound request-id header. It's echoed into log lines, so
+// admit only a bounded run of URL-safe id characters (covers a UUID and
+// then some) and reject anything with whitespace or control characters
+// that could forge a log line.
+const REQUEST_ID_PATTERN = /^[A-Za-z0-9._:-]{1,128}$/;
+export function sanitizeRequestId(
+  raw: string | null | undefined,
+): string | null {
+  if (typeof raw !== 'string') return null;
+  let trimmed = raw.trim();
+  if (!trimmed) return null;
+  return REQUEST_ID_PATTERN.test(trimmed) ? trimmed : null;
+}
diff --git a/packages/runtime-common/realm-index-query-engine.ts b/packages/runtime-common/realm-index-query-engine.ts
index 336683c9ec1..66db0ae7297 100644
--- a/packages/runtime-common/realm-index-query-engine.ts
+++ b/packages/runtime-common/realm-index-query-engine.ts
@@ -35,6 +35,7 @@ import type { Realm } from './realm';
 import type { VirtualNetwork } from './virtual-network';
 import { FILE_META_RESERVED_KEYS } from './realm';
 import { RealmPaths } from './paths';
+import type { RequestTimings } from './request-timings';
 import type {
   RealmResourceIdentifier,
   RealmIdentifier,
@@ -168,6 +169,13 @@ type Options = {
   // for live / external callers, which therefore never read or write the
   // cache.
   jobIdentity?: string;
+  // Per-request wall-clock collector, threaded from `searchRealms` when a
+  // request carries a correlation id. The post-SQL stages here — the SQL
+  // query, the `loadLinks` relationship assembly, and the per-instance
+  // cache reads/writes — stamp their elapsed time on it so the handler can
+  // attribute the request's server-side time across stages. Absent (and so
+  // a no-op) for everything except instrumented `_federated-search` calls.
+  timings?: RequestTimings;
 } & QueryOptions;
 
 type SearchResult = SearchResultDoc | SearchResultError;
@@ -376,11 +384,15 @@ export class RealmIndexQueryEngine {
         meta,
       };
     } else {
-      let { cards, meta } = await this.#indexQueryEngine.searchCards(
-        new URL(this.#realm.url),
-        query,
-        opts,
-      );
+      let runCardSql = () =>
+        this.#indexQueryEngine.searchCards(
+          new URL(this.#realm.url),
+          query,
+          opts,
+        );
+      let { cards, meta } = opts?.timings
+        ? await opts.timings.time('sql', runCardSql)
+        : await runCardSql();
       let cardResources = cards.map((resource) => ({
         ...resource,
         ...{ links: { self: resource.id } },
@@ -408,14 +420,18 @@ export class RealmIndexQueryEngine {
       // Process all root resources together so a single batched DB query
       // resolves their first-level links (1+1 instead of N+M sequential
       // round-trips). See CS-11038.
-      let included = await this.loadLinks(
-        {
-          realmURL: this.realmURL,
-          rootResources: doc.data,
-          omit,
-        },
-        linkOpts,
-      );
+      let runLoadLinks = () =>
+        this.loadLinks(
+          {
+            realmURL: this.realmURL,
+            rootResources: doc.data,
+            omit,
+          },
+          linkOpts,
+        );
+      let included = opts?.timings
+        ? await opts.timings.time('loadLinks', runLoadLinks)
+        : await runLoadLinks();
       if (included.length > 0) {
         doc.included = included;
       }
@@ -1373,14 +1389,18 @@ export class RealmIndexQueryEngine {
               : opts?.linkFields
                 ? { ...opts, linkFields: undefined }
                 : opts;
+            let timings = popOpts?.timings;
             let cacheKey = this.#instanceCacheKey(resource, popOpts);
             if (cacheKey) {
-              let cached = await this.#readCachedRelationships(
-                cacheKey.jobId,
-                cacheKey.url,
-              );
+              let ck = cacheKey;
+              let cached = timings
+                ? await timings.time('cacheRead', () =>
+                    this.#readCachedRelationships(ck.jobId, ck.url),
+                  )
+                : await this.#readCachedRelationships(ck.jobId, ck.url);
               if (cached !== undefined) {
                 recordInstanceCacheEvent(cacheKey.jobId, true);
+                timings?.incr('cacheHit');
                 // Hit: reuse this instance's assembled query-field
                 // relationships from an earlier occurrence in the same job,
                 // skipping the definition lookup + field-tree walk. The
@@ -1396,28 +1416,43 @@ export class RealmIndexQueryEngine {
                 return;
               }
               recordInstanceCacheEvent(cacheKey.jobId, false);
+              timings?.incr('cacheMiss');
             }
             let storedDefs = (
               resource.meta as {
                 queryFieldDefs?: Record<string, QueryFieldMeta>;
               }
             )?.queryFieldDefs;
-            if (popOpts?.cacheOnlyDefinitions && storedDefs) {
-              await this.populateQueryFieldsFromMeta(
-                resource,
-                realmURL,
-                storedDefs,
-                popOpts,
-              );
+            // The relationship/query-field assembly — the definition lookup +
+            // field-tree walk — is the post-SQL "wire-format prep" the timeline
+            // attributes under `populate`.
+            let runPopulate = () =>
+              popOpts?.cacheOnlyDefinitions && storedDefs
+                ? this.populateQueryFieldsFromMeta(
+                    resource,
+                    realmURL,
+                    storedDefs,
+                    popOpts,
+                  )
+                : this.populateQueryFields(resource, realmURL, popOpts);
+            if (timings) {
+              await timings.time('populate', runPopulate);
             } else {
-              await this.populateQueryFields(resource, realmURL, popOpts);
+              await runPopulate();
             }
             if (cacheKey) {
-              await this.#writeCachedRelationships(
-                cacheKey.jobId,
-                cacheKey.url,
-                (resource as { relationships?: unknown }).relationships,
-              );
+              let ck = cacheKey;
+              let writeCache = () =>
+                this.#writeCachedRelationships(
+                  ck.jobId,
+                  ck.url,
+                  (resource as { relationships?: unknown }).relationships,
+                );
+              if (timings) {
+                await timings.time('cacheWrite', writeCache);
+              } else {
+                await writeCache();
+              }
             }
           }),
         );
diff --git a/packages/runtime-common/realm.ts b/packages/runtime-common/realm.ts
index c1c5331e1b5..4ed0168c18b 100644
--- a/packages/runtime-common/realm.ts
+++ b/packages/runtime-common/realm.ts
@@ -5175,6 +5175,7 @@ export class Realm {
       // `!== undefined` so an explicit priority 0 (system-initiated) survives.
       ...(opts?.priority !== undefined ? { priority: opts.priority } : {}),
       ...(opts?.jobIdentity ? { jobIdentity: opts.jobIdentity } : {}),
+      ...(opts?.timings ? { timings: opts.timings } : {}),
     });
   }
 
diff --git a/packages/runtime-common/request-timings.ts b/packages/runtime-common/request-timings.ts
new file mode 100644
index 00000000000..431d9a4e80d
--- /dev/null
+++ b/packages/runtime-common/request-timings.ts
@@ -0,0 +1,64 @@
+// A request-scoped wall-clock collector for the realm-server search path.
+//
+// One instance is created per instrumented `_search` request in
+// `handle-search` and threaded into search opts. Each stage of the
+// server-side pipeline that runs *after* the request is received —
+// the SQL query, the post-SQL `loadLinks` relationship assembly (the
+// per-instance cache reads/writes), and the JSON wire-format
+// serialization — stamps its elapsed time here, so the handler can emit
+// a single line attributing the request's wall-clock across stages.
+//
+// It is deliberately plain (no Node-only APIs) so it can live in
+// runtime-common alongside the search pipeline it instruments. In
+// practice it is only ever instantiated by the realm-server: the host's
+// store fetches search results over HTTP and never runs
+// `RealmIndexQueryEngine`, so in the browser the threaded collector is
+// always `undefined` and every `opts?.timings?.…` call is a no-op.
+export class RequestTimings {
+  #stages = new Map<string, number>();
+  #counters = new Map<string, number>();
+
+  // Time an async stage and accumulate its elapsed ms under `stage`.
+  // Repeated stages (e.g. one `loadLinks` per realm in a federated
+  // search) sum, so the line reports total time in each stage.
+  async time<T>(stage: string, fn: () => Promise<T>): Promise<T> {
+    let start = Date.now();
+    try {
+      return await fn();
+    } finally {
+      this.add(stage, Date.now() - start);
+    }
+  }
+
+  add(stage: string, ms: number): void {
+    this.#stages.set(stage, (this.#stages.get(stage) ?? 0) + ms);
+  }
+
+  // Integer tallies that aren't durations — result counts, per-instance
+  // cache hit/miss counts.
+  incr(counter: string, n = 1): void {
+    this.#counters.set(counter, (this.#counters.get(counter) ?? 0) + n);
+  }
+
+  stages(): Record<string, number> {
+    let out: Record<string, number> = {};
+    for (let [k, v] of this.#stages) {
+      out[k] = Math.round(v);
+    }
+    return out;
+  }
+
+  counters(): Record<string, number> {
+    return Object.fromEntries(this.#counters);
+  }
+
+  // Compact fragment for a single log line, e.g.
+  //   `sql=41 loadLinks=120 stringify=210 | results=312 cacheHit=18 cacheMiss=3`
+  toLogFragment(): string {
+    let stages = [...this.#stages]
+      .map(([k, v]) => `${k}=${Math.round(v)}`)
+      .join(' ');
+    let counters = [...this.#counters].map(([k, v]) => `${k}=${v}`).join(' ');
+    return counters ? `${stages} | ${counters}` : stages;
+  }
+}
diff --git a/packages/runtime-common/search-utils.ts b/packages/runtime-common/search-utils.ts
index b7e17b6d764..30181055c94 100644
--- a/packages/runtime-common/search-utils.ts
+++ b/packages/runtime-common/search-utils.ts
@@ -1,6 +1,8 @@
 import type { RealmResourceIdentifier } from './card-reference-resolver';
+import { logger } from './log';
 import { ensureTrailingSlash } from './paths';
 import { assertQuery, InvalidQueryError, type Query } from './query';
+import { RequestTimings } from './request-timings';
 import {
   isValidPrerenderedHtmlFormat,
   PRERENDERED_HTML_FORMATS,
@@ -326,6 +328,18 @@ export type SearchOpts = {
   omitIncluded?: boolean;
   priority?: number;
   jobIdentity?: string;
+  // Correlation id minted by the client (the prerendered host stamps
+  // `x-boxel-request-id` on its `_federated-search` fetch) and read back
+  // out by the request handler into opts. When present, `searchRealms`
+  // instruments the server-side search pipeline and emits one
+  // `realm:search-timing` line keyed by this id, so a client-observed
+  // slow search can be joined to where the realm-server spent the time.
+  requestId?: string;
+  // Per-request wall-clock collector. `searchRealms` creates it when a
+  // `requestId` is present and threads it down through `Realm.search` →
+  // `searchCards` → `loadLinks` so each post-SQL stage stamps its
+  // elapsed time. Callers never supply this directly.
+  timings?: RequestTimings;
 };
 
 type SearchableRealm = {
@@ -336,11 +350,47 @@ type SearchableRealm = {
   url?: string;
 };
 
+// Indirection so a host integration test can deterministically capture
+// the emitted timing line: loglevel rebinds a logger's methods on every
+// `setLevel`, so a test that monkeypatches a direct logger handle would
+// race the next `logger('realm:search-timing')` call. A settable sink
+// sidesteps that. Defaults to the `realm:search-timing` logger.
+let searchTimingSink: ((line: string) => void) | undefined;
+let searchTimingLog: ReturnType<typeof logger> | undefined;
+export function setSearchTimingSinkForTests(
+  sink: ((line: string) => void) | undefined,
+): void {
+  searchTimingSink = sink;
+}
+export function emitSearchTiming(line: string): void {
+  if (searchTimingSink) {
+    searchTimingSink(line);
+    return;
+  }
+  // Lazy: a module-load `logger()` call races the circular import that
+  // installs the logger factory. First emission happens well after boot.
+  (searchTimingLog ??= logger('realm:search-timing')).info(line);
+}
+
 export async function searchRealms(
   realms: Array<SearchableRealm | null | undefined>,
   query: Query,
   opts?: SearchOpts,
 ): Promise<LinkableCollectionDocument> {
+  // Instrument only when the caller threaded a correlation id. The
+  // prerendered host stamps one; live SPA / API traffic does not — so
+  // normal traffic allocates no collector and emits no line.
+  //
+  // Two callers: the realm-server's `handle-search` threads a collector it
+  // owns (so it can emit one complete request→response line itself —
+  // `opts.timings` is set, `ownsTimings` is false, we don't emit), and the
+  // host-test realm-server mock calls us with just a `requestId` (we create
+  // the collector and emit the line ourselves, which the host test observes).
+  let ownsTimings = Boolean(opts?.requestId) && !opts?.timings;
+  let timings =
+    opts?.timings ?? (ownsTimings ? new RequestTimings() : undefined);
+  let perRealmOpts = ownsTimings && opts ? { ...opts, timings } : opts;
+  let startedAt = ownsTimings ? Date.now() : 0;
   let realmEntries = realms
     .filter((realm): realm is SearchableRealm => Boolean(realm))
     .map((realm) => ({
@@ -348,7 +398,7 @@ export async function searchRealms(
       label: realm.url ? String(realm.url) : undefined,
     }));
   let searchPromises = realmEntries.map(({ realm }) =>
-    Promise.resolve().then(() => realm.search(query, opts)),
+    Promise.resolve().then(() => realm.search(query, perRealmOpts)),
   );
   let results = await Promise.allSettled(searchPromises);
   let queryLabel = '[unserializable query]';
@@ -369,7 +419,20 @@ export async function searchRealms(
   let docs = results.flatMap((result) =>
     result.status === 'fulfilled' ? [result.value] : [],
   );
-  return combineSearchResults(docs);
+  let combined = combineSearchResults(docs);
+  if (timings) {
+    timings.incr('results', combined.data?.length ?? 0);
+  }
+  if (ownsTimings && timings) {
+    emitSearchTiming(
+      `req=${opts!.requestId}` +
+        (opts!.jobIdentity ? ` job=${opts!.jobIdentity}` : '') +
+        ` realms=${realmEntries.length}` +
+        ` total=${Date.now() - startedAt}ms ` +
+        timings.toLogFragment(),
+    );
+  }
+  return combined;
 }
 
 type PrerenderedSearchableRealm = {

From 706ed8b2bde5d7e7330f84c4d05c5b7f80f60e74 Mon Sep 17 00:00:00 2001
From: Hassan Abdel-Rahman <hassan.abdelrahman@gmail.com>
Date: Wed, 3 Jun 2026 16:38:36 -0400
Subject: [PATCH 2/2] Whitelist the correlation header for CORS, rename it, and
 fix coalesced-search timing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- CORS: add the correlation header to the realm-server's `Access-Control-Allow-Headers`
  allowlist. Without it the cross-origin preflight blocks the prerender's
  `_federated-search` once the host stamps the header.
- Rename `x-boxel-request-id` → `x-boxel-logging-correlation-id` (and the
  internal field/vars to `loggingCorrelationId`, the log token to `corr=`):
  the old name read too much like the unrelated `x-boxel-client-request-id`,
  which is realm-event/write correlation, a different concern.
- searchCards in-flight coalescing: exclude the per-request `timings` collector
  from `searchInFlightKey` so it can't perturb the dedup key, and when a follower
  awaits an already-running identical search, record that wait as `coalescedWait`
  on the follower's own collector — otherwise its timing line would show no
  sql/loadLinks and look misleadingly instant under concurrent search load.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../host/app/lib/prerender-fetch-headers.ts   |  8 +++----
 packages/host/app/services/store.ts           |  4 ++--
 .../tests/helpers/realm-server-mock/routes.ts | 10 ++++-----
 .../search-correlation-id-test.gts            | 16 +++++++-------
 .../realm-server/handlers/handle-search.ts    | 21 +++++++++++--------
 packages/realm-server/middleware/index.ts     | 18 +++++++++-------
 packages/realm-server/server.ts               |  2 +-
 packages/runtime-common/prerender-headers.ts  |  5 +++--
 .../realm-index-query-engine.ts               | 21 +++++++++++++++++--
 packages/runtime-common/search-utils.ts       | 12 +++++------
 10 files changed, 71 insertions(+), 46 deletions(-)

diff --git a/packages/host/app/lib/prerender-fetch-headers.ts b/packages/host/app/lib/prerender-fetch-headers.ts
index a4b810acc31..f6e7e9061a3 100644
--- a/packages/host/app/lib/prerender-fetch-headers.ts
+++ b/packages/host/app/lib/prerender-fetch-headers.ts
@@ -2,7 +2,7 @@ import {
   DURING_PRERENDER_HEADER,
   X_BOXEL_CONSUMING_REALM_HEADER,
   X_BOXEL_JOB_ID_HEADER,
-  X_BOXEL_REQUEST_ID_HEADER,
+  X_BOXEL_LOGGING_CORRELATION_ID_HEADER,
 } from '@cardstack/runtime-common';
 
 // Set by the prerender server's `evaluateOnNewDocument` before the
@@ -50,19 +50,19 @@ export function jobIdHeader(): Record<string, string> {
 
 // Per-search correlation id. Minted fresh for each `_federated-search`
 // fetch the SPA issues while rendering inside a prerender tab, and stamped
-// as `x-boxel-request-id`. The realm-server reads it back out and keys its
+// as `x-boxel-logging-correlation-id`. The realm-server reads it back out and keys its
 // `realm:search-timing` line on it, so a search the prerender observes as
 // slow (surfaced in its `queryLoadsInFlight` diagnostics) can be joined to
 // the realm-server's stage-by-stage view of the same request. Gated on the
 // prerender context — exactly like the job-id / consuming-realm headers —
 // so live SPA traffic is unaffected and emits no server-side timing line.
-export function requestIdHeader(): Record<string, string> {
+export function loggingCorrelationIdHeader(): Record<string, string> {
   let flag = (globalThis as unknown as { __boxelRenderContext?: boolean })
     .__boxelRenderContext;
   if (flag !== true) {
     return {};
   }
-  return { [X_BOXEL_REQUEST_ID_HEADER]: newCorrelationId() };
+  return { [X_BOXEL_LOGGING_CORRELATION_ID_HEADER]: newCorrelationId() };
 }
 
 function newCorrelationId(): string {
diff --git a/packages/host/app/services/store.ts b/packages/host/app/services/store.ts
index 1b105f79960..5aa0fbad1e3 100644
--- a/packages/host/app/services/store.ts
+++ b/packages/host/app/services/store.ts
@@ -89,7 +89,7 @@ import {
   consumingRealmHeader,
   duringPrerenderHeaders,
   jobIdHeader,
-  requestIdHeader,
+  loggingCorrelationIdHeader,
 } from '../lib/prerender-fetch-headers';
 import { searchCacheKey } from '../lib/search-cache-key';
 import { searchInFlightKey } from '../lib/search-in-flight-key';
@@ -1154,7 +1154,7 @@ export default class StoreService extends Service implements StoreInterface {
           ...consumingRealmHeader(),
           ...jobIdHeader(),
           ...jobPriorityHeader(),
-          ...requestIdHeader(),
+          ...loggingCorrelationIdHeader(),
         },
         body: JSON.stringify({ ...query, realms }),
       },
diff --git a/packages/host/tests/helpers/realm-server-mock/routes.ts b/packages/host/tests/helpers/realm-server-mock/routes.ts
index 266e36318ed..0d6445c60d4 100644
--- a/packages/host/tests/helpers/realm-server-mock/routes.ts
+++ b/packages/host/tests/helpers/realm-server-mock/routes.ts
@@ -7,11 +7,11 @@ import {
   parseSearchQueryFromPayload,
   parseSearchRequestPayload,
   SearchRequestError,
-  sanitizeRequestId,
+  sanitizeLoggingCorrelationId,
   searchPrerenderedRealms,
   searchRealms,
   SupportedMimeType,
-  X_BOXEL_REQUEST_ID_HEADER,
+  X_BOXEL_LOGGING_CORRELATION_ID_HEADER,
   type RealmInfo,
   type Query,
 } from '@cardstack/runtime-common';
@@ -124,13 +124,13 @@ function registerSearchRoutes() {
       // correlation id off the request and thread it into searchRealms, so
       // the real `realm:search-timing` line is emitted (and observable by
       // host integration tests) keyed by the id the client minted.
-      let requestId = sanitizeRequestId(
-        req.headers.get(X_BOXEL_REQUEST_ID_HEADER),
+      let loggingCorrelationId = sanitizeLoggingCorrelationId(
+        req.headers.get(X_BOXEL_LOGGING_CORRELATION_ID_HEADER),
       );
       let combined = await searchRealms(
         realmList.map((realmURL) => getSearchableRealmForURL(realmURL)),
         cardsQuery,
-        requestId ? { requestId } : undefined,
+        loggingCorrelationId ? { loggingCorrelationId } : undefined,
       );
 
       return new Response(JSON.stringify(combined), {
diff --git a/packages/host/tests/integration/search-correlation-id-test.gts b/packages/host/tests/integration/search-correlation-id-test.gts
index e215722cdc6..ec378783ae6 100644
--- a/packages/host/tests/integration/search-correlation-id-test.gts
+++ b/packages/host/tests/integration/search-correlation-id-test.gts
@@ -8,7 +8,7 @@ import {
   baseRealm,
   rri,
   setSearchTimingSinkForTests,
-  X_BOXEL_REQUEST_ID_HEADER,
+  X_BOXEL_LOGGING_CORRELATION_ID_HEADER,
 } from '@cardstack/runtime-common';
 import type { Loader } from '@cardstack/runtime-common/loader';
 import type { Query } from '@cardstack/runtime-common/query';
@@ -26,14 +26,14 @@ import { setupMockMatrix } from '../helpers/mock-matrix';
 import { setupRenderingTest } from '../helpers/setup';
 
 // End-to-end coverage for the search correlation id: the in-realm browser
-// (the prerendered host SPA) mints `x-boxel-request-id` on its
+// (the prerendered host SPA) mints `x-boxel-logging-correlation-id` on its
 // `_federated-search` fetch, and the realm-server's search path emits a
 // `realm:search-timing` line keyed by that same id. This proves the id
 // threads all the way from the client that originated it through to the
 // server log a triage would join against.
 //
 // The host test exercises the *real* code on both ends: the SPA's
-// `requestIdHeader()` stamps the header, and the realm-server-mock hands it
+// `loggingCorrelationIdHeader()` stamps the header, and the realm-server-mock hands it
 // to the real `searchRealms`, which emits the line. Only the prerender
 // context flag is simulated (the host normally sets it inside a prerender
 // tab).
@@ -108,7 +108,7 @@ module('Integration | search correlation id', function (hooks) {
     let sentRequestIds: string[] = [];
     let spy = async (request: Request) => {
       if (new URL(request.url).pathname.endsWith('/_federated-search')) {
-        let id = request.headers.get(X_BOXEL_REQUEST_ID_HEADER);
+        let id = request.headers.get(X_BOXEL_LOGGING_CORRELATION_ID_HEADER);
         if (id) {
           sentRequestIds.push(id);
         }
@@ -139,7 +139,9 @@ module('Integration | search correlation id', function (hooks) {
       `client-minted correlation id looks well-formed (${sentId})`,
     );
 
-    let matching = timingLines.filter((line) => line.includes(`req=${sentId}`));
+    let matching = timingLines.filter((line) =>
+      line.includes(`corr=${sentId}`),
+    );
     assert.strictEqual(
       matching.length,
       1,
@@ -168,7 +170,7 @@ module('Integration | search correlation id', function (hooks) {
     let spy = async (request: Request) => {
       if (
         new URL(request.url).pathname.endsWith('/_federated-search') &&
-        request.headers.get(X_BOXEL_REQUEST_ID_HEADER)
+        request.headers.get(X_BOXEL_LOGGING_CORRELATION_ID_HEADER)
       ) {
         sawHeader = true;
       }
@@ -184,7 +186,7 @@ module('Integration | search correlation id', function (hooks) {
     assert.strictEqual(results.length, 2, 'the search still returns results');
     assert.false(
       sawHeader,
-      'live (non-prerender) traffic sends no x-boxel-request-id header',
+      'live (non-prerender) traffic sends no x-boxel-logging-correlation-id header',
     );
     assert.strictEqual(
       timingLines.length,
diff --git a/packages/realm-server/handlers/handle-search.ts b/packages/realm-server/handlers/handle-search.ts
index 8a5a7579c63..300f28b6ed6 100644
--- a/packages/realm-server/handlers/handle-search.ts
+++ b/packages/realm-server/handlers/handle-search.ts
@@ -10,8 +10,8 @@ import {
   sanitizeConsumingRealmHeader,
   SearchRequestError,
   searchRealms,
-  sanitizeRequestId,
-  X_BOXEL_REQUEST_ID_HEADER,
+  sanitizeLoggingCorrelationId,
+  X_BOXEL_LOGGING_CORRELATION_ID_HEADER,
   RequestTimings,
   emitSearchTiming,
 } from '@cardstack/runtime-common';
@@ -47,8 +47,11 @@ export default function handleSearch(opts: {
     // outermost request→response bound (incl. body read + send) is the
     // `realm:requests` middleware's `dur=`, keyed by the same id.
     let handlerStart = Date.now();
-    let requestId = sanitizeRequestId(ctxt.get(X_BOXEL_REQUEST_ID_HEADER));
-    let timings = requestId !== null ? new RequestTimings() : undefined;
+    let loggingCorrelationId = sanitizeLoggingCorrelationId(
+      ctxt.get(X_BOXEL_LOGGING_CORRELATION_ID_HEADER),
+    );
+    let timings =
+      loggingCorrelationId !== null ? new RequestTimings() : undefined;
 
     let { realmList } = getMultiRealmAuthorization(ctxt);
 
@@ -123,7 +126,7 @@ export default function handleSearch(opts: {
     if (prerenderJobId) searchOpts.jobIdentity = prerenderJobId;
     let normalizedSearchOpts =
       Object.keys(searchOpts).length > 0 ? searchOpts : undefined;
-    // `requestId` / `timings` are deliberately kept OUT of `searchOpts`:
+    // `loggingCorrelationId` / `timings` are deliberately kept OUT of `searchOpts`:
     // that object is the job-scoped search cache's key material (see
     // `computeETag` / `getOrPopulate` below), and per-request values would
     // make every key unique and defeat the cache. They only need to reach
@@ -131,10 +134,10 @@ export default function handleSearch(opts: {
     // collector this handler emits), so they ride on the run-time opts and
     // never touch the cache key.
     let runSearchOpts =
-      requestId !== null
+      loggingCorrelationId !== null
         ? {
             ...(normalizedSearchOpts ?? {}),
-            requestId,
+            loggingCorrelationId,
             ...(timings ? { timings } : {}),
           }
         : normalizedSearchOpts;
@@ -168,11 +171,11 @@ export default function handleSearch(opts: {
     // error early returns above, which never reach here). No-op without a
     // correlation id.
     let emitTimeline = () => {
-      if (!timings || requestId === null) {
+      if (!timings || loggingCorrelationId === null) {
         return;
       }
       emitSearchTiming(
-        `req=${requestId}` +
+        `corr=${loggingCorrelationId}` +
           (prerenderJobId ? ` job=${prerenderJobId}` : '') +
           ` handler=${Date.now() - handlerStart}ms ` +
           timings.toLogFragment(),
diff --git a/packages/realm-server/middleware/index.ts b/packages/realm-server/middleware/index.ts
index f850aa803cb..afa72c36eaa 100644
--- a/packages/realm-server/middleware/index.ts
+++ b/packages/realm-server/middleware/index.ts
@@ -4,8 +4,8 @@ import type { ResponseWithNodeStream } from '@cardstack/runtime-common';
 import {
   logger as getLogger,
   webStreamToText,
-  sanitizeRequestId,
-  X_BOXEL_REQUEST_ID_HEADER,
+  sanitizeLoggingCorrelationId,
+  X_BOXEL_LOGGING_CORRELATION_ID_HEADER,
 } from '@cardstack/runtime-common';
 import type Koa from 'koa';
 import mime from 'mime-types';
@@ -173,10 +173,12 @@ export function httpLogging(ctxt: Koa.Context, next: Koa.Next) {
   // joins to the realm-server's view of the same request. The matching
   // `realm:search-timing` line (emitted by `searchRealms`) is keyed by the
   // same value.
-  let requestId = sanitizeRequestId(ctxt.get(X_BOXEL_REQUEST_ID_HEADER));
-  let reqTag = requestId ? ` req=${requestId}` : '';
-  if (requestId) {
-    ctxt.set(X_BOXEL_REQUEST_ID_HEADER, requestId);
+  let loggingCorrelationId = sanitizeLoggingCorrelationId(
+    ctxt.get(X_BOXEL_LOGGING_CORRELATION_ID_HEADER),
+  );
+  let corrTag = loggingCorrelationId ? ` corr=${loggingCorrelationId}` : '';
+  if (loggingCorrelationId) {
+    ctxt.set(X_BOXEL_LOGGING_CORRELATION_ID_HEADER, loggingCorrelationId);
   }
   let startedAt = Date.now();
 
@@ -199,7 +201,7 @@ export function httpLogging(ctxt: Koa.Context, next: Koa.Next) {
   logger.info(
     `<-- ${ctxt.method} ${ctxt.req.headers.accept} ${
       fullRequestURL(ctxt).href
-    }${jobTag}${reqTag}`,
+    }${jobTag}${corrTag}`,
   );
 
   let onSettled = () => {
@@ -209,7 +211,7 @@ export function httpLogging(ctxt: Koa.Context, next: Koa.Next) {
     logger.info(
       `--> ${ctxt.method} ${ctxt.req.headers.accept} ${
         fullRequestURL(ctxt).href
-      }: ${ctxt.status}${jobTag}${reqTag} dur=${Date.now() - startedAt}ms`,
+      }: ${ctxt.status}${jobTag}${corrTag} dur=${Date.now() - startedAt}ms`,
     );
     logger.debug(JSON.stringify(ctxt.req.headers));
   };
diff --git a/packages/realm-server/server.ts b/packages/realm-server/server.ts
index 43f60196ae3..c9fc2aace00 100644
--- a/packages/realm-server/server.ts
+++ b/packages/realm-server/server.ts
@@ -728,7 +728,7 @@ export class RealmServer {
         cors({
           origin: '*',
           allowHeaders:
-            'Authorization, Content-Type, If-Match, If-None-Match, X-Requested-With, X-Boxel-Client-Request-Id, X-Boxel-Assume-User, X-HTTP-Method-Override, X-Boxel-Disable-Module-Cache, X-Filename, X-Boxel-During-Prerender, X-Boxel-Consuming-Realm, X-Boxel-Job-Id, X-Boxel-Job-Priority, X-Grafana-Device-Id, X-Grafana-Action',
+            'Authorization, Content-Type, If-Match, If-None-Match, X-Requested-With, X-Boxel-Client-Request-Id, X-Boxel-Assume-User, X-HTTP-Method-Override, X-Boxel-Disable-Module-Cache, X-Filename, X-Boxel-During-Prerender, X-Boxel-Consuming-Realm, X-Boxel-Job-Id, X-Boxel-Job-Priority, X-Boxel-Logging-Correlation-Id, X-Grafana-Device-Id, X-Grafana-Action',
           // Without an explicit expose list, @koa/cors only emits the
           // CORS-safelisted response headers (cache-control, content-*,
           // expires, last-modified, pragma). ETag is not on that list,
diff --git a/packages/runtime-common/prerender-headers.ts b/packages/runtime-common/prerender-headers.ts
index b72eae1b409..dacedb8427f 100644
--- a/packages/runtime-common/prerender-headers.ts
+++ b/packages/runtime-common/prerender-headers.ts
@@ -102,14 +102,15 @@ export function sanitizeJobPriorityHeader(
 // can be joined to where the realm-server actually spent the time. Lives
 // here so the host SPA can import the header name without depending on
 // the realm-server package.
-export const X_BOXEL_REQUEST_ID_HEADER = 'x-boxel-request-id';
+export const X_BOXEL_LOGGING_CORRELATION_ID_HEADER =
+  'x-boxel-logging-correlation-id';
 
 // Sanitize the inbound request-id header. It's echoed into log lines, so
 // admit only a bounded run of URL-safe id characters (covers a UUID and
 // then some) and reject anything with whitespace or control characters
 // that could forge a log line.
 const REQUEST_ID_PATTERN = /^[A-Za-z0-9._:-]{1,128}$/;
-export function sanitizeRequestId(
+export function sanitizeLoggingCorrelationId(
   raw: string | null | undefined,
 ): string | null {
   if (typeof raw !== 'string') return null;
diff --git a/packages/runtime-common/realm-index-query-engine.ts b/packages/runtime-common/realm-index-query-engine.ts
index 66db0ae7297..3354c924e16 100644
--- a/packages/runtime-common/realm-index-query-engine.ts
+++ b/packages/runtime-common/realm-index-query-engine.ts
@@ -227,10 +227,19 @@ export function searchInFlightKey(
   // values inside query/opts — e.g. a `matches: 'a|b'` string — can never
   // collide with the delimiter and cause unrelated searches to coalesce.
   try {
+    // `timings` is a per-request diagnostic collector, not part of the
+    // result-shaping opts. Exclude it from the key so it can't perturb the
+    // in-flight coalescing — two otherwise-identical searches must still
+    // dedupe even though each carries its own collector.
+    let keyOpts = opts;
+    if (opts && 'timings' in opts) {
+      let { timings: _omitTimings, ...rest } = opts;
+      keyOpts = rest;
+    }
     return JSON.stringify([
       realmURL,
       normalizeQueryForSignature(query),
-      opts ? sortKeysDeep(opts) : null,
+      keyOpts ? sortKeysDeep(keyOpts) : null,
     ]);
   } catch {
     return undefined;
@@ -341,7 +350,15 @@ export class RealmIndexQueryEngine {
     if (key !== undefined) {
       let existing = this.#inFlightSearch.get(key);
       if (existing) {
-        return await existing;
+        // A concurrent identical search is already running; this follower
+        // awaits its result instead of re-running the work. Record that wait
+        // as `coalescedWait` on the follower's own collector so its
+        // `realm:search-timing` line reflects the time spent — otherwise the
+        // follower would show no `sql`/`loadLinks` and look misleadingly
+        // instant exactly under the concurrent search load we're diagnosing.
+        return opts?.timings
+          ? await opts.timings.time('coalescedWait', () => existing)
+          : await existing;
       }
       let pending = this.searchCardsUncoalesced(query, opts).finally(() => {
         // Identity-check before deletion: a concurrent invalidation path
diff --git a/packages/runtime-common/search-utils.ts b/packages/runtime-common/search-utils.ts
index 30181055c94..515d15f3a02 100644
--- a/packages/runtime-common/search-utils.ts
+++ b/packages/runtime-common/search-utils.ts
@@ -329,14 +329,14 @@ export type SearchOpts = {
   priority?: number;
   jobIdentity?: string;
   // Correlation id minted by the client (the prerendered host stamps
-  // `x-boxel-request-id` on its `_federated-search` fetch) and read back
+  // `x-boxel-logging-correlation-id` on its `_federated-search` fetch) and read back
   // out by the request handler into opts. When present, `searchRealms`
   // instruments the server-side search pipeline and emits one
   // `realm:search-timing` line keyed by this id, so a client-observed
   // slow search can be joined to where the realm-server spent the time.
-  requestId?: string;
+  loggingCorrelationId?: string;
   // Per-request wall-clock collector. `searchRealms` creates it when a
-  // `requestId` is present and threads it down through `Realm.search` →
+  // `loggingCorrelationId` is present and threads it down through `Realm.search` →
   // `searchCards` → `loadLinks` so each post-SQL stage stamps its
   // elapsed time. Callers never supply this directly.
   timings?: RequestTimings;
@@ -384,9 +384,9 @@ export async function searchRealms(
   // Two callers: the realm-server's `handle-search` threads a collector it
   // owns (so it can emit one complete request→response line itself —
   // `opts.timings` is set, `ownsTimings` is false, we don't emit), and the
-  // host-test realm-server mock calls us with just a `requestId` (we create
+  // host-test realm-server mock calls us with just a `loggingCorrelationId` (we create
   // the collector and emit the line ourselves, which the host test observes).
-  let ownsTimings = Boolean(opts?.requestId) && !opts?.timings;
+  let ownsTimings = Boolean(opts?.loggingCorrelationId) && !opts?.timings;
   let timings =
     opts?.timings ?? (ownsTimings ? new RequestTimings() : undefined);
   let perRealmOpts = ownsTimings && opts ? { ...opts, timings } : opts;
@@ -425,7 +425,7 @@ export async function searchRealms(
   }
   if (ownsTimings && timings) {
     emitSearchTiming(
-      `req=${opts!.requestId}` +
+      `corr=${opts!.loggingCorrelationId}` +
         (opts!.jobIdentity ? ` job=${opts!.jobIdentity}` : '') +
         ` realms=${realmEntries.length}` +
         ` total=${Date.now() - startedAt}ms ` +