AztecProtocol · mverzilli · May 4, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/.test_patterns.yml b/.test_patterns.yml
@@ -146,14 +146,19 @@ tests:
       - *palla
 
   # yarn-project tests
+  # Attempt to catch all kv-store browser test failures (consider them quarantined for now)
+  - regex: "yarn-project/kv-store"
+    error_regex: "vitest"
+    owners:
+      - *martin
   - regex: "yarn-project/kv-store"
     error_regex: "Could not import your test module"
     owners:
       - *grego
   - regex: "yarn-project/kv-store"
     error_regex: "timeout: sending signal TERM to command"
     owners:
-      - *alex
+      - *martin
   - regex: "yarn-project/kv-store"
     error_regex: "Failed to fetch dynamically imported module"
     owners:

diff --git a/yarn-project/bootstrap.sh b/yarn-project/bootstrap.sh
@@ -170,7 +170,7 @@ function test_cmds {
 
   # Exclusions:
   # end-to-end: e2e tests handled separately with end-to-end/bootstrap.sh.
-  # kv-store: Uses mocha so will need different treatment.
+  # kv-store: per-file fan-out handled by kv-store/bootstrap.sh test_cmds.
   for test in !(end-to-end|kv-store|aztec)/src/**/*.test.ts; do
     # Skip benchmarks here.
     [[ "$test" =~ \.bench\.test\.ts$ ]] && continue
@@ -218,8 +218,8 @@ function test_cmds {
     echo "${prefix}${cmd_env} yarn-project/scripts/run_test.sh $test"
   done
 
-  # Uses mocha for browser tests, so we have to treat it differently.
-  echo "$hash:ISOLATE=1 cd yarn-project/kv-store && yarn test"
+  # kv-store: per-file fan-out (mocha for node tests, vitest for browser tests).
+  kv-store/bootstrap.sh test_cmds
 
   # Aztec CLI tests
   aztec/bootstrap.sh test_cmds

diff --git a/yarn-project/kv-store/bootstrap.sh b/yarn-project/kv-store/bootstrap.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+source $(git rev-parse --show-toplevel)/ci3/source_bootstrap
+
+hash=$(../bootstrap.sh hash)
+
+function test_cmds {
+  # Node tests (mocha): files outside the browser-test and bench dirs.
+  # Mirrors .mocharc.json's spec.
+  for test in src/**/!(indexeddb|sqlite-opfs|bench)/*.test.ts; do
+    echo "$hash yarn-project/kv-store/scripts/run_test.sh $test"
+  done
+
+  # Browser tests (vitest + chromium). Each file runs in its own ISOLATE
+  # container — running multiple files in a single vitest invocation
+  # triggers a CDP teardown deadlock on the 2-CPU CI executor. See
+  # scripts/run-browser-tests.sh for the root-cause analysis.
+  for test in src/indexeddb/*.test.ts src/sqlite-opfs/*.test.ts; do
+    echo "$hash:ISOLATE=1 yarn-project/kv-store/scripts/run_test.sh $test"
+  done
+}
+
+case "$cmd" in
+  "")
+    ;;
+  *)
+    default_cmd_handler "$@"
+    ;;
+esac
diff --git a/yarn-project/kv-store/package.json b/yarn-project/kv-store/package.json
@@ -16,9 +16,9 @@
     "build:dev": "../scripts/tsc.sh --watch",
     "clean": "rm -rf ./dest .tsbuildinfo",
     "test:node": "NODE_NO_WARNINGS=1 mocha --config ./.mocharc.json",
-    "test:browser": "vitest run --config ./vitest.config.ts",
+    "test:browser": "bash scripts/run-browser-tests.sh",
     "bench:browser": "VITE_BENCH=1 vitest run --config ./vitest.config.ts src/bench",
-    "test": "yarn test:node",
+    "test": "yarn test:node && yarn test:browser",
     "test:jest": "NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --passWithNoTests --maxWorkers=${JEST_MAX_WORKERS:-8}"
   },
   "inherits": [

diff --git a/yarn-project/kv-store/package.local.json b/yarn-project/kv-store/package.local.json
@@ -1,5 +1,6 @@
 {
   "scripts": {
-    "test": "yarn test:node"
+    "test": "yarn test:node && yarn test:browser",
+    "test:browser": "bash scripts/run-browser-tests.sh"
   }
 }
diff --git a/yarn-project/kv-store/scripts/probe-test-browser.sh b/yarn-project/kv-store/scripts/probe-test-browser.sh
@@ -0,0 +1,207 @@
+#!/usr/bin/env bash
+# Deep diagnostic wrapper around `yarn test:browser` for investigating
+# browser-test hangs and silent failures.
+#
+# Wraps a single vitest run with a hybrid 1s-steady / 0.1s-burst sampler
+# that captures /proc state (wchan, stack, syscall, fd), cgroup CPU/memory
+# counters, /proc/net socket state counts, and a CPU-availability
+# microbench. When vitest goes silent for >3s the sampler enters dense
+# burst mode and emits a stacks_snapshot every 1s of burst. Outer 90s
+# timeout converts a hang into a clean exit with the full diagnostic dump
+# emitted to stderr by the EXIT trap.
+#
+# Single-tenant invocation:
+#   bash yarn-project/kv-store/scripts/probe-test-browser.sh
+#
+# Parallel invocation (avoids tmp-path collisions across slots):
+#   DIAG_DIR=/tmp/diag.$$ bash yarn-project/kv-store/scripts/probe-test-browser.sh
+#
+# Was used to root-cause the kv-store CDP teardown deadlock that
+# scripts/run-browser-tests.sh now works around; kept for future regression
+# debugging. scripts/repro-browser-hang.sh runs this in a parallel grind
+# under docker_isolate constraints.
+
+cd "$(dirname "$0")/.."
+
+DIAG_DIR=${DIAG_DIR:-/tmp/diag}
+PROBE_LOG=$DIAG_DIR/probe.log
+STACKS_LOG=$DIAG_DIR/stacks.log
+VITEST_LOG=$DIAG_DIR/vitest.log
+mkdir -p "$DIAG_DIR"
+: > "$PROBE_LOG"
+: > "$STACKS_LOG"
+: > "$VITEST_LOG"
+
+echo "probe-test-browser v2: starting at $(date +%T) (pid $$)" >&2
+
+pids_of_interest() {
+  pgrep -f 'chrom|node|vitest|esbuild|yarn' 2>/dev/null
+}
+
+# Force fixed CPU demand and measure what fraction we actually got.
+# ratio = (user+sys)/real. ~1.0 = full CPU; <0.5 = heavily preempted by host.
+# Disambiguates "processes not asking for CPU" (scenario A) from "host not
+# scheduling us" (scenario B) when cgroup counters are flat.
+microbench() {
+  local TIMEFORMAT='microbench: real=%R user=%U sys=%S'
+  { time awk 'BEGIN{x=0; for(i=0;i<100000;i++) x+=i*i; print x > "/dev/null"}' \
+    </dev/null 2>/dev/null; } 2>&1
+}
+
+snapshot() {
+  local tag="$1"
+  echo "=== $(date +%T.%N | cut -c1-12) $tag ==="
+  echo "-- /tmp --"
+  df -h /tmp 2>/dev/null | tail -1
+  du -sh /tmp 2>/dev/null
+  echo "-- cgroup mem --"
+  if [ -r /sys/fs/cgroup/memory.current ]; then
+    awk '{printf "current %d bytes (%.0f MB)\n", $1, $1/1048576}' /sys/fs/cgroup/memory.current
+    cat /sys/fs/cgroup/memory.events 2>/dev/null
+  fi
+  echo "-- cgroup cpu --"
+  cat /sys/fs/cgroup/cpu.stat 2>/dev/null
+  echo "-- top by rss --"
+  ps -eo pid,ppid,stat,rss,pcpu,wchan:20,comm --sort=-rss --no-headers 2>/dev/null | head -15
+  echo "-- procs of interest --"
+  for pid in $(pids_of_interest); do
+    [ -d "/proc/$pid" ] || continue
+    local state threads rss_kb wchan cmd
+    state=$(awk '{print $3}' "/proc/$pid/stat" 2>/dev/null)
+    wchan=$(cat "/proc/$pid/wchan" 2>/dev/null)
+    rss_kb=$(awk '/^VmRSS:/{print $2}' "/proc/$pid/status" 2>/dev/null)
+    threads=$(awk '/^Threads:/{print $2}' "/proc/$pid/status" 2>/dev/null)
+    cmd=$({ tr '\0' ' ' < "/proc/$pid/cmdline"; } 2>/dev/null | cut -c1-120)
+    echo "pid=$pid state=$state threads=$threads rss=${rss_kb}kB wchan=$wchan cmd=$cmd"
+  done
+  # TCP state hex (col 4): 01=ESTABLISHED 04=FIN_WAIT1 05=FIN_WAIT2 06=TIME_WAIT
+  # 07=CLOSE 08=CLOSE_WAIT 09=LAST_ACK 0A=LISTEN 0B=CLOSING
+  echo "-- tcp4 sockets (state counts) --"
+  awk 'NR>1{print $4}' /proc/net/tcp 2>/dev/null | sort | uniq -c
+  echo "-- tcp6 sockets (state counts) --"
+  awk 'NR>1{print $4}' /proc/net/tcp6 2>/dev/null | sort | uniq -c
+  # Unix socket state (col 6): 01=SS_UNCONNECTED 02=SS_CONNECTING 03=SS_CONNECTED
+  # 04=SS_DISCONNECTING — CDP/vitest IPC is typically over SS_CONNECTED unix sockets.
+  echo "-- unix sockets (state counts) --"
+  awk 'NR>1{print $6}' /proc/net/unix 2>/dev/null | sort | uniq -c
+}
+
+stacks_snapshot() {
+  local tag="$1"
+  echo "=== $(date +%T.%N | cut -c1-12) $tag ==="
+  echo "-- lsof tcp/unix (-n -P, head -40) --"
+  lsof -n -P -i TCP 2>/dev/null | head -25
+  lsof -n -P -U 2>/dev/null | head -15
+  for pid in $(pids_of_interest); do
+    [ -d "/proc/$pid" ] || continue
+    echo "--- pid=$pid ---"
+    { tr '\0' ' ' < "/proc/$pid/cmdline"; } 2>/dev/null | cut -c1-140
+    echo ""
+    # /proc/$pid/syscall: syscall_nr arg0..arg5 sp pc (readable by process owner)
+    echo "syscall:"
+    cat "/proc/$pid/syscall" 2>/dev/null | head -1
+    # /proc/$pid/stack: may require CAP_SYS_ADMIN; print only if non-empty
+    local stack_content
+    stack_content=$(cat "/proc/$pid/stack" 2>/dev/null | head -15)
+    if [ -n "$stack_content" ]; then
+      echo "kernel stack:"
+      echo "$stack_content"
+    fi
+    # Per-thread wchan — tells us if threads are stuck on different things
+    echo "thread wchans (sorted | uniq -c):"
+    for t in /proc/$pid/task/*/wchan; do
+      cat "$t" 2>/dev/null
+      echo ""
+    done | sort | uniq -c | sort -rn | head -10
+    echo "socket/pipe fds:"
+    for fd in /proc/$pid/fd/*; do
+      local target
+      target=$(readlink "$fd" 2>/dev/null) || continue
+      case "$target" in
+        socket:*|pipe:*|anon_inode:*) echo "  $(basename "$fd") -> $target" ;;
+      esac
+    done
+  done
+}
+
+probe_loop() {
+  local last_vitest_size=0
+  local silent_since
+  silent_since=$(date +%s)
+  local burst_count=0
+  local burst_cap=200   # ~20s dense sampling at 0.1s
+  local steady_count=0  # increments once per steady iteration
+  while true; do
+    local now_s cur_size silent_for
+    now_s=$(date +%s)
+    cur_size=$(stat -c %s "$VITEST_LOG" 2>/dev/null || echo 0)
+    if [ "$cur_size" != "$last_vitest_size" ]; then
+      last_vitest_size=$cur_size
+      silent_since=$now_s
+      burst_count=0
+    fi
+    silent_for=$((now_s - silent_since))
+
+    if [ "$silent_for" -ge 3 ] && [ "$burst_count" -lt "$burst_cap" ]; then
+      {
+        snapshot "burst(${silent_for}s silent)"
+        # During a hang, microbench every burst snapshot — it's the core
+        # signal for "is the host giving us CPU?" Overhead is irrelevant
+        # since tests aren't running.
+        microbench
+      } >> "$PROBE_LOG" 2>&1
+      # Stacks every 10th burst tick (~every 1s of burst) to keep log size sane
+      if [ $((burst_count % 10)) -eq 0 ]; then
+        stacks_snapshot "burst(${silent_for}s silent)" >> "$STACKS_LOG" 2>&1
+      fi
+      burst_count=$((burst_count + 1))
+      sleep 0.1
+    else
+      {
+        snapshot "steady"
+        # Microbench every 5th steady iteration to bound CPU overhead
+        # while tests are actually running (~0.4% vs tests' CPU budget).
+        if [ $((steady_count % 5)) -eq 0 ]; then
+          microbench
+        fi
+      } >> "$PROBE_LOG" 2>&1
+      # Periodic in-flight stacks baseline — gives us a "healthy waiting"
+      # reference to diff against burst-triggered dumps during a hang.
+      if [ $((steady_count % 10)) -eq 0 ]; then
+        stacks_snapshot "steady(tick=${steady_count})" >> "$STACKS_LOG" 2>&1
+      fi
+      steady_count=$((steady_count + 1))
+      sleep 1
+    fi
+  done
+}
+
+probe_loop &
+PROBE_PID=$!
+
+cleanup() {
+  local rc=$?
+  kill "$PROBE_PID" 2>/dev/null
+  wait "$PROBE_PID" 2>/dev/null
+  # One last stacks snapshot at the moment of failure
+  stacks_snapshot "cleanup" >> "$STACKS_LOG" 2>&1
+  {
+    echo ""
+    echo "=== VITEST LOG (tail -300) ==="
+    tail -300 "$VITEST_LOG"
+    echo ""
+    echo "=== FULL PROBE LOG ($(wc -l < "$PROBE_LOG") lines) ==="
+    cat "$PROBE_LOG"
+    echo ""
+    echo "=== FULL STACKS LOG ($(wc -l < "$STACKS_LOG") lines, test exit=$rc) ==="
+    cat "$STACKS_LOG"
+    echo "=== END DIAGNOSTICS ==="
+  } >&2
+  exit "$rc"
+}
+trap cleanup EXIT
+
+# Tee vitest output to a file so the probe loop can detect silence.
+# pipefail ensures we surface timeout's exit code, not tee's.
+set -o pipefail
+timeout -v 90s yarn test:browser 2>&1 | tee "$VITEST_LOG"
diff --git a/yarn-project/kv-store/scripts/repro-browser-hang.sh b/yarn-project/kv-store/scripts/repro-browser-hang.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Reproduces the kv-store browser-test hang under CI3 ISOLATE constraints.
+#
+# Failure mode: CDP CLOSE_WAIT deadlock between vitest and chromium's network
+# service at test-file transitions. Vitest closes 6 of 10 CDP TCP connections
+# on file-boundary teardown; chromium's network-service event loop, starved of
+# CPU under --cpus=2, never drains the closed sockets. Vitest's teardown hangs
+# waiting for the close-handshake. Both processes end up with zero on-CPU
+# threads. Outer `timeout -v 90s` in probe-test-browser.sh fires SIGTERM after
+# 90s of silence.
+#
+# Crucially, this does NOT reproduce on unconstrained hardware — chromium has
+# enough cores to drain sockets immediately. Must run inside docker_isolate
+# (--cpus=2 --memory=8g --tmpfs /tmp:exec,size=1g) to surface the hang.
+#
+# Usage:
+#   bash yarn-project/kv-store/scripts/repro-browser-hang.sh [JOBS]
+#
+#   JOBS: parallel container count (default 8). Higher = faster repro,
+#   higher RAM/CPU ceiling on host. 8 reliably catches a hang in ~3min on
+#   a multi-core box.
+#
+# On hang: parallel halts (rc=124) and the failing job's full probe diagnostic
+# (vitest tail + probe.log + stacks.log) is dumped to this script's stderr by
+# dump_fail. Capture stderr to a file if you want to keep it.
+#
+# Example: capture both streams for later analysis
+#   bash yarn-project/kv-store/scripts/repro-browser-hang.sh 8 > /tmp/repro.log 2>&1
+
+set -uo pipefail
+
+cd "$(git rev-parse --show-toplevel)"
+
+JOBS=${1:-8}
+
+echo "=== repro-browser-hang start: $(date -Is), jobs=$JOBS ==="
+while true; do
+  echo './ci3/dump_fail "CPUS=2 MEM=8g TMPFS_SIZE=1g ./ci3/docker_isolate \"cd yarn-project/kv-store && bash scripts/probe-test-browser.sh\"" >/dev/null'
+done | parallel -j"$JOBS" --halt now,fail=1
+rc=$?
+echo "=== repro-browser-hang end (parallel rc=$rc): $(date -Is) ==="
+exit $rc
diff --git a/yarn-project/kv-store/scripts/run-browser-tests.sh b/yarn-project/kv-store/scripts/run-browser-tests.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Local-dev entrypoint for `yarn test:browser`: runs every browser test file
+# in its own vitest process, sequentially. Delegates per-file dispatch to
+# scripts/run_test.sh (which CI also uses for per-file fan-out).
+#
+# Why not a single `vitest run` over all files: vitest+chromium have a CDP
+# teardown deadlock at test-file transitions under CPU-constrained
+# environments (CI3 ISOLATE: --cpus=2). Vitest closes a cohort of CDP TCP
+# connections when switching files; chromium's network service can't drain
+# them fast enough under contention; vitest's teardown blocks indefinitely
+# on the close-handshake. By running each file in a separate vitest process
+# the close-handshake only happens at process exit, avoiding the cross-file
+# teardown path entirely. See scripts/repro-browser-hang.sh for the repro.
+set -euo pipefail
+
+cd "$(dirname "$0")/.."
+
+files=$(find src/indexeddb src/sqlite-opfs -name '*.test.ts' 2>/dev/null | sort)
+
+if [ -z "$files" ]; then
+  echo "No test files found in src/indexeddb or src/sqlite-opfs"
+  exit 0
+fi
+
+count=$(echo "$files" | wc -l)
+echo "Running $count browser test files (one vitest process per file)"
+
+i=0
+for f in $files; do
+  i=$((i + 1))
+  echo "==> [$i/$count] $f"
+  bash scripts/run_test.sh "$f"
+done
diff --git a/yarn-project/kv-store/scripts/run_test.sh b/yarn-project/kv-store/scripts/run_test.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Runs a single kv-store test file. Dispatches to vitest+chromium for
+# browser tests (under src/indexeddb or src/sqlite-opfs) and to mocha for
+# everything else. Emitted by yarn-project/kv-store/bootstrap.sh test_cmds
+# for CI per-file fan-out and runnable directly for local reproduction:
+#
+#   yarn-project/kv-store/scripts/run_test.sh src/lmdb-v2/store.test.ts
+source $(git rev-parse --show-toplevel)/ci3/source
+
+test=${1:?"Usage: $0 <test-file relative to kv-store/>"}
+cd ..
+
+case "$test" in
+  src/indexeddb/*|src/sqlite-opfs/*)
+    exec yarn vitest run --config ./vitest.config.ts "$test"
+    ;;
+  *)
+    NODE_NO_WARNINGS=1 exec yarn mocha --config ./.mocharc.json "$test"
+    ;;
+esac