diff --git a/.gitignore b/.gitignore
index ecf5a6e07d2..4fe556dd5b5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -59,6 +59,7 @@ race.*
 core/services/job/testdata/wasm/testmodule.wasm
 core/services/job/testdata/wasm/testmodule.br
 temp-repo
+diagnose-*/
 
 # DB state
 ./db/
diff --git a/GNUmakefile b/GNUmakefile
index 01021ec5fb8..7514aeb1d40 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -17,6 +17,8 @@ LOOPINSTALL_PUBLIC_ARGS  := $(if $(strip $(CL_LOOPINSTALL_OUTPUT_DIR)),--output-
 LOOPINSTALL_PRIVATE_ARGS := $(if $(strip $(CL_LOOPINSTALL_OUTPUT_DIR)),--output-installation-artifacts $(CL_LOOPINSTALL_OUTPUT_DIR)/private.json)
 LOOPINSTALL_TESTING_ARGS := $(if $(strip $(CL_LOOPINSTALL_OUTPUT_DIR)),--output-installation-artifacts $(CL_LOOPINSTALL_OUTPUT_DIR)/testing.json)
 GOLANGCI_LINT_VERSION = "v2.11.4"
+# Pin path so `make generate` does not pick up a different mockery (e.g. v3) from PATH.
+MOCKERY_BIN ?= $(shell GOBIN="$$(go env GOBIN)"; if [ -n "$$GOBIN" ]; then echo "$$GOBIN/mockery"; else echo "$$(go env GOPATH)/bin/mockery"; fi)
 
 .PHONY: install
 install: install-chainlink-autoinstall ## Install chainlink and all its dependencies.
@@ -175,7 +177,7 @@ operator-ui: ## Fetch the frontend
 generate: codecgen mockery protoc gomods modgraph ## Execute all go:generate commands.
 	## Updating PATH makes sure that go:generate uses the version of protoc installed by the protoc make command.
 	export PATH="$(HOME)/.local/bin:$(PATH)"; gomods -w go generate -x ./...
-	find . -type f -name .mockery.yaml -execdir mockery \; ## Execute mockery for all .mockery.yaml files
+	find . -type f -name .mockery.yaml -execdir $(MOCKERY_BIN) \; ## Execute mockery for all .mockery.yaml files (see mockery target: v2)
 
 .PHONY: rm-mocked
 rm-mocked:
@@ -270,6 +272,22 @@ modgraph:
 test-short: ## Run 'go test -short' and suppress uninteresting output
 	go test -short ./... | grep -v "\[no test files\]" | grep -v "\(cached\)"
 
+# Chainlink tools/test harness (Postgres setup + optional diagnose). Uses the
+# nested module directly so its dependencies stay out of the root module. Pass
+# flags and packages via ARGS (quoted), e.g. make new_test ARGS="-v -p 4 ./core/..."
+# Note: do not use "make target -p 4 ..." — -p is a make flag; use ARGS= instead.
+.PHONY: new_test
+new_test: ## tools/test: passthrough go test. Usage: make new_test ARGS="-v -p 4 ./core/..."
+	go -C tools/test run . run $(ARGS)
+
+.PHONY: new_gotestsum
+new_gotestsum: ## tools/test: gotestsum. Usage: make new_gotestsum ARGS="--format=dots -- -count=1 ./core/..."
+	go -C tools/test run . gotestsum $(ARGS)
+
+.PHONY: new_test_diagnose
+new_test_diagnose: ## tools/test: diagnose (flakes/slow). Usage: make new_test_diagnose ARGS="--iterations 5 -- --timeout 9m ./core/..."
+	go -C tools/test run . diagnose $(ARGS)
+
 .PHONY: gocs
 gocs: ## Run gocs to generate changeset markdown files.
 	go run github.com/smartcontractkit/gocs/cmd/gocs@v0.2.0
diff --git a/tools/README.md b/tools/README.md
index cbbb356064f..9873bf25c36 100644
--- a/tools/README.md
+++ b/tools/README.md
@@ -3,3 +3,7 @@
 ## [Docker](./docker)
 
 Manage Docker for development and testing
+
+## [test](./test/)
+
+A harness for running /chainlink tests. From the repo root use **`make new_test`** / **`make new_gotestsum`** / **`make new_test_diagnose`**; details in [tools/test/README.md](./test/README.md).
diff --git a/tools/test/AGENTS.md b/tools/test/AGENTS.md
new file mode 100644
index 00000000000..56afb272f03
--- /dev/null
+++ b/tools/test/AGENTS.md
@@ -0,0 +1,34 @@
+A test runner harness for the /chainlink repo.
+
+<goals>
+- Provide a single, easy command to setup and run tests in /chainlink repo, eliminating `make` command chaining.
+- Enable automatically re-running tests and analyzing results to catch and diagnose flakes and slow tests
+- Provide an AI skill for the process in `.agents/skills/diagnose-tests/SKILL.md`
+</goals>
+
+<rules>
+- From /chainlink root, document `make new_test`, `make new_gotestsum`, and `make new_test_diagnose`. When working only inside this module, `go run . …` is fine.
+- Each output should account for a pretty, human-readable terminal experience, and a minimal version meant for AI ingestion
+</rules>
+
+<modes>
+<mode name="go test" subcommand="run"> 
+Run tests using vanilla `go test` command and arguments
+</mode>
+<mode name="gotestsum" subcommand="gotestsum"> 
+Run tests using gotestsum for those that prefer its output and tools
+</mode>
+<mode name="diagnose" subcommand="diagnose"> 
+Opinionated flow to re-run tests and identify flakes, races, timeouts, and test runtimes.
+</mode>
+</modes>
+
+<commands>
+Run these commands to validate any changes you make
+```sh
+golangci-lint run ./... --fix
+go test ./...
+```
+
+DO NOT use other commands like `goimports`, `gofmt`, or `go vet` for formatting and lint checks.
+</commands>
diff --git a/tools/test/README.md b/tools/test/README.md
new file mode 100644
index 00000000000..eda364e4d33
--- /dev/null
+++ b/tools/test/README.md
@@ -0,0 +1,41 @@
+# /chainlink Test Runner
+
+A Go harness to run unit tests in /chainlink with a simpler flow and control scheme. Plus a mode to help you hunt down flakes, races, and timeouts.
+
+## Run
+
+You can run using `go -C tools/test run .` or through make targets.
+
+```sh
+go -C tools/test run . -h # Help menu
+
+# Use plain go test
+go -C tools/test run . run -count=1 ./core/... 
+make new_test ARGS="-count=1 ./core/..."
+
+# Use gotestsum
+go -C tools/test run . gotestsum --format=testname -- -count=1 ./core/...
+make new_gotestsum ARGS="--format=testname -- -count=1 ./core/..."
+
+# Diagnose and fix flaky tests
+go -C tools/test run . diagnose --iterations 5 -- --timeout=9m ./core/...
+make new_test_diagnose ARGS="--iterations 5 -- --timeout=9m ./core/..."
+```
+
+When **developing only inside this directory** (nested module), use `go run .` instead of `go -C tools/test`:
+
+```sh
+go run . -h
+go run . run -count=1 ./core/...
+go run . diagnose --iterations 5 -- ./core/...
+```
+
+### AI Skill
+
+Use the [/diagnose-tests](/.agents/skills/diagnose-tests/SKILL.md) ai skill with your favorite agent to run a `diagnose` loop.
+
+## Why not just `go test`?
+
+There is no way to tell `go test` about some universal, one-time setup step (like creating a Postgres DB), so we need a light wrapper to take care of this.
+
+We could make just `go test` work if we have each test package that needs a DB launch their own using [testcontainers-go](https://github.com/testcontainers/testcontainers-go), but performance implications of that are still unknown.
diff --git a/tools/test/fixing-flaky-tests.md b/tools/test/fixing-flaky-tests.md
new file mode 100644
index 00000000000..812e0d8405c
--- /dev/null
+++ b/tools/test/fixing-flaky-tests.md
@@ -0,0 +1,135 @@
+# Finding the Root Cause of Test Flakes in Go
+
+Flaky tests can arise from many sources and can be frustrating to fix. Here's a non-exhaustive guide to help you find and resolve common causes for flakes in Go. But first, to answer a common question...
+
+## The Test Only Flakes 0.xx% of the Time, Why Bother Fixing It?
+
+You bother fixing it because of **MATH!**
+
+Let's imagine a large repo with 10,000 tests, and let's imagine only 100 (1%) of them are flaky. Let's further imagine that each of those flaky tests has a chance of flaking 1% of the time. If you are a responsible dev that requires all of your tests to pass in CI before you merge, flaky tests have now become a massive headache.
+
+$$P(\text{at least one flaky test}) = 1 - (1 - 0.01)^{100}$$
+
+$$P(\text{at least one flaky test}) \approx 63.40\%$$
+
+Even a small percentage of tests with a small chance of flaking can cause massive damage to dev velocity.
+
+
+## Tips on Finding and Fixing Flakes
+
+Ideally, if you're dealing with a flaky test, you'll already have some examples of it flaking in front of you so you can dig through logs and stack traces and figure it out that way. If that's not the case, or you'd like some more evidence, or you're just stumped, try reproducing the flake. How you reproduce the flake is often the best clue as to why it's flaking.
+
+You can also try some more precise configurations below.
+
+### 1. Run the Test in Isolation
+
+As we saw above, flaky tests become issues even when their chance of flaking is tiny. You might be hunting down a flake that only happens 0.5% of the time, so your only real solution is to run the test over and over.
+
+```sh
+# Run just that test 1,000 times, stopping after the first failure
+go test ./package -run TestName -count 1000 -failfast
+```
+
+### 2. Run the Test Package
+
+Tests rarely run in isolation in the real world. If you can't get the flake to happen when isolated, try running the whole package on repeat.
+
+```sh
+# Run all tests in the package over and over.
+go test ./package -count 1000 -failfast
+```
+
+If you get the test to fail here, but not independently, it's likely that it depends on the execution of other tests in the package. Look for global resources your test could be sharing with others, and do your best to isolate all of your unit tests.
+
+### 3. Randomize Test Order
+
+If that's still not doing the job, or you're still scratching your head, try randomizing the test order. Go runs tests in a deterministic order by default.
+
+```sh
+# -shuffle randomizes test order
+go test ./package -shuffle on -count 1000 -failfast
+# You can supply your own int value to shuffle as a seed
+go test ./package -shuffle 15 -count 1000 -failfast
+```
+
+### 4. Check for Races
+
+If your test is failing in a situation like this, it's possible a race condition is causing the failure. Go's `-race` flag isn't guaranteed to catch all races every time. Just like flakes, you sometimes just need to get lucky (unlucky?).
+
+```sh
+# Tests with -race detection take longer to run, and aren't always going to catch issues, especially in large test suites.
+go test ./package -race -shuffle on -count 100 -failfast
+```
+
+### 5. Emulate Your Target System
+
+Tests will often fail in CI, but not locally. You can try re-running the test in CI, but this might take a long time, cost a lot of money, or generally be annoying. There are a few tricks you can do to emulate CI environments locally.
+
+#### 5.1 Play with -cpu and -parallel
+
+You can artificially constrain or expand parallel execution directly in Go. [GOMAXPROCS](https://pkg.go.dev/runtime#hdr-Environment_Variables) is set to the number of CPUs your system has by default, and controls how many OS threads can run Go code at once. You can manipulate this value, or otherwise play with how many tests can run at once easily. This can help you figure out if resource constraints are hurting your tests.
+
+```sh
+# Use -cpu to change GOMAXPROCS. You can supply a list of values to try out different values at once
+go test ./package -shuffle 15 -count 1000 -failfast -cpu 1,2,4
+# Use -parallel to set the max amount of tests allowed to run in parallel at once
+go test ./package -shuffle 15 -count 1000 -failfast -parallel 4
+```
+
+#### 5.2 Use Docker
+
+Docker can help you emulate your CI environment a little better. You can lookup what type of GitHub Actions runner your CI workflow uses by matching to the lists [here](https://docs.github.com/en/actions/using-github-hosted-runners/using-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories) and [here](https://docs.github.com/en/actions/using-github-hosted-runners/using-larger-runners/about-larger-runners#specifications-for-general-larger-runners). You can then package your Go tests in a Docker container, and run them with varying resources.
+
+```sh
+# Run a basic Ubuntu container with resources matching a standard GitHub-hosted runner
+docker run -it --cpus=4 --memory="16g" ubuntu-24.04
+```
+
+You can also try using [dockexec](https://github.com/mvdan/dockexec) for convenience, but I've never personally tried it.
+
+#### 5.3 Use act
+
+[act](https://github.com/nektos/act) is a project that lets you emulate your GitHub Actions workflows locally. It's not perfect, and can be tricky to setup for more complex workflows, but it is a nice option if you suspect issues are further back in the workflow, and don't want to run the full CI process.
+
+### 6. Use Your Target System
+
+Sometimes you can only discover the truth by going directly to the source. Before you do so, please double check what `runs-on` systems your workflows use. If you're only using `ubuntu-latest` runners, these runs should be free, or at least very cheap. `8-core`, `16-core`, and `32-core` workflows can become very expensive, very quickly. Please use caution and discretion when running these workflows repeatedly.
+
+#### 6.1 CI Resource Constraints
+
+It is sometimes the case that tests only fail in CI environments because those environments are underpowered. **This is more rare than you think, be cautious of [System 1 thinking](https://en.wikipedia.org/wiki/Thinking,_Fast_and_Slow) here.** You can diagnose this with [this excellent GitHub workflow telemetry action](https://github.com/catchpoint/workflow-telemetry-action) that can give you detailed stats on how many resources your tests are consuming. (This is also handy if you're looking to optimize your CI runtimes or costs.) If your tests are flaking due to low resources, consider other options before just increasing the power of the CI runners. [Increasing the power of a GitHub Actions workflow by a single tier doubles its cost](https://docs.github.com/en/billing/managing-billing-for-your-products/managing-billing-for-github-actions/about-billing-for-github-actions#per-minute-rates-for-x64-powered-larger-runners). If your workflow runs often, you can burn a lot of cash quickly. You can otherwise try strategies like:
+
+* Splitting the tests into different workflows, each running on `ubuntu-latest`
+* Moving more resource-hungry tests to run only on nightly cadences
+* Try removing `t.Parallel()` from subtests, as too many tests trying to run at once will often hurt stability and runtimes on smaller machines
+
+### 7. Fix It!
+
+Maybe you've found the source of the flake and are now drilling down into the reasons why. Whatever those reasons might be, I urge you to, at least briefly, reframe the problem and ask if the test is actually working as intended, and that it is revealing flaky behavior in your application instead. Consider that you might have found a rare bug, rather than a rare flake.
+
+### 8. Give Up
+
+It's not my favorite answer, but sometimes this truly is the solution. It's hard to know exactly when you should abandon hope, but maybe the below steps can help you figure it out.
+
+#### 8.1 Evaluate the Importance of the Test
+
+Ask yourself these questions to help figure out if it's worth working on this flake further, and to help you figure out what to do next.
+
+* What does the test actually check? Is it a critical path?
+* Is the test flaking because it's a bad test? Is it trying to test behavior that shouldn't or can't be tested?
+* Can you write a new test that checks the same behavior, but doesn't fall to the same issues?
+* Can you come back to this later? Maybe in a week or two you'll have new ideas, or maybe the underlying system will change in ways that make this flake no longer an issue?
+
+#### 8.2 Turn it Off
+
+Assuming you're ready to declare defeat, it's time to turn off the test. How you do this depends on the test, your team, and the answers to the questions above. If you've determined the test isn't particularly important and isn't worth running anymore, you should just delete it.
+
+## Chainlink `tools/test` harness
+
+For repeated runs with Postgres setup, `go test -json` capture, and machine-readable reports under `diagnose-*` directories, use the harness from the **repository root** (`go -C tools/test run .`, declared in the root `go.mod`):
+
+```sh
+go -C tools/test run . diagnose --iterations 50 -- --failfast ./path/to/package
+```
+
+See [README.md](./README.md), root `GNUmakefile` targets `new_test` / `new_gotestsum` / `new_test_diagnose`, and the agent playbook [`.agents/skills/diagnose-tests/SKILL.md`](../../.agents/skills/diagnose-tests/SKILL.md).
diff --git a/tools/test/go.mod b/tools/test/go.mod
new file mode 100644
index 00000000000..03d976f3212
--- /dev/null
+++ b/tools/test/go.mod
@@ -0,0 +1,95 @@
+module github.com/smartcontractkit/chainlink/v2/tools/test
+
+go 1.26.2
+
+require (
+	charm.land/fang/v2 v2.0.1
+	charm.land/lipgloss/v2 v2.0.3
+	github.com/charmbracelet/x/term v0.2.2
+	github.com/spf13/cobra v1.10.2
+	github.com/spf13/pflag v1.0.10
+	github.com/spf13/viper v1.21.0
+	github.com/stretchr/testify v1.11.1
+	github.com/testcontainers/testcontainers-go v0.42.0
+	github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0
+)
+
+require (
+	dario.cat/mergo v1.0.2 // indirect
+	github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
+	github.com/Microsoft/go-winio v0.6.2 // indirect
+	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/charmbracelet/colorprofile v0.4.3 // indirect
+	github.com/charmbracelet/ultraviolet v0.0.0-20260422141423-a0f1f21775f7 // indirect
+	github.com/charmbracelet/x/ansi v0.11.7 // indirect
+	github.com/charmbracelet/x/exp/charmtone v0.0.0-20260422141420-a6cbdff8a7e2 // indirect
+	github.com/charmbracelet/x/termios v0.1.1 // indirect
+	github.com/charmbracelet/x/windows v0.2.2 // indirect
+	github.com/clipperhouse/displaywidth v0.11.0 // indirect
+	github.com/clipperhouse/uax29/v2 v2.7.0 // indirect
+	github.com/containerd/errdefs v1.0.0 // indirect
+	github.com/containerd/errdefs/pkg v0.3.0 // indirect
+	github.com/containerd/log v0.1.0 // indirect
+	github.com/containerd/platforms v0.2.1 // indirect
+	github.com/cpuguy83/dockercfg v0.3.2 // indirect
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/distribution/reference v0.6.0 // indirect
+	github.com/docker/go-connections v0.7.0 // indirect
+	github.com/docker/go-units v0.5.0 // indirect
+	github.com/ebitengine/purego v0.10.0 // indirect
+	github.com/felixge/httpsnoop v1.0.4 // indirect
+	github.com/fsnotify/fsnotify v1.9.0 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-ole/go-ole v1.3.0 // indirect
+	github.com/go-viper/mapstructure/v2 v2.5.0 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/klauspost/compress v1.18.5 // indirect
+	github.com/lucasb-eyer/go-colorful v1.4.0 // indirect
+	github.com/lufia/plan9stats v0.0.0-20260330125221-c963978e514e // indirect
+	github.com/magiconair/properties v1.8.10 // indirect
+	github.com/mattn/go-runewidth v0.0.23 // indirect
+	github.com/moby/docker-image-spec v1.3.1 // indirect
+	github.com/moby/go-archive v0.2.0 // indirect
+	github.com/moby/moby/api v1.54.1 // indirect
+	github.com/moby/moby/client v0.4.0 // indirect
+	github.com/moby/patternmatcher v0.6.1 // indirect
+	github.com/moby/sys/sequential v0.6.0 // indirect
+	github.com/moby/sys/user v0.4.0 // indirect
+	github.com/moby/sys/userns v0.1.0 // indirect
+	github.com/moby/term v0.5.2 // indirect
+	github.com/muesli/cancelreader v0.2.2 // indirect
+	github.com/muesli/mango v0.2.0 // indirect
+	github.com/muesli/mango-cobra v1.3.0 // indirect
+	github.com/muesli/mango-pflag v0.2.0 // indirect
+	github.com/muesli/roff v0.1.0 // indirect
+	github.com/opencontainers/go-digest v1.0.0 // indirect
+	github.com/opencontainers/image-spec v1.1.1 // indirect
+	github.com/pelletier/go-toml/v2 v2.3.0 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
+	github.com/rivo/uniseg v0.4.7 // indirect
+	github.com/sagikazarmark/locafero v0.12.0 // indirect
+	github.com/shirou/gopsutil/v4 v4.26.3 // indirect
+	github.com/sirupsen/logrus v1.9.4 // indirect
+	github.com/spf13/afero v1.15.0 // indirect
+	github.com/spf13/cast v1.10.0 // indirect
+	github.com/subosito/gotenv v1.6.0 // indirect
+	github.com/tklauser/go-sysconf v0.3.16 // indirect
+	github.com/tklauser/numcpus v0.11.0 // indirect
+	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
+	github.com/yusufpapurcu/wmi v1.2.4 // indirect
+	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 // indirect
+	go.opentelemetry.io/otel v1.43.0 // indirect
+	go.opentelemetry.io/otel/metric v1.43.0 // indirect
+	go.opentelemetry.io/otel/trace v1.43.0 // indirect
+	go.yaml.in/yaml/v3 v3.0.4 // indirect
+	golang.org/x/crypto v0.50.0 // indirect
+	golang.org/x/sync v0.20.0 // indirect
+	golang.org/x/sys v0.43.0 // indirect
+	golang.org/x/text v0.36.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)
diff --git a/tools/test/go.sum b/tools/test/go.sum
new file mode 100644
index 00000000000..5d5b3ae6756
--- /dev/null
+++ b/tools/test/go.sum
@@ -0,0 +1,228 @@
+charm.land/fang/v2 v2.0.1 h1:zQCM8JQJ1JnQX/66B5jlCYBUxL2as5JXQZ2KJ6EL0mY=
+charm.land/fang/v2 v2.0.1/go.mod h1:S1GmkpcvK+OB5w9caywUnJcsMew45Ot8FXqoz8ALrII=
+charm.land/lipgloss/v2 v2.0.3 h1:yM2zJ4Cf5Y51b7RHIwioil4ApI/aypFXXVHSwlM6RzU=
+charm.land/lipgloss/v2 v2.0.3/go.mod h1:7myLU9iG/3xluAWzpY/fSxYYHCgoKTie7laxk6ATwXA=
+dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8=
+dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA=
+github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk=
+github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
+github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg=
+github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
+github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
+github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
+github.com/aymanbagabas/go-udiff v0.4.1 h1:OEIrQ8maEeDBXQDoGCbbTTXYJMYRCRO1fnodZ12Gv5o=
+github.com/aymanbagabas/go-udiff v0.4.1/go.mod h1:0L9PGwj20lrtmEMeyw4WKJ/TMyDtvAoK9bf2u/mNo3w=
+github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
+github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex9t5KX76i20Q=
+github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q=
+github.com/charmbracelet/ultraviolet v0.0.0-20260422141423-a0f1f21775f7 h1:PeRlqWGEoO0apcS62iEgxQhVnFCTOYyQvi2sUTdf6IE=
+github.com/charmbracelet/ultraviolet v0.0.0-20260422141423-a0f1f21775f7/go.mod h1:3YdTxlnV/L0bQ3VN8WOSw8doF7LZV/xawUQ4MuAPDvo=
+github.com/charmbracelet/x/ansi v0.11.7 h1:kzv1kJvjg2S3r9KHo8hDdHFQLEqn4RBCb39dAYC84jI=
+github.com/charmbracelet/x/ansi v0.11.7/go.mod h1:9qGpnAVYz+8ACONkZBUWPtL7lulP9No6p1epAihUZwQ=
+github.com/charmbracelet/x/exp/charmtone v0.0.0-20260422141420-a6cbdff8a7e2 h1:VMYDZEx8U5ZkxH76l1GoElpgUSG9Sj6gnZwuxosimPA=
+github.com/charmbracelet/x/exp/charmtone v0.0.0-20260422141420-a6cbdff8a7e2/go.mod h1:nsExn0DGyX0lh9LwLHTn2Gg+hafdzfSXnC+QmEJTZFY=
+github.com/charmbracelet/x/exp/golden v0.0.0-20250806222409-83e3a29d542f h1:pk6gmGpCE7F3FcjaOEKYriCvpmIN4+6OS/RD0vm4uIA=
+github.com/charmbracelet/x/exp/golden v0.0.0-20250806222409-83e3a29d542f/go.mod h1:IfZAMTHB6XkZSeXUqriemErjAWCCzT0LwjKFYCZyw0I=
+github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
+github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
+github.com/charmbracelet/x/termios v0.1.1 h1:o3Q2bT8eqzGnGPOYheoYS8eEleT5ZVNYNy8JawjaNZY=
+github.com/charmbracelet/x/termios v0.1.1/go.mod h1:rB7fnv1TgOPOyyKRJ9o+AsTU/vK5WHJ2ivHeut/Pcwo=
+github.com/charmbracelet/x/windows v0.2.2 h1:IofanmuvaxnKHuV04sC0eBy/smG6kIKrWG2/jYn2GuM=
+github.com/charmbracelet/x/windows v0.2.2/go.mod h1:/8XtdKZzedat74NQFn0NGlGL4soHB0YQZrETF96h75k=
+github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8=
+github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3JhNl3dSqnGhOoSD/o0=
+github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk=
+github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
+github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
+github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
+github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
+github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
+github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
+github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
+github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
+github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
+github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
+github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
+github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
+github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
+github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
+github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
+github.com/docker/go-connections v0.7.0 h1:6SsRfJddP22WMrCkj19x9WKjEDTB+ahsdiGYf0mN39c=
+github.com/docker/go-connections v0.7.0/go.mod h1:no1qkHdjq7kLMGUXYAduOhYPSJxxvgWBh7ogVvptn3Q=
+github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
+github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU=
+github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
+github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
+github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
+github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
+github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
+github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
+github.com/go-viper/mapstructure/v2 v2.5.0 h1:vM5IJoUAy3d7zRSVtIwQgBj7BiWtMPfmPEgAXnvj1Ro=
+github.com/go-viper/mapstructure/v2 v2.5.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
+github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
+github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
+github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
+github.com/jackc/pgx/v5 v5.5.4 h1:Xp2aQS8uXButQdnCMWNmvx6UysWQQC+u1EoizjguY+8=
+github.com/jackc/pgx/v5 v5.5.4/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
+github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
+github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
+github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
+github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
+github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
+github.com/lucasb-eyer/go-colorful v1.4.0 h1:UtrWVfLdarDgc44HcS7pYloGHJUjHV/4FwW4TvVgFr4=
+github.com/lucasb-eyer/go-colorful v1.4.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/lufia/plan9stats v0.0.0-20260330125221-c963978e514e h1:Q6MvJtQK/iRcRtzAscm/zF23XxJlbECiGPyRicsX+Ak=
+github.com/lufia/plan9stats v0.0.0-20260330125221-c963978e514e/go.mod h1:autxFIvghDt3jPTLoqZ9OZ7s9qTGNAWmYCjVFWPX/zg=
+github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE=
+github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
+github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3RybWcw=
+github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
+github.com/mdelapenya/tlscert v0.2.0 h1:7H81W6Z/4weDvZBNOfQte5GpIMo0lGYEeWbkGp5LJHI=
+github.com/mdelapenya/tlscert v0.2.0/go.mod h1:O4njj3ELLnJjGdkN7M/vIVCpZ+Cf0L6muqOG4tLSl8o=
+github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
+github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
+github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8=
+github.com/moby/go-archive v0.2.0/go.mod h1:mNeivT14o8xU+5q1YnNrkQVpK+dnNe/K6fHqnTg4qPU=
+github.com/moby/moby/api v1.54.1 h1:TqVzuJkOLsgLDDwNLmYqACUuTehOHRGKiPhvH8V3Nn4=
+github.com/moby/moby/api v1.54.1/go.mod h1:+RQ6wluLwtYaTd1WnPLykIDPekkuyD/ROWQClE83pzs=
+github.com/moby/moby/client v0.4.0 h1:S+2XegzHQrrvTCvF6s5HFzcrywWQmuVnhOXe2kiWjIw=
+github.com/moby/moby/client v0.4.0/go.mod h1:QWPbvWchQbxBNdaLSpoKpCdf5E+WxFAgNHogCWDoa7g=
+github.com/moby/patternmatcher v0.6.1 h1:qlhtafmr6kgMIJjKJMDmMWq7WLkKIo23hsrpR3x084U=
+github.com/moby/patternmatcher v0.6.1/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
+github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
+github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
+github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
+github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
+github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
+github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
+github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
+github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
+github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
+github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
+github.com/muesli/mango v0.2.0 h1:iNNc0c5VLQ6fsMgAqGQofByNUBH2Q2nEbD6TaI+5yyQ=
+github.com/muesli/mango v0.2.0/go.mod h1:5XFpbC8jY5UUv89YQciiXNlbi+iJgt29VDC5xbzrLL4=
+github.com/muesli/mango-cobra v1.3.0 h1:vQy5GvPg3ndOSpduxutqFoINhWk3vD5K2dXo5E8pqec=
+github.com/muesli/mango-cobra v1.3.0/go.mod h1:Cj1ZrBu3806Qw7UjxnAUgE+7tllUBj1NCLQDwwGx19E=
+github.com/muesli/mango-pflag v0.2.0 h1:QViokgKDZQCzKhYe1zH8D+UlPJzBSGoP9yx0hBG0t5k=
+github.com/muesli/mango-pflag v0.2.0/go.mod h1:X9LT1p/pbGA1wjvEbtwnixujKErkP0jVmrxwrw3fL0Y=
+github.com/muesli/roff v0.1.0 h1:YD0lalCotmYuF5HhZliKWlIx7IEhiXeSfq7hNjFqGF8=
+github.com/muesli/roff v0.1.0/go.mod h1:pjAHQM9hdUUwm/krAfrLGgJkXJ+YuhtsfZ42kieB2Ig=
+github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
+github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
+github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
+github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
+github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM=
+github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
+github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
+github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
+github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/sagikazarmark/locafero v0.12.0 h1:/NQhBAkUb4+fH1jivKHWusDYFjMOOKU88eegjfxfHb4=
+github.com/sagikazarmark/locafero v0.12.0/go.mod h1:sZh36u/YSZ918v0Io+U9ogLYQJ9tLLBmM4eneO6WwsI=
+github.com/shirou/gopsutil/v4 v4.26.3 h1:2ESdQt90yU3oXF/CdOlRCJxrP+Am1aBYubTMTfxJ1qc=
+github.com/shirou/gopsutil/v4 v4.26.3/go.mod h1:LZ6ewCSkBqUpvSOf+LsTGnRinC6iaNUNMGBtDkJBaLQ=
+github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
+github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g=
+github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I=
+github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg=
+github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
+github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
+github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
+github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
+github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
+github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU=
+github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY=
+github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4=
+github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
+github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
+github.com/testcontainers/testcontainers-go v0.42.0 h1:He3IhTzTZOygSXLJPMX7n44XtK+qhjat1nI9cneBbUY=
+github.com/testcontainers/testcontainers-go v0.42.0/go.mod h1:vZjdY1YmUA1qEForxOIOazfsrdyORJAbhi0bp8plN30=
+github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0 h1:GCbb1ndrF7OTDiIvxXyItaDab4qkzTFJ48LKFdM7EIo=
+github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0/go.mod h1:IRPBaI8jXdrNfD0e4Zm7Fbcgaz5shKxOQv4axiL09xs=
+github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA=
+github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI=
+github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw=
+github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ=
+github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
+github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
+github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
+github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
+go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
+go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo=
+go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
+go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
+go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
+go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
+go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg=
+go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg=
+go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw=
+go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A=
+go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A=
+go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
+go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
+golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
+golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
+golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
+golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
+golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
+golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
+golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/term v0.42.0 h1:UiKe+zDFmJobeJ5ggPwOshJIVt6/Ft0rcfrXZDLWAWY=
+golang.org/x/term v0.42.0/go.mod h1:Dq/D+snpsbazcBG5+F9Q1n2rXV8Ma+71xEjTRufARgY=
+golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
+golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
+gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=
+pgregory.net/rapid v1.2.0 h1:keKAYRcjm+e1F0oAuU5F5+YPAWcyxNNRK2wud503Gnk=
+pgregory.net/rapid v1.2.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04=
diff --git a/tools/test/internal/cmd/diagnose.go b/tools/test/internal/cmd/diagnose.go
new file mode 100644
index 00000000000..8d11b897efe
--- /dev/null
+++ b/tools/test/internal/cmd/diagnose.go
@@ -0,0 +1,57 @@
+package cmd
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"time"
+
+	"github.com/spf13/cobra"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/config"
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/runner"
+)
+
+var diagnoseCmd = &cobra.Command{
+	Use:   "diagnose [--diagnose flags] [-- go test flags]",
+	Short: "Run /chainlink unit tests multiple times to hunt down flakes, races, timeouts, and more",
+	Long: `Runs /chainlink unit tests multiple times to hunt down flakes, races, timeouts, and more.
+
+Pass every flag and package pattern you want forwarded to go test after "--". The harness
+prepends "go test -json" (duplicate -json in your arguments is ignored) and adds "-count=1"
+when you omit -count or use -count=1. Prefer diagnose --iterations for repetition; you may
+use -count>1 to repeat inside one go test (e.g. to reduce DB setup/teardown between diagnose
+iterations). With --shuffle-seed, a per-iteration -shuffle=<seed> is appended.`,
+	Example: `# Run the full core test suite 10 times.
+go -C tools/test run . diagnose --iterations 10 -- ./core/...`,
+	Args: cobra.MinimumNArgs(1),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		conf, err := config.Load(cmd)
+		if err != nil {
+			return err
+		}
+
+		defer func() {
+			if err := dbHandle.Cleanup(); err != nil {
+				fmt.Fprintf(os.Stderr, "error tearing down postgres: %v\n", err)
+			}
+		}()
+
+		if conf.Iterations < 1 {
+			return errors.New("--iterations must be >= 1")
+		}
+
+		if err := runner.WarnDiagnoseGoTestCount(os.Stderr, args); err != nil {
+			return err
+		}
+
+		return runner.Diagnose(cmd.Context(), conf, args, dbHandle.Reset, dbHandle.DumpDiagnostics)
+	},
+}
+
+func init() {
+	diagnoseCmd.Flags().Int("iterations", 1, "number of full test runs")
+	diagnoseCmd.Flags().Duration("slow-threshold", 30*time.Second, "tests whose max Elapsed exceeds this are flagged slow")
+	diagnoseCmd.Flags().Bool("fail-fast", false, "stop this diagnose run immediately if any iteration fails")
+	diagnoseCmd.Flags().Bool("shuffle-seed", false, "randomize test order each iteration; a unique seed is generated per iteration and recorded in report.json for reproduction")
+}
diff --git a/tools/test/internal/cmd/gotestsum.go b/tools/test/internal/cmd/gotestsum.go
new file mode 100644
index 00000000000..90169fc872d
--- /dev/null
+++ b/tools/test/internal/cmd/gotestsum.go
@@ -0,0 +1,47 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+
+	"github.com/spf13/cobra"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/config"
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/runner"
+)
+
+var gotestsumCmd = &cobra.Command{
+	Use:                "gotestsum [gotestsum flags] [-- go test flags]",
+	DisableFlagParsing: true,
+	Short:              "Run tests with gotestsum",
+	Long: `Runs gotestsum from the Chainlink repo root (with optional ephemeral Postgres).
+
+Because this subcommand does not parse flags, global options (--database-url,
+--postgres-version, --ai-output) must appear on the root command before gotestsum, for example:
+  go -C tools/test run . --database-url=postgres://... gotestsum --format=dots -- -count=1 ./core/...`,
+	Example: `go -C tools/test run . gotestsum --format=dots -- -count=1 ./core/...
+go -C tools/test run . --ai-output gotestsum --format=testname -- -count=1 ./core/...`,
+	Args: cobra.ArbitraryArgs,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		return runGotestsum(cmd, args, exec.LookPath, func() error { return dbHandle.Cleanup() })
+	},
+}
+
+// runGotestsum runs the gotestsum path. lookPath and deferCleanup are injectable for tests.
+// deferCleanup must run on every exit path after PersistentPreRunE may have started Postgres.
+func runGotestsum(cmd *cobra.Command, args []string, lookPath func(string) (string, error), deferCleanup func() error) error {
+	defer func() {
+		if err := deferCleanup(); err != nil {
+			fmt.Fprintf(os.Stderr, "error tearing down postgres: %v\n", err)
+		}
+	}()
+	if _, err := lookPath("gotestsum"); err != nil {
+		return fmt.Errorf("gotestsum not on PATH: install with go install gotest.tools/gotestsum@latest: %w", err)
+	}
+	conf, err := config.Load(cmd)
+	if err != nil {
+		return err
+	}
+	return runner.Gotestsum(cmd.Context(), conf, args)
+}
diff --git a/tools/test/internal/cmd/gotestsum_test.go b/tools/test/internal/cmd/gotestsum_test.go
new file mode 100644
index 00000000000..f4d913835ba
--- /dev/null
+++ b/tools/test/internal/cmd/gotestsum_test.go
@@ -0,0 +1,36 @@
+package cmd
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/spf13/cobra"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRunGotestsumRunsCleanupWhenLookPathFails(t *testing.T) {
+	var cleanups int
+	err := runGotestsum(
+		&cobra.Command{},
+		nil,
+		func(string) (string, error) { return "", errors.New("missing") },
+		func() error { cleanups++; return nil },
+	)
+	require.Error(t, err)
+	require.ErrorContains(t, err, "gotestsum not on PATH")
+	assert.Equal(t, 1, cleanups, "cleanup must run when gotestsum is missing so ephemeral Postgres from PersistentPreRun is torn down")
+}
+
+func TestRunGotestsumRunsCleanupWhenConfigLoadFails(t *testing.T) {
+	var cleanups int
+	err := runGotestsum(
+		nil,
+		nil,
+		func(string) (string, error) { return "/bin/gotestsum", nil },
+		func() error { cleanups++; return nil },
+	)
+	require.Error(t, err)
+	require.ErrorContains(t, err, "command is required")
+	assert.Equal(t, 1, cleanups, "cleanup must run after LookPath succeeds but later steps fail")
+}
diff --git a/tools/test/internal/cmd/root.go b/tools/test/internal/cmd/root.go
new file mode 100644
index 00000000000..dfa39956d30
--- /dev/null
+++ b/tools/test/internal/cmd/root.go
@@ -0,0 +1,74 @@
+package cmd
+
+import (
+	"context"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"charm.land/fang/v2"
+	"github.com/charmbracelet/x/term"
+	"github.com/spf13/cobra"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/config"
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/db"
+)
+
+var dbHandle *db.Handle
+
+var rootCmd = &cobra.Command{
+	Use:   "test",
+	Short: "Run Chainlink Go tests with a single command",
+	Long: `Run Chainlink Go tests with a single command. Use an ephemeral Postgres database or an existing one.
+
+Modes:
+
+- run: Run tests using vanilla go test command and arguments
+- gotestsum: Run tests using gotestsum for those that prefer its output and tools
+- diagnose: Run tests multiple times to collect statistics, debug logs, and more to help find flakes, races, panics, timeouts, and other issues`,
+	Annotations: map[string]string{
+		cobra.CommandDisplayNameAnnotation: "go -C tools/test run .",
+	},
+	Example: `# Use vanilla go test commands
+go -C tools/test run . run -v -count=1 -p 4 ./core/...
+# Use gotestsum as the runner
+go -C tools/test run . gotestsum --format=dots -- -count=1 ./core/...
+# Run the full core test suite 10 times and collect statistics, debug logs, and more
+go -C tools/test run . diagnose --iterations 10 -- --timeout=15m ./core/...`,
+	PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
+		conf, err := config.Load(cmd)
+		if err != nil {
+			return err
+		}
+
+		dbHandle, err = db.Ensure(cmd.Context(), conf)
+		if err != nil {
+			return err
+		}
+		return nil
+	},
+}
+
+func init() {
+	rootCmd.PersistentFlags().String("database-url", "", "Provide a PostgreSQL connection string to use an existing database instead of an ephemeral one")
+	rootCmd.PersistentFlags().String("postgres-version", config.DefaultPostgresVersion, "PostgreSQL version to run tests against")
+	rootCmd.PersistentFlags().Bool("ai-output", !term.IsTerminal(os.Stdout.Fd()), "Use sparse output for agent tooling (and robotic humans)")
+
+	rootCmd.AddCommand(runCmd)
+	rootCmd.AddCommand(gotestsumCmd)
+	rootCmd.AddCommand(diagnoseCmd)
+}
+
+// Execute runs the root command. A SIGINT or SIGTERM cancels the context so
+// long-running subcommands (notably `diagnose`) can stop cleanly and still write
+// their post-run analysis. A second signal hits the default handler and
+// force-exits.
+func Execute() {
+	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
+	opts := []fang.Option{fang.WithoutCompletions()}
+	if err := fang.Execute(ctx, rootCmd, opts...); err != nil {
+		stop()
+		os.Exit(1)
+	}
+	stop()
+}
diff --git a/tools/test/internal/cmd/root_test.go b/tools/test/internal/cmd/root_test.go
new file mode 100644
index 00000000000..e96e910aa48
--- /dev/null
+++ b/tools/test/internal/cmd/root_test.go
@@ -0,0 +1,40 @@
+package cmd
+
+import (
+	"testing"
+
+	"github.com/spf13/cobra"
+)
+
+func TestRootCommandPathShowsGoCInvocation(t *testing.T) {
+	t.Parallel()
+
+	if got := rootCmd.CommandPath(); got != "go -C tools/test" {
+		t.Fatalf("root CommandPath (help / errors): got %q want %q", got, "go -C tools/test")
+	}
+	if got := rootCmd.DisplayName(); got != "go -C tools/test" {
+		t.Fatalf("DisplayName: got %q want %q", got, "go -C tools/test")
+	}
+	if got := rootCmd.Name(); got != "test" {
+		t.Fatalf("internal Name (subcommand paths use CommandPath + Name): got %q want %q", got, "test")
+	}
+}
+
+func TestSubcommandCommandPaths(t *testing.T) {
+	t.Parallel()
+
+	var gotestsum *cobra.Command
+	for _, c := range rootCmd.Commands() {
+		if c.Name() == "gotestsum" {
+			gotestsum = c
+			break
+		}
+	}
+	if gotestsum == nil {
+		t.Fatal("gotestsum subcommand not found")
+	}
+	want := "go -C tools/test run . gotestsum"
+	if got := gotestsum.CommandPath(); got != want {
+		t.Fatalf("gotestsum CommandPath: got %q want %q", got, want)
+	}
+}
diff --git a/tools/test/internal/cmd/run.go b/tools/test/internal/cmd/run.go
new file mode 100644
index 00000000000..259f64ab911
--- /dev/null
+++ b/tools/test/internal/cmd/run.go
@@ -0,0 +1,37 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/config"
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/runner"
+)
+
+var runCmd = &cobra.Command{
+	Use:                "run [go test flags]",
+	DisableFlagParsing: true,
+	Short:              "Run go test; all flags and args are passed through",
+	Long: `Runs go test from the Chainlink repo root (with optional ephemeral Postgres).
+
+Because this subcommand does not parse flags, global options (--database-url,
+--postgres-version, --ai-output) must appear on the root command before run, for example:
+  go -C tools/test run . --database-url=postgres://... run -v -count=1 ./core/...`,
+	Example: `  go -C tools/test run . run -v -count=1 -p 4 ./core/...
+  go -C tools/test run . run --postgres-version=16 run -count=1 ./core/...`,
+	Args: cobra.ArbitraryArgs,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		conf, err := config.Load(cmd)
+		if err != nil {
+			return err
+		}
+		defer func() {
+			if err := dbHandle.Cleanup(); err != nil {
+				fmt.Fprintf(os.Stderr, "error tearing down postgres: %v\n", err)
+			}
+		}()
+		return runner.GoTest(cmd.Context(), conf, args)
+	},
+}
diff --git a/tools/test/internal/config/config.go b/tools/test/internal/config/config.go
new file mode 100644
index 00000000000..d825302e7d5
--- /dev/null
+++ b/tools/test/internal/config/config.go
@@ -0,0 +1,72 @@
+package config
+
+import (
+	"errors"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/charmbracelet/x/term"
+	"github.com/spf13/cobra"
+	"github.com/spf13/pflag"
+	"github.com/spf13/viper"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/repo"
+)
+
+const DefaultPostgresVersion = "16"
+
+type App struct {
+	DatabaseURL     string        `mapstructure:"database_url"`
+	PostgresVersion string        `mapstructure:"postgres_version"`
+	RepoRoot        string        `mapstructure:"repo_root"`
+	AIOutput        bool          `mapstructure:"ai_output"`
+	Iterations      int           `mapstructure:"iterations"`
+	SlowThreshold   time.Duration `mapstructure:"slow_threshold"`
+	FailFast        bool          `mapstructure:"fail_fast"`
+	Shuffle         bool          `mapstructure:"shuffle_seed"`
+}
+
+// Load binds Viper to the active command's persistent flags and local flags, then unmarshals into App.
+func Load(cmd *cobra.Command) (*App, error) {
+	if cmd == nil {
+		return nil, errors.New("command is required")
+	}
+	v := viper.New()
+
+	v.SetDefault("postgres_version", DefaultPostgresVersion)
+	// Enable sparse output when stdout is not a TTY (e.g. redirected or CI).
+	v.SetDefault("ai_output", !term.IsTerminal(os.Stdout.Fd()))
+	v.SetDefault("iterations", 1)
+	v.SetDefault("slow_threshold", 30*time.Second)
+	v.SetDefault("fail_fast", false)
+	repoRoot, err := repo.RootFromWd()
+	if err != nil {
+		return nil, err
+	}
+	v.SetDefault("repo_root", repoRoot)
+
+	if err := bindPFlags(v, cmd.PersistentFlags()); err != nil {
+		return nil, err
+	}
+	if err := bindPFlags(v, cmd.Flags()); err != nil {
+		return nil, err
+	}
+
+	var conf App
+	if err := v.Unmarshal(&conf); err != nil {
+		return nil, err
+	}
+	return &conf, nil
+}
+
+func bindPFlags(v *viper.Viper, flags *pflag.FlagSet) error {
+	var err error
+	flags.VisitAll(func(f *pflag.Flag) {
+		configName := strings.ReplaceAll(f.Name, "-", "_")
+		if bindErr := v.BindPFlag(configName, f); bindErr != nil {
+			err = bindErr
+		}
+	})
+	return err
+}
diff --git a/tools/test/internal/config/config_test.go b/tools/test/internal/config/config_test.go
new file mode 100644
index 00000000000..3e69c80afcd
--- /dev/null
+++ b/tools/test/internal/config/config_test.go
@@ -0,0 +1,31 @@
+package config
+
+import (
+	"testing"
+
+	"github.com/spf13/cobra"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestLoadBindsPersistentAndLocalFlags(t *testing.T) {
+	t.Parallel()
+
+	root := &cobra.Command{Use: "root"}
+	root.PersistentFlags().String("database-url", "", "")
+	sub := &cobra.Command{
+		Use: "sub",
+		Run: func(*cobra.Command, []string) {},
+	}
+	sub.Flags().Int("iterations", 1, "")
+	root.AddCommand(sub)
+	root.SetArgs([]string{"sub", "--database-url", "postgres://example", "--iterations", "7"})
+
+	cmd, err := root.ExecuteC()
+	require.NoError(t, err)
+
+	conf, err := Load(cmd)
+	require.NoError(t, err)
+	assert.Equal(t, "postgres://example", conf.DatabaseURL)
+	assert.Equal(t, 7, conf.Iterations)
+}
diff --git a/tools/test/internal/db/db.go b/tools/test/internal/db/db.go
new file mode 100644
index 00000000000..d72c308033f
--- /dev/null
+++ b/tools/test/internal/db/db.go
@@ -0,0 +1,255 @@
+package db
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"time"
+
+	"github.com/testcontainers/testcontainers-go"
+	"github.com/testcontainers/testcontainers-go/modules/postgres"
+	"github.com/testcontainers/testcontainers-go/wait"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/termstyle"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/config"
+)
+
+// Handle owns the ephemeral Postgres used for a run. When the user supplied
+// CL_DATABASE_URL the container is nil and Reset/Cleanup are no-ops.
+type Handle struct {
+	container *postgres.PostgresContainer
+	conf      *config.App
+}
+
+// Ensure configures CL_DATABASE_URL for child test processes. If --database-url
+// is set, that value is exported as CL_DATABASE_URL (failing if CL_DATABASE_URL is
+// already set to something else). Otherwise it starts an ephemeral Postgres
+// container, sets CL_DATABASE_URL to its connection string, runs preparetest
+// --force, and snapshots the prepared state so Reset can restore it between
+// diagnose iterations.
+func Ensure(ctx context.Context, conf *config.App) (h *Handle, err error) {
+	start := time.Now()
+
+	if conf.PostgresVersion == "" {
+		return &Handle{conf: conf}, errors.New("postgres version is required")
+	}
+
+	if conf.DatabaseURL != "" {
+		if existing := os.Getenv("CL_DATABASE_URL"); existing != "" && existing != conf.DatabaseURL {
+			return &Handle{conf: conf}, errors.New("CL_DATABASE_URL is already set to a different value than --database-url (refusing to override)")
+		}
+		if err = os.Setenv("CL_DATABASE_URL", conf.DatabaseURL); err != nil {
+			return &Handle{conf: conf}, fmt.Errorf("set CL_DATABASE_URL: %w", err)
+		}
+		if !conf.AIOutput {
+			fmt.Fprintln(os.Stdout,
+				termstyle.Muted.Render("Skipping database setup, using provided database URL: ")+
+					termstyle.Label.Render(conf.DatabaseURL))
+		}
+		return &Handle{conf: conf}, nil
+	}
+	// Intentional: Ryuk is disabled because this harness always tears down via
+	// Handle.Cleanup(); Ryuk can conflict with that lifecycle in some setups.
+	if err = os.Setenv("TESTCONTAINERS_RYUK_DISABLED", "true"); err != nil {
+		return &Handle{conf: conf}, fmt.Errorf("failed to set TESTCONTAINERS_RYUK_DISABLED environment variable: %w", err)
+	}
+
+	// Progress on stderr, same escape and TTY rules as diagnoseIteration /
+	// renderDiagnoseProgressLine (runner).
+	setupPartial := false
+	if !conf.AIOutput {
+		fmt.Fprint(os.Stderr, termstyle.Label.Render("Setting up Postgres..."))
+		setupPartial = true
+	}
+	abortSetupPartial := func() {
+		if !setupPartial {
+			return
+		}
+		fmt.Fprint(os.Stderr, "\r\033[K\n")
+		setupPartial = false
+	}
+	defer func() {
+		if err != nil {
+			abortSetupPartial()
+		}
+	}()
+
+	c, err := postgres.Run(ctx,
+		fmt.Sprintf("docker.io/postgres:%s-alpine", conf.PostgresVersion),
+		postgres.WithDatabase("chainlink_test"),
+		postgres.WithUsername("postgres"),
+		postgres.WithPassword("postgres"),
+		testcontainers.WithCmdArgs("-c", "max_connections=1000"),
+		testcontainers.WithWaitStrategy(
+			wait.ForLog("database system is ready to accept connections").
+				WithOccurrence(2).
+				WithStartupTimeout(60*time.Second)),
+	)
+	if err != nil {
+		return &Handle{conf: conf}, fmt.Errorf("postgres testcontainer: %w", err)
+	}
+
+	h = &Handle{container: c, conf: conf}
+
+	// Build the connection string for CL tests to use
+	connStr, err := c.ConnectionString(ctx, "sslmode=disable")
+	if err != nil {
+		return h, errors.Join(fmt.Errorf("connection string: %w", err), h.Cleanup())
+	}
+
+	// Set the connection string for CL tests to use
+	if err := os.Setenv("CL_DATABASE_URL", connStr); err != nil {
+		return h, errors.Join(err, h.Cleanup())
+	}
+
+	// Run preparetest --force to set up the database for tests
+	prepareOutput := bytes.NewBuffer(nil)
+	prep := exec.CommandContext(ctx, "go", "run", "./core/store/cmd/preparetest", "--force")
+	prep.Dir = conf.RepoRoot
+	prep.Env = os.Environ()
+	prep.Stdout = prepareOutput
+	prep.Stderr = prepareOutput
+	if err := prep.Run(); err != nil {
+		return h, errors.Join(fmt.Errorf("preparetest --force: %w\n%s", err, prepareOutput.String()), h.Cleanup())
+	}
+
+	// Snapshot the prepared schema so Reset can restore it quickly between iterations.
+	if err := c.Snapshot(ctx); err != nil {
+		return h, errors.Join(fmt.Errorf("snapshot prepared database: %w", err), h.Cleanup())
+	}
+
+	if !conf.AIOutput {
+		fmt.Fprint(os.Stderr, "\r\033[K")
+		fmt.Fprintln(os.Stderr,
+			termstyle.Label.Render("Setup Postgres")+" "+
+				termstyle.OK.Render("✅")+" "+
+				termstyle.Muted.Render(fmt.Sprintf("(%s)", time.Since(start).Round(time.Millisecond))))
+		setupPartial = false
+	}
+
+	return h, nil
+}
+
+// Reset restores the database to its freshly-prepared snapshot. No-op when the
+// user supplied CL_DATABASE_URL (we don't own the database).
+func (h *Handle) Reset(ctx context.Context) error {
+	if h == nil || h.container == nil {
+		return nil
+	}
+	if err := h.container.Restore(ctx); err != nil {
+		return fmt.Errorf("restore snapshot: %w", err)
+	}
+	return nil
+}
+
+// DumpDiagnostics writes postgres-state-<iteration>.md to dir with the container log
+// and key system-view snapshots for that iteration. No-op when the user
+// supplied CL_DATABASE_URL (we don't own that database).
+func (h *Handle) DumpDiagnostics(ctx context.Context, dir string, iteration int) error {
+	if h == nil || h.container == nil {
+		return nil
+	}
+
+	name := fmt.Sprintf("postgres-state-%d.md", iteration)
+	f, err := os.Create(filepath.Join(dir, name))
+	if err != nil {
+		return fmt.Errorf("create %s: %w", name, err)
+	}
+	defer f.Close()
+
+	fmt.Fprintf(f, "# Postgres State\n\nCaptured: %s\n\n", time.Now().UTC().Format(time.RFC3339))
+
+	// Container (server) log.
+	fmt.Fprint(f, "## Server Log\n\n```\n")
+	logs, logErr := h.container.Logs(ctx)
+	if logErr != nil {
+		return fmt.Errorf("fetch logs: %w", logErr)
+	}
+	defer func() {
+		if logCloseErr := logs.Close(); logCloseErr != nil {
+			// Ignore errors closing logs, we're dumping diagnostics to a file.
+			fmt.Fprintf(f, "error closing logs: %v\n", logCloseErr)
+		}
+	}()
+	_, err = io.Copy(f, logs)
+	if err != nil {
+		return fmt.Errorf("copy logs: %w", err)
+	}
+	fmt.Fprint(f, "```\n\n")
+
+	type query struct {
+		heading string
+		sql     string
+	}
+	queries := []query{
+		{
+			"Active Connections (pg_stat_activity)",
+			`SELECT pid, state, wait_event_type, wait_event, query_start, left(query,120) AS query ` +
+				`FROM pg_stat_activity WHERE datname='chainlink_test' ORDER BY query_start;`,
+		},
+		{
+			"Locks (pg_locks + pg_stat_activity)",
+			`SELECT l.pid, l.locktype, l.relation::regclass, l.mode, l.granted, left(a.query,80) AS query ` +
+				`FROM pg_locks l LEFT JOIN pg_stat_activity a ON a.pid=l.pid ` +
+				`WHERE l.relation IS NOT NULL ORDER BY l.granted, l.pid;`,
+		},
+		{
+			"Table Statistics (pg_stat_user_tables)",
+			`SELECT relname, seq_scan, idx_scan, n_tup_ins, n_tup_upd, n_tup_del, n_live_tup, n_dead_tup ` +
+				`FROM pg_stat_user_tables ORDER BY n_live_tup DESC LIMIT 30;`,
+		},
+		{
+			"Database Size",
+			`SELECT pg_size_pretty(pg_database_size('chainlink_test')) AS db_size;`,
+		},
+	}
+
+	for _, q := range queries {
+		fmt.Fprintf(f, "## %s\n\n```\n", q.heading)
+		exitCode, out, execErr := h.container.Exec(ctx,
+			[]string{"psql", "-U", "postgres", "-d", "chainlink_test", "-P", "pager=off", "-c", q.sql},
+		)
+		switch {
+		case execErr != nil:
+			fmt.Fprintf(f, "error: %v\n", execErr)
+		case exitCode != 0:
+			fmt.Fprintf(f, "psql exit %d\n", exitCode)
+		}
+		_, err = io.Copy(f, out)
+		if err != nil {
+			return fmt.Errorf("copy output: %w", err)
+		}
+		fmt.Fprint(f, "```\n\n")
+	}
+
+	return nil
+}
+
+// Cleanup terminates the Postgres testcontainer. Safe to call on a nil or
+// no-container Handle.
+func (h *Handle) Cleanup() error {
+	if h == nil || h.container == nil {
+		return nil
+	}
+	if !h.conf.AIOutput {
+		fmt.Fprint(os.Stderr, termstyle.Label.Render("Tearing down postgres..."))
+	}
+	termCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+	if err := h.container.Terminate(termCtx); err != nil {
+		if !h.conf.AIOutput {
+			fmt.Fprintln(os.Stderr, " "+termstyle.Bad.Render("❌"))
+		}
+		return fmt.Errorf("error terminating postgres container, you need to terminate it manually: %w", err)
+	}
+	if !h.conf.AIOutput {
+		fmt.Fprintln(os.Stderr, " "+termstyle.OK.Render("✅"))
+	}
+	return nil
+}
diff --git a/tools/test/internal/db/db_test.go b/tools/test/internal/db/db_test.go
new file mode 100644
index 00000000000..74cddb350ec
--- /dev/null
+++ b/tools/test/internal/db/db_test.go
@@ -0,0 +1,62 @@
+package db
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/config"
+)
+
+func TestDumpDiagnosticsNilHandle(t *testing.T) {
+	t.Parallel()
+	var h *Handle
+	require.NoError(t, h.DumpDiagnostics(context.Background(), t.TempDir(), 0))
+}
+
+func TestDumpDiagnosticsNoContainer(t *testing.T) {
+	t.Parallel()
+	h := &Handle{}
+	dir := t.TempDir()
+	require.NoError(t, h.DumpDiagnostics(context.Background(), dir, 0))
+	_, err := os.Stat(filepath.Join(dir, "postgres-state-0.md"))
+	assert.ErrorIs(t, err, os.ErrNotExist)
+}
+
+func TestEnsureDatabaseURLSetsCLDatabaseURL(t *testing.T) {
+	t.Setenv("CL_DATABASE_URL", "")
+	want := "postgres://user@host/chainlink_test"
+	h, err := Ensure(context.Background(), &config.App{
+		PostgresVersion: "15",
+		DatabaseURL:     want,
+		AIOutput:        true,
+	})
+	require.NoError(t, err)
+	require.NotNil(t, h)
+	assert.Equal(t, want, os.Getenv("CL_DATABASE_URL"))
+}
+
+func TestEnsureDatabaseURLConflictsWithEnv(t *testing.T) {
+	t.Setenv("CL_DATABASE_URL", "postgres://already/set")
+	_, err := Ensure(context.Background(), &config.App{
+		PostgresVersion: "15",
+		DatabaseURL:     "postgres://other/db",
+		AIOutput:        true,
+	})
+	require.Error(t, err)
+	assert.ErrorContains(t, err, "CL_DATABASE_URL")
+}
+
+func TestEnsureRequiresPostgresVersion(t *testing.T) {
+	t.Parallel()
+	_, err := Ensure(context.Background(), &config.App{
+		PostgresVersion: "",
+		AIOutput:        true,
+	})
+	require.Error(t, err)
+	assert.ErrorContains(t, err, "postgres version is required")
+}
diff --git a/tools/test/internal/repo/repo.go b/tools/test/internal/repo/repo.go
new file mode 100644
index 00000000000..1381da88187
--- /dev/null
+++ b/tools/test/internal/repo/repo.go
@@ -0,0 +1,58 @@
+package repo
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+const rootModulePath = "github.com/smartcontractkit/chainlink/v2"
+
+// RootFromWd walks parents from the working directory to find the chainlink v2 repo root.
+func RootFromWd() (string, error) {
+	wd, err := os.Getwd()
+	if err != nil {
+		return "", err
+	}
+	return RootFrom(wd)
+}
+
+// RootFrom walks parents from dir until go.mod declares the chainlink v2 module.
+func RootFrom(dir string) (string, error) {
+	dir = filepath.Clean(dir)
+	for {
+		modPath := filepath.Join(dir, "go.mod")
+		data, err := os.ReadFile(modPath)
+		if err == nil {
+			if mod, ok := modulePathFromGoMod(string(data)); ok && mod == rootModulePath {
+				// Exact root module only (not tools/test or other nested modules).
+				return dir, nil
+			}
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return "", fmt.Errorf("chainlink repo root not found (no go.mod with module %s) starting from %q", rootModulePath, dir)
+		}
+		dir = parent
+	}
+}
+
+// modulePathFromGoMod returns the module path from the first `module` directive,
+// skipping leading comments and blank lines (go.mod may legally start with either).
+func modulePathFromGoMod(data string) (path string, ok bool) {
+	for _, raw := range strings.Split(data, "\n") {
+		line := strings.TrimSpace(raw)
+		if line == "" || strings.HasPrefix(line, "//") {
+			continue
+		}
+		if i := strings.Index(line, "//"); i >= 0 {
+			line = strings.TrimSpace(line[:i])
+		}
+		fields := strings.Fields(line)
+		if len(fields) >= 2 && fields[0] == "module" {
+			return fields[1], true
+		}
+	}
+	return "", false
+}
diff --git a/tools/test/internal/repo/repo_test.go b/tools/test/internal/repo/repo_test.go
new file mode 100644
index 00000000000..8a55b4f55f4
--- /dev/null
+++ b/tools/test/internal/repo/repo_test.go
@@ -0,0 +1,73 @@
+package repo
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestRootFrom_chainlinkModule(t *testing.T) {
+	// This test runs from package dir; walk up to module root (tools/test), then repo root.
+	here, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	// internal/repo -> internal -> tools/test
+	testMod := filepath.Clean(filepath.Join(here, "..", ".."))
+	root, err := RootFrom(testMod)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if filepath.Base(root) != "chainlink" && !strings.HasSuffix(root, "chainlink") {
+		t.Fatalf("unexpected root %q", root)
+	}
+	// go.mod at root must exist
+	if _, err := os.Stat(filepath.Join(root, "go.mod")); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestRootFrom_skipsLeadingCommentsAndBlankLinesInGoMod(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	goMod := `// workspace root
+
+` + "\n\n" + `module github.com/smartcontractkit/chainlink/v2
+
+go 1.26
+`
+	require.NoError(t, os.WriteFile(filepath.Join(root, "go.mod"), []byte(goMod), 0600))
+	nested := filepath.Join(root, "a", "b")
+	require.NoError(t, os.MkdirAll(nested, 0700))
+
+	got, err := RootFrom(nested)
+	require.NoError(t, err)
+	require.Equal(t, root, got)
+}
+
+func TestModulePathFromGoMod(t *testing.T) {
+	t.Parallel()
+	tests := []struct {
+		name   string
+		data   string
+		want   string
+		wantOK bool
+	}{
+		{name: "leading comment", data: "// hi\n\nmodule github.com/smartcontractkit/chainlink/v2\n", want: rootModulePath, wantOK: true},
+		{name: "no module", data: "go 1.26\n", wantOK: false},
+		{name: "inline comment", data: "module github.com/smartcontractkit/chainlink/v2 // chainlink\n", want: rootModulePath, wantOK: true},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			got, ok := modulePathFromGoMod(tc.data)
+			require.Equal(t, tc.wantOK, ok)
+			if tc.wantOK {
+				require.Equal(t, tc.want, got)
+			}
+		})
+	}
+}
diff --git a/tools/test/internal/runner/analyze.go b/tools/test/internal/runner/analyze.go
new file mode 100644
index 00000000000..4d7f8ba6db0
--- /dev/null
+++ b/tools/test/internal/runner/analyze.go
@@ -0,0 +1,839 @@
+package runner
+
+import (
+	"bufio"
+	"encoding/csv"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"slices"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"charm.land/lipgloss/v2"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/termstyle"
+)
+
+// timeoutPanic appears in go test -json output when the test binary's
+// -timeout fires. It may be attached to a running test or to the package.
+const timeoutPanic = "panic: test timed out"
+
+// TestEvent mirrors cmd/internal/test2json's TestEvent; only fields we need.
+type TestEvent struct {
+	Action  string  `json:"Action"`
+	Package string  `json:"Package"`
+	Test    string  `json:"Test"`
+	Elapsed float64 `json:"Elapsed"`
+	Output  string  `json:"Output"`
+}
+
+type testKey struct {
+	Package string
+	Test    string
+}
+
+type aggregate struct {
+	passes       int
+	fails        int
+	skips        int
+	maxElapsed   time.Duration
+	timedOut     bool
+	iterations   map[int]struct{}
+	failedIters  map[int]bool
+	timeoutIters map[int]bool
+	skipIters    map[int]bool
+	outputs      map[int]*strings.Builder
+	elapseds     []time.Duration
+}
+
+// TestEntry is a single row in the analysis report.
+type TestEntry struct {
+	Package    string        `json:"package"`
+	Test       string        `json:"test,omitempty"`
+	Runs       int           `json:"runs"`
+	Successes  int           `json:"successes"`
+	Fails      int           `json:"fails"`
+	Skips      int           `json:"skips"`
+	Timeouts   int           `json:"timeouts"`
+	MinElapsed time.Duration `json:"min_elapsed"`
+	MaxElapsed time.Duration `json:"max_elapsed"`
+	P50Elapsed time.Duration `json:"p50_elapsed"`
+	LogFiles   []string      `json:"log_files,omitempty"`
+}
+
+// IterationSummary captures high-level stats for a single diagnose iteration.
+// Duration and ShuffleSeed are populated by the runner after analysis.
+type IterationSummary struct {
+	Index        int           `json:"index"`
+	Duration     time.Duration `json:"duration,omitempty"`
+	Result       string        `json:"result"` // "pass", "fail", "timeout"
+	FailingTests []string      `json:"failing_tests,omitempty"`
+	ShuffleSeed  int64         `json:"shuffle_seed,omitempty"`
+}
+
+// Report classifies tests across iterations of a diagnose run.
+type Report struct {
+	Iterations         int                `json:"iterations"`
+	SlowThreshold      time.Duration      `json:"slow_threshold"`
+	IterationSummaries []IterationSummary `json:"iteration_summaries,omitempty"`
+	Flakes             []TestEntry        `json:"flakes,omitempty"`
+	Failures           []TestEntry        `json:"failures,omitempty"`
+	Timeouts           []TestEntry        `json:"timeouts,omitempty"`
+	Slow               []TestEntry        `json:"slow,omitempty"`
+}
+
+// LogMap maps (package,test) → iteration → raw interleaved output.
+// Returned alongside Report so callers can write per-test log files without
+// coupling the parser to the filesystem.
+type LogMap map[testKey]map[int]string
+
+// Analyze reads per-iteration test2json streams and classifies tests.
+// Malformed lines are silently skipped (go test can interleave non-JSON).
+func Analyze(iterations []io.Reader, slowThreshold time.Duration) (*Report, LogMap, error) {
+	aggs := map[testKey]*aggregate{}
+	newAgg := func() *aggregate {
+		return &aggregate{
+			iterations:   map[int]struct{}{},
+			failedIters:  map[int]bool{},
+			timeoutIters: map[int]bool{},
+			skipIters:    map[int]bool{},
+			outputs:      map[int]*strings.Builder{},
+		}
+	}
+
+	for i, r := range iterations {
+		// Line-based scan + per-line Unmarshal: go test -json can interleave
+		// non-JSON output (stderr warnings, build errors); streaming decoder
+		// can't recover from those. Skip unparsable lines silently.
+		scanner := bufio.NewScanner(r)
+		for scanner.Scan() {
+			line := scanner.Bytes()
+			if len(line) == 0 || line[0] != '{' {
+				continue
+			}
+			var ev TestEvent
+			if err := json.Unmarshal(line, &ev); err != nil {
+				continue
+			}
+			key := testKey{Package: ev.Package, Test: ev.Test}
+			a := aggs[key]
+			if a == nil {
+				a = newAgg()
+				aggs[key] = a
+			}
+			switch ev.Action {
+			case "pass":
+				a.passes++
+				a.iterations[i] = struct{}{}
+				d := seconds(ev.Elapsed)
+				a.elapseds = append(a.elapseds, d)
+				if d > a.maxElapsed {
+					a.maxElapsed = d
+				}
+			case "fail":
+				a.fails++
+				a.iterations[i] = struct{}{}
+				a.failedIters[i] = true
+				d := seconds(ev.Elapsed)
+				a.elapseds = append(a.elapseds, d)
+				if d > a.maxElapsed {
+					a.maxElapsed = d
+				}
+			case "skip":
+				a.skips++
+				a.iterations[i] = struct{}{}
+				a.skipIters[i] = true
+				d := seconds(ev.Elapsed)
+				a.elapseds = append(a.elapseds, d)
+			case "output":
+				if strings.Contains(ev.Output, timeoutPanic) {
+					a.timedOut = true
+					a.iterations[i] = struct{}{}
+					a.timeoutIters[i] = true
+				}
+				buf := a.outputs[i]
+				if buf == nil {
+					buf = &strings.Builder{}
+					a.outputs[i] = buf
+				}
+				buf.WriteString(ev.Output)
+			}
+		}
+		if err := scanner.Err(); err != nil {
+			return nil, nil, fmt.Errorf("reading iteration %d: %w", i, err)
+		}
+	}
+
+	reattributeTimeouts(aggs, newAgg)
+
+	rep := &Report{
+		Iterations:    len(iterations),
+		SlowThreshold: slowThreshold,
+	}
+
+	for key, a := range aggs {
+		minE, p50 := stats(a.elapseds)
+		base := TestEntry{
+			Package:    key.Package,
+			Test:       key.Test,
+			Runs:       len(a.iterations),
+			Successes:  a.passes,
+			Fails:      a.fails,
+			Skips:      a.skips,
+			Timeouts:   len(a.timeoutIters),
+			MinElapsed: minE,
+			MaxElapsed: a.maxElapsed,
+			P50Elapsed: p50,
+		}
+		switch {
+		case a.timedOut:
+			rep.Timeouts = append(rep.Timeouts, base)
+		case key.Test == "" && a.fails == 0:
+			// Package-level pass summary or benign events (no failing tests).
+		case key.Test == "" && a.fails > 0:
+			// Build failures, TestMain/init failures: Test is empty in go test -json.
+			if a.passes > 0 {
+				rep.Flakes = append(rep.Flakes, base)
+			} else {
+				rep.Failures = append(rep.Failures, base)
+			}
+		case a.passes > 0 && a.fails > 0:
+			rep.Flakes = append(rep.Flakes, base)
+		case a.fails > 0 && a.passes == 0:
+			rep.Failures = append(rep.Failures, base)
+		}
+		if !a.timedOut && key.Test != "" && slowThreshold > 0 && a.maxElapsed > slowThreshold {
+			rep.Slow = append(rep.Slow, base)
+		}
+	}
+
+	sortEntries(rep.Flakes)
+	sortEntries(rep.Failures)
+	sortEntries(rep.Timeouts)
+	sortEntries(rep.Slow)
+
+	// Build per-iteration summaries from aggregated failure/timeout data.
+	iterFails := make(map[int][]string, len(iterations))
+	iterTimedOut := make(map[int]bool, len(iterations))
+	for key, a := range aggs {
+		for i := range a.timeoutIters {
+			iterTimedOut[i] = true
+		}
+		failName := key.Test
+		if failName == "" {
+			failName = key.Package
+		}
+		for i := range a.failedIters {
+			iterFails[i] = append(iterFails[i], failName)
+		}
+	}
+	summaries := make([]IterationSummary, len(iterations))
+	for i := range iterations {
+		s := IterationSummary{Index: i}
+		switch {
+		case iterTimedOut[i]:
+			s.Result = "timeout"
+		case len(iterFails[i]) > 0:
+			s.Result = "fail"
+			sort.Strings(iterFails[i])
+			s.FailingTests = iterFails[i]
+		default:
+			s.Result = "pass"
+		}
+		summaries[i] = s
+	}
+	rep.IterationSummaries = summaries
+
+	logs := buildLogMap(aggs)
+	return rep, logs, nil
+}
+
+// AnalyzeResults opens every `iteration-*.log.jsonl` file in resultsDir, in
+// numeric-iteration order, and delegates to Analyze.
+func AnalyzeResults(resultsDir string, slowThreshold time.Duration) (*Report, LogMap, error) {
+	matches, err := filepath.Glob(filepath.Join(resultsDir, "iteration-*.log.jsonl"))
+	if err != nil {
+		return nil, nil, err
+	}
+	sort.Slice(matches, func(i, j int) bool {
+		return iterNumber(matches[i]) < iterNumber(matches[j])
+	})
+	readers := make([]io.Reader, 0, len(matches))
+	files := make([]*os.File, 0, len(matches))
+	defer func() {
+		for _, f := range files {
+			f.Close()
+		}
+	}()
+	for _, p := range matches {
+		f, err := os.Open(p)
+		if err != nil {
+			return nil, nil, err
+		}
+		files = append(files, f)
+		readers = append(readers, f)
+	}
+	return Analyze(readers, slowThreshold)
+}
+
+// WriteReport writes the report as pretty JSON to <resultsDir>/report.json.
+func WriteReport(resultsDir string, rep *Report) error {
+	b, err := json.MarshalIndent(rep, "", "  ")
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(filepath.Join(resultsDir, "report.json"), b, 0600)
+}
+
+// WriteLogFiles writes per-test per-iteration log files under <resultsDir>/logs/
+// for flagged tests and populates each flagged TestEntry's LogFiles slice with
+// paths relative to resultsDir. One file is written for each iteration that has
+// any captured output in logs (including iterations that passed but produced stderr
+// or other output captured into the aggregate).
+func WriteLogFiles(resultsDir string, rep *Report, logs LogMap) error {
+	if rep == nil {
+		return nil
+	}
+	logsDir := filepath.Join(resultsDir, "logs")
+	if err := os.MkdirAll(logsDir, 0700); err != nil {
+		return err
+	}
+	groups := [][]TestEntry{rep.Flakes, rep.Failures, rep.Timeouts, rep.Slow}
+	for gi, group := range groups {
+		for ei, entry := range group {
+			key := testKey{Package: entry.Package, Test: entry.Test}
+			m, ok := logs[key]
+			if !ok || len(m) == 0 {
+				continue
+			}
+			iterations := make([]int, 0, len(m))
+			for it, out := range m {
+				if out != "" {
+					iterations = append(iterations, it)
+				}
+			}
+			sort.Ints(iterations)
+			paths := make([]string, 0, len(iterations))
+			for _, it := range iterations {
+				out := m[it]
+				name := fmt.Sprintf("%s__%s__iter-%d.log",
+					sanitize(shortPackage(entry.Package)), sanitize(entry.Test), it)
+				abs := filepath.Join(logsDir, name)
+				if err := os.WriteFile(abs, []byte(out), 0600); err != nil {
+					return err
+				}
+				paths = append(paths, filepath.Join("logs", name))
+			}
+			if len(paths) > 0 {
+				groups[gi][ei].LogFiles = paths
+			}
+		}
+	}
+	rep.Flakes = groups[0]
+	rep.Failures = groups[1]
+	rep.Timeouts = groups[2]
+	rep.Slow = groups[3]
+	return nil
+}
+
+// WriteCSV writes a human-readable CSV of every flagged test
+// (Flakes ∪ Failures ∪ Timeouts ∪ Slow) to <resultsDir>/report.csv.
+// Rows sort worst-first: (timeouts+fails) desc, then package, then test.
+func WriteCSV(resultsDir string, rep *Report) error {
+	if rep == nil {
+		return nil
+	}
+	rows := flaggedRows(rep)
+	f, err := os.Create(filepath.Join(resultsDir, "report.csv"))
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	w := csv.NewWriter(f)
+	if err := w.Write([]string{
+		"package", "test", "category",
+		"runs", "successes", "fails", "skips", "timeouts",
+		"min", "max", "p50",
+	}); err != nil {
+		return err
+	}
+	for _, r := range rows {
+		if err := w.Write(r.record()); err != nil {
+			return err
+		}
+	}
+	w.Flush()
+	return w.Error()
+}
+
+type csvRow struct {
+	Package   string
+	Test      string
+	Category  string
+	Runs      int
+	Successes int
+	Fails     int
+	Skips     int
+	Timeouts  int
+	Min       time.Duration
+	Max       time.Duration
+	P50       time.Duration
+}
+
+func (r csvRow) record() []string {
+	return []string{
+		r.Package, r.Test, r.Category,
+		strconv.Itoa(r.Runs),
+		strconv.Itoa(r.Successes),
+		strconv.Itoa(r.Fails),
+		strconv.Itoa(r.Skips),
+		strconv.Itoa(r.Timeouts),
+		r.Min.Round(time.Millisecond).String(),
+		r.Max.Round(time.Millisecond).String(),
+		r.P50.Round(time.Millisecond).String(),
+	}
+}
+
+// flaggedRows builds the deduped CSV row set. A test in both Flakes and Slow
+// is categorized as "flake" (primary signal wins over "slow").
+func flaggedRows(rep *Report) []csvRow {
+	seen := map[testKey]struct{}{}
+	var rows []csvRow
+	add := func(entries []TestEntry, cat string) {
+		for _, e := range entries {
+			k := testKey{Package: e.Package, Test: e.Test}
+			if _, ok := seen[k]; ok {
+				continue
+			}
+			seen[k] = struct{}{}
+			rows = append(rows, csvRow{
+				Package:   e.Package,
+				Test:      e.Test,
+				Category:  cat,
+				Runs:      e.Runs,
+				Successes: e.Successes,
+				Fails:     e.Fails,
+				Skips:     e.Skips,
+				Timeouts:  e.Timeouts,
+				Min:       e.MinElapsed,
+				Max:       e.MaxElapsed,
+				P50:       e.P50Elapsed,
+			})
+		}
+	}
+	// Order matters: first category wins on dup.
+	add(rep.Timeouts, "timeout")
+	add(rep.Failures, "failure")
+	add(rep.Flakes, "flake")
+	add(rep.Slow, "slow")
+
+	sort.SliceStable(rows, func(i, j int) bool {
+		li := rows[i].Timeouts + rows[i].Fails
+		lj := rows[j].Timeouts + rows[j].Fails
+		if li != lj {
+			return li > lj
+		}
+		if rows[i].Package != rows[j].Package {
+			return rows[i].Package < rows[j].Package
+		}
+		return rows[i].Test < rows[j].Test
+	})
+	return rows
+}
+
+// PrintSummary writes a human-readable summary: headings and tests grouped by
+// package under a common path prefix (tree). Broken/Flaky/Slow test lines use
+// red / yellow / grey; package path rows are muted.
+// Broken and Timeout entries are sorted alphabetically by package then test.
+// Flaky entries are sorted by fails/runs (desc), then fails (desc), then name.
+// Slow entries are sorted by max runtime (desc), then name.
+func PrintSummary(w io.Writer, rep *Report) {
+	if rep == nil {
+		return
+	}
+
+	if n := len(rep.Failures); n > 0 {
+		fails := append([]TestEntry(nil), rep.Failures...)
+		sort.Slice(fails, func(i, j int) bool {
+			if fails[i].Package != fails[j].Package {
+				return fails[i].Package < fails[j].Package
+			}
+			return fails[i].Test < fails[j].Test
+		})
+		printSummarySectionTree(w, "Broken", n, fails, termstyle.Bad, termstyle.Bad, formatBrokenTestLine)
+	}
+
+	if n := len(rep.Flakes); n > 0 {
+		flakes := append([]TestEntry(nil), rep.Flakes...)
+		sort.Slice(flakes, func(i, j int) bool {
+			ri := flakeFailRatio(flakes[i])
+			rj := flakeFailRatio(flakes[j])
+			if ri != rj {
+				return ri > rj
+			}
+			if flakes[i].Fails != flakes[j].Fails {
+				return flakes[i].Fails > flakes[j].Fails
+			}
+			return entryFQName(flakes[i]) < entryFQName(flakes[j])
+		})
+		printSummarySectionTree(w, "Flaky", n, flakes, termstyle.Flaky, termstyle.Flaky, formatFlakyTestLine)
+	}
+
+	if n := len(rep.Timeouts); n > 0 {
+		touts := append([]TestEntry(nil), rep.Timeouts...)
+		sort.Slice(touts, func(i, j int) bool {
+			if touts[i].Package != touts[j].Package {
+				return touts[i].Package < touts[j].Package
+			}
+			return touts[i].Test < touts[j].Test
+		})
+		printSummarySectionTree(w, "Timeout", n, touts, termstyle.Accent, termstyle.Accent, formatTimeoutTestLine)
+	}
+
+	if n := len(rep.Slow); n > 0 {
+		slow := append([]TestEntry(nil), rep.Slow...)
+		sort.Slice(slow, func(i, j int) bool {
+			if slow[i].MaxElapsed != slow[j].MaxElapsed {
+				return slow[i].MaxElapsed > slow[j].MaxElapsed
+			}
+			if slow[i].Package != slow[j].Package {
+				return slow[i].Package < slow[j].Package
+			}
+			return slow[i].Test < slow[j].Test
+		})
+		printSummarySectionTree(w, "Slow", n, slow, termstyle.Muted, termstyle.Muted, formatSlowTestLine)
+	}
+}
+
+func formatBrokenTestLine(e TestEntry) string {
+	if e.Test == "" {
+		return e.Package
+	}
+	return e.Test
+}
+
+func formatFlakyTestLine(e TestEntry) string {
+	runs := e.Runs
+	if runs < 1 {
+		runs = e.Successes + e.Fails
+	}
+	if runs < 1 {
+		runs = 1
+	}
+	if e.Test == "" {
+		return fmt.Sprintf("%s (%d/%d)", e.Package, e.Fails, runs)
+	}
+	return fmt.Sprintf("%s (%d/%d)", e.Test, e.Fails, runs)
+}
+
+func formatTimeoutTestLine(e TestEntry) string {
+	if e.Test == "" {
+		return e.Package
+	}
+	return e.Test
+}
+
+func formatSlowTestLine(e TestEntry) string {
+	if e.Test == "" {
+		return fmt.Sprintf("%s %s", e.Package, e.MaxElapsed.Round(time.Millisecond))
+	}
+	return fmt.Sprintf("%s %s", e.Test, e.MaxElapsed.Round(time.Millisecond))
+}
+
+// pipeBranch returns a tree prefix: depth 1 -> "|-- ", depth 2 -> "|---- ", etc.
+func pipeBranch(depth int) string {
+	if depth < 1 {
+		return ""
+	}
+	return "|" + strings.Repeat("-", 2*depth) + " "
+}
+
+// longestCommonPathPrefix returns the longest shared prefix ending at a '/'
+// so grouped packages can share one root line. Empty if no '/' in common.
+func longestCommonPathPrefix(paths []string) string {
+	if len(paths) == 0 {
+		return ""
+	}
+	p := append([]string(nil), paths...)
+	sort.Strings(p)
+	first, last := p[0], p[len(p)-1]
+	cmpLen := min(len(last), len(first))
+	i := 0
+	for i < cmpLen && first[i] == last[i] {
+		i++
+	}
+	common := first[:i]
+	if j := strings.LastIndex(common, "/"); j >= 0 {
+		return common[:j+1]
+	}
+	return ""
+}
+
+func printSummarySectionTree(w io.Writer, title string, n int, entries []TestEntry, headingStyle, testStyle lipgloss.Style, formatTest func(TestEntry) string) {
+	fmt.Fprintln(w, headingStyle.Render(fmt.Sprintf("%s (%d)", title, n)))
+
+	byPkg := make(map[string][]TestEntry)
+	var pkgs []string
+	seen := map[string]struct{}{}
+	for _, e := range entries {
+		if _, ok := seen[e.Package]; !ok {
+			seen[e.Package] = struct{}{}
+			pkgs = append(pkgs, e.Package)
+		}
+		byPkg[e.Package] = append(byPkg[e.Package], e)
+	}
+	sort.Strings(pkgs)
+	for _, pkg := range pkgs {
+		sort.Slice(byPkg[pkg], func(i, j int) bool { return byPkg[pkg][i].Test < byPkg[pkg][j].Test })
+	}
+
+	lcp := longestCommonPathPrefix(pkgs)
+	if lcp == "" && len(pkgs) > 0 {
+		lcp = pkgs[0]
+		if j := strings.LastIndex(lcp, "/"); j >= 0 {
+			lcp = lcp[:j+1]
+		} else {
+			lcp = ""
+		}
+	}
+
+	if lcp != "" {
+		fmt.Fprintln(w, termstyle.Muted.Render("- "+lcp))
+	}
+
+	for _, pkg := range pkgs {
+		suffix := strings.TrimPrefix(pkg, lcp)
+		suffix = strings.TrimPrefix(suffix, "/")
+		segments := strings.Split(suffix, "/")
+		var nonEmpty []string
+		for _, s := range segments {
+			if s != "" {
+				nonEmpty = append(nonEmpty, s)
+			}
+		}
+		depth := 0
+		for _, seg := range nonEmpty {
+			depth++
+			fmt.Fprintln(w, termstyle.Muted.Render(pipeBranch(depth)+seg+"/"))
+		}
+		testDepth := len(nonEmpty) + 1
+		if len(nonEmpty) == 0 {
+			testDepth = 1
+		}
+		for _, e := range byPkg[pkg] {
+			line := pipeBranch(testDepth) + formatTest(e)
+			fmt.Fprintln(w, testStyle.Render(line))
+		}
+	}
+	fmt.Fprintln(w)
+}
+
+func entryFQName(e TestEntry) string {
+	if e.Test == "" {
+		return e.Package
+	}
+	return e.Package + "." + e.Test
+}
+
+func flakeFailRatio(e TestEntry) float64 {
+	runs := e.Runs
+	if runs < 1 {
+		runs = e.Successes + e.Fails
+	}
+	if runs < 1 {
+		return 0
+	}
+	return float64(e.Fails) / float64(runs)
+}
+
+// reattributeTimeouts fixes the go-test-json quirk where a `panic: test timed out`
+// is attached to whichever test most recently emitted events rather than the
+// actually-stuck one. The real culprits are listed in the panic's
+// "running tests:" block — move the timeout mark (and the captured stack
+// trace) onto those tests.
+func reattributeTimeouts(aggs map[testKey]*aggregate, newAgg func() *aggregate) {
+	keys := make([]testKey, 0, len(aggs))
+	for k := range aggs {
+		keys = append(keys, k)
+	}
+	for _, key := range keys {
+		a := aggs[key]
+		if !a.timedOut {
+			continue
+		}
+		for i := range a.timeoutIters {
+			buf := a.outputs[i]
+			if buf == nil {
+				continue
+			}
+			output := buf.String()
+			names := parseRunningTests(output)
+			if len(names) == 0 {
+				continue
+			}
+			if slices.Contains(names, key.Test) {
+				continue
+			}
+			delete(a.timeoutIters, i)
+			if len(a.timeoutIters) == 0 {
+				a.timedOut = false
+			}
+			for _, name := range names {
+				nk := testKey{Package: key.Package, Test: name}
+				na := aggs[nk]
+				if na == nil {
+					na = newAgg()
+					aggs[nk] = na
+				}
+				na.timedOut = true
+				na.timeoutIters[i] = true
+				na.iterations[i] = struct{}{}
+				if na.outputs[i] == nil {
+					na.outputs[i] = &strings.Builder{}
+				}
+				na.outputs[i].WriteString(output)
+			}
+		}
+	}
+}
+
+// parseRunningTests extracts test names from a `panic: test timed out` block:
+//
+//	running tests:
+//	        TestName (5s)
+//	        TestOther/sub (4s)
+func parseRunningTests(output string) []string {
+	const marker = "running tests:"
+	_, tail, found := strings.Cut(output, marker)
+	if !found {
+		return nil
+	}
+	var names []string
+	for line := range strings.SplitSeq(tail, "\n") {
+		trim := strings.TrimLeft(line, "\t ")
+		if trim == "" {
+			if len(names) == 0 {
+				continue
+			}
+			break
+		}
+		open := strings.LastIndex(trim, " (")
+		if open < 0 || !strings.HasSuffix(trim, ")") {
+			break
+		}
+		name := trim[:open]
+		if name == "" {
+			break
+		}
+		names = append(names, name)
+	}
+	return names
+}
+
+// buildLogMap returns the raw per-iteration output for every (pkg, test) that
+// has any output recorded. Callers use this to write per-test log files.
+func buildLogMap(aggs map[testKey]*aggregate) LogMap {
+	out := LogMap{}
+	for k, a := range aggs {
+		if len(a.outputs) == 0 {
+			continue
+		}
+		m := map[int]string{}
+		for i, buf := range a.outputs {
+			if buf != nil && buf.Len() > 0 {
+				m[i] = buf.String()
+			}
+		}
+		if len(m) > 0 {
+			out[k] = m
+		}
+	}
+	return out
+}
+
+// stats computes min and p50 from a sample of durations.
+// Returns (0, 0) for an empty sample.
+func stats(samples []time.Duration) (minDur, p50 time.Duration) {
+	if len(samples) == 0 {
+		return 0, 0
+	}
+	sorted := append([]time.Duration(nil), samples...)
+	slices.Sort(sorted)
+	minDur = sorted[0]
+	n := len(sorted)
+	if n%2 == 1 {
+		p50 = sorted[n/2]
+	} else {
+		p50 = (sorted[n/2-1] + sorted[n/2]) / 2
+	}
+	return minDur, p50
+}
+
+// shortPackage keeps the last two path segments of a Go import path so log
+// filenames stay under the OS NAME_MAX (255 on most filesystems). Deeply
+// nested packages like github.com/.../core/services/ocr2/plugins/llo collapse
+// to plugins/llo.
+func shortPackage(pkg string) string {
+	if pkg == "" {
+		return ""
+	}
+	parts := strings.Split(pkg, "/")
+	if len(parts) <= 2 {
+		return pkg
+	}
+	return strings.Join(parts[len(parts)-2:], "/")
+}
+
+// sanitize turns a package path or test name into a filename-safe token.
+// Replaces path separators and other hostile characters with '_'.
+func sanitize(s string) string {
+	if s == "" {
+		return "_"
+	}
+	var b strings.Builder
+	b.Grow(len(s))
+	for _, r := range s {
+		switch {
+		case r >= 'a' && r <= 'z',
+			r >= 'A' && r <= 'Z',
+			r >= '0' && r <= '9',
+			r == '-', r == '.':
+			b.WriteRune(r)
+		default:
+			b.WriteRune('_')
+		}
+	}
+	return b.String()
+}
+
+func seconds(f float64) time.Duration {
+	return time.Duration(f * float64(time.Second))
+}
+
+func sortEntries(entries []TestEntry) {
+	sort.Slice(entries, func(i, j int) bool {
+		if entries[i].Package != entries[j].Package {
+			return entries[i].Package < entries[j].Package
+		}
+		return entries[i].Test < entries[j].Test
+	})
+}
+
+func iterNumber(path string) int {
+	base := filepath.Base(path)
+	base = strings.TrimPrefix(base, "iteration-")
+	base = strings.TrimSuffix(base, ".log.jsonl")
+	n := 0
+	for _, c := range base {
+		if c < '0' || c > '9' {
+			return -1
+		}
+		n = n*10 + int(c-'0')
+	}
+	return n
+}
diff --git a/tools/test/internal/runner/analyze_files_test.go b/tools/test/internal/runner/analyze_files_test.go
new file mode 100644
index 00000000000..032260241ca
--- /dev/null
+++ b/tools/test/internal/runner/analyze_files_test.go
@@ -0,0 +1,181 @@
+package runner
+
+import (
+	"encoding/csv"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestWriteLogFiles(t *testing.T) {
+	t.Parallel()
+
+	iter := `{"Action":"output","Package":"github.com/foo/bar","Test":"TestFail","Output":"boom\n"}
+{"Action":"fail","Package":"github.com/foo/bar","Test":"TestFail","Elapsed":0.1}
+`
+	dir := t.TempDir()
+	rep, logs, err := Analyze(readers(iter), 30*time.Second)
+	require.NoError(t, err)
+	require.Len(t, rep.Failures, 1)
+
+	require.NoError(t, WriteLogFiles(dir, rep, logs))
+
+	// log_files path sanitization + existence
+	require.Len(t, rep.Failures[0].LogFiles, 1)
+	rel := rep.Failures[0].LogFiles[0]
+	assert.Equal(t, "logs/foo_bar__TestFail__iter-0.log", rel)
+	b, err := os.ReadFile(filepath.Join(dir, rel))
+	require.NoError(t, err)
+	assert.Equal(t, "boom\n", string(b))
+}
+
+func TestWriteLogFilesWritesEachIterationWithOutput(t *testing.T) {
+	t.Parallel()
+
+	// Iter 0 fails with output, iter 1 passes with output — both iterations
+	// have captured output, so both get a log file for this flagged flake.
+	iters := []string{
+		`{"Action":"output","Package":"p","Test":"T","Output":"fail-log\n"}
+{"Action":"fail","Package":"p","Test":"T","Elapsed":0.01}
+`,
+		`{"Action":"output","Package":"p","Test":"T","Output":"ok-log\n"}
+{"Action":"pass","Package":"p","Test":"T","Elapsed":0.01}
+`,
+	}
+	dir := t.TempDir()
+	rep, logs, err := Analyze(readers(iters...), 30*time.Second)
+	require.NoError(t, err)
+	require.Len(t, rep.Flakes, 1)
+
+	require.NoError(t, WriteLogFiles(dir, rep, logs))
+
+	require.Len(t, rep.Flakes[0].LogFiles, 2)
+	// Iteration order in report must be stable (sorted by iteration index).
+	assert.Equal(t,
+		[]string{"logs/p__T__iter-0.log", "logs/p__T__iter-1.log"},
+		rep.Flakes[0].LogFiles)
+
+	b0, err := os.ReadFile(filepath.Join(dir, "logs/p__T__iter-0.log"))
+	require.NoError(t, err)
+	assert.Equal(t, "fail-log\n", string(b0))
+
+	b1, err := os.ReadFile(filepath.Join(dir, "logs/p__T__iter-1.log"))
+	require.NoError(t, err)
+	assert.Equal(t, "ok-log\n", string(b1))
+}
+
+func TestWriteLogFilesNoLogsForNonFlaggedTests(t *testing.T) {
+	t.Parallel()
+
+	// A clean pass is not flagged → no entry exists → no file written.
+	iter := `{"Action":"output","Package":"p","Test":"T","Output":"hi\n"}
+{"Action":"pass","Package":"p","Test":"T","Elapsed":0.01}
+`
+	dir := t.TempDir()
+	rep, logs, err := Analyze(readers(iter), 30*time.Second)
+	require.NoError(t, err)
+	assert.Empty(t, rep.Flakes)
+	assert.Empty(t, rep.Failures)
+	assert.Empty(t, rep.Timeouts)
+
+	require.NoError(t, WriteLogFiles(dir, rep, logs))
+
+	entries, _ := os.ReadDir(filepath.Join(dir, "logs"))
+	assert.Empty(t, entries, "no log files should be written for a clean-pass test")
+}
+
+func TestWriteCSV(t *testing.T) {
+	t.Parallel()
+
+	// Scenario: one flake, one failure, one timeout, one slow.
+	iters := []string{
+		// iter 0
+		`{"Action":"fail","Package":"pkg/flake","Test":"TestFlake","Elapsed":0.1}
+{"Action":"fail","Package":"pkg/fail","Test":"TestDead","Elapsed":0.2}
+{"Action":"output","Package":"pkg/to","Test":"TestStuck","Output":"panic: test timed out after 5s\n"}
+{"Action":"fail","Package":"pkg/to","Test":"TestStuck","Elapsed":5.0}
+{"Action":"pass","Package":"pkg/slow","Test":"TestSlow","Elapsed":45.0}
+`,
+		// iter 1
+		`{"Action":"pass","Package":"pkg/flake","Test":"TestFlake","Elapsed":0.08}
+{"Action":"fail","Package":"pkg/fail","Test":"TestDead","Elapsed":0.25}
+`,
+	}
+	dir := t.TempDir()
+	rep, _, err := Analyze(readers(iters...), 30*time.Second)
+	require.NoError(t, err)
+	require.NoError(t, WriteCSV(dir, rep))
+
+	f, err := os.Open(filepath.Join(dir, "report.csv"))
+	require.NoError(t, err)
+	defer f.Close()
+	records, err := csv.NewReader(f).ReadAll()
+	require.NoError(t, err)
+
+	require.GreaterOrEqual(t, len(records), 5, "header + 4 rows")
+	assert.Equal(t, []string{
+		"package", "test", "category",
+		"runs", "successes", "fails", "skips", "timeouts",
+		"min", "max", "p50",
+	}, records[0])
+
+	// Worst-first: fails=2 (pkg/fail.TestDead) before timeouts=1 w/ fails=1 (pkg/to.TestStuck)
+	// before fails=1 (pkg/flake.TestFlake) before slow (fails=0).
+	rows := records[1:]
+	categories := make([]string, 0, len(rows))
+	for _, r := range rows {
+		categories = append(categories, r[2])
+	}
+	// failure (fails=2) first
+	assert.Equal(t, "failure", rows[0][2])
+	assert.Equal(t, "pkg/fail", rows[0][0])
+	// slow last
+	assert.Equal(t, "slow", rows[len(rows)-1][2])
+	// all four categories present
+	assert.ElementsMatch(t, []string{"flake", "failure", "timeout", "slow"}, categories)
+}
+
+func TestWriteCSVRenamesSlowWhenAlsoTimeout(t *testing.T) {
+	t.Parallel()
+	// A test that's a timeout is also over the slow threshold. CSV must list
+	// it once, as "timeout" not "slow" (primary signal wins).
+	iter := `{"Action":"output","Package":"p","Test":"T","Output":"panic: test timed out after 10m0s\n"}
+{"Action":"fail","Package":"p","Test":"T","Elapsed":600.0}
+`
+	dir := t.TempDir()
+	rep, _, err := Analyze(readers(iter), 30*time.Second)
+	require.NoError(t, err)
+	require.NoError(t, WriteCSV(dir, rep))
+
+	b, err := os.ReadFile(filepath.Join(dir, "report.csv"))
+	require.NoError(t, err)
+	content := string(b)
+	assert.Contains(t, content, "timeout")
+	// Only one data row beyond the header.
+	assert.Equal(t, 2, strings.Count(content, "\n"), "header + one row")
+}
+
+func TestSanitize(t *testing.T) {
+	t.Parallel()
+	tests := []struct {
+		in, want string
+	}{
+		{"github.com/foo/bar", "github.com_foo_bar"},
+		{"TestFoo/sub case", "TestFoo_sub_case"},
+		{"TestName", "TestName"},
+		{"a:b:c", "a_b_c"},
+		{"", "_"},
+		{"abc-123.go", "abc-123.go"},
+	}
+	for _, tc := range tests {
+		t.Run(tc.in, func(t *testing.T) {
+			t.Parallel()
+			assert.Equal(t, tc.want, sanitize(tc.in))
+		})
+	}
+}
diff --git a/tools/test/internal/runner/analyze_test.go b/tools/test/internal/runner/analyze_test.go
new file mode 100644
index 00000000000..c5aebccbe56
--- /dev/null
+++ b/tools/test/internal/runner/analyze_test.go
@@ -0,0 +1,546 @@
+package runner
+
+import (
+	"bufio"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func readers(iters ...string) []io.Reader {
+	rs := make([]io.Reader, len(iters))
+	for i, s := range iters {
+		rs[i] = strings.NewReader(s)
+	}
+	return rs
+}
+
+func TestAnalyzePackageLevelTimeoutIterationSummary(t *testing.T) {
+	t.Parallel()
+	iterations := []string{
+		`{"Action":"output","Package":"pkg/hang","Output":"panic: test timed out after 2m0s\n"}
+{"Action":"fail","Package":"pkg/hang","Elapsed":120.0}
+`,
+	}
+	rep, _, err := Analyze(readers(iterations...), 30*time.Second)
+	require.NoError(t, err)
+	require.Len(t, rep.IterationSummaries, 1)
+	assert.Equal(t, "timeout", rep.IterationSummaries[0].Result)
+}
+
+func TestAnalyzeLineExceedsDefaultScannerLimit(t *testing.T) {
+	t.Parallel()
+	// bufio.Scanner default max token is bufio.MaxScanTokenSize (64 KiB).
+	over := strings.Repeat("x", bufio.MaxScanTokenSize+1) + "\n"
+	iter := `{"Action":"pass","Package":"p","Test":"T","Elapsed":0.01}` + "\n" + over +
+		`{"Action":"pass","Package":"p","Test":"T2","Elapsed":0.01}` + "\n"
+	_, _, err := Analyze(readers(iter), 30*time.Second)
+	require.Error(t, err)
+	require.ErrorContains(t, err, "reading iteration 0")
+	require.ErrorIs(t, err, bufio.ErrTooLong, "want bufio.ErrTooLong wrapped in analyze error")
+}
+
+func TestAnalyzeBuildErrorsInterleavedWithJSONL(t *testing.T) {
+	t.Parallel()
+	// go test -json can interleave compiler lines (non-JSON) with events; package build ends with fail, Test "".
+	iter := `# example.com/badpkg
+badpkg.go:1:2: undefined: MissingType
+` + `{"Action":"output","Package":"example.com/badpkg","Output":"# example.com/badpkg\n"}
+{"Action":"fail","Package":"example.com/badpkg","Elapsed":0.0}
+`
+	rep, _, err := Analyze(readers(iter), 30*time.Second)
+	require.NoError(t, err)
+	require.Len(t, rep.Failures, 1)
+	assert.Equal(t, "example.com/badpkg", rep.Failures[0].Package)
+	assert.Empty(t, rep.Failures[0].Test)
+	require.Len(t, rep.IterationSummaries, 1)
+	assert.Equal(t, "fail", rep.IterationSummaries[0].Result)
+	assert.Equal(t, []string{"example.com/badpkg"}, rep.IterationSummaries[0].FailingTests)
+}
+
+func TestAnalyzePackageLevelFailureIterationSummary(t *testing.T) {
+	t.Parallel()
+	// go test -json uses Test == "" for build failures, TestMain failures, etc.
+	iterations := []string{
+		`{"Action":"fail","Package":"pkg/build","Elapsed":0.0}` + "\n",
+	}
+	rep, _, err := Analyze(readers(iterations...), 30*time.Second)
+	require.NoError(t, err)
+	require.Len(t, rep.IterationSummaries, 1)
+	assert.Equal(t, "fail", rep.IterationSummaries[0].Result)
+	assert.Equal(t, []string{"pkg/build"}, rep.IterationSummaries[0].FailingTests)
+	require.Len(t, rep.Failures, 1)
+	assert.Equal(t, "pkg/build", rep.Failures[0].Package)
+	assert.Empty(t, rep.Failures[0].Test)
+}
+
+func TestAnalyze(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name          string
+		iterations    []string
+		slowThreshold time.Duration
+		wantFlakes    []TestEntry
+		wantFailures  []TestEntry
+		wantTimeouts  []TestEntry
+		wantSlow      []TestEntry
+	}{
+		{
+			name: "flake: failed once, passed once",
+			iterations: []string{
+				`{"Action":"run","Package":"pkg/foo","Test":"TestX"}
+{"Action":"fail","Package":"pkg/foo","Test":"TestX","Elapsed":0.5}
+`,
+				`{"Action":"run","Package":"pkg/foo","Test":"TestX"}
+{"Action":"pass","Package":"pkg/foo","Test":"TestX","Elapsed":0.4}
+`,
+			},
+			slowThreshold: 30 * time.Second,
+			wantFlakes: []TestEntry{
+				{
+					Package: "pkg/foo", Test: "TestX",
+					Runs: 2, Successes: 1, Fails: 1,
+					MinElapsed: 400 * time.Millisecond,
+					MaxElapsed: 500 * time.Millisecond,
+					P50Elapsed: 450 * time.Millisecond,
+				},
+			},
+		},
+		{
+			name: "deterministic failure: failed in every iteration",
+			iterations: []string{
+				`{"Action":"fail","Package":"pkg/bar","Test":"TestBroken","Elapsed":0.1}` + "\n",
+				`{"Action":"fail","Package":"pkg/bar","Test":"TestBroken","Elapsed":0.1}` + "\n",
+			},
+			slowThreshold: 30 * time.Second,
+			wantFailures: []TestEntry{
+				{
+					Package: "pkg/bar", Test: "TestBroken",
+					Runs: 2, Fails: 2,
+					MinElapsed: 100 * time.Millisecond,
+					MaxElapsed: 100 * time.Millisecond,
+					P50Elapsed: 100 * time.Millisecond,
+				},
+			},
+		},
+		{
+			name: "timeout: panic output attached to a test",
+			iterations: []string{
+				`{"Action":"run","Package":"pkg/qux","Test":"TestHang"}
+{"Action":"output","Package":"pkg/qux","Test":"TestHang","Output":"panic: test timed out after 10m0s\n"}
+{"Action":"fail","Package":"pkg/qux","Test":"TestHang","Elapsed":600.0}
+`,
+			},
+			slowThreshold: 30 * time.Second,
+			wantTimeouts: []TestEntry{
+				{
+					Package: "pkg/qux", Test: "TestHang",
+					Runs: 1, Fails: 1, Timeouts: 1,
+					MinElapsed: 600 * time.Second,
+					MaxElapsed: 600 * time.Second,
+					P50Elapsed: 600 * time.Second,
+				},
+			},
+		},
+		{
+			name: "timeout: package-level panic without test field",
+			iterations: []string{
+				`{"Action":"output","Package":"pkg/hang","Output":"panic: test timed out after 2m0s\n"}
+{"Action":"fail","Package":"pkg/hang","Elapsed":120.0}
+`,
+			},
+			slowThreshold: 30 * time.Second,
+			wantTimeouts: []TestEntry{
+				{
+					Package: "pkg/hang",
+					Runs:    1, Fails: 1, Timeouts: 1,
+					MinElapsed: 120 * time.Second,
+					MaxElapsed: 120 * time.Second,
+					P50Elapsed: 120 * time.Second,
+				},
+			},
+		},
+		{
+			name: "slow: passing test exceeds threshold",
+			iterations: []string{
+				`{"Action":"run","Package":"pkg/a","Test":"TestSlow"}
+{"Action":"pass","Package":"pkg/a","Test":"TestSlow","Elapsed":45.0}
+`,
+			},
+			slowThreshold: 30 * time.Second,
+			wantSlow: []TestEntry{
+				{
+					Package: "pkg/a", Test: "TestSlow",
+					Runs: 1, Successes: 1,
+					MinElapsed: 45 * time.Second,
+					MaxElapsed: 45 * time.Second,
+					P50Elapsed: 45 * time.Second,
+				},
+			},
+		},
+		{
+			name: "package-level failure without test name (build/TestMain)",
+			iterations: []string{
+				`{"Action":"fail","Package":"pkg/build","Elapsed":0.0}` + "\n",
+			},
+			slowThreshold: 30 * time.Second,
+			wantFailures: []TestEntry{
+				{
+					Package:    "pkg/build",
+					Test:       "",
+					Runs:       1,
+					Fails:      1,
+					MinElapsed: 0,
+					MaxElapsed: 0,
+					P50Elapsed: 0,
+				},
+			},
+		},
+		{
+			name: "clean pass is not reported",
+			iterations: []string{
+				`{"Action":"pass","Package":"pkg/c","Test":"TestOK","Elapsed":0.01}` + "\n",
+			},
+			slowThreshold: 30 * time.Second,
+		},
+		{
+			name: "skips-only test is not flagged",
+			iterations: []string{
+				`{"Action":"skip","Package":"pkg/s","Test":"TestSkipped","Elapsed":0.0}` + "\n",
+			},
+			slowThreshold: 30 * time.Second,
+		},
+		{
+			name: "subtests counted independently of parent",
+			iterations: []string{
+				`{"Action":"fail","Package":"pkg/d","Test":"TestParent/sub1","Elapsed":0.1}
+{"Action":"pass","Package":"pkg/d","Test":"TestParent/sub2","Elapsed":0.1}
+{"Action":"fail","Package":"pkg/d","Test":"TestParent","Elapsed":0.2}
+`,
+				`{"Action":"pass","Package":"pkg/d","Test":"TestParent/sub1","Elapsed":0.1}
+{"Action":"pass","Package":"pkg/d","Test":"TestParent/sub2","Elapsed":0.1}
+{"Action":"pass","Package":"pkg/d","Test":"TestParent","Elapsed":0.2}
+`,
+			},
+			slowThreshold: 30 * time.Second,
+			wantFlakes: []TestEntry{
+				{
+					Package: "pkg/d", Test: "TestParent",
+					Runs: 2, Successes: 1, Fails: 1,
+					MinElapsed: 200 * time.Millisecond,
+					MaxElapsed: 200 * time.Millisecond,
+					P50Elapsed: 200 * time.Millisecond,
+				},
+				{
+					Package: "pkg/d", Test: "TestParent/sub1",
+					Runs: 2, Successes: 1, Fails: 1,
+					MinElapsed: 100 * time.Millisecond,
+					MaxElapsed: 100 * time.Millisecond,
+					P50Elapsed: 100 * time.Millisecond,
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			rep, _, err := Analyze(readers(tc.iterations...), tc.slowThreshold)
+			require.NoError(t, err)
+			assert.Equal(t, len(tc.iterations), rep.Iterations)
+			assert.Equal(t, tc.wantFlakes, rep.Flakes, "flakes")
+			assert.Equal(t, tc.wantFailures, rep.Failures, "failures")
+			assert.Equal(t, tc.wantTimeouts, rep.Timeouts, "timeouts")
+			assert.Equal(t, tc.wantSlow, rep.Slow, "slow")
+		})
+	}
+}
+
+func TestAnalyzeCapturesLogsForFailures(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name       string
+		iterations []string
+		category   string // "flakes","failures","timeouts"
+		wantKey    testKey
+		wantIter   int
+		wantOutput string
+	}{
+		{
+			name: "failure captures output from failing iteration",
+			iterations: []string{
+				`{"Action":"run","Package":"p","Test":"T"}
+{"Action":"output","Package":"p","Test":"T","Output":"    t.go:12: boom\n"}
+{"Action":"output","Package":"p","Test":"T","Output":"--- FAIL: T (0.00s)\n"}
+{"Action":"fail","Package":"p","Test":"T","Elapsed":0.01}
+`,
+			},
+			category:   "failures",
+			wantKey:    testKey{Package: "p", Test: "T"},
+			wantIter:   0,
+			wantOutput: "    t.go:12: boom\n--- FAIL: T (0.00s)\n",
+		},
+		{
+			name: "flake captures logs only from failing iterations",
+			iterations: []string{
+				`{"Action":"output","Package":"p","Test":"T","Output":"fail-log\n"}
+{"Action":"fail","Package":"p","Test":"T","Elapsed":0.01}
+`,
+				`{"Action":"output","Package":"p","Test":"T","Output":"ok-log\n"}
+{"Action":"pass","Package":"p","Test":"T","Elapsed":0.01}
+`,
+			},
+			category:   "flakes",
+			wantKey:    testKey{Package: "p", Test: "T"},
+			wantIter:   0,
+			wantOutput: "fail-log\n",
+		},
+		{
+			name: "timeout captures the panic output",
+			iterations: []string{
+				`{"Action":"output","Package":"p","Test":"T","Output":"panic: test timed out after 10m0s\n"}
+{"Action":"output","Package":"p","Test":"T","Output":"\tstack trace line\n"}
+{"Action":"fail","Package":"p","Test":"T","Elapsed":600.0}
+`,
+			},
+			category:   "timeouts",
+			wantKey:    testKey{Package: "p", Test: "T"},
+			wantIter:   0,
+			wantOutput: "panic: test timed out after 10m0s\n\tstack trace line\n",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			rep, logs, err := Analyze(readers(tc.iterations...), 30*time.Second)
+			require.NoError(t, err)
+			var entries []TestEntry
+			switch tc.category {
+			case "flakes":
+				entries = rep.Flakes
+			case "failures":
+				entries = rep.Failures
+			case "timeouts":
+				entries = rep.Timeouts
+			}
+			require.Len(t, entries, 1, "expected exactly one %s entry", tc.category)
+			require.Contains(t, logs, tc.wantKey, "log map should contain the flagged test")
+			assert.Equal(t, tc.wantOutput, logs[tc.wantKey][tc.wantIter])
+		})
+	}
+}
+
+func TestAnalyzeReattributesTimeoutToRunningTests(t *testing.T) {
+	t.Parallel()
+	iter := `{"Action":"run","Package":"p","Test":"TestFast"}
+{"Action":"pass","Package":"p","Test":"TestFast","Elapsed":0.01}
+{"Action":"output","Package":"p","Test":"TestFast","Output":"panic: test timed out after 5s\n"}
+{"Action":"output","Package":"p","Test":"TestFast","Output":"\trunning tests:\n"}
+{"Action":"output","Package":"p","Test":"TestFast","Output":"\t\tTestSlow/sub_case (5s)\n"}
+{"Action":"output","Package":"p","Test":"TestFast","Output":"\t\tTestOther (4s)\n"}
+{"Action":"output","Package":"p","Test":"TestFast","Output":"\n"}
+{"Action":"output","Package":"p","Test":"TestFast","Output":"goroutine 1 [chan receive]:\n"}
+{"Action":"fail","Package":"p","Elapsed":5.01}
+`
+	rep, logs, err := Analyze(readers(iter), 30*time.Second)
+	require.NoError(t, err)
+
+	names := make([]string, 0, len(rep.Timeouts))
+	for _, e := range rep.Timeouts {
+		names = append(names, e.Test)
+	}
+	assert.ElementsMatch(t, []string{"TestSlow/sub_case", "TestOther"}, names)
+	for _, e := range rep.Timeouts {
+		assert.NotEqual(t, "TestFast", e.Test)
+	}
+	for _, e := range rep.Timeouts {
+		k := testKey{Package: e.Package, Test: e.Test}
+		require.Contains(t, logs, k)
+		assert.Contains(t, logs[k][0], "panic: test timed out after 5s")
+	}
+}
+
+func TestAnalyzeKeepsTimeoutOnCulpritWhenItWasTheReportedTest(t *testing.T) {
+	t.Parallel()
+	iter := `{"Action":"output","Package":"p","Test":"TestSlow","Output":"panic: test timed out after 5s\n"}
+{"Action":"output","Package":"p","Test":"TestSlow","Output":"\trunning tests:\n"}
+{"Action":"output","Package":"p","Test":"TestSlow","Output":"\t\tTestSlow (5s)\n"}
+{"Action":"fail","Package":"p","Elapsed":5.01}
+`
+	rep, _, err := Analyze(readers(iter), 30*time.Second)
+	require.NoError(t, err)
+	require.Len(t, rep.Timeouts, 1)
+	assert.Equal(t, "TestSlow", rep.Timeouts[0].Test)
+}
+
+func TestPrintSummaryTimeoutShowsTestNotPassCounts(t *testing.T) {
+	t.Parallel()
+	rep := &Report{
+		Iterations:    3,
+		SlowThreshold: 30 * time.Second,
+		Timeouts: []TestEntry{
+			{Package: "p", Test: "TestStuck", Successes: 2},
+		},
+	}
+	var buf strings.Builder
+	PrintSummary(&buf, rep)
+	out := buf.String()
+	assert.Contains(t, out, "Timeout (1)")
+	assert.Contains(t, out, "|-- p/")
+	assert.Contains(t, out, "TestStuck")
+	assert.NotContains(t, out, "(2p/0f)")
+}
+
+func TestAnalyzeResultsRoundtrip(t *testing.T) {
+	t.Parallel()
+	dir := t.TempDir()
+	must(t, os.WriteFile(filepath.Join(dir, "iteration-0.log.jsonl"),
+		[]byte(`{"Action":"fail","Package":"pkg/z","Test":"TestFlaky","Elapsed":0.2}`+"\n"), 0600))
+	must(t, os.WriteFile(filepath.Join(dir, "iteration-1.log.jsonl"),
+		[]byte(`{"Action":"pass","Package":"pkg/z","Test":"TestFlaky","Elapsed":0.1}`+"\n"), 0600))
+
+	rep, _, err := AnalyzeResults(dir, 30*time.Second)
+	require.NoError(t, err)
+	require.Len(t, rep.Flakes, 1)
+	assert.Equal(t, "TestFlaky", rep.Flakes[0].Test)
+
+	require.NoError(t, WriteReport(dir, rep))
+	b, err := os.ReadFile(filepath.Join(dir, "report.json"))
+	require.NoError(t, err)
+	assert.Contains(t, string(b), `"flakes"`)
+	assert.Contains(t, string(b), `"TestFlaky"`)
+}
+
+func must(t *testing.T, err error) {
+	t.Helper()
+	require.NoError(t, err)
+}
+
+func TestAnalyzeIterationSummaries(t *testing.T) {
+	t.Parallel()
+	tests := []struct {
+		name       string
+		iterations []string
+		want       []IterationSummary
+	}{
+		{
+			name: "all pass",
+			iterations: []string{
+				`{"Action":"pass","Package":"p","Test":"T","Elapsed":0.1}` + "\n",
+				`{"Action":"pass","Package":"p","Test":"T","Elapsed":0.2}` + "\n",
+			},
+			want: []IterationSummary{
+				{Index: 0, Result: "pass"},
+				{Index: 1, Result: "pass"},
+			},
+		},
+		{
+			name: "fail then pass",
+			iterations: []string{
+				`{"Action":"fail","Package":"p","Test":"TestA","Elapsed":0.1}` + "\n",
+				`{"Action":"pass","Package":"p","Test":"TestA","Elapsed":0.2}` + "\n",
+			},
+			want: []IterationSummary{
+				{Index: 0, Result: "fail", FailingTests: []string{"TestA"}},
+				{Index: 1, Result: "pass"},
+			},
+		},
+		{
+			name: "timeout",
+			iterations: []string{
+				`{"Action":"output","Package":"p","Test":"TestHang","Output":"panic: test timed out after 10m0s\n"}` + "\n" +
+					`{"Action":"fail","Package":"p","Test":"TestHang","Elapsed":600.0}` + "\n",
+			},
+			want: []IterationSummary{
+				{Index: 0, Result: "timeout"},
+			},
+		},
+		{
+			name: "multiple failures sorted",
+			iterations: []string{
+				`{"Action":"fail","Package":"p","Test":"TestB","Elapsed":0.1}` + "\n" +
+					`{"Action":"fail","Package":"p","Test":"TestA","Elapsed":0.1}` + "\n",
+			},
+			want: []IterationSummary{
+				{Index: 0, Result: "fail", FailingTests: []string{"TestA", "TestB"}},
+			},
+		},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			rep, _, err := Analyze(readers(tc.iterations...), 30*time.Second)
+			require.NoError(t, err)
+			require.Len(t, rep.IterationSummaries, len(tc.want))
+			// Strip Duration/ShuffleSeed — set by runner, not Analyze.
+			got := make([]IterationSummary, len(rep.IterationSummaries))
+			for i, s := range rep.IterationSummaries {
+				got[i] = IterationSummary{Index: s.Index, Result: s.Result, FailingTests: s.FailingTests}
+			}
+			assert.Equal(t, tc.want, got)
+		})
+	}
+}
+
+func TestAnalyzeSkipsMalformedLines(t *testing.T) {
+	t.Parallel()
+	input := `not json at all
+{"Action":"pass","Package":"p","Test":"T","Elapsed":0.01}
+`
+	rep, _, err := Analyze(readers(input), 30*time.Second)
+	require.NoError(t, err)
+	assert.Empty(t, rep.Flakes)
+	assert.Empty(t, rep.Failures)
+}
+
+func TestStatsP50(t *testing.T) {
+	t.Parallel()
+	tests := []struct {
+		name    string
+		samples []time.Duration
+		wantMin time.Duration
+		wantP50 time.Duration
+	}{
+		{
+			name:    "empty",
+			samples: nil,
+			wantMin: 0,
+			wantP50: 0,
+		},
+		{
+			name:    "single",
+			samples: []time.Duration{5 * time.Second},
+			wantMin: 5 * time.Second,
+			wantP50: 5 * time.Second,
+		},
+		{
+			name:    "odd count",
+			samples: []time.Duration{3, 1, 2},
+			wantMin: 1,
+			wantP50: 2,
+		},
+		{
+			name:    "even count averages middle two",
+			samples: []time.Duration{1 * time.Second, 3 * time.Second, 5 * time.Second, 9 * time.Second},
+			wantMin: 1 * time.Second,
+			wantP50: 4 * time.Second,
+		},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			minDur, p50 := stats(tc.samples)
+			assert.Equal(t, tc.wantMin, minDur, "min")
+			assert.Equal(t, tc.wantP50, p50, "p50")
+		})
+	}
+}
diff --git a/tools/test/internal/runner/diagnose_progress.go b/tools/test/internal/runner/diagnose_progress.go
new file mode 100644
index 00000000000..edf61acf7ee
--- /dev/null
+++ b/tools/test/internal/runner/diagnose_progress.go
@@ -0,0 +1,199 @@
+package runner
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/termstyle"
+)
+
+// chainlinkModulePrefix is trimmed from import paths in the diagnose progress line
+// so the status shows repo-relative paths (e.g. core/foo).
+const chainlinkModulePrefix = "github.com/smartcontractkit/chainlink/v2"
+
+// packagePatternsFromEnd returns trailing non-flag arguments. This matches the usual
+// `go test [flags] [packages]` layout (package patterns last).
+func packagePatternsFromEnd(args []string) []string {
+	var pkgs []string
+	for i := len(args) - 1; i >= 0; i-- {
+		if strings.HasPrefix(args[i], "-") {
+			break
+		}
+		pkgs = append(pkgs, args[i])
+	}
+	for i, j := 0, len(pkgs)-1; i < j; i, j = i+1, j-1 {
+		pkgs[i], pkgs[j] = pkgs[j], pkgs[i]
+	}
+	return pkgs
+}
+
+func shortenChainlinkImportPath(importPath string) string {
+	if importPath == "" {
+		return ""
+	}
+	if importPath == chainlinkModulePrefix {
+		return "."
+	}
+	p := chainlinkModulePrefix + "/"
+	return strings.TrimPrefix(importPath, p)
+}
+
+// listTestPackageCount runs `go list -test -e` for the trailing package patterns
+// in go test arguments (see packagePatternsFromEnd). On error or no patterns,
+// returns an error or zero packages.
+func listTestPackageCount(ctx context.Context, repoRoot string, goTestArgs []string) (int, error) {
+	pkgs := packagePatternsFromEnd(goTestArgs)
+	if len(pkgs) == 0 {
+		return 0, errors.New("no package patterns in go test arguments (put packages last, after flags)")
+	}
+	//nolint:gosec // it's fine
+	cmd := exec.CommandContext(ctx, "go", append([]string{"list", "-test", "-e", "-f", "{{.ImportPath}}"}, pkgs...)...)
+	cmd.Dir = repoRoot
+	cmd.Env = os.Environ()
+	out, err := cmd.Output()
+	if err != nil {
+		return 0, err
+	}
+	n := 0
+	for line := range strings.SplitSeq(string(out), "\n") {
+		if strings.TrimSpace(line) != "" {
+			n++
+		}
+	}
+	if n == 0 {
+		return 0, errors.New("go list returned no packages")
+	}
+	return n, nil
+}
+
+// diagnoseProgress tracks completed packages from a go test -json stream.
+type diagnoseProgress struct {
+	mu         sync.Mutex
+	done       map[string]struct{}
+	lastPkg    string
+	pkgOutcome map[string]string // package import path → pass|fail|skip (package-level events only)
+	total      int               // -1 when denominator is unknown (go list failed or empty)
+}
+
+func newDiagnoseProgress(totalPackages int) *diagnoseProgress {
+	return &diagnoseProgress{
+		done:       make(map[string]struct{}),
+		pkgOutcome: make(map[string]string),
+		total:      totalPackages,
+	}
+}
+
+// onTestJSONLine updates state from one JSONL line. Returns true if the number
+// of completed packages increased (for throttled redraws).
+func (p *diagnoseProgress) onTestJSONLine(line []byte) (completedIncreased bool) {
+	if len(line) == 0 || line[0] != '{' {
+		return false
+	}
+	var ev TestEvent
+	if err := json.Unmarshal(line, &ev); err != nil {
+		return false
+	}
+	if ev.Package != "" {
+		p.mu.Lock()
+		p.lastPkg = ev.Package
+		p.mu.Unlock()
+	}
+	if !isPackageTerminalEvent(&ev) {
+		return false
+	}
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.pkgOutcome[ev.Package] = ev.Action
+	before := len(p.done)
+	p.done[ev.Package] = struct{}{}
+	return len(p.done) > before
+}
+
+func isPackageTerminalEvent(ev *TestEvent) bool {
+	if ev.Package == "" || ev.Test != "" {
+		return false
+	}
+	switch ev.Action {
+	case "pass", "fail", "skip":
+		return true
+	default:
+		return false
+	}
+}
+
+func (p *diagnoseProgress) snapshot() (completed int, total int, lastPkg string, outcome string) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	return len(p.done), p.total, p.lastPkg, p.pkgOutcome[p.lastPkg]
+}
+
+// packageOutcomeMark returns a short suffix after the displayed package path:
+// pass/fail/skip from package-level JSON events, or an hourglass while that path
+// is active but no terminal result is recorded yet, or empty when there is no path.
+func packageOutcomeMark(action, displayPkg string) string {
+	if displayPkg != "" && action == "" {
+		return " ⌛"
+	}
+	switch action {
+	case "pass":
+		return " ✅"
+	case "fail":
+		return " ❌"
+	case "skip":
+		return " ⏭"
+	default:
+		return ""
+	}
+}
+
+// renderDiagnoseProgressLine writes one status line to w.
+func renderDiagnoseProgressLine(w io.Writer, iteration, iterations int, elapsed time.Duration, prog *diagnoseProgress, isTTY bool) {
+	if !isTTY {
+		return // no-op when not a TTY (ai output doesn't need this)
+	}
+	completed, total, lastPkg, outcome := prog.snapshot()
+
+	meta := fmt.Sprintf("iter %d/%d", iteration, iterations)
+	var countStr string
+	if total < 0 {
+		countStr = fmt.Sprintf("%d/?", completed)
+	} else {
+		pct := 0
+		if total > 0 {
+			pct = completed * 100 / total
+		}
+		countStr = fmt.Sprintf("%d/%d %d%%", completed, total, pct)
+	}
+
+	const pkgMaxChars = 42
+	displayPkg := shortenChainlinkImportPath(lastPkg)
+	mark := packageOutcomeMark(outcome, displayPkg)
+	markReserve := 0
+	if displayPkg != "" {
+		markReserve = 8 // room for terminal marks or hourglass (display width approx)
+	}
+	shortPkg := ellipsizeRight(displayPkg, pkgMaxChars-markReserve) + mark
+
+	line := termstyle.Label.Render(meta) + "  " + termstyle.Accent.Render(countStr)
+	if shortPkg != "" {
+		line += "  " + termstyle.Muted.Render(shortPkg) // path + ⌛ while running, or ✅/❌/⏭ when done
+	}
+	line += "  " + termstyle.Muted.Render(elapsed.Round(time.Second).String())
+	fmt.Fprint(w, "\r\033[K")
+	fmt.Fprint(w, line)
+}
+
+func ellipsizeRight(s string, maxLen int) string {
+	if maxLen <= 3 || len(s) <= maxLen {
+		return s
+	}
+	return "…" + s[len(s)-(maxLen-3):]
+}
diff --git a/tools/test/internal/runner/diagnose_progress_test.go b/tools/test/internal/runner/diagnose_progress_test.go
new file mode 100644
index 00000000000..ffd88267df2
--- /dev/null
+++ b/tools/test/internal/runner/diagnose_progress_test.go
@@ -0,0 +1,102 @@
+package runner
+
+import (
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestDiagnoseProgress_onTestJSONLine_packageTerminal(t *testing.T) {
+	p := newDiagnoseProgress(2)
+
+	require.False(t, p.onTestJSONLine([]byte(`not json`)))
+	require.False(t, p.onTestJSONLine([]byte(`{"Action":"run","Package":"a/b","Test":"TestX"}`)))
+
+	require.True(t, p.onTestJSONLine([]byte(`{"Action":"pass","Package":"a/b"}`)))
+	c, tot, _, _ := p.snapshot()
+	require.Equal(t, 1, c)
+	require.Equal(t, 2, tot)
+
+	// Duplicate package-level pass must not report a second completion tick.
+	require.False(t, p.onTestJSONLine([]byte(`{"Action":"pass","Package":"a/b"}`)))
+	c, _, _, _ = p.snapshot()
+	require.Equal(t, 1, c)
+
+	require.True(t, p.onTestJSONLine([]byte(`{"Action":"fail","Package":"c/d"}`)))
+	c, _, _, _ = p.snapshot()
+	require.Equal(t, 2, c)
+}
+
+func TestDiagnoseProgress_onTestJSONLine_skipFail(t *testing.T) {
+	p := newDiagnoseProgress(1)
+	require.True(t, p.onTestJSONLine([]byte(`{"Action":"skip","Package":"p"}`)))
+	c, _, _, _ := p.snapshot()
+	require.Equal(t, 1, c)
+
+	p2 := newDiagnoseProgress(1)
+	require.True(t, p2.onTestJSONLine([]byte(`{"Action":"fail","Package":"p"}`)))
+	c2, _, _, _ := p2.snapshot()
+	require.Equal(t, 1, c2)
+}
+
+func TestDiagnoseProgress_lastPkgUpdates(t *testing.T) {
+	p := newDiagnoseProgress(10)
+	p.onTestJSONLine([]byte(`{"Action":"run","Package":"x/y","Test":"TestZ"}`))
+	_, _, last, _ := p.snapshot()
+	require.Equal(t, "x/y", last)
+}
+
+func TestDiagnoseProgress_pkgOutcomeOnTerminal(t *testing.T) {
+	p := newDiagnoseProgress(5)
+	p.onTestJSONLine([]byte(`{"Action":"run","Package":"p/q","Test":"TestZ"}`))
+	_, _, _, out := p.snapshot()
+	require.Empty(t, out)
+	p.onTestJSONLine([]byte(`{"Action":"pass","Package":"p/q"}`))
+	_, _, last, out := p.snapshot()
+	require.Equal(t, "p/q", last)
+	require.Equal(t, "pass", out)
+}
+
+func TestShortenChainlinkImportPath(t *testing.T) {
+	t.Parallel()
+	require.Empty(t, shortenChainlinkImportPath(""))
+	require.Equal(t, ".", shortenChainlinkImportPath(chainlinkModulePrefix))
+	require.Equal(t, "core/foo", shortenChainlinkImportPath(chainlinkModulePrefix+"/core/foo"))
+	require.Equal(t, "other.com/pkg", shortenChainlinkImportPath("other.com/pkg"))
+}
+
+func TestEllipsizeRight(t *testing.T) {
+	require.Equal(t, "short", ellipsizeRight("short", 10))
+	require.Equal(t, "abcdefghij", ellipsizeRight("abcdefghij", 10))
+	require.Equal(t, "…hij", ellipsizeRight("abcdefghij", 6))
+}
+
+func TestRenderDiagnoseProgressLine_smoke(t *testing.T) {
+	var b strings.Builder
+	p := newDiagnoseProgress(10)
+	p.onTestJSONLine([]byte(`{"Action":"pass","Package":"demo/pkg"}`))
+	renderDiagnoseProgressLine(&b, 1, 3, 2*time.Second, p, true)
+	require.Contains(t, b.String(), "iter 1/3")
+	require.Contains(t, b.String(), "1/10 10%")
+	require.Contains(t, b.String(), "✅")
+	require.NotContains(t, b.String(), "█")
+}
+
+func TestRenderDiagnoseProgressLine_inProgressShowsHourglass(t *testing.T) {
+	var b strings.Builder
+	p := newDiagnoseProgress(10)
+	p.onTestJSONLine([]byte(`{"Action":"run","Package":"demo/pkg","Test":"TestX"}`))
+	renderDiagnoseProgressLine(&b, 1, 3, 2*time.Second, p, true)
+	require.Contains(t, b.String(), "⌛")
+	require.NotContains(t, b.String(), "✅")
+}
+
+func TestRenderDiagnoseProgressLine_notTTY(t *testing.T) {
+	var b strings.Builder
+	p := newDiagnoseProgress(10)
+	p.onTestJSONLine([]byte(`{"Action":"pass","Package":"demo/pkg"}`))
+	renderDiagnoseProgressLine(&b, 1, 3, 2*time.Second, p, false)
+	require.Empty(t, b.String())
+}
diff --git a/tools/test/internal/runner/diagnose_results_dir.go b/tools/test/internal/runner/diagnose_results_dir.go
new file mode 100644
index 00000000000..ce278cf8bfb
--- /dev/null
+++ b/tools/test/internal/runner/diagnose_results_dir.go
@@ -0,0 +1,138 @@
+package runner
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+	"strings"
+	"time"
+	"unicode/utf8"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/config"
+)
+
+const (
+	diagnoseResultsNamePrefix  = "diagnose-"
+	maxDiagnoseResultsBasename = 220
+	defaultSlowThreshold       = 30 * time.Second
+)
+
+// diagnoseResultsDirName returns a repo-root-relative directory basename for
+// diagnose output: diagnose-<targetSlug>-<config>-<YYYYMMDDHHMMSS>.
+func diagnoseResultsDirName(conf *config.App, goTestArgs []string, now time.Time) string {
+	tsPart := now.Format("20060102150405")
+	target := guessPackagePatternForSlug(goTestArgs)
+	for phase := range 8 {
+		cfg := diagnoseConfigDirPartPhase(conf, goTestArgs, phase)
+		tail := "-" + cfg + "-" + tsPart
+		avail := max(maxDiagnoseResultsBasename-len(diagnoseResultsNamePrefix)-len(tail), 8)
+		slug := truncateUTF8MaxBytes(diagnoseTargetSlug(target), avail)
+		base := diagnoseResultsNamePrefix + slug + tail
+		if len(base) <= maxDiagnoseResultsBasename {
+			return base
+		}
+	}
+	return diagnoseResultsNamePrefix + "x" + "-" + tsPart
+}
+
+func diagnoseTargetSlug(target string) string {
+	t := strings.TrimPrefix(target, "./")
+	switch {
+	case t == "...":
+		return "allpkgs"
+	case strings.HasSuffix(t, "/..."):
+		t = strings.TrimSuffix(t, "/...") + "_allpkgs"
+	}
+	t = strings.ReplaceAll(t, "/", "_")
+	return sanitizeDirToken(t)
+}
+
+func sanitizeDirToken(s string) string {
+	var b strings.Builder
+	b.Grow(len(s))
+	for _, r := range s {
+		switch {
+		case r >= 'a' && r <= 'z',
+			r >= 'A' && r <= 'Z',
+			r >= '0' && r <= '9',
+			r == '_', r == '-', r == '.':
+			b.WriteRune(r)
+		default:
+			b.WriteByte('_')
+		}
+	}
+	return b.String()
+}
+
+// guessPackagePatternForSlug picks a human-readable slug from go test arguments
+// (trailing package patterns). Falls back to "pkgs" if none found.
+func guessPackagePatternForSlug(goTestArgs []string) string {
+	pkgs := packagePatternsFromEnd(goTestArgs)
+	switch len(pkgs) {
+	case 0:
+		return "pkgs"
+	case 1:
+		return pkgs[0]
+	default:
+		return strings.Join(pkgs, "__")
+	}
+}
+
+func durationDirToken(d time.Duration) string {
+	return strings.ReplaceAll(d.String(), ":", "_")
+}
+
+func diagnoseConfigDirPartPhase(conf *config.App, goTestArgs []string, phase int) string {
+	h := sha256.Sum256([]byte(strings.Join(goTestArgs, "\x00")))
+	hash8 := hex.EncodeToString(h[:4])
+
+	dropSlow := phase >= 1
+	dropShuffle := phase >= 2
+	dropFF := phase >= 3
+	shortHash := phase >= 4
+
+	var parts []string
+	if conf.Iterations > 0 {
+		parts = append(parts, fmt.Sprintf("it%d", conf.Iterations))
+	}
+	hStr := hash8
+	if shortHash {
+		hStr = hStr[:4]
+	}
+	parts = append(parts, "h"+hStr)
+	if !dropFF && conf.FailFast {
+		parts = append(parts, "ff")
+	}
+	if !dropShuffle && conf.Shuffle {
+		parts = append(parts, "shuffle")
+	}
+	if !dropSlow {
+		slow := conf.SlowThreshold
+		if slow == 0 {
+			slow = defaultSlowThreshold
+		}
+		if slow != defaultSlowThreshold {
+			parts = append(parts, "slow"+durationDirToken(conf.SlowThreshold))
+		}
+	}
+	return strings.Join(parts, "-")
+}
+
+func truncateUTF8MaxBytes(s string, maxBytes int) string {
+	if maxBytes <= 0 {
+		return ""
+	}
+	if len(s) <= maxBytes {
+		return s
+	}
+	s = s[:maxBytes]
+	for len(s) > 0 {
+		r, size := utf8.DecodeLastRuneInString(s)
+		// RuneError is also the rune value U+FFFD; only strip when decoding hit invalid UTF-8 (size 1).
+		if r != utf8.RuneError || size != 1 {
+			break
+		}
+		s = s[:len(s)-1]
+	}
+	return s
+}
diff --git a/tools/test/internal/runner/runner.go b/tools/test/internal/runner/runner.go
new file mode 100644
index 00000000000..a781d856385
--- /dev/null
+++ b/tools/test/internal/runner/runner.go
@@ -0,0 +1,409 @@
+package runner
+
+import (
+	"bufio"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"math/rand/v2"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/charmbracelet/x/term"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/config"
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/termstyle"
+)
+
+// GoTest runs `go test` with the given args (repo root as working directory).
+func GoTest(ctx context.Context, conf *config.App, args []string) error {
+	//nolint:gosec // it's fine
+	cmd := exec.CommandContext(ctx, "go", append([]string{"test"}, args...)...)
+	cmd.Dir = conf.RepoRoot
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = os.Stdin
+	cmd.Env = os.Environ()
+	return cmd.Run()
+}
+
+// Gotestsum runs `gotestsum` with the given args (repo root as working directory).
+func Gotestsum(ctx context.Context, conf *config.App, args []string) error {
+	if _, err := exec.LookPath("gotestsum"); err != nil {
+		return fmt.Errorf("gotestsum not on PATH: install with go install gotest.tools/gotestsum@latest: %w", err)
+	}
+
+	cmd := exec.CommandContext(ctx, "gotestsum", args...)
+	cmd.Dir = conf.RepoRoot
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = os.Stdin
+	cmd.Env = os.Environ()
+	return cmd.Run()
+}
+
+// Diagnose runs go test -json once per iteration, writing each stream to
+// iteration-<n>.log.jsonl, then analyzes and writes report.json.
+// Test iteration failures do not stop later runs (unless --fail-fast); they are
+// reflected in report.json. Diagnose returns a non-nil error for setup failures
+// (e.g. mkdir, database reset), analyze/write report failures, or ctx errors
+// bubbling from dependencies — not for failing tests alone.
+// resetDB (optional) runs before each iteration after the first to restore the
+// database to its freshly-prepared state.
+// dumpDB (optional) runs after each iteration to capture database state for
+// per-iteration diagnosis; errors are logged but do not fail the diagnose run.
+func Diagnose(ctx context.Context, conf *config.App, goTestArgs []string, resetDB func(context.Context) error, dumpDB func(context.Context, string, int) error) error {
+	start := time.Now()
+
+	resultsDir := filepath.Join(conf.RepoRoot, diagnoseResultsDirName(conf, goTestArgs, start))
+	err := os.MkdirAll(resultsDir, 0700)
+	if err != nil {
+		return err
+	}
+
+	var (
+		completed     int
+		failedFast    bool
+		iterDurations = make([]time.Duration, 0, conf.Iterations)
+		shuffleSeeds  map[int]int64
+	)
+	if conf.Shuffle {
+		shuffleSeeds = make(map[int]int64)
+	}
+	for i := range conf.Iterations {
+		if ctx.Err() != nil {
+			break
+		}
+		if i > 0 && resetDB != nil {
+			if err := resetDB(ctx); err != nil {
+				if ctx.Err() != nil {
+					break
+				}
+				return fmt.Errorf("reset database before iteration %d: %w", i, err)
+			}
+		}
+		var seed int64
+		if conf.Shuffle {
+			seed = rand.Int64N(1<<62) + 1 // always nonzero
+			shuffleSeeds[i] = seed
+		}
+		iterStart := time.Now()
+		iterErr := diagnoseIteration(ctx, conf, resultsDir, goTestArgs, i, seed)
+		iterDurations = append(iterDurations, time.Since(iterStart))
+		if dumpDB != nil {
+			if dumpErr := dumpDB(ctx, resultsDir, i); dumpErr != nil && !conf.AIOutput {
+				fmt.Fprintf(os.Stderr, "postgres state dump iteration %d: %v\n", i, dumpErr)
+			}
+		}
+		if iterErr != nil && conf.FailFast {
+			failedFast = true
+			break
+		}
+		completed = i + 1
+	}
+
+	interrupted := ctx.Err() != nil
+	if interrupted && !conf.AIOutput {
+		fmt.Fprintln(os.Stderr,
+			termstyle.Accent.Render(fmt.Sprintf("interrupted after %d/%d iterations", completed, conf.Iterations))+
+				termstyle.Muted.Render(" — analyzing partial results…"))
+	}
+
+	if failedFast && !conf.AIOutput {
+		fmt.Fprintln(os.Stderr, termstyle.Accent.Render("--fail-fast set, stopping early"))
+	}
+
+	report, logs, analyzeErr := AnalyzeResults(resultsDir, conf.SlowThreshold)
+	if analyzeErr != nil {
+		fmt.Fprintf(os.Stderr, "analyze results: %v\n", analyzeErr)
+		return analyzeErr
+	}
+	if report != nil {
+		for i, d := range iterDurations {
+			if i >= len(report.IterationSummaries) {
+				break
+			}
+			report.IterationSummaries[i].Duration = d
+			if shuffleSeeds != nil {
+				report.IterationSummaries[i].ShuffleSeed = shuffleSeeds[i]
+			}
+		}
+	}
+	if err := WriteLogFiles(resultsDir, report, logs); err != nil {
+		fmt.Fprintf(os.Stderr, "write log files: %v\n", err)
+		return err
+	}
+	if err := WriteReport(resultsDir, report); err != nil {
+		fmt.Fprintf(os.Stderr, "write report: %v\n", err)
+		return err
+	}
+	if err := WriteCSV(resultsDir, report); err != nil {
+		fmt.Fprintf(os.Stderr, "write csv: %v\n", err)
+		return err
+	}
+
+	reportPath := filepath.Join(resultsDir, "report.json")
+	if conf.AIOutput {
+		fmt.Fprintln(os.Stdout, reportPath)
+		return nil
+	}
+
+	fmt.Fprintln(os.Stderr,
+		termstyle.Label.Render("diagnose complete")+
+			termstyle.Muted.Render(fmt.Sprintf(" (%s)", time.Since(start).Round(time.Millisecond))))
+	if report != nil {
+		PrintSummary(os.Stderr, report)
+	}
+	fmt.Fprintln(os.Stderr,
+		termstyle.Muted.Render("results in ")+termstyle.Label.Render(resultsDir))
+	return nil
+}
+
+// goTestFlagsBeforeArgs returns the portion of argv that belongs to `go test`
+// itself, stopping before -args (flags after -args are passed to the test binary).
+func goTestFlagsBeforeArgs(args []string) []string {
+	for i, a := range args {
+		if a == "-args" {
+			return args[:i]
+		}
+	}
+	return args
+}
+
+// parseDiagnoseGoTestCount returns the last -count in the portion of argv that
+// belongs to `go test` itself (before -args). If no -count appears, set is false.
+func parseDiagnoseGoTestCount(goTestArgs []string) (set bool, n int, err error) {
+	args := goTestFlagsBeforeArgs(goTestArgs)
+	for i := 0; i < len(args); i++ {
+		a := args[i]
+		if after, ok := strings.CutPrefix(a, "-count="); ok {
+			v := after
+			num, e := strconv.Atoi(strings.TrimSpace(v))
+			if e != nil {
+				return false, 0, fmt.Errorf("invalid -count value %q: %w", v, e)
+			}
+			if num < 1 {
+				return false, 0, fmt.Errorf("invalid go test arguments: -count must be a positive integer, got %d", num)
+			}
+			set = true
+			n = num
+			continue
+		}
+		if a == "-count" {
+			if i+1 >= len(args) {
+				return false, 0, errors.New("invalid go test arguments: -count must be followed by a value")
+			}
+			i++
+			num, e := strconv.Atoi(strings.TrimSpace(args[i]))
+			if e != nil {
+				return false, 0, fmt.Errorf("invalid -count value %q: %w", args[i], e)
+			}
+			if num < 1 {
+				return false, 0, fmt.Errorf("invalid go test arguments: -count must be a positive integer, got %d", num)
+			}
+			set = true
+			n = num
+		}
+	}
+	return set, n, nil
+}
+
+// WarnDiagnoseGoTestCount prints hints when the user sets -count on go test, and
+// returns an error if -count values in the go test flag section are malformed.
+func WarnDiagnoseGoTestCount(w io.Writer, goTestArgs []string) error {
+	set, n, err := parseDiagnoseGoTestCount(goTestArgs)
+	if err != nil {
+		return err
+	}
+	if !set {
+		return nil
+	}
+	if n == 1 {
+		fmt.Fprintln(w, termstyle.Muted.Render(
+			"note: -count=1 is unnecessary; diagnose adds -count=1 when you omit it."))
+		return nil
+	}
+	fmt.Fprintln(w, termstyle.Muted.Render(
+		"note: prefer diagnose --iterations for repetition; use -count>1 only if you want to avoid overhead between diagnose iterations (e.g. DB setup/teardown)."))
+	return nil
+}
+
+// filterDiagnoseUserGoTestArgs removes -json/--json from the go test flag
+// section so the harness can inject -json; arguments after -args are unchanged.
+func filterDiagnoseUserGoTestArgs(args []string) []string {
+	split := len(args)
+	for i, a := range args {
+		if a == "-args" {
+			split = i
+			break
+		}
+	}
+	prefix := args[:split]
+	suffix := args[split:]
+	var out []string
+	for _, a := range prefix {
+		if a == "-json" || a == "--json" {
+			continue
+		}
+		out = append(out, a)
+	}
+	return append(out, suffix...)
+}
+
+// buildDiagnoseArgs constructs the `go test` argv for a single diagnose iteration.
+func buildDiagnoseArgs(goTestArgs []string, shuffleSeed int64) ([]string, error) {
+	filtered := filterDiagnoseUserGoTestArgs(goTestArgs)
+	set, n, err := parseDiagnoseGoTestCount(goTestArgs)
+	if err != nil {
+		return nil, err
+	}
+	args := []string{"test", "-json"}
+	args = append(args, filtered...)
+	if shuffleSeed != 0 {
+		args = append(args, fmt.Sprintf("-shuffle=%d", shuffleSeed))
+	}
+	if !set || n <= 1 {
+		args = append(args, "-count=1")
+	}
+	return args, nil
+}
+
+// syncedWriter serializes writes to w so stdout and stderr from `go test` can
+// share one JSONL file without interleaved corrupt lines.
+type syncedWriter struct {
+	mu sync.Mutex
+	w  io.Writer
+}
+
+func (sw *syncedWriter) Write(p []byte) (int, error) {
+	sw.mu.Lock()
+	defer sw.mu.Unlock()
+	return sw.w.Write(p)
+}
+
+func diagnoseIteration(ctx context.Context, conf *config.App, resultsDir string, goTestArgs []string, iteration int, shuffleSeed int64) error {
+	start := time.Now()
+	jsonPath := filepath.Join(resultsDir, fmt.Sprintf("iteration-%d.log.jsonl", iteration))
+	resultsFile, err := os.Create(jsonPath)
+	if err != nil {
+		return err
+	}
+	defer resultsFile.Close()
+
+	args, err := buildDiagnoseArgs(goTestArgs, shuffleSeed)
+	if err != nil {
+		return err
+	}
+	cmd := exec.CommandContext(ctx, "go", args...)
+	cmd.Dir = conf.RepoRoot
+	cmd.Stdin = os.Stdin
+	cmd.Env = os.Environ()
+	// Soft-cancel on ctx cancellation so `go test -json` gets a chance to flush
+	// its final events before we escalate to SIGKILL after WaitDelay.
+	cmd.Cancel = func() error { return cmd.Process.Signal(os.Interrupt) }
+	cmd.WaitDelay = 5 * time.Second
+
+	if conf.AIOutput {
+		sw := &syncedWriter{w: resultsFile}
+		cmd.Stdout = sw
+		cmd.Stderr = sw
+		return cmd.Run()
+	}
+
+	sw := &syncedWriter{w: resultsFile}
+	cmd.Stderr = sw
+
+	totalPkgs := -1
+	if n, listErr := listTestPackageCount(ctx, conf.RepoRoot, goTestArgs); listErr == nil {
+		totalPkgs = n
+	}
+	prog := newDiagnoseProgress(totalPkgs)
+
+	pr, pw := io.Pipe()
+	cmd.Stdout = pw
+
+	isTTY := term.IsTerminal(os.Stderr.Fd())
+	iter, iters := iteration+1, conf.Iterations
+	if !isTTY {
+		fmt.Fprintln(os.Stderr,
+			termstyle.Muted.Render(fmt.Sprintf("iteration %d/%d started", iter, iters)))
+	}
+
+	redraw := func(isTTYLine bool) {
+		renderDiagnoseProgressLine(os.Stderr, iter, iters, time.Since(start), prog, isTTYLine)
+	}
+
+	var readWG sync.WaitGroup
+	var scanErr error
+	readWG.Go(func() {
+		sc := bufio.NewScanner(pr)
+		sc.Buffer(make([]byte, 0, 64*1024), 4*1024*1024)
+		for sc.Scan() {
+			line := sc.Bytes()
+			out := make([]byte, len(line)+1)
+			copy(out, line)
+			out[len(line)] = '\n'
+			if _, werr := sw.Write(out); werr != nil {
+				break
+			}
+			if prog.onTestJSONLine(line) && !isTTY {
+				redraw(false)
+			}
+		}
+		scanErr = sc.Err()
+	})
+
+	tickDone := make(chan struct{})
+	var tickWG sync.WaitGroup
+	if isTTY {
+		tickWG.Go(func() {
+			tick := time.NewTicker(250 * time.Millisecond)
+			defer tick.Stop()
+			for {
+				select {
+				case <-tickDone:
+					return
+				case <-tick.C:
+					redraw(true)
+				}
+			}
+		})
+		redraw(true)
+	}
+
+	runErr := cmd.Start()
+	started := runErr == nil
+	if started {
+		runErr = cmd.Wait()
+		_ = pw.Close()
+	} else {
+		_ = pw.CloseWithError(runErr)
+	}
+	readWG.Wait()
+	close(tickDone)
+	tickWG.Wait()
+
+	if isTTY {
+		fmt.Fprint(os.Stderr, "\r\033[K")
+	}
+	if started {
+		status := termstyle.OK.Render("✅")
+		if runErr != nil {
+			status = termstyle.Bad.Render("❌")
+		}
+		fmt.Fprintln(os.Stderr,
+			termstyle.Label.Render(fmt.Sprintf("iteration %d/%d ", iter, iters))+
+				status+" "+
+				termstyle.Muted.Render(fmt.Sprintf("(%s)", time.Since(start).Round(time.Millisecond))))
+	}
+	if scanErr != nil {
+		return fmt.Errorf("reading go test output: %w", scanErr)
+	}
+	return runErr
+}
diff --git a/tools/test/internal/runner/runner_test.go b/tools/test/internal/runner/runner_test.go
new file mode 100644
index 00000000000..9b15f20ce20
--- /dev/null
+++ b/tools/test/internal/runner/runner_test.go
@@ -0,0 +1,344 @@
+package runner
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/config"
+)
+
+// Fixed clock for diagnoseResultsDirName assertions (timestamp 20240601123045).
+var diagnoseResultsDirNameAt = time.Date(2024, 6, 1, 12, 30, 45, 0, time.UTC)
+
+func testArgHash8(goTestArgs []string) string {
+	h := sha256.Sum256([]byte(strings.Join(goTestArgs, "\x00")))
+	return hex.EncodeToString(h[:4])
+}
+
+// When ctx is already canceled before Diagnose starts, no iterations run but
+// analysis still produces a report.json — this is the path a user hits after
+// Ctrl+C'ing a long-running diagnose run.
+func TestDiagnoseCanceledCtxRunsNoIterationsButStillWritesReport(t *testing.T) {
+	t.Parallel()
+	repoRoot := t.TempDir()
+	conf := &config.App{
+		RepoRoot:   repoRoot,
+		AIOutput:   true,
+		Iterations: 3,
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	err := Diagnose(ctx, conf, []string{"./..."}, nil, nil)
+	require.NoError(t, err)
+
+	matches, err := filepath.Glob(filepath.Join(repoRoot, diagnoseResultsNamePrefix+"*"))
+	require.NoError(t, err)
+	require.Len(t, matches, 1)
+	resultsDir := matches[0]
+
+	// No iteration jsonl files because the loop guard tripped on entry.
+	iterFiles, err := filepath.Glob(filepath.Join(resultsDir, "iteration-*.log.jsonl"))
+	require.NoError(t, err)
+	assert.Empty(t, iterFiles)
+
+	reportBytes, err := os.ReadFile(filepath.Join(resultsDir, "report.json"))
+	require.NoError(t, err)
+	var rep Report
+	require.NoError(t, json.Unmarshal(reportBytes, &rep))
+	assert.Equal(t, 0, rep.Iterations)
+}
+
+func TestParseDiagnoseGoTestCount(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name    string
+		args    []string
+		wantSet bool
+		wantN   int
+		wantErr bool
+	}{
+		{name: "no count", args: []string{"-v", "./..."}, wantSet: false, wantN: 0, wantErr: false},
+		{name: "count 1", args: []string{"-count=1", "./..."}, wantSet: true, wantN: 1, wantErr: false},
+		{name: "count 1 spaced", args: []string{"-count", "1", "./..."}, wantSet: true, wantN: 1, wantErr: false},
+		{name: "count 2", args: []string{"-count=2", "./..."}, wantSet: true, wantN: 2, wantErr: false},
+		{name: "count 99", args: []string{"-count", "99"}, wantSet: true, wantN: 99, wantErr: false},
+		{name: "last count wins", args: []string{"-count=1", "-count=3"}, wantSet: true, wantN: 3, wantErr: false},
+		{name: "count after -args ignored", args: []string{"-v", "-args", "-count=50"}, wantSet: false, wantN: 0, wantErr: false},
+		{name: "invalid count value", args: []string{"-count=maybe"}, wantErr: true},
+		{name: "-count without value", args: []string{"-count"}, wantErr: true},
+		{name: "count zero", args: []string{"-count=0", "./..."}, wantErr: true},
+		{name: "count negative", args: []string{"-count=-1", "./..."}, wantErr: true},
+		{name: "count zero spaced", args: []string{"-count", "0"}, wantErr: true},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			set, n, err := parseDiagnoseGoTestCount(tc.args)
+			if tc.wantErr {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			assert.Equal(t, tc.wantSet, set)
+			assert.Equal(t, tc.wantN, n)
+		})
+	}
+}
+
+func TestWarnDiagnoseGoTestCount(t *testing.T) {
+	t.Parallel()
+
+	t.Run("count 1", func(t *testing.T) {
+		t.Parallel()
+		var buf strings.Builder
+		require.NoError(t, WarnDiagnoseGoTestCount(&buf, []string{"-count=1", "./pkg"}))
+		assert.Contains(t, buf.String(), "unnecessary")
+	})
+
+	t.Run("count greater than 1", func(t *testing.T) {
+		t.Parallel()
+		var buf strings.Builder
+		require.NoError(t, WarnDiagnoseGoTestCount(&buf, []string{"-count=5"}))
+		assert.Contains(t, buf.String(), "prefer")
+		assert.Contains(t, buf.String(), "iterations")
+	})
+
+	t.Run("no count", func(t *testing.T) {
+		t.Parallel()
+		var buf strings.Builder
+		require.NoError(t, WarnDiagnoseGoTestCount(&buf, []string{"./..."}))
+		assert.Empty(t, strings.TrimSpace(buf.String()))
+	})
+
+	t.Run("invalid non positive count", func(t *testing.T) {
+		t.Parallel()
+		var buf strings.Builder
+		err := WarnDiagnoseGoTestCount(&buf, []string{"-count=0", "./..."})
+		require.Error(t, err)
+		assert.Contains(t, err.Error(), "positive integer")
+	})
+}
+
+func TestBuildDiagnoseArgs(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name        string
+		goTestArgs  []string
+		shuffleSeed int64
+		want        []string
+		wantErr     bool
+	}{
+		{
+			name:       "passthrough flags and package",
+			goTestArgs: []string{"-timeout=5m", "./pkg"},
+			want:       []string{"test", "-json", "-timeout=5m", "./pkg", "-count=1"},
+		},
+		{
+			name:        "shuffle seed appended",
+			goTestArgs:  []string{"./pkg"},
+			shuffleSeed: 12345,
+			want:        []string{"test", "-json", "./pkg", "-shuffle=12345", "-count=1"},
+		},
+		{
+			name:        "zero shuffle seed omitted",
+			goTestArgs:  []string{"./pkg"},
+			shuffleSeed: 0,
+			want:        []string{"test", "-json", "./pkg", "-count=1"},
+		},
+		{
+			name:       "strips duplicate -json; keeps -count greater than 1",
+			goTestArgs: []string{"-json", "-count=3", "-race", "-run=^X$", "./pkg"},
+			want:       []string{"test", "-json", "-count=3", "-race", "-run=^X$", "./pkg"},
+		},
+		{
+			name:       "passes through -count with separate value when greater than 1",
+			goTestArgs: []string{"-count", "99", "./a"},
+			want:       []string{"test", "-json", "-count", "99", "./a"},
+		},
+		{
+			name:       "explicit -count=1 gets default appended",
+			goTestArgs: []string{"-count=1", "./pkg"},
+			want:       []string{"test", "-json", "-count=1", "./pkg", "-count=1"},
+		},
+		{
+			name:       "reject count zero",
+			goTestArgs: []string{"-count=0", "./pkg"},
+			wantErr:    true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			got, err := buildDiagnoseArgs(tc.goTestArgs, tc.shuffleSeed)
+			if tc.wantErr {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			assert.Equal(t, tc.want, got)
+		})
+	}
+}
+
+func TestDiagnoseShuffleSeedsAbsentWhenNoIterationsRun(t *testing.T) {
+	t.Parallel()
+	repoRoot := t.TempDir()
+	conf := &config.App{
+		RepoRoot:   repoRoot,
+		AIOutput:   true,
+		Iterations: 3,
+		Shuffle:    true,
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	require.NoError(t, Diagnose(ctx, conf, []string{"./..."}, nil, nil))
+
+	matches, err := filepath.Glob(filepath.Join(repoRoot, diagnoseResultsNamePrefix+"*"))
+	require.NoError(t, err)
+	require.Len(t, matches, 1)
+
+	reportBytes, err := os.ReadFile(filepath.Join(matches[0], "report.json"))
+	require.NoError(t, err)
+	var rep Report
+	require.NoError(t, json.Unmarshal(reportBytes, &rep))
+	assert.Empty(t, rep.IterationSummaries)
+}
+
+func TestDiagnoseResultsDirName(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name       string
+		conf       *config.App
+		goTestArgs []string
+		want       string
+	}{
+		{
+			name: "repo root pattern",
+			conf: &config.App{
+				Iterations: 1,
+			},
+			goTestArgs: []string{"./..."},
+			want:       diagnoseResultsNamePrefix + "allpkgs-it1-h" + testArgHash8([]string{"./..."}) + "-20240601123045",
+		},
+		{
+			name: "nested package with ellipsis",
+			conf: &config.App{
+				Iterations: 10,
+			},
+			goTestArgs: []string{"./core/..."},
+			want:       diagnoseResultsNamePrefix + "core_allpkgs-it10-h" + testArgHash8([]string{"./core/..."}) + "-20240601123045",
+		},
+		{
+			name: "fail-fast and shuffle and non-default slow",
+			conf: &config.App{
+				Iterations:    2,
+				SlowThreshold: 45 * time.Second,
+				FailFast:      true,
+				Shuffle:       true,
+			},
+			goTestArgs: []string{"-race", "-run=^TestFoo$", "./pkg"},
+			want: diagnoseResultsNamePrefix + "pkg-it2-h" + testArgHash8([]string{"-race", "-run=^TestFoo$", "./pkg"}) +
+				"-ff-shuffle-slow45s-20240601123045",
+		},
+		{
+			name: "default slow threshold omitted",
+			conf: &config.App{
+				Iterations:    3,
+				SlowThreshold: 30 * time.Second,
+			},
+			goTestArgs: []string{"./a"},
+			want:       diagnoseResultsNamePrefix + "a-it3-h" + testArgHash8([]string{"./a"}) + "-20240601123045",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			got := diagnoseResultsDirName(tc.conf, tc.goTestArgs, diagnoseResultsDirNameAt)
+			assert.Equal(t, tc.want, got)
+			assert.LessOrEqual(t, len(got), maxDiagnoseResultsBasename)
+		})
+	}
+}
+
+func TestDiagnoseResultsDirNameLongRunAndPath(t *testing.T) {
+	t.Parallel()
+	longRun := strings.Repeat("Xy", 80)
+	goTestArgs := []string{"-run=" + longRun, "./p"}
+	conf := &config.App{
+		Iterations: 1,
+	}
+	got := diagnoseResultsDirName(conf, goTestArgs, diagnoseResultsDirNameAt)
+	assert.LessOrEqual(t, len(got), maxDiagnoseResultsBasename)
+	assert.Contains(t, got, "-it1-h")
+	assert.Contains(t, got, testArgHash8(goTestArgs))
+	assert.Regexp(t, `diagnose-p-it1-h[0-9a-f]{8}-20240601123045`, got)
+
+	longTarget := "./" + strings.Repeat("seg/", 60) + "z"
+	goTestArgs2 := []string{longTarget}
+	got2 := diagnoseResultsDirName(conf, goTestArgs2, diagnoseResultsDirNameAt)
+	assert.LessOrEqual(t, len(got2), maxDiagnoseResultsBasename)
+	assert.True(t, strings.HasPrefix(got2, diagnoseResultsNamePrefix))
+}
+
+func TestDiagnoseDumpDBCalledWithResultsDir(t *testing.T) {
+	t.Parallel()
+	repoRoot := t.TempDir()
+	conf := &config.App{
+		RepoRoot:   repoRoot,
+		AIOutput:   true,
+		Iterations: 2,
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	type call struct {
+		dir  string
+		iter int
+	}
+	var calls []call
+	dumpDB := func(_ context.Context, dir string, iteration int) error {
+		calls = append(calls, call{dir, iteration})
+		return nil
+	}
+
+	// pre-canceled ctx → no iterations run → dumpDB never called
+	require.NoError(t, Diagnose(ctx, conf, []string{"./..."}, nil, dumpDB))
+	assert.Empty(t, calls)
+}
+
+func TestTruncateUTF8MaxBytes(t *testing.T) {
+	t.Parallel()
+	s := "ééé" // 6 bytes, 3 runes
+	assert.Empty(t, truncateUTF8MaxBytes(s, 0))
+	assert.Empty(t, truncateUTF8MaxBytes(s, 1))
+	assert.Equal(t, "é", truncateUTF8MaxBytes(s, 2))
+	assert.Equal(t, "éé", truncateUTF8MaxBytes(s, 4))
+	assert.Equal(t, "ééé", truncateUTF8MaxBytes(s, 6))
+	assert.Equal(t, "ééé", truncateUTF8MaxBytes(s, 10))
+	// U+FFFD is utf8.RuneError's value; truncation must not strip a valid final replacement character.
+	assert.Equal(t, "abc\uFFFD", truncateUTF8MaxBytes("abc\uFFFD"+"x", 6))
+}
+
+func TestPackagePatternsFromEnd(t *testing.T) {
+	t.Parallel()
+	assert.Equal(t, []string{"./core/...", "./foo"}, packagePatternsFromEnd([]string{"-race", "-timeout=5m", "./core/...", "./foo"}))
+	assert.Nil(t, packagePatternsFromEnd([]string{"-v", "-race"}))
+}
diff --git a/tools/test/internal/termstyle/termstyle.go b/tools/test/internal/termstyle/termstyle.go
new file mode 100644
index 00000000000..ca44f3509aa
--- /dev/null
+++ b/tools/test/internal/termstyle/termstyle.go
@@ -0,0 +1,18 @@
+// Package termstyle holds lipgloss styles shared by diagnose progress, DB setup,
+// and summary output so the CLI reads as one palette.
+package termstyle
+
+import "charm.land/lipgloss/v2"
+
+// Colors align with runner/diagnose_progress.go (labels, counts, accents).
+var (
+	Filled = lipgloss.NewStyle().Foreground(lipgloss.Color("42"))
+	OK     = Filled // checkmarks / success ticks
+	Empty  = lipgloss.NewStyle().Foreground(lipgloss.Color("238"))
+	Label  = lipgloss.NewStyle().Foreground(lipgloss.Color("39")).Bold(true)
+	Muted  = lipgloss.NewStyle().Foreground(lipgloss.Color("245"))
+	Accent = lipgloss.NewStyle().Foreground(lipgloss.Color("214"))
+	Bad    = lipgloss.NewStyle().Foreground(lipgloss.Color("196"))
+	// Flaky summary (yellow); Slow uses Muted (grey) in the runner.
+	Flaky = lipgloss.NewStyle().Foreground(lipgloss.Color("226"))
+)
diff --git a/tools/test/main.go b/tools/test/main.go
new file mode 100644
index 00000000000..49b1e9d18d0
--- /dev/null
+++ b/tools/test/main.go
@@ -0,0 +1,9 @@
+package main
+
+import (
+	"github.com/smartcontractkit/chainlink/v2/tools/test/internal/cmd"
+)
+
+func main() {
+	cmd.Execute()
+}