shakacode · justin808 · Jun 12, 2026 · Jun 12, 2026 · chatgpt-codex-connector · Jun 12, 2026
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -80,6 +80,16 @@ jobs:
     steps:
       - uses: actions/checkout@v6
         with:
+          # On pull_request events GitHub's default checkout ref is the merge
+          # commit (PR merged into the CURRENT main). Diffing that against the
+          # PR's recorded base.sha — which is main as of PR creation and can lag
+          # several commits — sweeps in every change merged to main in between,
+          # miscategorizing an unrelated runtime/JS change onto a CI-only PR and
+          # firing benchmarks spuriously (#3919: a bin/-only PR ran all four
+          # suites because eight intervening main commits leaked into the diff).
+          # Check out the PR head SHA instead so change detection sees only the
+          # PR's own commits. On push, this falls back to the pushed SHA.
+          ref: ${{ github.event.pull_request.head.sha || github.sha }}
-          ref: ${{ github.event.pull_request.head.sha || github.sha }}
+          ref: ${{ github.event.pull_request.head.sha || github.sha }}
-          ref: ${{ github.event.pull_request.head.sha || github.sha }}
+          ref: ${{ github.event.pull_request.head.sha || github.sha }}
           fetch-depth: 50
           persist-credentials: false
       - name: Read runtime versions

diff --git a/AGENTS.md b/AGENTS.md
@@ -270,9 +270,10 @@ Agents should recommend PR labels based on change complexity and risk. The goal
 - **Default: no CI-expansion label.** For docs-only changes, focused tests, small isolated fixes, and refactors with no cross-package behavior change, rely on the standard path-based CI selection and local verification.
 - **Use `full-ci`** (or ask a maintainer to comment `+ci-run-full`) when the PR is high-risk or broad: CI workflow/detector changes, dependency or lockfile updates, package manager/Ruby/Node version changes, release/build/package publishing logic, generator output, dummy app boot/build behavior, SSR or hydration behavior, cross-cutting core Ruby changes, Pro/core boundary changes, or changes where skipped suites would leave a credible regression path.
 - **Use `benchmark`** for performance-sensitive changes: server rendering paths, Node renderer, caching, bundle generation, asset serving/precompile behavior, concurrency/pooling, or anything expected to affect throughput, latency, memory, or bundle size. `full-ci` does not trigger benchmarks; use both labels when a PR is both high-risk and performance-sensitive.
+- **Use `full-ci-no-benchmarks`** when a PR needs the broad test matrix but cannot move runtime performance — CI plumbing/detector changes, lint/RuboCop config, dependency or tool-version bookkeeping, scaffolder/tooling files. This label hard-suppresses every benchmark suite for the PR, overriding both change detection and the `benchmark` label, so a 30+ minute suite never runs (and never ships meaningless Bencher data) just because an uncategorized or spec-only file was touched. Pair it with `full-ci` for full test coverage without benchmarks. (Benchmarks still run on every push to `main`, which remains the regression backstop.)
 - **Remove `full-ci` when no longer needed** with `+ci-stop-full` if the PR returns to a low-risk state after splitting or reverting broad changes.
 - **Record intentional full-CI waivers** with `+ci-skip-full [optional reason]`. This is especially important for admins: the comment creates a SHA-bound audit trail without forcing docs-only or low-risk PRs to run the full matrix.
-- In PR descriptions and handoffs, state the recommended label decision explicitly: `Labels: none`, `Labels: full-ci`, `Labels: benchmark`, or `Labels: full-ci, benchmark`, with one sentence explaining why.
+- In PR descriptions and handoffs, state the recommended label decision explicitly: `Labels: none`, `Labels: full-ci`, `Labels: benchmark`, `Labels: full-ci, benchmark`, or `Labels: full-ci, full-ci-no-benchmarks`, with one sentence explaining why.
 
 ### For All PRs
 

diff --git a/benchmarks/generate_matrix.rb b/benchmarks/generate_matrix.rb
@@ -75,6 +75,17 @@
 # selecting no suites (which would skip benchmarks while CI stays green).
 VALID_APP_VERSIONS = SUITES.flat_map { |suite| suite.fetch(:app_versions) }.uniq.freeze
 
+# A PR label that forces every benchmark suite OFF for this run, even when change
+# detection, a suite-specific benchmark label, or the broad `benchmark` label
+# would otherwise select suites. This is the "full CI but skip benchmarks"
+# escape hatch, paired with the `full-ci` label (which only governs the TEST
+# matrix, never benchmarks): use it on PRs that need broad test coverage but
+# cannot move runtime performance — CI plumbing, lint/config, or tooling — so
+# Bencher does not record a meaningless run (the #3919 / #3855 class of spurious
+# runs). Honored from forks too: it only ever turns benchmarks off, so there is
+# no fork-safety concern.
+BENCHMARK_SUPPRESS_LABEL = "full-ci-no-benchmarks"
+
 def truthy_env?(name)
   ENV.fetch(name, "false") == "true"
 end
@@ -207,7 +218,7 @@ def empty_matrix_row
 
 def build_matrix
   labels = pull_request_labels
-  rows = if truthy_env?("BENCHMARK_NON_RUNTIME_ONLY")
+  rows = if truthy_env?("BENCHMARK_NON_RUNTIME_ONLY") || labels.include?(BENCHMARK_SUPPRESS_LABEL)
            []
          else
            SUITES.select { |suite| suite_enabled?(suite, labels) }.flat_map { |suite| suite_rows(suite) }

diff --git a/benchmarks/spec/benchmark_matrix_spec.rb b/benchmarks/spec/benchmark_matrix_spec.rb
@@ -81,6 +81,39 @@ def suite_ids_for(env)
     end
   end
 
+  describe "the full-ci-no-benchmarks suppression label" do
+    it "suppresses every suite even when change detection requested them" do
+      expect(suite_ids_for(
+               "BENCHMARK_EVENT_NAME" => "pull_request",
+               "BENCHMARK_PULL_REQUEST_HEAD_REPO" => "shakacode/react_on_rails",
+               "GITHUB_REPOSITORY" => "shakacode/react_on_rails",
+               "RUN_CORE_BENCHMARKS" => "true",
+               "RUN_PRO_BENCHMARKS" => "true",
+               "RUN_PRO_NODE_RENDERER_BENCHMARKS" => "true",
+               "BENCHMARK_PULL_REQUEST_LABELS" => '["full-ci-no-benchmarks"]'
+             )).to eq(["none"])
+    end
+
+    it "beats an explicit benchmark label on the same PR" do
+      expect(suite_ids_for(
+               "BENCHMARK_EVENT_NAME" => "pull_request",
+               "BENCHMARK_PULL_REQUEST_HEAD_REPO" => "shakacode/react_on_rails",
+               "GITHUB_REPOSITORY" => "shakacode/react_on_rails",
+               "BENCHMARK_PULL_REQUEST_LABELS" => '["benchmark","full-ci-no-benchmarks"]'
+             )).to eq(["none"])
+    end
+
+    it "is honored even from a fork (it only turns benchmarks off)" do
+      expect(suite_ids_for(
+               "BENCHMARK_EVENT_NAME" => "pull_request",
+               "BENCHMARK_PULL_REQUEST_HEAD_REPO" => "contributor/react_on_rails",
+               "GITHUB_REPOSITORY" => "shakacode/react_on_rails",
+               "RUN_CORE_BENCHMARKS" => "true",
+               "BENCHMARK_PULL_REQUEST_LABELS" => '["full-ci-no-benchmarks"]'
+             )).to eq(["none"])
+    end
+  end
+
   describe "app_version input filtering" do
     it "restricts a workflow_dispatch to the node-renderer suite for pro_node_renderer_only" do
       expect(suite_ids_for(

diff --git a/script/ci-changes-detector b/script/ci-changes-detector
@@ -324,13 +324,11 @@ while IFS= read -r file; do
         SOURCE_COMMENT_ONLY_CHANGED=true
       else
         RSPEC_CHANGED=true
-        # Mirrors the existing benchmark behavior: a core gem-spec change runs the
-        # core+pro suites (run_ruby_tests fed them). The gem-tests.yml workflow in
-        # this arm is guarded out — editing it can't move performance.
-        if ! is_benchmark_irrelevant_path "$file"; then
-          BENCH_CORE_CHANGED=true
-          BENCH_PRO_CHANGED=true
-        fi
+        # Spec-only changes set no BENCH_* flag: specs are never shipped in the
+        # gem, so they cannot move runtime performance (e.g. #3854 changed only
+        # CI-config specs yet ran the core+pro suites). Benchmarks are driven by
+        # the lib/app source arms above; the `benchmark` label still forces a run
+        # when a spec change is paired with a perf concern.
       fi
       ;;
 
@@ -411,10 +409,9 @@ while IFS= read -r file; do
         PRO_SOURCE_COMMENT_ONLY_CHANGED=true
       else
         PRO_RSPEC_CHANGED=true
-        # pro-gem-tests.yml in this arm is guarded out (a workflow edit can't move perf).
-        if ! is_benchmark_irrelevant_path "$file"; then
-          BENCH_PRO_CHANGED=true
-        fi
+        # Spec-only changes set no BENCH_* flag (see the core gem-spec arm above):
+        # Pro specs are not shipped, so they cannot move runtime performance. The
+        # `benchmark` label still forces a run when needed.
       fi
       ;;
 
@@ -478,16 +475,20 @@ while IFS= read -r file; do
       ;;
 
     # Catch-all: any file not explicitly categorized above
-    # Philosophy: When in doubt, run everything to ensure safety
-    # This prevents new file types or directories from being silently ignored by CI
+    # Philosophy: When in doubt, run every TEST suite to ensure safety. This
+    # prevents new file types or directories from being silently ignored by CI.
+    #
+    # Deliberately does NOT set any BENCH_* flag: an uncategorized file is almost
+    # always tooling/config (e.g. .ci-dependency-versions, .tool-versions,
+    # packages/create-react-on-rails-app/**, knip.ts), not a hot runtime path, so
+    # benchmarking it on a PR just burns a 30+ minute suite and ships meaningless
+    # Bencher data (the #3855 / #3915 / #3785 spurious runs). The `benchmark` PR
+    # label still forces a run for a genuinely new runtime path, and every push to
+    # main benchmarks unconditionally (generate_matrix.rb), so main remains the
+    # regression backstop for anything that slips through here.
     *)
       DOCS_ONLY=false
       UNCATEGORIZED_CHANGED=true
-      # Unknown file: benchmark every suite too (safety, matching the full-suite
-      # test run below). A new runtime path must never silently skip benchmarks.
-      BENCH_CORE_CHANGED=true
-      BENCH_PRO_CHANGED=true
-      BENCH_PRO_NODE_RENDERER_CHANGED=true
       ;;
   esac
 done <<< "$CHANGED_FILES"

diff --git a/script/ci-changes-detector-test.bash b/script/ci-changes-detector-test.bash
@@ -366,6 +366,42 @@ test_node_renderer_source_change_runs_node_renderer_benchmark() {
   assert_contains "$out" '"run_core_benchmarks": false' "node renderer output"
 }
 
+# An uncategorized file (e.g. a new CI/tooling dotfile like .ci-dependency-versions,
+# the #3855 case) runs the full TEST suite for safety, but must NOT benchmark:
+# unknown files are almost always tooling, not a hot runtime path, so a Bencher
+# run here is pure noise. Main pushes benchmark unconditionally, so a genuinely
+# new runtime path is still measured once it lands.
+test_uncategorized_file_runs_tests_but_skips_benchmarks() {
+  setup_repo
+  write_file_change ".ci-dependency-versions" "ruby: 3.2"
+
+  local out
+  out="$(detector_output)"
+  assert_contains "$out" '"non_runtime_only": false' "uncategorized output"
+  # Full test suite still runs (the safety the catch-all is there for).
+  assert_contains "$out" '"run_ruby_tests": true' "uncategorized output"
+  assert_contains "$out" '"run_pro_tests": true' "uncategorized output"
+  # ... but no benchmark suite.
+  assert_contains "$out" '"run_core_benchmarks": false' "uncategorized output"
+  assert_contains "$out" '"run_pro_benchmarks": false' "uncategorized output"
+  assert_contains "$out" '"run_pro_node_renderer_benchmarks": false' "uncategorized output"
+}
+
+# Spec-only changes run the gem tests but never benchmark: specs are not shipped,
+# so they cannot move runtime performance (#3854 changed only CI-config specs yet
+# ran the core+pro suites).
+test_rspec_only_change_skips_benchmarks() {
+  setup_repo
+  write_file_change "react_on_rails/spec/react_on_rails/some_bench_spec.rb"
+
+  local out
-  local out
+  assert_contains "$out" '"run_ruby_tests": true' "rspec-bench output"
+  assert_contains "$out" '"non_runtime_only": true' "rspec-bench output"
+  assert_contains "$out" '"run_core_benchmarks": false' "rspec-bench output"
-  local out
+  assert_contains "$out" '"run_ruby_tests": true' "rspec-bench output"
+  assert_contains "$out" '"non_runtime_only": true' "rspec-bench output"
+  assert_contains "$out" '"run_core_benchmarks": false' "rspec-bench output"
+  out="$(detector_output)"
+  assert_contains "$out" '"run_ruby_tests": true' "rspec-bench output"
+  assert_contains "$out" '"run_core_benchmarks": false' "rspec-bench output"
+  assert_contains "$out" '"run_pro_benchmarks": false' "rspec-bench output"
+  assert_contains "$out" '"run_pro_node_renderer_benchmarks": false' "rspec-bench output"
+}
+
 test_ruby_comment_only_change_skips_heavy_tests_but_keeps_lint() {
   setup_repo
   perl -0pi -e 's/class Example/class Example\n    # Explain the fixture./' \
@@ -798,6 +834,8 @@ run_test test_ci_infrastructure_only_change_runs_tests_but_skips_benchmarks
 run_test test_suite_workflow_file_runs_its_tests_but_no_benchmark
 run_test test_ci_infra_plus_runtime_source_still_benchmarks
 run_test test_node_renderer_source_change_runs_node_renderer_benchmark
+run_test test_uncategorized_file_runs_tests_but_skips_benchmarks
+run_test test_rspec_only_change_skips_benchmarks
 run_test test_ruby_comment_only_change_skips_heavy_tests_but_keeps_lint
 run_test test_ruby_block_comment_only_change_skips_heavy_tests_but_keeps_lint
 run_test test_wrapping_ruby_code_with_block_comment_delimiters_remains_runtime_affecting