shakacode
diff --git a/‎benchmarks/lib/bencher_runner.rb‎
Lines changed: 171 additions & 0 deletions b/‎benchmarks/lib/bencher_runner.rb‎
Lines changed: 171 additions & 0 deletions
diff --git a/‎benchmarks/lib/github.rb‎
Lines changed: 4 additions & 0 deletions b/‎benchmarks/lib/github.rb‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎benchmarks/lib/pr_report_poster.rb‎
Lines changed: 140 additions & 0 deletions b/‎benchmarks/lib/pr_report_poster.rb‎
Lines changed: 140 additions & 0 deletions
@@ -0,0 +1,171 @@
+# frozen_string_literal: true
+
+require "fileutils"
+require "json"
+require "open3"
+
+require_relative "bencher_report"
+require_relative "github"
+
+# Builds and runs the Bencher CLI invocation for benchmark tracking.
+class BencherRunner
+  class ReportParseError < StandardError; end
+  class PersistenceError < StandardError; end
+
+  Result = Struct.new(:stderr, :exit_code, :report, keyword_init: true)
+  private_constant :Result
+
+  # Bencher dashboard project for React on Rails benchmark runs.
+  PROJECT_SLUG = "react-on-rails-t8a9ncxo"
+  private_constant :PROJECT_SLUG
+  MAX_SAMPLE = "64" # String because it is passed verbatim as a CLI argument.
+  private_constant :MAX_SAMPLE
+
+  # Per-measure t-test boundaries (the confidence level Bencher uses for its
+  # prediction interval). Tuned from a sweep of recent main-branch reports so fewer
+  # than 1/20 commits raise a false regression across all benchmarks: rps and p50
+  # individually need ~0.9995 / ~0.9999 to clear that bar. failed_pct stays at 0.95
+  # because healthy runs sit at ~0 with near-zero variance, so its boundary rarely
+  # matters.
+  # Bencher's t-test threshold is a prediction interval, so each one-sided boundary B
+  # gives a per-test false-positive rate of ~(1 - B):
+  # https://bencher.dev/docs/explanation/thresholds/
+  # Direction: :lower for "regression = drop" measures (rps), :upper for
+  # "regression = climb" measures (latency, failure rate).
+  # p90/p99/max are intentionally NOT tracked: their tail noise can't meet the 1/20
+  # target at any usable boundary. p90 stays in the summary table for visibility only.
+  THRESHOLDS = [
+    ["rps", :lower, "0.9995"],
+    ["p50_latency", :upper, "0.9999"],
+    ["failed_pct", :upper, "0.95"]
+  ].freeze
+
+  def initialize(benchmark_json:, report_json:)
+    @benchmark_json = benchmark_json
+    @report_json = report_json
+  end
+
+  # Returns a Result with :stderr, :exit_code, and :report accessors. The
+  # private constant keeps callers from depending on the struct class name.
+  # Raises PersistenceError on I/O failure, ReportParseError on malformed JSON output.
+  def run(branch:, start_point_args:)
+    # This Bencher CLI call is not wrapped in Timeout.timeout because that can leak
+    # child processes. In CI it is bounded by the GitHub Actions job timeout for
+    # .github/workflows/benchmark-suite.yml; the benchmark execution step has its
+    # own narrower timeout-minutes before this reporting step runs.
+    stdout, stderr, status = Open3.capture3(*args(branch, start_point_args))
+    emit_stderr(stderr)
+    report = persist_report(stdout)
+    warn_on_missing_perf_link_context(report) if report
+    Result.new(stderr:, exit_code: status.exitstatus, report:)
+  end
+
+  private
+
+  attr_reader :benchmark_json, :report_json
+
+  def emit_stderr(stderr)
+    return if stderr.empty?
+
+    warn stderr
+  end
+
+  def threshold_args(measure, direction, boundary)
+    # "_" is Bencher's sentinel for "no boundary on this side".
+    lower, upper = direction == :lower ? [boundary, "_"] : ["_", boundary]
+    [
+      "--threshold-measure", measure,
+      "--threshold-test", "t_test",
+      "--threshold-max-sample-size", MAX_SAMPLE,
+      "--threshold-lower-boundary", lower,
+      "--threshold-upper-boundary", upper
+    ]
+  end
+
+  def args(branch, start_point_args)
+    [
+      "bencher", "run",
+      "--project", PROJECT_SLUG,
+      "--branch", branch,
+      *start_point_args,
+      "--testbed", "github-actions",
+      "--adapter", "json",
+      "--file", benchmark_json,
+      "--err",
+      "--quiet",
+      "--format", "json",
+      *THRESHOLDS.flat_map { |measure, direction, boundary| threshold_args(measure, direction, boundary) }
+    ]
+  end
+
+  # Writes Bencher stdout to disk atomically (tmp -> mv), then parses it.
+  # On write/move failure the prior report at report_json is left untouched.
+  # Empty Bencher stdout removes any stale prior report because there is no new output to preserve.
+  # On parse failure the newly-written malformed report is removed so a future
+  # retry starts clean rather than re-posting garbage.
+  def persist_report(stdout)
+    if stdout.empty?
+      begin
+        FileUtils.rm_f(report_json)
+      rescue SystemCallError, IOError => e
+        raise PersistenceError,
+              "#{e.message} (Bencher produced no output; see stderr above for the run failure)"
+      end
+      return nil
+    end
+
+    tmp_report_json = "#{report_json}.tmp"
+    begin
+      File.write(tmp_report_json, stdout)
+      FileUtils.mv(tmp_report_json, report_json)
+    rescue SystemCallError, IOError => e
+      raise PersistenceError, e.message
+    ensure
+      # Always runs, including for exceptions that bypass the rescue block. After a successful mv the tmp
+      # file no longer exists, so rm_f is a no-op; if write or mv raised it performs the cleanup.
+      safe_remove_tmp(tmp_report_json)
+    end
+
+    parse_and_cleanup_report(stdout)
+  end
+
+  def parse_and_cleanup_report(stdout)
+    parse_report(stdout)
+  rescue ReportParseError
+    Github.debug("Malformed Bencher output (first 300 chars): #{stdout.slice(0, 300).inspect}")
+    # Only ReportParseError is cleaned up here. Unexpected parser bugs should propagate unchanged.
+    # Remove malformed output so a future retry starts clean; the raw debugging
+    # artifact is lost, but a bad report file is worse than no report file.
+    begin
+      FileUtils.rm_f(report_json)
+    rescue StandardError => e
+      Github.warning("Could not remove malformed Bencher report #{report_json}: #{e.message}")
+    end
+    raise
+  end
+
+  def parse_report(stdout)
+    BencherReport.parse(stdout, tracked_measures: THRESHOLDS.map(&:first))
+  rescue BencherReport::FormatError, JSON::ParserError => e
+    raise ReportParseError,
+          "Bencher JSON report has an unexpected shape — re-verify against " \
+          "benchmarks/spec/bencher_report_spec.rb before bumping the CLI pin. #{e.message}"
+  end
+
+  def safe_remove_tmp(path)
+    FileUtils.rm_f(path)
+  rescue StandardError => e
+    # Cleanup failures are non-fatal, so keep this broader than the persistence rescue.
+    Github.warning("Could not remove temporary Bencher report #{path}: #{e.message}")
+  end
+
+  def warn_on_missing_perf_link_context(report)
+    return unless report.perf_links_unavailable?
+
+    Github.warning(
+      "Bencher report listed benchmarks but no perf-link context " \
+      "(project/branch/testbed uuid); benchmark names will render unlinked. Re-verify the " \
+      "report shape against benchmarks/spec/bencher_perf_url_spec.rb before bumping the CLI pin."
+    )
+  end
+end
@@ -16,6 +16,10 @@ def notice(message)
     $stdout.puts "::notice::#{escape_workflow_command_data(message)}"
   end
 
+  def debug(message)
+    $stdout.puts "::debug::#{escape_workflow_command_data(message)}"
+  end
+
   def escape_workflow_command_data(value)
     value.to_s
          # Escape percent first so the percent signs introduced below are not double-encoded.
 
@@ -0,0 +1,140 @@
+# frozen_string_literal: true
+
+require "time"
+
+require_relative "github"
+require_relative "github_cli"
+
+# Posts the per-suite Bencher Markdown report to a pull request and cleans up
+# older comments with the same marker.
+class PrReportPoster
+  REPOSITORY_SLUG_PATTERN = %r{\A[A-Za-z0-9][A-Za-z0-9_.-]*/[A-Za-z0-9][A-Za-z0-9_.-]*\z}
+  private_constant :REPOSITORY_SLUG_PATTERN
+
+  def initialize(repository:, pr_number:, suite_name:, marker:)
+    normalized_repository = repository.to_s
+    # The regex allows ".." within component names, so reject any embedded path traversal.
+    unless normalized_repository.match?(REPOSITORY_SLUG_PATTERN) && !normalized_repository.include?("..")
+      raise ArgumentError, "repository must be in owner/repo format, got: #{normalized_repository.inspect}"
+    end
+
+    normalized_pr_number = pr_number.to_s
+    unless normalized_pr_number.match?(/\A\d+\z/)
+      raise ArgumentError, "pr_number must be numeric, got: #{normalized_pr_number.inspect}"
+    end
+
+    @repository = normalized_repository
+    @pr_number = normalized_pr_number
+    @suite_name = suite_name
+    @marker = marker
+  end
+
+  # GitHub Actions sets GITHUB_REPOSITORY natively. The workflow step must set
+  # PR_NUMBER from the pull request event.
+  def self.from_env(suite_name:, marker:)
+    new(
+      repository: required_repository,
+      pr_number: required_pr_number,
+      suite_name:,
+      marker:
+    )
+  end
+
+  def self.required_repository
+    ENV.fetch("GITHUB_REPOSITORY") do
+      raise KeyError, "GITHUB_REPOSITORY env var is required (set by GitHub Actions)"
+    end
+  end
+  private_class_method :required_repository
+
+  def self.required_pr_number
+    ENV.fetch("PR_NUMBER") do
+      raise KeyError, "PR_NUMBER env var is required (set it from the pull_request event in the workflow step)"
+    end
+  end
+  private_class_method :required_pr_number
+
+  def replace(markdown)
+    # Guard callers that use the poster without the script-level empty-report check.
+    return if markdown.empty?
+
+    # Capture cutoff before posting so the stale-comment sweep only hits pre-existing
+    # comments with the same marker, not the one this run is about to create.
+    cutoff_ts = Time.now.utc.iso8601
+    if post_comment(markdown)
+      delete_stale_comments(before: cutoff_ts)
+    else
+      Github.warning("Failed to post #{suite_name} benchmark report comment; keeping prior comments in place.")
+    end
+  end
+
+  private
+
+  attr_reader :repository, :pr_number, :suite_name, :marker
+
+  def delete_stale_comments(before:)
+    failed = 0
+    stale_comment_ids(before:).each do |comment_id|
+      $stdout.puts "Deleting stale #{suite_name} Bencher report comment #{comment_id}"
+      failed += 1 unless GithubCli.run(
+        "gh", "api", "-X", "DELETE", "repos/#{repository}/issues/comments/#{comment_id}",
+        error_message: "Failed to delete stale #{suite_name} Bencher report comment #{comment_id}"
+      )
+    end
+    return if failed.zero?
+
+    Github.warning(
+      "Failed to delete #{failed} stale #{suite_name} Bencher report comment(s); " \
+      "they may remain visible."
+    )
+  end
+
+  def stale_comment_ids(before:)
+    # Marker + cutoff are passed via env so the jq filter reads them through `env.X`,
+    # avoiding Ruby/JQ escaping mismatches from interpolated strings.
+    stdout, status = GithubCli.capture(
+      "gh", "api", "repos/#{repository}/issues/#{pr_number}/comments",
+      "--paginate",
+      # gh api --paginate applies --jq to each page independently, then concatenates stdout.
+      # GitHub timestamps are fixed-width ISO-8601 strings, so lexical ordering matches time ordering.
+      "--jq", ".[] | select(.body | startswith(env.MARKER)) | select(.created_at < env.CUTOFF_TS) | .id",
+      env: { "MARKER" => marker, "CUTOFF_TS" => before }
+    )
+    unless status.success?
+      # Cleanup is best-effort: stale comments should not fail an otherwise valid benchmark job.
+      Github.warning("Failed to list stale #{suite_name} Bencher report comments; skipping cleanup.")
+      return []
+    end
+
+    comment_ids = stdout.lines.map(&:strip).reject(&:empty?)
+    numeric_comment_ids = comment_ids.grep(/\A\d+\z/)
+    non_numeric_comment_ids = comment_ids.grep_v(/\A\d+\z/)
+    if non_numeric_comment_ids.any?
+      if numeric_comment_ids.empty?
+        Github.warning(
+          "Stale #{suite_name} Bencher report comment listing returned no numeric IDs " \
+          "(#{non_numeric_comment_ids.size} non-numeric token(s), " \
+          "e.g. #{non_numeric_comment_ids.first.slice(0, 120).inspect}); skipping cleanup."
+        )
+        return []
+      end
+
+      Github.warning(
+        "Stale #{suite_name} Bencher report comment listing returned " \
+        "#{non_numeric_comment_ids.size} non-numeric ID(s); ignoring those entries."
+      )
+    end
+
+    numeric_comment_ids
+  end
+
+  def post_comment(markdown)
+    # Send the body over stdin (--body-file -) rather than as a CLI argument so a
+    # large report can't hit the OS argument-length limit.
+    GithubCli.run(
+      "gh", "pr", "comment", pr_number, "--repo", repository, "--body-file", "-",
+      error_message: "Failed to post #{suite_name} benchmark report comment",
+      stdin_data: "#{marker}\n#{markdown}"
+    )
+  end
+end