stan-dev · VisruthSK · Apr 14, 2026 · Apr 8, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -24,3 +24,4 @@ vignettes/loo2-non-factorized_cache/*
 ^release-prep\.R$
 ^_pkgdown\.yml$
 ^pkgdown$
+^touchstone$
diff --git a/.github/workflows/touchstone-comment.yaml b/.github/workflows/touchstone-comment.yaml
@@ -0,0 +1,22 @@
+name: Continuous Benchmarks (Comment)
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref }}
+  cancel-in-progress: true
+
+on:
+  workflow_run:
+    workflows: ["Continuous Benchmarks (Receive)"]
+    types:
+      - completed
+
+jobs:
+  upload:
+    runs-on: ubuntu-latest
+    if: >
+      ${{ github.event.workflow_run.event == 'pull_request' }}
+    steps:
+      - uses: lorenzwalthert/touchstone/actions/comment@main
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
diff --git a/.github/workflows/touchstone-receive.yaml b/.github/workflows/touchstone-receive.yaml
@@ -0,0 +1,43 @@
+name: Continuous Benchmarks (Receive)
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref }}
+  cancel-in-progress: true
+
+on:
+  pull_request:
+
+jobs:
+  prepare:
+    runs-on: ubuntu-latest
+    outputs:
+      config: ${{ steps.read_touchstone_config.outputs.config }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - id: read_touchstone_config
+        run: |
+          echo "config=$(jq -c . ./touchstone/config.json)" >> $GITHUB_OUTPUT
+
+  build:
+    needs: prepare
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - ${{ fromJson(needs.prepare.outputs.config) }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+      - uses: lorenzwalthert/touchstone/actions/receive@main
+        with:
+          r-version: ${{ matrix.config.r }}
diff --git a/data-raw/wine_loglik.R b/data-raw/wine_loglik.R
@@ -0,0 +1,20 @@
+library(brms)
+library(loo)
+options(brms.backend = "cmdstanr")
+options(mc.cores = 4)
+
+fitos <- read.delim("data-raw/winequality-red.csv", sep = ";") |>
+  unique() |>
+  scale() |>
+  as.data.frame() |>
+  brm(
+    ordered(quality) ~ .,
+    family = cumulative("logit"),
+    prior = prior(R2D2(mean_R2 = 1 / 3, prec_R2 = 3)),
+    data = _,
+    seed = 1,
+    silent = 2,
+    refresh = 0
+  )
+
+saveRDS(log_lik(fitos), "touchstone/wine.rds")
diff --git a/data-raw/winequality-red.csv b/data-raw/winequality-red.csv
diff --git a/touchstone/.gitignore b/touchstone/.gitignore
@@ -0,0 +1,7 @@
+*
+!script.R
+!config.json
+!.gitignore
+!header.R
+!footer.R
+!wine.rds
diff --git a/touchstone/config.json b/touchstone/config.json
@@ -0,0 +1,8 @@
+{
+  "os": "ubuntu-22.04",
+  "r": "4.4.3",
+  "rspm": "https://packagemanager.posit.co/cran/__linux__/jammy/latest",
+  "benchmarking_repo": "",
+  "benchmarking_ref": "",
+  "benchmarking_path": ""
+}
diff --git a/touchstone/footer.R b/touchstone/footer.R
@@ -0,0 +1,10 @@
+# You can modify the PR comment footer here. You can use github markdown e.g.
+# emojis like :tada:.
+# This file will be parsed and evaluate within the context of
+# `benchmark_analyze` and should return the comment text as the last value.
+# See `?touchstone::pr_comment`
+link <- "https://lorenzwalthert.github.io/touchstone/articles/inference.html"
+glue::glue(
+  "\nFurther explanation regarding interpretation and",
+  " methodology can be found in the [documentation]({link})."
+)
diff --git a/touchstone/header.R b/touchstone/header.R
@@ -0,0 +1,13 @@
+# You can modify the PR comment header here. You can use github markdown e.g.
+# emojis like :tada:.
+# This file will be parsed and evaluate within the context of
+# `benchmark_analyze` and should return the comment text as the last value.
+# Available variables for glue substitution:
+# * ci: confidence interval
+# * branches: BASE and HEAD branches benchmarked against each other.
+# See `?touchstone::pr_comment`
+glue::glue(
+  "This is how benchmark results would change (along with a",
+  " {100 * ci}% confidence interval in relative change) if ",
+  "{system2('git', c('rev-parse', 'HEAD'), stdout = TRUE)} is merged into {branches[1]}:\n"
+)
diff --git a/touchstone/script.R b/touchstone/script.R
@@ -0,0 +1,57 @@
+# see `help(run_script, package = 'touchstone')` on how to run this
+# interactively
+
+# installs branches to benchmark
+touchstone::branch_install()
+
+touchstone::pin_assets("touchstone/wine.rds")
+
+# These synthetic workloads are large enough to expose real slowdowns in the
+# core `loo()` paths, but still short enough to keep PR feedback reasonably fast.
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    suppressPackageStartupMessages(library(loo))
+    # benchmark_run() evaluates in a callr subprocess, so load pinned assets here.
+    wine_log_lik_matrix <- readRDS(touchstone::path_pinned_asset(
+      "touchstone/wine.rds"
+    ))
+    matrix_r_eff <- rep(1, ncol(wine_log_lik_matrix))
+  },
+  loo_matrix = {
+    suppressWarnings(
+      loo(
+        wine_log_lik_matrix,
+        r_eff = matrix_r_eff,
+        cores = 1
+      )
+    )
+  },
+  n = 10
+)
+
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    suppressPackageStartupMessages(library(loo))
+    wine_log_lik_matrix <- readRDS(touchstone::path_pinned_asset(
+      "touchstone/wine.rds"
+    ))
+    function_r_eff <- rep(1, ncol(wine_log_lik_matrix))
+    wine_data <- data.frame(obs = seq_len(ncol(wine_log_lik_matrix)))
+    wine_llfun <- function(data_i, draws) draws[, data_i$obs, drop = FALSE]
+  },
+  loo_function = {
+    suppressWarnings(
+      loo(
+        wine_llfun,
+        data = wine_data,
+        draws = wine_log_lik_matrix,
+        r_eff = function_r_eff,
+        cores = 1
+      )
+    )
+  },
+  n = 10
+)
+
+# create artifacts used downstream in the GitHub Action
+touchstone::benchmark_analyze()
diff --git a/touchstone/wine.rds b/touchstone/wine.rds