Merge branch 'export-srs-diff-est' of https://github.com/vinniott/loo into export-srs-diff-est

vinniott · vinniott · commit 6863c50a984c · 2026-05-10T15:45:53.000+02:00
diff --git a/.github/workflows/touchstone-comment.yaml b/.github/workflows/touchstone-comment.yaml
@@ -1,46 +1,25 @@
 name: Continuous Benchmarks (Comment)
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.run_id }}
+  group: ${{ github.workflow }}-${{ github.head_ref }}
   cancel-in-progress: true
 
 on:
   workflow_run:
     workflows: ["Continuous Benchmarks (Receive)"]
-    types: [completed]
+    types:
+      - completed
 
 jobs:
-  comment:
+  upload:
     runs-on: ubuntu-latest
     permissions:
       actions: read
       pull-requests: write
-    if: ${{ github.event.workflow_run.event == 'pull_request' }}
+      statuses: write
+    if: >
+      ${{ github.event.workflow_run.event == 'pull_request' }}
     steps:
-      - name: Download Touchstone artifact
-        id: download
-        uses: actions/download-artifact@v8
-        with:
-          name: pr
-          github-token: ${{ github.token }}
-          repository: ${{ github.repository }}
-          run-id: ${{ github.event.workflow_run.id }}
-
-      # defensive since issues could cause commenting in random places
-      - name: Read PR number
-        id: pr
-        shell: bash
-        run: |
-          number="$(tr -cd '0-9' < ./NR)"
-          test -n "$number"
-          echo "number=$number" >> "$GITHUB_OUTPUT"
-
-      - name: Create or update sticky PR comment
-        id: comment
-        uses: marocchino/sticky-pull-request-comment@v3
+      - uses: lorenzwalthert/touchstone/actions/comment@main
         with:
           GITHUB_TOKEN: ${{ github.token }}
-          number_force: ${{ steps.pr.outputs.number }}
-          header: touchstone
-          path: ./info.txt
-          skip_unchanged: true
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -37,7 +37,7 @@ Imports:
     checkmate,
     matrixStats (>= 0.52),
     parallel,
-    posterior (>= 1.7.0),
+    posterior (>= 1.5.0),
     stats
 Suggests:
     bayesplot (>= 1.7.0),
diff --git a/R/gpdfit.R b/R/gpdfit.R
@@ -29,10 +29,71 @@
 #' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
 #'
 gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
-  posterior::gpdfit(
-    x = x,
-    wip = wip,
-    min_grid_pts = min_grid_pts,
-    sort_x = sort_x
-  )
+  # See section 4 of Zhang and Stephens (2009)
+  if (sort_x) {
+    x <- sort.int(x)
+  }
+  N <- length(x)
+  prior <- 3
+  M <- min_grid_pts + floor(sqrt(N))
+  jj <- seq_len(M)
+  xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
+  theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
+  l_theta <- N * lx(theta, x) # profile log-lik
+  w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
+  theta_hat <- sum(theta * w_theta)
+  k <- mean.default(log1p(-theta_hat * x))
+  sigma <- -k / theta_hat
+
+  if (wip) {
+    k <- adjust_k_wip(k, n = N)
+  }
+
+  if (is.na(k)) {
+    k <- Inf
+  }
+
+  nlist(k, sigma)
+}
+
+
+# internal ----------------------------------------------------------------
+
+lx <- function(a,x) {
+  a <- -a
+  k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1))
+  log(a / k) - k - 1
+}
+
+#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This
+#' will stabilize estimates for very small Monte Carlo sample sizes and low neff
+#' cases.
+#'
+#' @noRd
+#' @param k Scalar khat estimate.
+#' @param n Integer number of tail samples used to fit GPD.
+#' @return Scalar adjusted khat estimate.
+#'
+adjust_k_wip <- function(k, n) {
+  a <- 10
+  n_plus_a <- n + a
+  k * n / n_plus_a + a * 0.5 / n_plus_a
+}
+
+
+#' Inverse CDF of generalized Pareto distribution
+#' (assuming location parameter is 0)
+#'
+#' @noRd
+#' @param p Vector of probabilities.
+#' @param k Scalar shape parameter.
+#' @param sigma Scalar scale parameter.
+#' @return Vector of quantiles.
+#'
+qgpd <- function(p, k, sigma) {
+  if (is.nan(sigma) || sigma <= 0) {
+    return(rep(NaN, length(p)))
+  }
+
+  sigma * expm1(-k * log1p(-p)) / k
 }
diff --git a/R/psis.R b/R/psis.R
@@ -254,12 +254,12 @@ psis_smooth_tail <- function(x, cutoff) {
   exp_cutoff <- exp(cutoff)
 
   # save time not sorting since x already sorted
-  fit <- posterior::gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
+  fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
   k <- fit$k
   sigma <- fit$sigma
   if (is.finite(k)) {
     p <- (seq_len(len) - 0.5) / len
-    qq <- posterior::qgeneralized_pareto(p, 0, sigma, k) + exp_cutoff
+    qq <- qgpd(p, k, sigma) + exp_cutoff
     tail <- log(qq)
   } else {
     tail <- x
diff --git a/R/psislw.R b/R/psislw.R
@@ -72,11 +72,11 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4,
       # body and gPd smoothed tail
       tail_ord <- order(x_tail)
       exp_cutoff <- exp(cutoff)
-      fit <- posterior::gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80)
+      fit <- gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80)
       k <- fit$k
       sigma <- fit$sigma
       prb <- (seq_len(tail_len) - 0.5) / tail_len
-      qq <- posterior::qgeneralized_pareto(prb, 0, sigma, k) + exp_cutoff
+      qq <- qgpd(prb, k, sigma) + exp_cutoff
       smoothed_tail <- rep.int(0, tail_len)
       smoothed_tail[tail_ord] <- log(qq)
       x_new <- x
diff --git a/tests/testthat/test_gpdfit.R b/tests/testthat/test_gpdfit.R
@@ -11,3 +11,11 @@ test_that("gpdfit returns correct result", {
   expect_snapshot_value(gpdfit_val_wip_default_grid, style = "serialize")
 })
 
+test_that("qgpd returns the correct result ", {
+  probs <- seq(from = 0, to = 1, by = 0.25)
+  q1 <- qgpd(probs, k = 1, sigma = 1)
+  expect_equal(q1, c(0, 1 / 3, 1, 3, Inf))
+
+  q2 <- qgpd(probs, k = 1, sigma = 0)
+  expect_true(all(is.nan(q2)))
+})
diff --git a/touchstone/config.json b/touchstone/config.json
@@ -1,6 +1,6 @@
 {
-  "os": "ubuntu-24.04",
-  "r": "4.5.3",
+  "os": "ubuntu-22.04",
+  "r": "4.4.3",
   "rspm": "https://packagemanager.posit.co/cran/__linux__/jammy/latest",
   "benchmarking_repo": "",
   "benchmarking_ref": "",
diff --git a/touchstone/script.R b/touchstone/script.R
@@ -4,12 +4,14 @@
 # installs branches to benchmark
 touchstone::branch_install()
 
-# make log lik available to tests
 touchstone::pin_assets("touchstone/wine.rds")
 
+# These synthetic workloads are large enough to expose real slowdowns in the
+# core `loo()` paths, but still short enough to keep PR feedback reasonably fast.
 touchstone::benchmark_run(
   expr_before_benchmark = {
     suppressPackageStartupMessages(library(loo))
+    # benchmark_run() evaluates in a callr subprocess, so load pinned assets here.
     wine_log_lik_matrix <- readRDS(touchstone::path_pinned_asset(
       "touchstone/wine.rds"
     ))
@@ -24,7 +26,7 @@ touchstone::benchmark_run(
       )
     )
   },
-  n = 60
+  n = 10
 )
 
 touchstone::benchmark_run(
@@ -48,7 +50,7 @@ touchstone::benchmark_run(
       )
     )
   },
-  n = 60
+  n = 10
 )
 
 # create artifacts used downstream in the GitHub Action

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`		`- "os": "ubuntu-24.04",`
`3`		`- "r": "4.5.3",`
	`2`	`+ "os": "ubuntu-22.04",`
	`3`	`+ "r": "4.4.3",`
`4`	`4`	`"rspm": "https://packagemanager.posit.co/cran/__linux__/jammy/latest",`
`5`	`5`	`"benchmarking_repo": "",`
`6`	`6`	`"benchmarking_ref": "",`
Original file line number	Diff line number	Diff line change
`@@ -4,12 +4,14 @@`
`4`	`4`	`# installs branches to benchmark`
`5`	`5`	`touchstone::branch_install()`
`6`	`6`
`7`		`-# make log lik available to tests`
`8`	`7`	`touchstone::pin_assets("touchstone/wine.rds")`
`9`	`8`
	`9`	`+# These synthetic workloads are large enough to expose real slowdowns in the`
	`10`	+# core `loo()` paths, but still short enough to keep PR feedback reasonably fast.
`10`	`11`	`touchstone::benchmark_run(`
`11`	`12`	`expr_before_benchmark = {`
`12`	`13`	`suppressPackageStartupMessages(library(loo))`
	`14`	`+ # benchmark_run() evaluates in a callr subprocess, so load pinned assets here.`
`13`	`15`	`wine_log_lik_matrix <- readRDS(touchstone::path_pinned_asset(`
`14`	`16`	`"touchstone/wine.rds"`
`15`	`17`	`))`
`@@ -24,7 +26,7 @@ touchstone::benchmark_run(`
`24`	`26`	`)`
`25`	`27`	`)`
`26`	`28`	`},`
`27`		`- n = 60`
	`29`	`+ n = 10`
`28`	`30`	`)`
`29`	`31`
`30`	`32`	`touchstone::benchmark_run(`
`@@ -48,7 +50,7 @@ touchstone::benchmark_run(`
`48`	`50`	`)`
`49`	`51`	`)`
`50`	`52`	`},`
`51`		`- n = 60`
	`53`	`+ n = 10`
`52`	`54`	`)`
`53`	`55`
`54`	`56`	`# create artifacts used downstream in the GitHub Action`