Skip to content

Commit 6863c50

Browse files
committed
Merge branch 'export-srs-diff-est' of https://github.com/vinniott/loo into export-srs-diff-est
2 parents 30cc34e + 7a73bf1 commit 6863c50

8 files changed

Lines changed: 95 additions & 45 deletions

File tree

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,25 @@
11
name: Continuous Benchmarks (Comment)
22

33
concurrency:
4-
group: ${{ github.workflow }}-${{ github.run_id }}
4+
group: ${{ github.workflow }}-${{ github.head_ref }}
55
cancel-in-progress: true
66

77
on:
88
workflow_run:
99
workflows: ["Continuous Benchmarks (Receive)"]
10-
types: [completed]
10+
types:
11+
- completed
1112

1213
jobs:
13-
comment:
14+
upload:
1415
runs-on: ubuntu-latest
1516
permissions:
1617
actions: read
1718
pull-requests: write
18-
if: ${{ github.event.workflow_run.event == 'pull_request' }}
19+
statuses: write
20+
if: >
21+
${{ github.event.workflow_run.event == 'pull_request' }}
1922
steps:
20-
- name: Download Touchstone artifact
21-
id: download
22-
uses: actions/download-artifact@v8
23-
with:
24-
name: pr
25-
github-token: ${{ github.token }}
26-
repository: ${{ github.repository }}
27-
run-id: ${{ github.event.workflow_run.id }}
28-
29-
# defensive since issues could cause commenting in random places
30-
- name: Read PR number
31-
id: pr
32-
shell: bash
33-
run: |
34-
number="$(tr -cd '0-9' < ./NR)"
35-
test -n "$number"
36-
echo "number=$number" >> "$GITHUB_OUTPUT"
37-
38-
- name: Create or update sticky PR comment
39-
id: comment
40-
uses: marocchino/sticky-pull-request-comment@v3
23+
- uses: lorenzwalthert/touchstone/actions/comment@main
4124
with:
4225
GITHUB_TOKEN: ${{ github.token }}
43-
number_force: ${{ steps.pr.outputs.number }}
44-
header: touchstone
45-
path: ./info.txt
46-
skip_unchanged: true

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Imports:
3737
checkmate,
3838
matrixStats (>= 0.52),
3939
parallel,
40-
posterior (>= 1.7.0),
40+
posterior (>= 1.5.0),
4141
stats
4242
Suggests:
4343
bayesplot (>= 1.7.0),

R/gpdfit.R

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,71 @@
2929
#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
3030
#'
3131
gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
32-
posterior::gpdfit(
33-
x = x,
34-
wip = wip,
35-
min_grid_pts = min_grid_pts,
36-
sort_x = sort_x
37-
)
32+
# See section 4 of Zhang and Stephens (2009)
33+
if (sort_x) {
34+
x <- sort.int(x)
35+
}
36+
N <- length(x)
37+
prior <- 3
38+
M <- min_grid_pts + floor(sqrt(N))
39+
jj <- seq_len(M)
40+
xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
41+
theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
42+
l_theta <- N * lx(theta, x) # profile log-lik
43+
w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
44+
theta_hat <- sum(theta * w_theta)
45+
k <- mean.default(log1p(-theta_hat * x))
46+
sigma <- -k / theta_hat
47+
48+
if (wip) {
49+
k <- adjust_k_wip(k, n = N)
50+
}
51+
52+
if (is.na(k)) {
53+
k <- Inf
54+
}
55+
56+
nlist(k, sigma)
57+
}
58+
59+
60+
# internal ----------------------------------------------------------------
61+
62+
lx <- function(a,x) {
63+
a <- -a
64+
k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1))
65+
log(a / k) - k - 1
66+
}
67+
68+
#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This
69+
#' will stabilize estimates for very small Monte Carlo sample sizes and low neff
70+
#' cases.
71+
#'
72+
#' @noRd
73+
#' @param k Scalar khat estimate.
74+
#' @param n Integer number of tail samples used to fit GPD.
75+
#' @return Scalar adjusted khat estimate.
76+
#'
77+
adjust_k_wip <- function(k, n) {
78+
a <- 10
79+
n_plus_a <- n + a
80+
k * n / n_plus_a + a * 0.5 / n_plus_a
81+
}
82+
83+
84+
#' Inverse CDF of generalized Pareto distribution
85+
#' (assuming location parameter is 0)
86+
#'
87+
#' @noRd
88+
#' @param p Vector of probabilities.
89+
#' @param k Scalar shape parameter.
90+
#' @param sigma Scalar scale parameter.
91+
#' @return Vector of quantiles.
92+
#'
93+
qgpd <- function(p, k, sigma) {
94+
if (is.nan(sigma) || sigma <= 0) {
95+
return(rep(NaN, length(p)))
96+
}
97+
98+
sigma * expm1(-k * log1p(-p)) / k
3899
}

R/psis.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,12 +254,12 @@ psis_smooth_tail <- function(x, cutoff) {
254254
exp_cutoff <- exp(cutoff)
255255

256256
# save time not sorting since x already sorted
257-
fit <- posterior::gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
257+
fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
258258
k <- fit$k
259259
sigma <- fit$sigma
260260
if (is.finite(k)) {
261261
p <- (seq_len(len) - 0.5) / len
262-
qq <- posterior::qgeneralized_pareto(p, 0, sigma, k) + exp_cutoff
262+
qq <- qgpd(p, k, sigma) + exp_cutoff
263263
tail <- log(qq)
264264
} else {
265265
tail <- x

R/psislw.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4,
7272
# body and gPd smoothed tail
7373
tail_ord <- order(x_tail)
7474
exp_cutoff <- exp(cutoff)
75-
fit <- posterior::gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80)
75+
fit <- gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80)
7676
k <- fit$k
7777
sigma <- fit$sigma
7878
prb <- (seq_len(tail_len) - 0.5) / tail_len
79-
qq <- posterior::qgeneralized_pareto(prb, 0, sigma, k) + exp_cutoff
79+
qq <- qgpd(prb, k, sigma) + exp_cutoff
8080
smoothed_tail <- rep.int(0, tail_len)
8181
smoothed_tail[tail_ord] <- log(qq)
8282
x_new <- x

tests/testthat/test_gpdfit.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,11 @@ test_that("gpdfit returns correct result", {
1111
expect_snapshot_value(gpdfit_val_wip_default_grid, style = "serialize")
1212
})
1313

14+
test_that("qgpd returns the correct result ", {
15+
probs <- seq(from = 0, to = 1, by = 0.25)
16+
q1 <- qgpd(probs, k = 1, sigma = 1)
17+
expect_equal(q1, c(0, 1 / 3, 1, 3, Inf))
18+
19+
q2 <- qgpd(probs, k = 1, sigma = 0)
20+
expect_true(all(is.nan(q2)))
21+
})

touchstone/config.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
2-
"os": "ubuntu-24.04",
3-
"r": "4.5.3",
2+
"os": "ubuntu-22.04",
3+
"r": "4.4.3",
44
"rspm": "https://packagemanager.posit.co/cran/__linux__/jammy/latest",
55
"benchmarking_repo": "",
66
"benchmarking_ref": "",

touchstone/script.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
# installs branches to benchmark
55
touchstone::branch_install()
66

7-
# make log lik available to tests
87
touchstone::pin_assets("touchstone/wine.rds")
98

9+
# These synthetic workloads are large enough to expose real slowdowns in the
10+
# core `loo()` paths, but still short enough to keep PR feedback reasonably fast.
1011
touchstone::benchmark_run(
1112
expr_before_benchmark = {
1213
suppressPackageStartupMessages(library(loo))
14+
# benchmark_run() evaluates in a callr subprocess, so load pinned assets here.
1315
wine_log_lik_matrix <- readRDS(touchstone::path_pinned_asset(
1416
"touchstone/wine.rds"
1517
))
@@ -24,7 +26,7 @@ touchstone::benchmark_run(
2426
)
2527
)
2628
},
27-
n = 60
29+
n = 10
2830
)
2931

3032
touchstone::benchmark_run(
@@ -48,7 +50,7 @@ touchstone::benchmark_run(
4850
)
4951
)
5052
},
51-
n = 60
53+
n = 10
5254
)
5355

5456
# create artifacts used downstream in the GitHub Action

0 commit comments

Comments
 (0)