Skip to content

Commit 9b2859f

Browse files
Merge branch 'master' into fix/kde-correction-empty-vector
2 parents aab6bae + c056b03 commit 9b2859f

7 files changed

Lines changed: 137 additions & 20 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ release-prep.R
1616

1717
# vscode/positron/etc settings
1818
.vscode/*
19+
Rplots.pdf

NEWS.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# bayesplot (development version)
22

33
* Validate user-provided `pit` values in `ppc_loo_pit_data()` using `validate_pit()` to reject invalid inputs (non-numeric, out of range, NAs) at the entry point instead of in internal helpers.
4+
* Added unit tests for `ppc_error_data()` and `ppc_loo_pit_data()` covering output structure, argument handling, and edge cases.
5+
* Added vignette sections demonstrating `*_data()` companion functions for building custom ggplot2 visualizations (#435)
6+
* Extract `drop_singleton_values()` helper in `mcmc_nuts_treedepth()` to remove duplicated filtering logic.
47
* Eliminate redundant data processing in `mcmc_areas_data()` by reusing the prepared MCMC array for both interval and density computation.
5-
* Validate equal chain lengths in `validate_df_with_chain()`, reject missing
6-
chain labels, and renumber data-frame chain labels internally when converting
7-
to arrays.
8+
* Validate equal chain lengths in `validate_df_with_chain()`, reject missing chain labels, and renumber data-frame chain labels internally when converting to arrays.
89
* Added unit tests for previously untested edge cases in `param_range()`, `param_glue()`, and `tidyselect_parameters()` (no-match, partial-match, and negation behavior).
910
* Bumped minimum version for `rstantools` from `>= 1.5.0` to `>= 2.0.0` .
1011
* Use `rlang::warn()` and `rlang::inform()` for selected PPC user messages instead of base `warning()` and `message()`.

R/mcmc-diagnostics-nuts.R

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -369,11 +369,7 @@ mcmc_nuts_treedepth <- function(x, lp, chain = NULL, ...) {
369369
yaxis_ticks(FALSE)
370370

371371
violin_lp_data <- data.frame(treedepth, lp = lp$Value)
372-
373-
# Only keep treedepth values that occur more than once for violin plot
374-
value_counts <- table(violin_lp_data$Value)
375-
keep_values <- names(value_counts[value_counts > 1])
376-
violin_lp_data <- violin_lp_data[violin_lp_data$Value %in% keep_values, ]
372+
violin_lp_data <- drop_singleton_values(violin_lp_data, "Value")
377373

378374
violin_lp <-
379375
ggplot(violin_lp_data, aes(x = factor(.data$Value), y = .data$lp)) +
@@ -382,11 +378,7 @@ mcmc_nuts_treedepth <- function(x, lp, chain = NULL, ...) {
382378
bayesplot_theme_get()
383379

384380
violin_accept_stat_data <- data.frame(treedepth, as = accept_stat$Value)
385-
386-
# Only keep treedepth values that occur more than once for violin plot
387-
value_counts <- table(violin_accept_stat_data$Value)
388-
keep_values <- names(value_counts[value_counts > 1])
389-
violin_accept_stat_data <- violin_accept_stat_data[violin_accept_stat_data$Value %in% keep_values, ]
381+
violin_accept_stat_data <- drop_singleton_values(violin_accept_stat_data, "Value")
390382

391383
violin_accept_stat <-
392384
ggplot(violin_accept_stat_data, aes(x = factor(.data$Value), y = .data$as)) +
@@ -572,3 +564,11 @@ chain_violin <-
572564
alpha = alpha
573565
)
574566
}
567+
568+
# Drop rows whose value in `col` appears only once (singletons cannot
569+
# produce a violin density estimate).
570+
drop_singleton_values <- function(df, col) {
571+
counts <- table(df[[col]])
572+
keep <- names(counts[counts > 1])
573+
df[df[[col]] %in% keep, ]
574+
}

tests/testthat/test-ppc-errors.R

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1+
skip_if_not_installed("rstantools")
12
source(test_path("data-for-ppc-tests.R"))
23

34
test_that("ppc_error_hist and ppc_error_scatter return ggplot object", {
4-
skip_if_not_installed("rstantools")
55
expect_gg(ppc_error_hist(y, yrep[1:5, ], binwidth = 0.1))
66
expect_gg(ppc_error_scatter(y, yrep[1:5, ]))
77

@@ -13,14 +13,12 @@ test_that("ppc_error_hist and ppc_error_scatter return ggplot object", {
1313
})
1414

1515
test_that("ppc_error_hist_grouped returns ggplot object", {
16-
skip_if_not_installed("rstantools")
1716
expect_gg(ppc_error_hist_grouped(y, yrep[1:5, ], group, binwidth = 0.1))
1817
expect_gg(ppc_error_hist_grouped(y, yrep[1,, drop = FALSE], group,
1918
freq = FALSE, binwidth = 1))
2019
})
2120

2221
test_that("ppc_error_scatter_avg returns ggplot2 object", {
23-
skip_if_not_installed("rstantools")
2422
expect_gg(ppc_error_scatter_avg(y, yrep))
2523
expect_gg(ppc_error_scatter_avg(y, yrep[1:5, ]))
2624

@@ -30,7 +28,6 @@ test_that("ppc_error_scatter_avg returns ggplot2 object", {
3028
})
3129

3230
test_that("ppc_error_scatter_avg same as ppc_error_scatter if nrow(yrep) = 1", {
33-
skip_if_not_installed("rstantools")
3431
p1 <- ppc_error_scatter_avg(y2, yrep2)
3532
p2 <- ppc_error_scatter(y2, yrep2)
3633
d1 <- p1$data
@@ -42,8 +39,6 @@ test_that("ppc_error_scatter_avg same as ppc_error_scatter if nrow(yrep) = 1", {
4239
})
4340

4441
test_that("ppc_error_scatter_avg_vs_x returns ggplot2 object", {
45-
skip_if_not_installed("rstantools")
46-
4742
# expect warning
4843
expect_warning(expect_gg(ppc_error_scatter_avg_vs_x(y, yrep, x = rnorm(length(y)))),
4944
"'ppc_error_scatter_avg_vs_x' is deprecated.")
@@ -52,7 +47,6 @@ test_that("ppc_error_scatter_avg_vs_x returns ggplot2 object", {
5247
})
5348

5449
test_that("ppc_error_binned returns ggplot object", {
55-
skip_if_not_installed("rstantools")
5650
load(test_path("data-for-binomial.rda"))
5751
expect_gg(ppc_error_binned(y, Ey))
5852
expect_gg(ppc_error_binned(y[1:5], Ey[, 1:5]))
@@ -73,6 +67,24 @@ test_that("bin_errors works for edge cases", {
7367
expect_equal(ans, val)
7468
})
7569

70+
# ppc_error_data tests -----------------------------------------------------
71+
72+
test_that("ppc_error_data returns exact structure and computed errors", {
73+
d <- ppc_error_data(y, yrep)
74+
expect_named(d, c("y_id", "y_name", "y_obs", "rep_id", "rep_label", "value"))
75+
third_rep <- d[d$rep_id == 3, ]
76+
expected_errors <- y - yrep[3, ]
77+
expect_equal(third_rep$value, expected_errors)
78+
expect_equal(third_rep$y_obs, y)
79+
})
80+
81+
test_that("ppc_error_data with group returns exact structure", {
82+
d <- ppc_error_data(y, yrep, group = group)
83+
expect_named(d, c("group", "y_id", "y_name", "y_obs", "rep_id", "rep_label", "value"))
84+
expect_identical(levels(d$group), levels(group))
85+
expect_equal(d$group[d$rep_id == 1], group)
86+
})
87+
7688

7789
# Visual tests -----------------------------------------------------------------
7890

tests/testthat/test-ppc-loo.R

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,3 +358,50 @@ test_that("ppc_loo_pit_ecdf renders correctly", {
358358
)
359359
vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (ecdf difference)", p_custom)
360360
})
361+
362+
363+
# ppc_loo_pit_data tests ---------------------------------------------------
364+
365+
test_that("ppc_loo_pit_data returns the expected structure for both boundary modes", {
366+
set.seed(123)
367+
pit_vals <- runif(50)
368+
n_samples <- 10
369+
expect_message(
370+
d_raw <- ppc_loo_pit_data(
371+
pit = pit_vals,
372+
boundary_correction = FALSE,
373+
samples = n_samples
374+
),
375+
"pit"
376+
)
377+
expect_s3_class(d_raw, "data.frame")
378+
expect_named(
379+
d_raw,
380+
c("y_id", "y_name", "rep_id", "rep_label", "is_y", "is_y_label", "value")
381+
)
382+
y_rows <- d_raw[d_raw$is_y, ]
383+
yrep_rows <- d_raw[!d_raw$is_y, ]
384+
expect_equal(nrow(y_rows), length(pit_vals))
385+
expect_equal(nrow(yrep_rows), length(pit_vals) * n_samples)
386+
expect_equal(y_rows$value, pit_vals)
387+
388+
grid_len <- 128
389+
expect_message(
390+
d_bc <- ppc_loo_pit_data(
391+
pit = pit_vals,
392+
boundary_correction = TRUE,
393+
samples = n_samples,
394+
grid_len = grid_len
395+
),
396+
"pit"
397+
)
398+
expect_named(
399+
d_bc,
400+
c("y_id", "y_name", "rep_id", "rep_label", "is_y", "is_y_label", "value", "x")
401+
)
402+
y_rows <- d_bc[d_bc$is_y, ]
403+
yrep_rows <- d_bc[!d_bc$is_y, ]
404+
expect_equal(nrow(y_rows), grid_len)
405+
expect_equal(nrow(yrep_rows), grid_len * n_samples)
406+
expect_false(anyNA(d_bc$x))
407+
})

vignettes/graphical-ppcs.Rmd

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,45 @@ See Figure 8 in [Gabry et al. (2019)](#gabry2019) for another example of using
314314

315315
<br>
316316

317+
## Using `*_data()` functions for custom plots
318+
319+
Many bayesplot plotting functions have a companion `*_data()` function that
320+
returns the pre-processed data as a tidy data frame instead of a plot. This is
321+
useful when you want to build a fully custom ggplot2 visualization using the
322+
same summary statistics that bayesplot computes internally.
323+
324+
For example, `ppc_intervals_data()` returns the quantile summaries that
325+
`ppc_intervals()` uses:
326+
327+
```{r data_intervals, eval=params$EVAL}
328+
d <- ppc_intervals_data(y, yrep_nb, prob = 0.5, prob_outer = 0.9)
329+
head(d)
330+
```
331+
332+
You can then use this data to create your own plot:
333+
334+
```{r data_intervals_custom, eval=params$EVAL}
335+
ggplot(d, aes(x = x, y = m)) +
336+
geom_linerange(aes(ymin = ll, ymax = hh), color = "skyblue", linewidth = 0.6) +
337+
geom_linerange(aes(ymin = l, ymax = h), color = "steelblue", linewidth = 1.2) +
338+
geom_point(aes(y = y_obs), shape = 21, fill = "red", size = 1.5) +
339+
labs(title = "Custom interval plot from ppc_intervals_data()",
340+
x = "Observation", y = "Value") +
341+
theme_minimal()
342+
```
343+
344+
Similarly, `ppc_stat_data()` returns the computed test statistics:
345+
346+
```{r data_stat, eval=params$EVAL, message=FALSE}
347+
stat_d <- ppc_stat_data(y, yrep_nb, stat = "median")
348+
head(stat_d)
349+
```
350+
351+
See `available_ppc(plots_only = FALSE)` and `available_mcmc(plots_only = FALSE)`
352+
for a full list of data-preparation functions.
353+
354+
<br>
355+
317356
## Providing an interface to bayesplot PPCs from another package
318357

319358
The **bayesplot** package provides the S3 generic function `pp_check`. Authors of

vignettes/plotting-mcmc-draws.Rmd

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,23 @@ mcmc_trace_highlight(posterior, pars = "sigma", highlight = 3)
367367
```
368368

369369

370+
<br>
371+
372+
## Using `*_data()` functions for custom plots
373+
374+
As with PPC functions, many MCMC plotting functions have `*_data()` companions
375+
that return the underlying data instead of a plot. For example,
376+
`mcmc_intervals_data()` returns the quantiles used by `mcmc_intervals()`:
377+
378+
```{r data_intervals_mcmc}
379+
d <- mcmc_intervals_data(posterior, pars = c("(Intercept)", "sigma"))
380+
d
381+
```
382+
383+
This can be used to build fully custom ggplot2 visualizations using the same
384+
summary statistics that bayesplot computes internally. See
385+
`available_mcmc(plots_only = FALSE)` for a full list of `*_data()` functions.
386+
370387
<br>
371388

372389
## References

0 commit comments

Comments
 (0)