diff --git a/tests/testthat/_snaps/compare.md b/tests/testthat/_snaps/compare.md index cf27900e..bfc6824c 100644 --- a/tests/testthat/_snaps/compare.md +++ b/tests/testthat/_snaps/compare.md @@ -1,6 +1,24 @@ # loo_compare returns expected results (2 models) - WAoAAAACAAQFAAACAwAAAAMOAAAAEAAAAAAAAAAAwBA6U1+cRe4AAAAAAAAAAD+2ake0LxMB + Code + print(comp1) + Output + elpd_diff se_diff + model1 0.0 0.0 + model2 0.0 0.0 + +--- + + Code + print(comp2) + Output + elpd_diff se_diff + model1 0.0 0.0 + model2 -4.1 0.1 + +--- + + WAoAAAACAAQFAgACAwAAAAMOAAAAEAAAAAAAAAAAwBA6U1+cRe4AAAAAAAAAAD+2ake0LxMB wFTh8N3JQljAVeWWE8MGuUARCD2zEXBfQBEalRIN2T9ACijAYdW5U0AmZ5XrANCKP/H9Zexy 814/8ZtgnG1nx0Bk4fDdyUJYQGXllhPDBrlAIQg9sxFwX0AhGpUSDdk/AAAEAgAAAAEABAAJ AAAAA2RpbQAAAA0AAAACAAAAAgAAAAgAAAQCAAAAAQAEAAkAAAAIZGltbmFtZXMAAAATAAAA @@ -12,8 +30,18 @@ # loo_compare returns expected result (3 models) - WAoAAAACAAQFAAACAwAAAAMOAAAAGAAAAAAAAAAAwBA6U1+cRe7AMA3KkbYEGAAAAAAAAAAA - P7ZqR7QvEwE/y6/t4TTtXsBU4fDdyUJYwFXllhPDBrnAWOVjgjbDYkARCD2zEXBfQBEalRIN + Code + print(comp1) + Output + elpd_diff se_diff + model1 0.0 0.0 + model2 -4.1 0.1 + model3 -16.1 0.2 + +--- + + WAoAAAACAAQFAgACAwAAAAMOAAAAGAAAAAAAAAAAwBA6U1+cRe7AMA3KkbYEGAAAAAAAAAAA + P7ZqR7QvEwE/y6/t4TTtYMBU4fDdyUJYwFXllhPDBrnAWOVjgjbDYkARCD2zEXBfQBEalRIN 2T9AEPIF3GigE0AKKMBh1blTQCZnlesA0IpAQcjYUhrdCj/x/WXscvNeP/GbYJxtZ8c/8YDQ kmfJX0Bk4fDdyUJYQGXllhPDBrlAaOVjgjbDYkAhCD2zEXBfQCEalRIN2T9AIPIF3GigEwAA BAIAAAABAAQACQAAAANkaW0AAAANAAAAAgAAAAMAAAAIAAAEAgAAAAEABAAJAAAACGRpbW5h diff --git a/tests/testthat/_snaps/kfold_helpers.md b/tests/testthat/_snaps/kfold_helpers.md new file mode 100644 index 00000000..cb46bb98 --- /dev/null +++ b/tests/testthat/_snaps/kfold_helpers.md @@ -0,0 +1,7 @@ +# print_dims.kfold works + + Code + print_dims(xx) + Output + Based on 17-fold cross-validation. + diff --git a/tests/testthat/_snaps/loo_subsampling_cases.md b/tests/testthat/_snaps/loo_subsampling_cases.md new file mode 100644 index 00000000..2dcdf0fb --- /dev/null +++ b/tests/testthat/_snaps/loo_subsampling_cases.md @@ -0,0 +1,137 @@ +# Test the vignette + + Code + print(looss_1) + Output + + Computed from 4000 by 100 subsampled log-likelihood + values from 3020 total observations. + + Estimate SE subsampling SE + elpd_loo -1968.5 15.6 0.3 + p_loo 3.1 0.1 0.4 + looic 3936.9 31.2 0.6 + ------ + MCSE of elpd_loo is 0.0. + MCSE and ESS estimates assume independent draws (r_eff=1). + + All Pareto k estimates are good (k < 0.7). + See help('pareto-k-diagnostic') for details. + +--- + + Code + print(looss_1b) + Output + + Computed from 4000 by 200 subsampled log-likelihood + values from 3020 total observations. + + Estimate SE subsampling SE + elpd_loo -1968.3 15.6 0.2 + p_loo 3.2 0.1 0.4 + looic 3936.7 31.2 0.5 + ------ + MCSE of elpd_loo is 0.0. + MCSE and ESS estimates assume independent draws (r_eff=1). + + All Pareto k estimates are good (k < 0.7). + See help('pareto-k-diagnostic') for details. + +--- + + Code + print(aploo_1) + Output + + Computed from 2000 by 3020 log-likelihood matrix. + + Estimate SE + elpd_loo -1968.4 15.6 + p_loo 3.2 0.2 + looic 3936.8 31.2 + ------ + Posterior approximation correction used. + MCSE of elpd_loo is 0.0. + MCSE and ESS estimates assume independent draws (r_eff=1). + + All Pareto k estimates are good (k < 0.7). + See help('pareto-k-diagnostic') for details. + +--- + + Code + print(looapss_1) + Output + + Computed from 2000 by 100 subsampled log-likelihood + values from 3020 total observations. + + Estimate SE subsampling SE + elpd_loo -1968.2 15.6 0.4 + p_loo 2.9 0.1 0.5 + looic 3936.4 31.1 0.8 + ------ + Posterior approximation correction used. + MCSE of elpd_loo is 0.0. + MCSE and ESS estimates assume independent draws (r_eff=1). + + All Pareto k estimates are good (k < 0.7). + See help('pareto-k-diagnostic') for details. + +--- + + Code + print(looss_2) + Output + + Computed from 4000 by 100 subsampled log-likelihood + values from 3020 total observations. + + Estimate SE subsampling SE + elpd_loo -1952.0 16.2 0.2 + p_loo 2.6 0.1 0.3 + looic 3903.9 32.4 0.4 + ------ + MCSE of elpd_loo is 0.0. + MCSE and ESS estimates assume independent draws (r_eff=1). + + All Pareto k estimates are good (k < 0.7). + See help('pareto-k-diagnostic') for details. + +--- + + Code + print(comp) + Output + elpd_diff se_diff subsampling_se_diff + model2 0.0 0.0 0.0 + model1 16.5 22.5 0.4 + +--- + + Code + print(comp) + Output + elpd_diff se_diff subsampling_se_diff + model2 0.0 0.0 0.0 + model1 16.1 4.4 0.1 + +--- + + Code + print(comp2) + Output + elpd_diff se_diff subsampling_se_diff + model2 0.0 0.0 0.0 + model1 16.3 4.4 0.1 + +--- + + Code + print(comp3) + Output + elpd_diff se_diff subsampling_se_diff + model2 0.0 0.0 0.0 + model1 16.5 4.4 0.3 + diff --git a/tests/testthat/_snaps/model_weighting.md b/tests/testthat/_snaps/model_weighting.md index 0ed937d0..35a8647a 100644 --- a/tests/testthat/_snaps/model_weighting.md +++ b/tests/testthat/_snaps/model_weighting.md @@ -1,8 +1,44 @@ # loo_model_weights (stacking and pseudo-BMA) gives expected result - WAoAAAACAAQFAAACAwAAAAAOAAAAAz/KEXngFjO6P+l7oXTIUDU+YzIi3AAAAA== + WAoAAAACAAQFAgACAwAAAAAOAAAAAz/KEXnf1DM9P+l7oXTYyi4+YzIpAwAAAA== --- - WAoAAAACAAQFAAACAwAAAAAOAAAAAz+xA6UGtqDFP+3eFS5zKzY/J2MLYAsc4w== + Code + print(w1) + Output + Method: stacking + ------ + weight + model1 0.204 + model2 0.796 + model3 0.000 + +--- + + WAoAAAACAAQFAgACAwAAAAAOAAAAAz+xA6UGtqDDP+3eFS5zKzY/J2MLYAsc4A== + +--- + + Code + print(w2) + Output + Method: pseudo-BMA+ with Bayesian bootstrap + ------ + weight + model1 0.066 + model2 0.933 + model3 0.000 + +--- + + Code + print(w3) + Output + Method: pseudo-BMA + ------ + weight + model1 0.000 + model2 1.000 + model3 0.000 diff --git a/tests/testthat/_snaps/print_plot.md b/tests/testthat/_snaps/print_plot.md index 17742c92..e8d62a10 100644 --- a/tests/testthat/_snaps/print_plot.md +++ b/tests/testthat/_snaps/print_plot.md @@ -1,3 +1,67 @@ +# print.waic output is ok + + Code + print(waic1) + Output + + Computed from 1000 by 32 log-likelihood matrix. + + Estimate SE + elpd_waic -83.5 4.3 + p_waic 3.3 1.1 + waic 167.1 8.5 + + 3 (9.4%) p_waic estimates greater than 0.4. We recommend trying loo instead. + +# print.psis_loo and print.psis output ok + + Code + print(psis1) + Output + Computed from 1000 by 32 log-weights matrix. + MCSE and ESS estimates assume independent draws (r_eff=1). + + All Pareto k estimates are good (k < 0.67). + See help('pareto-k-diagnostic') for details. + +--- + + Code + print(loo1) + Output + + Computed from 1000 by 32 log-likelihood matrix. + + Estimate SE + elpd_loo -83.6 4.3 + p_loo 3.3 1.2 + looic 167.2 8.6 + ------ + MCSE of elpd_loo is 0.1. + MCSE and ESS estimates assume independent draws (r_eff=1). + + All Pareto k estimates are good (k < 0.67). + See help('pareto-k-diagnostic') for details. + +--- + + Code + print(loo1_r_eff) + Output + + Computed from 1000 by 32 log-likelihood matrix. + + Estimate SE + elpd_loo -83.6 4.3 + p_loo 3.3 1.2 + looic 167.2 8.6 + ------ + MCSE of elpd_loo is 0.1. + MCSE and ESS estimates assume MCMC draws (r_eff in [0.6, 1.0]). + + All Pareto k estimates are good (k < 0.67). + See help('pareto-k-diagnostic') for details. + # mcse_loo extractor gives correct value WAoAAAACAAQFAAACAwAAAAAOAAAAAT+2J8YDcP5s diff --git a/tests/testthat/_snaps/tisis.md b/tests/testthat/_snaps/tisis.md index 57cf7e30..b0e2560d 100644 --- a/tests/testthat/_snaps/tisis.md +++ b/tests/testthat/_snaps/tisis.md @@ -11,3 +11,31 @@ All Pareto k estimates are good (k < 0.67). See help('pareto-k-diagnostic') for details. +# tis_loo and sis_loo are returned + + Code + print(loo_tis) + Output + + Computed from 1000 by 32 log-likelihood matrix using tis_loo . + + Estimate SE + elpd_loo -83.6 4.3 + p_loo 3.3 1.2 + looic 167.2 8.6 + ------ + +--- + + Code + print(loo_sis) + Output + + Computed from 1000 by 32 log-likelihood matrix using sis_loo . + + Estimate SE + elpd_loo -83.6 4.3 + p_loo 3.3 1.2 + looic 167.2 8.6 + ------ + diff --git a/tests/testthat/test_compare.R b/tests/testthat/test_compare.R index d21e0731..3bc79302 100644 --- a/tests/testthat/test_compare.R +++ b/tests/testthat/test_compare.R @@ -89,13 +89,14 @@ test_that("loo_compare returns expected results (2 models)", { expect_s3_class(comp1, "compare.loo") expect_equal(colnames(comp1), comp_colnames) expect_equal(rownames(comp1), c("model1", "model2")) - expect_output(print(comp1), "elpd_diff") + expect_snapshot(print(comp1)) expect_equal(comp1[1:2, 1], c(0, 0), ignore_attr = TRUE) expect_equal(comp1[1:2, 2], c(0, 0), ignore_attr = TRUE) comp2 <- loo_compare(w1, w2) expect_s3_class(comp2, "compare.loo") expect_equal(colnames(comp2), comp_colnames) + expect_snapshot(print(comp2)) expect_snapshot_value(comp2, style = "serialize") @@ -113,6 +114,7 @@ test_that("loo_compare returns expected result (3 models)", { expect_equal(comp1[1, 1], 0) expect_s3_class(comp1, "compare.loo") expect_s3_class(comp1, "matrix") + expect_snapshot(print(comp1)) expect_snapshot_value(comp1, style = "serialize") diff --git a/tests/testthat/test_kfold_helpers.R b/tests/testthat/test_kfold_helpers.R index fd95de3e..430160e9 100644 --- a/tests/testthat/test_kfold_helpers.R +++ b/tests/testthat/test_kfold_helpers.R @@ -138,7 +138,7 @@ test_that("kfold helpers throw correct errors", { test_that("print_dims.kfold works", { xx <- structure(list(), K = 17, class = c("kfold", "loo")) - expect_output(print_dims(xx), "Based on 17-fold cross-validation") + expect_snapshot(print_dims(xx)) attr(xx, "K") <- NULL expect_silent(print_dims(xx)) diff --git a/tests/testthat/test_loo_subsampling_cases.R b/tests/testthat/test_loo_subsampling_cases.R index 1f53192b..6bd244c0 100644 --- a/tests/testthat/test_loo_subsampling_cases.R +++ b/tests/testthat/test_loo_subsampling_cases.R @@ -342,17 +342,7 @@ test_that("Test the vignette", { observations = 100 ) ) - expect_output( - print(looss_1), - "Computed from 4000 by 100 subsampled log-likelihood" - ) - expect_output(print(looss_1), "values from 3020 total observations.") - expect_output( - print(looss_1), - "MCSE and ESS estimates assume independent draws" - ) - expect_output(print(looss_1), "elpd_loo -1968.5 15.6 0.3") - expect_output(print(looss_1), "p_loo 3.1 0.1 0.4") + expect_snapshot(print(looss_1)) expect_s3_class(looss_1, c("psis_loo_ss", "psis_loo", "loo")) set.seed(4711) @@ -364,17 +354,7 @@ test_that("Test the vignette", { observations = 200 ) ) - expect_output( - print(looss_1b), - "Computed from 4000 by 200 subsampled log-likelihood" - ) - expect_output(print(looss_1b), "values from 3020 total observations.") - expect_output( - print(looss_1b), - "MCSE and ESS estimates assume independent draws" - ) - expect_output(print(looss_1b), "elpd_loo -1968.3 15.6 0.2") - expect_output(print(looss_1b), "p_loo 3.2 0.1 0.4") + expect_snapshot(print(looss_1b)) expect_s3_class(looss_1b, c("psis_loo_ss", "psis_loo", "loo")) set.seed(4711) @@ -413,18 +393,7 @@ test_that("Test the vignette", { log_g = log_g ) ) - expect_output( - print(aploo_1), - "Computed from 2000 by 3020 log-likelihood matrix" - ) - expect_output( - print(aploo_1), - "MCSE and ESS estimates assume independent draws" - ) - expect_output(print(aploo_1), "elpd_loo -1968.4 15.6") - expect_output(print(aploo_1), "p_loo 3.2 0.2") - expect_output(print(aploo_1), "Posterior approximation correction used.") - expect_output(print(aploo_1), "All Pareto k estimates are good") + expect_snapshot(print(aploo_1)) expect_equal(length(pareto_k_ids(aploo_1, threshold = 0.5)), 31) expect_s3_class(aploo_1, c("psis_loo_ap", "psis_loo", "loo")) @@ -439,18 +408,7 @@ test_that("Test the vignette", { observations = 100 ) ) - expect_output( - print(looapss_1), - "Computed from 2000 by 100 subsampled log-likelihood" - ) - expect_output( - print(looapss_1), - "MCSE and ESS estimates assume independent draws" - ) - expect_output(print(looapss_1), "values from 3020 total observations.") - expect_output(print(looapss_1), "elpd_loo -1968.2 15.6 0.4") - expect_output(print(looapss_1), "p_loo 2.9 0.1 0.5") - expect_output(print(looapss_1), "All Pareto k estimates are good") + expect_snapshot(print(looapss_1)) expect_equal(length(pareto_k_ids(looapss_1, threshold = 0.5)), 3) # Loo compare @@ -472,23 +430,13 @@ test_that("Test the vignette", { observations = 100 ) ) - expect_output( - print(looss_2), - "Computed from 4000 by 100 subsampled log-likelihood" - ) - expect_output( - print(looss_2), - "MCSE and ESS estimates assume independent draws" - ) - expect_output(print(looss_2), "values from 3020 total observations.") - expect_output(print(looss_2), "elpd_loo -1952.0 16.2 0.2") - expect_output(print(looss_2), "p_loo 2.6 0.1 0.3") + expect_snapshot(print(looss_2)) expect_warning( comp <- loo_compare(looss_1, looss_2), "Different subsamples in 'model2' and 'model1'. Naive diff SE is used." ) - expect_output(print(comp), "model1 16.5 22.5 0.4") + expect_snapshot(print(comp)) set.seed(4712) expect_no_warning( @@ -510,7 +458,7 @@ test_that("Test the vignette", { ) expect_silent(comp <- loo_compare(looss_1, looss_2_m)) - expect_output(print(comp), "model1 16.1 4.4 0.1") + expect_snapshot(print(comp)) set.seed(4712) expect_no_warning( @@ -530,7 +478,7 @@ test_that("Test the vignette", { ) ) expect_silent(comp2 <- loo_compare(looss_1, looss_2_m)) - expect_output(print(comp2), "model1 16.3 4.4 0.1") + expect_snapshot(print(comp2)) expect_no_warning( looss_2_full <- loo( @@ -543,5 +491,5 @@ test_that("Test the vignette", { comp3 <- loo_compare(x = list(looss_1, looss_2_full)), "Estimated elpd_diff using observations included in loo calculations for all models." ) - expect_output(print(comp3), "model1 16.5 4.4 0.3") + expect_snapshot(print(comp3)) }) diff --git a/tests/testthat/test_model_weighting.R b/tests/testthat/test_model_weighting.R index b15730ba..96c075b8 100644 --- a/tests/testthat/test_model_weighting.R +++ b/tests/testthat/test_model_weighting.R @@ -73,7 +73,7 @@ test_that("loo_model_weights (stacking and pseudo-BMA) gives expected result", { expect_length(w1, 3) expect_named(w1, paste0("model", c(1:3))) expect_snapshot_value(as.numeric(w1), style = "serialize") - expect_output(print(w1), "Method: stacking") + expect_snapshot(print(w1)) w1_b <- loo_model_weights(loo_list) expect_identical(w1, w1_b) @@ -89,7 +89,7 @@ test_that("loo_model_weights (stacking and pseudo-BMA) gives expected result", { expect_length(w2, 3) expect_named(w2, paste0("model", c(1:3))) expect_snapshot_value(as.numeric(w2), style = "serialize") - expect_output(print(w2), "Method: pseudo-BMA+") + expect_snapshot(print(w2)) w3 <- loo_model_weights( ll_list, @@ -105,7 +105,7 @@ test_that("loo_model_weights (stacking and pseudo-BMA) gives expected result", { c(5.365279e-05, 9.999436e-01, 2.707028e-06), tolerance = tol ) - expect_output(print(w3), "Method: pseudo-BMA") + expect_snapshot(print(w3)) w3_b <- loo_model_weights(loo_list, method = "pseudobma", BB = FALSE) expect_identical(w3, w3_b) diff --git a/tests/testthat/test_print_plot.R b/tests/testthat/test_print_plot.R index f69ed6f0..7baeb1b3 100644 --- a/tests/testthat/test_print_plot.R +++ b/tests/testthat/test_print_plot.R @@ -38,36 +38,14 @@ test_that("plot methods throw appropriate errors/warnings", { # printing ---------------------------------------------------------------- -lldim_msg <- paste0( - "Computed from ", - prod(dim(LLarr)[1:2]), - " by ", - dim(LLarr)[3], - " log-likelihood matrix" -) -lwdim_msg <- paste0( - "Computed from ", - prod(dim(LLarr)[1:2]), - " by ", - dim(LLarr)[3], - " log-weights matrix" -) - test_that("print.waic output is ok", { - expect_output(print(waic1), lldim_msg) - expect_output( - print(waic1), - "p_waic estimates greater than 0.4. We recommend trying loo instead." - ) + expect_snapshot(print(waic1)) }) test_that("print.psis_loo and print.psis output ok", { - expect_output(print(psis1), lwdim_msg) - expect_output(print(psis1), "Pareto k estimates are good") - expect_output(print(loo1), lldim_msg) - expect_output(print(loo1), "MCSE and ESS estimates assume independent draws") - expect_output(print(loo1_r_eff), "MCSE and ESS estimates assume MCMC draws") - expect_output(print(loo1), "Pareto k estimates are good") + expect_snapshot(print(psis1)) + expect_snapshot(print(loo1)) + expect_snapshot(print(loo1_r_eff)) loo1$diagnostics$pareto_k <- psis1$diagnostics$pareto_k <- runif(32, 0, .49) expect_output(print(loo1), regexp = "Pareto k estimates are good") diff --git a/tests/testthat/test_tisis.R b/tests/testthat/test_tisis.R index 44fcc12d..edfcbd3f 100644 --- a/tests/testthat/test_tisis.R +++ b/tests/testthat/test_tisis.R @@ -221,6 +221,6 @@ test_that("tis_loo and sis_loo are returned", { expect_s3_class(loo_tis, "importance_sampling_loo") expect_s3_class(loo_sis, "importance_sampling_loo") - expect_output(print(loo_tis), regexp = "tis_loo") - expect_output(print(loo_sis), regexp = "sis_loo") + expect_snapshot(print(loo_tis)) + expect_snapshot(print(loo_sis)) })