Skip to content

Commit ebed5b8

Browse files
author
Florence Bockting
committed
style: clean-up code
1 parent 990ab98 commit ebed5b8

4 files changed

Lines changed: 123 additions & 232 deletions

File tree

R/ppc-distributions.R

Lines changed: 81 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -708,104 +708,85 @@ ppc_pit_ecdf <- function(y,
708708
prob = .99,
709709
plot_diff = FALSE,
710710
interpolate_adj = NULL,
711-
method = "independent",
711+
method = NULL,
712712
test = NULL,
713713
gamma = NULL,
714714
linewidth = NULL,
715715
color = NULL,
716716
help_text = NULL,
717717
pareto_pit = NULL
718718
) {
719-
# Expected input combinations for ppc_pit_ecdf() based on method and test choices:
720-
# | yrep | y | pit | method | test | pareto_pit | approach |
721-
# |------|---|-----|-------------|------|------------|--------------------|
722-
# | x | x | | independent | NULL | FALSE | empirical pit |
723-
# | | | x | independent | NULL | FALSE | |
724-
# | x | x | | independent | NULL | TRUE | compute pareto-pit |
725-
# | x | x | | correlated | POT | TRUE | compute pareto-pit |
726-
# | | | x | correlated | POT | FALSE | |
727-
# | x | x | | correlated | PIET | TRUE | compute pareto-pit |
728-
# | | | x | correlated | PIET | FALSE | |
729-
# | x | x | | correlated | PRIT | FALSE | empirical pit |
730-
# | | | x | correlated | PRIT | FALSE | |
731-
732719
check_ignored_arguments(...,
733-
ok_args = c("K", "pareto_pit", "pit", "prob", "plot_diff",
734-
"interpolate_adj", "method", "test", "gamma", "linewidth", "color",
735-
"help_text")
720+
ok_args = c(
721+
"K", "pareto_pit", "pit", "prob", "plot_diff",
722+
"interpolate_adj", "method", "test", "gamma", "linewidth",
723+
"color", "help_text"
724+
)
736725
)
737726

738-
# ---------------------------------------------------------------------------
739-
# Internal helpers
740-
# ---------------------------------------------------------------------------
741-
742727
.warn_ignored <- function(method_name, args) {
743728
inform(paste0(
744729
"As method = ", method_name, " specified; ignoring: ",
745730
paste(args, collapse = ", "), "."
746731
))
747732
}
748733

749-
# ---------------------------------------------------------------------------
750-
# Resolve and validate `method`
751-
# ---------------------------------------------------------------------------
752-
753-
method <- match.arg(method, choices = c("independent", "correlated"))
754-
755-
756-
# ---------------------------------------------------------------------------
757-
# Method-specific defaults, validation, and pareto_pit flag
758-
# ---------------------------------------------------------------------------
734+
if (is.null(method)) {
735+
inform(c(
736+
"i" = paste(
737+
"In the next major release, the default `method`",
738+
"will change to 'correlated'."
739+
),
740+
"*" = paste(
741+
"To silence this message, explicitly set",
742+
"`method = 'independent'` or `method = 'correlated'`."
743+
)
744+
))
745+
method <- "independent"
746+
} else {
747+
method <- match.arg(method, choices = c("independent", "correlated"))
748+
if (method == "independent") {
749+
inform("The 'independent' method is superseded by the 'correlated' method.")
750+
}
751+
}
759752

760753
switch(method,
761754
"correlated" = {
762755
if (!is.null(interpolate_adj)) .warn_ignored("'correlated'", "interpolate_adj")
763756

764-
test <- match.arg(test %||% "POT", choices = c("POT", "PRIT", "PIET"))
765-
alpha <- 1 - prob
766-
gamma <- gamma %||% 0
757+
test <- match.arg(test %||% "POT", choices = c("POT", "PRIT", "PIET"))
758+
alpha <- 1 - prob
759+
gamma <- gamma %||% 0
767760
linewidth <- linewidth %||% 0.3
768-
color <- color %||% c(ecdf = "grey60", highlight = "red")
761+
color <- color %||% c(ecdf = "grey60", highlight = "red")
769762
help_text <- help_text %||% TRUE
770-
771-
# Pareto PIT applies for correlated only when the test is POT or PIET
772-
# (not PRIT, which uses empirical PIT).
773763
pareto_pit <- pareto_pit %||% is.null(pit) && test %in% c("POT", "PIET")
774764
},
775765
"independent" = {
776766
ignored <- c(
777-
if (!is.null(test)) "test",
778-
if (!is.null(gamma)) "gamma",
767+
if (!is.null(test)) "test",
768+
if (!is.null(gamma)) "gamma",
779769
if (!is.null(help_text)) "help_text"
780770
)
781771
if (length(ignored) > 0) .warn_ignored("'independent'", ignored)
782-
783-
# Pareto PIT applies for independent whenever pareto_pit = TRUE.
784772
pareto_pit <- pareto_pit %||% FALSE
785773
}
786774
)
787775

788-
# ---------------------------------------------------------------------------
789-
# Compute PIT values
790-
# ---------------------------------------------------------------------------
791-
792776
if (isTRUE(pareto_pit) && is.null(pit)) {
793777
# --- Pareto-smoothed PIT ---
794778
suggested_package("rstantools")
795779
y <- validate_y(y)
796780
yrep <- validate_predictions(yrep, length(y))
797-
798781
pit <- posterior::pareto_pit(x = yrep, y = y, weights = NULL, log = TRUE)
799782
K <- K %||% length(pit)
800783

801784
} else if (!is.null(pit)) {
802785
# --- Pre-supplied PIT values ---
803786
pit <- validate_pit(pit)
804787
K <- K %||% length(pit)
805-
806-
# Warn about any ignored arguments.
807788
ignored <- c(
808-
if (!missing(y) && !is.null(y)) "y",
789+
if (!missing(y) && !is.null(y)) "y",
809790
if (!missing(yrep) && !is.null(yrep)) "yrep"
810791
)
811792
if (length(ignored) > 0) {
@@ -814,69 +795,47 @@ ppc_pit_ecdf <- function(y,
814795
paste(ignored, collapse = ", "), "."
815796
))
816797
}
817-
818798
} else {
819799
# --- Empirical PIT ---'
820800
y <- validate_y(y)
821801
yrep <- validate_predictions(yrep, length(y))
822-
pit <- ppc_data(y, yrep) %>%
823-
group_by(.data$y_id) %>%
802+
pit <- ppc_data(y, yrep) |>
803+
group_by(.data$y_id) |>
824804
dplyr::group_map(
825805
~ mean(.x$value[.x$is_y] > .x$value[!.x$is_y]) +
826806
runif(1, max = mean(.x$value[.x$is_y] == .x$value[!.x$is_y]))
827-
) %>%
807+
) |>
828808
unlist()
829809
K <- K %||% min(nrow(yrep) + 1, 1000)
830810
}
831811

832-
# ---------------------------------------------------------------------------
833-
# Shared ECDF setup
834-
# ---------------------------------------------------------------------------
835-
836-
n_obs <- length(pit)
812+
n_obs <- length(pit)
837813
unit_interval <- seq(0, 1, length.out = K)
838-
ecdf_pit_fn <- ecdf(pit)
839-
y_label <- if (plot_diff) "ECDF difference" else "ECDF"
840-
841-
# ===========================================================================
842-
# Correlated method
843-
# ===========================================================================
814+
ecdf_pit_fn <- ecdf(pit)
815+
y_label <- if (plot_diff) "ECDF difference" else "ECDF"
844816

845817
if (method == "correlated") {
846-
847-
# Compute the per-observation test statistics (sorted for Shapley values)
848-
# and the combined Cauchy p-value.
849818
test_res <- posterior::uniformity_test(pit = pit, test = test)
850819
p_value_CCT <- test_res$pvalue
851820
pointwise_contrib <- test_res$pointwise
852-
853-
# Validate gamma against computed Shapley values.
854821
max_contrib <- max(pointwise_contrib)
855822
if (gamma < 0 || gamma > max_contrib) {
856823
stop(sprintf(
857824
"gamma must be in [0, %.2f], but gamma = %s was provided.",
858825
max_contrib, gamma
859826
))
860827
}
861-
862-
# Build the main ECDF data frame over a dense grid that includes pit values
863-
# so step discontinuities are rendered exactly.
864828
x_combined <- sort(unique(c(unit_interval, pit)))
865-
866829
df_main <- tibble::tibble(
867-
x = x_combined,
830+
x = x_combined,
868831
ecdf_val = ecdf_pit_fn(x_combined) - plot_diff * x_combined
869832
)
870-
871-
# Sorted pit data frame (used for highlighting suspicious points).
872833
pit_sorted <- sort(pit)
873834
df_pit <- tibble::tibble(
874-
pit = pit_sorted,
835+
pit = pit_sorted,
875836
ecdf_val = ecdf_pit_fn(pit_sorted) - plot_diff * pit_sorted
876837
)
877838

878-
# --- Base plot -----------------------------------------------------------
879-
880839
p <- ggplot() +
881840
geom_step(
882841
data = df_main,
@@ -886,50 +845,45 @@ ppc_pit_ecdf <- function(y,
886845
color = color["ecdf"]
887846
) +
888847
geom_segment(
889-
mapping = aes(x = 0, y = 0, xend = 1, yend = if (plot_diff) 0 else 1),
890-
linetype = "dashed",
891-
color = "darkgrey",
848+
mapping = aes(x = 0, y = 0, xend = 1, yend = if (plot_diff) 0 else 1),
849+
linetype = "dashed",
850+
color = "darkgrey",
892851
linewidth = 0.3
893852
) +
894853
labs(x = "PIT", y = y_label)
895854

896-
# --- Highlight suspicious regions ----------------------------------------
897-
898855
if (p_value_CCT < alpha) {
899856
red_idx <- which(pointwise_contrib > gamma)
900857

901858
if (length(red_idx) > 0) {
902859
df_red <- df_pit[red_idx, ]
903860
df_red$segment <- cumsum(c(1, diff(red_idx) != 1))
904-
905861
seg_sizes <- stats::ave(df_red$pit, df_red$segment, FUN = length)
906862
df_isolated <- df_red[seg_sizes == 1, ]
907863
df_grouped <- df_red[seg_sizes > 1, ]
908-
909-
# Highlight contiguous groups as coloured step segments.
864+
910865
if (nrow(df_grouped) > 0) {
911866
df_segments <- do.call(rbind, lapply(
912867
split(df_grouped, df_grouped$segment),
913868
function(grp) {
914-
pit_idx <- match(grp$pit, x_combined)
869+
pit_idx <- match(grp$pit, x_combined)
915870
idx_range <- seq(min(pit_idx), max(pit_idx))
916871
tibble::tibble(
917-
x = df_main$x[idx_range],
872+
x = df_main$x[idx_range],
918873
ecdf_val = df_main$ecdf_val[idx_range],
919-
segment = grp$segment[1L]
874+
segment = grp$segment[1L]
920875
)
921876
}
922877
))
923-
878+
924879
p <- p + geom_step(
925880
data = df_segments,
926881
mapping = aes(x = .data$x, y = .data$ecdf_val, group = .data$segment),
927882
color = color["highlight"],
928883
linewidth = linewidth + 0.8
929884
)
930885
}
931-
932-
# Highlight isolated suspicious points as dots.
886+
933887
if (nrow(df_isolated) > 0) {
934888
p <- p + geom_point(
935889
data = df_isolated,
@@ -940,8 +894,6 @@ ppc_pit_ecdf <- function(y,
940894
}
941895
}
942896
}
943-
944-
# --- Annotation and axis scaling -----------------------------------------
945897

946898
if (isTRUE(help_text)) {
947899
label_size <- 0.7 * bayesplot_theme_get()$text@size / ggplot2::.pt
@@ -951,8 +903,11 @@ ppc_pit_ecdf <- function(y,
951903
label = sprintf("p[unif]^{%s} == '%s' ~ (alpha == '%.2f')",
952904
test, fmt_p(p_value_CCT), alpha
953905
),
954-
hjust = -0.05, vjust = 1.5,
955-
color = "black", parse = TRUE, size = label_size
906+
hjust = -0.05,
907+
vjust = 1.5,
908+
color = "black",
909+
parse = TRUE,
910+
size = label_size
956911
)
957912
}
958913

@@ -972,36 +927,29 @@ ppc_pit_ecdf <- function(y,
972927
return(p)
973928
}
974929

975-
# ===========================================================================
976-
# Independent method
977-
# ===========================================================================
978-
979-
gamma_indep <- adjust_gamma(N = n_obs, K = K, prob = prob,
980-
interpolate_adj = interpolate_adj)
981-
930+
gamma_indep <- adjust_gamma(
931+
N = n_obs, K = K, prob = prob, interpolate_adj = interpolate_adj
932+
)
982933
lims <- ecdf_intervals(gamma = gamma_indep, N = n_obs, K = K)
983-
984-
# `lims` contains K + 1 elements (including the boundary at 0); drop it so
985-
# lengths match `unit_interval`.
986934
lims_upper <- lims$upper[-1] / n_obs - plot_diff * unit_interval
987935
lims_lower <- lims$lower[-1] / n_obs - plot_diff * unit_interval
988936
ecdf_eval <- ecdf_pit_fn(unit_interval) - plot_diff * unit_interval
989937

990938
p <- ggplot() +
991939
geom_step(
992-
mapping = aes(x = unit_interval, y = lims_upper, color = "yrep"),
993-
linetype = "dashed",
940+
mapping = aes(x = unit_interval, y = lims_upper, color = "yrep"),
941+
linetype = "dashed",
994942
linewidth = 0.3,
995943
show.legend = FALSE
996944
) +
997945
geom_step(
998-
mapping = aes(x = unit_interval, y = lims_lower, color = "yrep"),
999-
linetype = "dashed",
946+
mapping = aes(x = unit_interval, y = lims_lower, color = "yrep"),
947+
linetype = "dashed",
1000948
linewidth = 0.3,
1001949
show.legend = FALSE
1002950
) +
1003951
geom_step(
1004-
mapping = aes(x = unit_interval, y = ecdf_eval, color = "y"),
952+
mapping = aes(x = unit_interval, y = ecdf_eval, color = "y"),
1005953
linewidth = 0.5,
1006954
show.legend = FALSE
1007955
) +
@@ -1026,7 +974,7 @@ ppc_pit_ecdf_grouped <-
1026974
prob = .99,
1027975
plot_diff = FALSE,
1028976
interpolate_adj = NULL,
1029-
method = "independent",
977+
method = NULL,
1030978
test = NULL,
1031979
gamma = NULL,
1032980
linewidth = NULL,
@@ -1046,7 +994,25 @@ ppc_pit_ecdf_grouped <-
1046994
))
1047995
}
1048996

1049-
method <- match.arg(method, choices = c("independent", "correlated"))
997+
# Resolve and validate `method`
998+
if (is.null(method)) {
999+
inform(c(
1000+
"i" = paste(
1001+
"In the next major release, the default `method`",
1002+
"will change to 'correlated'."
1003+
),
1004+
"*" = paste(
1005+
"To silence this message, explicitly set",
1006+
"`method = 'independent'` or `method = 'correlated'`."
1007+
)
1008+
))
1009+
method <- "independent"
1010+
} else {
1011+
method <- match.arg(method, choices = c("independent", "correlated"))
1012+
if (method == "independent") {
1013+
inform("The 'independent' method is superseded by the 'correlated' method.")
1014+
}
1015+
}
10501016

10511017
switch(method,
10521018
"correlated" = {

0 commit comments

Comments
 (0)