@@ -708,104 +708,85 @@ ppc_pit_ecdf <- function(y,
708708 prob = .99 ,
709709 plot_diff = FALSE ,
710710 interpolate_adj = NULL ,
711- method = " independent " ,
711+ method = NULL ,
712712 test = NULL ,
713713 gamma = NULL ,
714714 linewidth = NULL ,
715715 color = NULL ,
716716 help_text = NULL ,
717717 pareto_pit = NULL
718718 ) {
719- # Expected input combinations for ppc_pit_ecdf() based on method and test choices:
720- # | yrep | y | pit | method | test | pareto_pit | approach |
721- # |------|---|-----|-------------|------|------------|--------------------|
722- # | x | x | | independent | NULL | FALSE | empirical pit |
723- # | | | x | independent | NULL | FALSE | |
724- # | x | x | | independent | NULL | TRUE | compute pareto-pit |
725- # | x | x | | correlated | POT | TRUE | compute pareto-pit |
726- # | | | x | correlated | POT | FALSE | |
727- # | x | x | | correlated | PIET | TRUE | compute pareto-pit |
728- # | | | x | correlated | PIET | FALSE | |
729- # | x | x | | correlated | PRIT | FALSE | empirical pit |
730- # | | | x | correlated | PRIT | FALSE | |
731-
732719 check_ignored_arguments(... ,
733- ok_args = c(" K" , " pareto_pit" , " pit" , " prob" , " plot_diff" ,
734- " interpolate_adj" , " method" , " test" , " gamma" , " linewidth" , " color" ,
735- " help_text" )
720+ ok_args = c(
721+ " K" , " pareto_pit" , " pit" , " prob" , " plot_diff" ,
722+ " interpolate_adj" , " method" , " test" , " gamma" , " linewidth" ,
723+ " color" , " help_text"
724+ )
736725 )
737726
738- # ---------------------------------------------------------------------------
739- # Internal helpers
740- # ---------------------------------------------------------------------------
741-
742727 .warn_ignored <- function (method_name , args ) {
743728 inform(paste0(
744729 " As method = " , method_name , " specified; ignoring: " ,
745730 paste(args , collapse = " , " ), " ."
746731 ))
747732 }
748733
749- # ---------------------------------------------------------------------------
750- # Resolve and validate `method`
751- # ---------------------------------------------------------------------------
752-
753- method <- match.arg(method , choices = c(" independent" , " correlated" ))
754-
755-
756- # ---------------------------------------------------------------------------
757- # Method-specific defaults, validation, and pareto_pit flag
758- # ---------------------------------------------------------------------------
734+ if (is.null(method )) {
735+ inform(c(
736+ " i" = paste(
737+ " In the next major release, the default `method`" ,
738+ " will change to 'correlated'."
739+ ),
740+ " *" = paste(
741+ " To silence this message, explicitly set" ,
742+ " `method = 'independent'` or `method = 'correlated'`."
743+ )
744+ ))
745+ method <- " independent"
746+ } else {
747+ method <- match.arg(method , choices = c(" independent" , " correlated" ))
748+ if (method == " independent" ) {
749+ inform(" The 'independent' method is superseded by the 'correlated' method." )
750+ }
751+ }
759752
760753 switch (method ,
761754 " correlated" = {
762755 if (! is.null(interpolate_adj )) .warn_ignored(" 'correlated'" , " interpolate_adj" )
763756
764- test <- match.arg(test %|| % " POT" , choices = c(" POT" , " PRIT" , " PIET" ))
765- alpha <- 1 - prob
766- gamma <- gamma %|| % 0
757+ test <- match.arg(test %|| % " POT" , choices = c(" POT" , " PRIT" , " PIET" ))
758+ alpha <- 1 - prob
759+ gamma <- gamma %|| % 0
767760 linewidth <- linewidth %|| % 0.3
768- color <- color %|| % c(ecdf = " grey60" , highlight = " red" )
761+ color <- color %|| % c(ecdf = " grey60" , highlight = " red" )
769762 help_text <- help_text %|| % TRUE
770-
771- # Pareto PIT applies for correlated only when the test is POT or PIET
772- # (not PRIT, which uses empirical PIT).
773763 pareto_pit <- pareto_pit %|| % is.null(pit ) && test %in% c(" POT" , " PIET" )
774764 },
775765 " independent" = {
776766 ignored <- c(
777- if (! is.null(test )) " test" ,
778- if (! is.null(gamma )) " gamma" ,
767+ if (! is.null(test )) " test" ,
768+ if (! is.null(gamma )) " gamma" ,
779769 if (! is.null(help_text )) " help_text"
780770 )
781771 if (length(ignored ) > 0 ) .warn_ignored(" 'independent'" , ignored )
782-
783- # Pareto PIT applies for independent whenever pareto_pit = TRUE.
784772 pareto_pit <- pareto_pit %|| % FALSE
785773 }
786774 )
787775
788- # ---------------------------------------------------------------------------
789- # Compute PIT values
790- # ---------------------------------------------------------------------------
791-
792776 if (isTRUE(pareto_pit ) && is.null(pit )) {
793777 # --- Pareto-smoothed PIT ---
794778 suggested_package(" rstantools" )
795779 y <- validate_y(y )
796780 yrep <- validate_predictions(yrep , length(y ))
797-
798781 pit <- posterior :: pareto_pit(x = yrep , y = y , weights = NULL , log = TRUE )
799782 K <- K %|| % length(pit )
800783
801784 } else if (! is.null(pit )) {
802785 # --- Pre-supplied PIT values ---
803786 pit <- validate_pit(pit )
804787 K <- K %|| % length(pit )
805-
806- # Warn about any ignored arguments.
807788 ignored <- c(
808- if (! missing(y ) && ! is.null(y )) " y" ,
789+ if (! missing(y ) && ! is.null(y )) " y" ,
809790 if (! missing(yrep ) && ! is.null(yrep )) " yrep"
810791 )
811792 if (length(ignored ) > 0 ) {
@@ -814,69 +795,47 @@ ppc_pit_ecdf <- function(y,
814795 paste(ignored , collapse = " , " ), " ."
815796 ))
816797 }
817-
818798 } else {
819799 # --- Empirical PIT ---'
820800 y <- validate_y(y )
821801 yrep <- validate_predictions(yrep , length(y ))
822- pit <- ppc_data(y , yrep ) % > %
823- group_by(.data $ y_id ) % > %
802+ pit <- ppc_data(y , yrep ) | >
803+ group_by(.data $ y_id ) | >
824804 dplyr :: group_map(
825805 ~ mean(.x $ value [.x $ is_y ] > .x $ value [! .x $ is_y ]) +
826806 runif(1 , max = mean(.x $ value [.x $ is_y ] == .x $ value [! .x $ is_y ]))
827- ) % > %
807+ ) | >
828808 unlist()
829809 K <- K %|| % min(nrow(yrep ) + 1 , 1000 )
830810 }
831811
832- # ---------------------------------------------------------------------------
833- # Shared ECDF setup
834- # ---------------------------------------------------------------------------
835-
836- n_obs <- length(pit )
812+ n_obs <- length(pit )
837813 unit_interval <- seq(0 , 1 , length.out = K )
838- ecdf_pit_fn <- ecdf(pit )
839- y_label <- if (plot_diff ) " ECDF difference" else " ECDF"
840-
841- # ===========================================================================
842- # Correlated method
843- # ===========================================================================
814+ ecdf_pit_fn <- ecdf(pit )
815+ y_label <- if (plot_diff ) " ECDF difference" else " ECDF"
844816
845817 if (method == " correlated" ) {
846-
847- # Compute the per-observation test statistics (sorted for Shapley values)
848- # and the combined Cauchy p-value.
849818 test_res <- posterior :: uniformity_test(pit = pit , test = test )
850819 p_value_CCT <- test_res $ pvalue
851820 pointwise_contrib <- test_res $ pointwise
852-
853- # Validate gamma against computed Shapley values.
854821 max_contrib <- max(pointwise_contrib )
855822 if (gamma < 0 || gamma > max_contrib ) {
856823 stop(sprintf(
857824 " gamma must be in [0, %.2f], but gamma = %s was provided." ,
858825 max_contrib , gamma
859826 ))
860827 }
861-
862- # Build the main ECDF data frame over a dense grid that includes pit values
863- # so step discontinuities are rendered exactly.
864828 x_combined <- sort(unique(c(unit_interval , pit )))
865-
866829 df_main <- tibble :: tibble(
867- x = x_combined ,
830+ x = x_combined ,
868831 ecdf_val = ecdf_pit_fn(x_combined ) - plot_diff * x_combined
869832 )
870-
871- # Sorted pit data frame (used for highlighting suspicious points).
872833 pit_sorted <- sort(pit )
873834 df_pit <- tibble :: tibble(
874- pit = pit_sorted ,
835+ pit = pit_sorted ,
875836 ecdf_val = ecdf_pit_fn(pit_sorted ) - plot_diff * pit_sorted
876837 )
877838
878- # --- Base plot -----------------------------------------------------------
879-
880839 p <- ggplot() +
881840 geom_step(
882841 data = df_main ,
@@ -886,50 +845,45 @@ ppc_pit_ecdf <- function(y,
886845 color = color [" ecdf" ]
887846 ) +
888847 geom_segment(
889- mapping = aes(x = 0 , y = 0 , xend = 1 , yend = if (plot_diff ) 0 else 1 ),
890- linetype = " dashed" ,
891- color = " darkgrey" ,
848+ mapping = aes(x = 0 , y = 0 , xend = 1 , yend = if (plot_diff ) 0 else 1 ),
849+ linetype = " dashed" ,
850+ color = " darkgrey" ,
892851 linewidth = 0.3
893852 ) +
894853 labs(x = " PIT" , y = y_label )
895854
896- # --- Highlight suspicious regions ----------------------------------------
897-
898855 if (p_value_CCT < alpha ) {
899856 red_idx <- which(pointwise_contrib > gamma )
900857
901858 if (length(red_idx ) > 0 ) {
902859 df_red <- df_pit [red_idx , ]
903860 df_red $ segment <- cumsum(c(1 , diff(red_idx ) != 1 ))
904-
905861 seg_sizes <- stats :: ave(df_red $ pit , df_red $ segment , FUN = length )
906862 df_isolated <- df_red [seg_sizes == 1 , ]
907863 df_grouped <- df_red [seg_sizes > 1 , ]
908-
909- # Highlight contiguous groups as coloured step segments.
864+
910865 if (nrow(df_grouped ) > 0 ) {
911866 df_segments <- do.call(rbind , lapply(
912867 split(df_grouped , df_grouped $ segment ),
913868 function (grp ) {
914- pit_idx <- match(grp $ pit , x_combined )
869+ pit_idx <- match(grp $ pit , x_combined )
915870 idx_range <- seq(min(pit_idx ), max(pit_idx ))
916871 tibble :: tibble(
917- x = df_main $ x [idx_range ],
872+ x = df_main $ x [idx_range ],
918873 ecdf_val = df_main $ ecdf_val [idx_range ],
919- segment = grp $ segment [1L ]
874+ segment = grp $ segment [1L ]
920875 )
921876 }
922877 ))
923-
878+
924879 p <- p + geom_step(
925880 data = df_segments ,
926881 mapping = aes(x = .data $ x , y = .data $ ecdf_val , group = .data $ segment ),
927882 color = color [" highlight" ],
928883 linewidth = linewidth + 0.8
929884 )
930885 }
931-
932- # Highlight isolated suspicious points as dots.
886+
933887 if (nrow(df_isolated ) > 0 ) {
934888 p <- p + geom_point(
935889 data = df_isolated ,
@@ -940,8 +894,6 @@ ppc_pit_ecdf <- function(y,
940894 }
941895 }
942896 }
943-
944- # --- Annotation and axis scaling -----------------------------------------
945897
946898 if (isTRUE(help_text )) {
947899 label_size <- 0.7 * bayesplot_theme_get()$ text @ size / ggplot2 :: .pt
@@ -951,8 +903,11 @@ ppc_pit_ecdf <- function(y,
951903 label = sprintf(" p[unif]^{%s} == '%s' ~ (alpha == '%.2f')" ,
952904 test , fmt_p(p_value_CCT ), alpha
953905 ),
954- hjust = - 0.05 , vjust = 1.5 ,
955- color = " black" , parse = TRUE , size = label_size
906+ hjust = - 0.05 ,
907+ vjust = 1.5 ,
908+ color = " black" ,
909+ parse = TRUE ,
910+ size = label_size
956911 )
957912 }
958913
@@ -972,36 +927,29 @@ ppc_pit_ecdf <- function(y,
972927 return (p )
973928 }
974929
975- # ===========================================================================
976- # Independent method
977- # ===========================================================================
978-
979- gamma_indep <- adjust_gamma(N = n_obs , K = K , prob = prob ,
980- interpolate_adj = interpolate_adj )
981-
930+ gamma_indep <- adjust_gamma(
931+ N = n_obs , K = K , prob = prob , interpolate_adj = interpolate_adj
932+ )
982933 lims <- ecdf_intervals(gamma = gamma_indep , N = n_obs , K = K )
983-
984- # `lims` contains K + 1 elements (including the boundary at 0); drop it so
985- # lengths match `unit_interval`.
986934 lims_upper <- lims $ upper [- 1 ] / n_obs - plot_diff * unit_interval
987935 lims_lower <- lims $ lower [- 1 ] / n_obs - plot_diff * unit_interval
988936 ecdf_eval <- ecdf_pit_fn(unit_interval ) - plot_diff * unit_interval
989937
990938 p <- ggplot() +
991939 geom_step(
992- mapping = aes(x = unit_interval , y = lims_upper , color = " yrep" ),
993- linetype = " dashed" ,
940+ mapping = aes(x = unit_interval , y = lims_upper , color = " yrep" ),
941+ linetype = " dashed" ,
994942 linewidth = 0.3 ,
995943 show.legend = FALSE
996944 ) +
997945 geom_step(
998- mapping = aes(x = unit_interval , y = lims_lower , color = " yrep" ),
999- linetype = " dashed" ,
946+ mapping = aes(x = unit_interval , y = lims_lower , color = " yrep" ),
947+ linetype = " dashed" ,
1000948 linewidth = 0.3 ,
1001949 show.legend = FALSE
1002950 ) +
1003951 geom_step(
1004- mapping = aes(x = unit_interval , y = ecdf_eval , color = " y" ),
952+ mapping = aes(x = unit_interval , y = ecdf_eval , color = " y" ),
1005953 linewidth = 0.5 ,
1006954 show.legend = FALSE
1007955 ) +
@@ -1026,7 +974,7 @@ ppc_pit_ecdf_grouped <-
1026974 prob = .99 ,
1027975 plot_diff = FALSE ,
1028976 interpolate_adj = NULL ,
1029- method = " independent " ,
977+ method = NULL ,
1030978 test = NULL ,
1031979 gamma = NULL ,
1032980 linewidth = NULL ,
@@ -1046,7 +994,25 @@ ppc_pit_ecdf_grouped <-
1046994 ))
1047995 }
1048996
1049- method <- match.arg(method , choices = c(" independent" , " correlated" ))
997+ # Resolve and validate `method`
998+ if (is.null(method )) {
999+ inform(c(
1000+ " i" = paste(
1001+ " In the next major release, the default `method`" ,
1002+ " will change to 'correlated'."
1003+ ),
1004+ " *" = paste(
1005+ " To silence this message, explicitly set" ,
1006+ " `method = 'independent'` or `method = 'correlated'`."
1007+ )
1008+ ))
1009+ method <- " independent"
1010+ } else {
1011+ method <- match.arg(method , choices = c(" independent" , " correlated" ))
1012+ if (method == " independent" ) {
1013+ inform(" The 'independent' method is superseded by the 'correlated' method." )
1014+ }
1015+ }
10501016
10511017 switch (method ,
10521018 " correlated" = {
0 commit comments