11# #=============================================================================
2- # ' Split partial lots into continuous or categorical datasets
2+ # ' Split partial dependence data into continuous or categorical datasets
3+ # '
4+ # ' Takes the list returned by \code{rfsrc::plot.variable(partial = TRUE)} and
5+ # ' separates the variables into two data frames: one for continuous predictors
6+ # ' and one for categorical (factor-like) predictors. The split is controlled
7+ # ' by \code{cat_limit}: variables with more unique x-values than this threshold
8+ # ' are treated as continuous; all others are categorical.
9+ # '
310# ' @param part_dta partial plot data from \code{rfsrc::plot.variable}
411# ' @param nvars how many of the partial plot variables to calculate
5- # ' @param cat_limit Categorical features are build when there are fewer than
6- # ' cat_limit unique features .
12+ # ' @param cat_limit Categorical features are built when there are fewer than
13+ # ' \code{ cat_limit} unique feature values .
714# ' @param model a label name applied to all features. Useful when combining
815# ' multiple partial plot objects in figures.
916# '
17+ # ' @return A named list with two elements:
18+ # ' \describe{
19+ # ' \item{continuous}{data.frame with columns \code{x}, \code{yhat},
20+ # ' \code{name} (and optionally \code{model}) for continuous variables}
21+ # ' \item{categorical}{data.frame with the same columns but with \code{x}
22+ # ' as a factor, for low-cardinality / categorical variables}
23+ # ' }
24+ # '
25+ # ' @seealso \code{\link{gg_partial_rfsrc}} \code{\link{gg_partialpro}}
26+ # '
27+ # ' @examples
28+ # ' ## Build a small regression forest on the airquality dataset
29+ # ' set.seed(42)
30+ # ' airq <- na.omit(airquality)
31+ # ' rf <- rfsrc(Ozone ~ ., data = airq, ntree = 50)
32+ # '
33+ # ' ## Compute partial dependence via plot.variable (show.plots = FALSE to
34+ # ' ## suppress the base-graphics output — we only want the data)
35+ # ' pv <- randomForestSRC::plot.variable(rf, partial = TRUE,
36+ # ' show.plots = FALSE)
37+ # '
38+ # ' ## Split into continuous and categorical data frames
39+ # ' result <- gg_partial(pv)
40+ # ' head(result$continuous)
41+ # '
42+ # ' ## Label this model for later comparison with a second forest
43+ # ' result_labelled <- gg_partial(pv, model = "airq_model")
44+ # ' unique(result_labelled$continuous$model)
45+ # '
1046# ' @export
11- gg_partial = function (part_dta ,
12- nvars = NULL ,
13- cat_limit = 10 ,
14- model = NULL ) {
15- # # Prepare the partial dependencies data for panel plots
47+ gg_partial <- function (part_dta ,
48+ nvars = NULL ,
49+ cat_limit = 10 ,
50+ model = NULL ) {
51+ # # Default: process all variables returned by plot.variable
1652 if (is.null(nvars )) {
17- nvars = length(part_dta $ plotthis )
53+ nvars <- length(part_dta $ plotthis )
1854 }
19-
20- cont_list = list ()
21- cat_list = list ()
55+
56+ # Accumulate per-variable data frames before binding
57+ cont_list <- list ()
58+ cat_list <- list ()
59+
2260 for (feature in seq(nvars )) {
23- # # Format any continuous features (those with fewer than cat_limit unique values)
24- if (length(unique(part_dta $ plotthis [[feature ]]$ x )) > cat_limit ) {
25- plt.df = dplyr :: bind_cols(
26- x = part_dta $ plotthis [[feature ]]$ x ,
61+ x_vals <- part_dta $ plotthis [[feature ]]$ x
62+
63+ # # ---- Continuous variable: more unique x values than cat_limit -------
64+ if (length(unique(x_vals )) > cat_limit ) {
65+ plt.df <- dplyr :: bind_cols(
66+ x = x_vals ,
2767 yhat = part_dta $ plotthis [[feature ]]$ yhat
2868 )
29- plt.df $ name = names(part_dta $ plotthis )[[feature ]]
30-
69+ # Tag each row with the variable name for downstream faceting
70+ plt.df $ name <- names(part_dta $ plotthis )[[feature ]]
71+
3172 cont_list [[feature ]] <- plt.df
32- } else {
33- # # Categorical features
34-
35- # # Though VarPro works with logical or continuous only. Factors are
36- # # one hot encoded internal to the varPro call.
37- plt.df = dplyr :: bind_cols(
38- x = factor (part_dta $ plotthis [[ feature ]] $ x ),
73+
74+ } else {
75+ # # ---- Categorical variable: few unique x values -------------------
76+ # # VarPro works with logical or continuous only; factors are
77+ # # one- hot encoded internally in the varPro call.
78+ plt.df <- dplyr :: bind_cols(
79+ x = factor (x_vals ),
3980 yhat = part_dta $ plotthis [[feature ]]$ yhat
4081 )
41- plt.df $ name = names(part_dta $ plotthis )[[feature ]]
42-
82+ plt.df $ name <- names(part_dta $ plotthis )[[feature ]]
83+
4384 cat_list [[feature ]] <- plt.df
4485 }
4586 }
46- continuous = dplyr :: bind_rows(cont_list )
47- categorical = dplyr :: bind_rows(cat_list )
48-
87+
88+ # Combine per-variable lists into single data frames (NULL entries dropped)
89+ continuous <- dplyr :: bind_rows(cont_list )
90+ categorical <- dplyr :: bind_rows(cat_list )
91+
92+ # # Optionally attach a model label (useful when overlaying multiple forests)
4993 if (! is.null(model )) {
5094 continuous $ model <- categorical $ model <- model
5195 }
52-
96+
5397 return (list (continuous = continuous , categorical = categorical ))
54- }
98+ }
0 commit comments