|
11 | 11 | #' `beta.varpro()` step once and reuse the result. |
12 | 12 | #' |
13 | 13 | #' @section What this is doing: |
14 | | -#' For each rule (a tree-branch pair) in the forest, [varPro::beta.varpro()] |
15 | | -#' fits a one-predictor lasso regression of the response on the released |
16 | | -#' variable's values, restricted to the OOB observations inside the rule's |
17 | | -#' region. The wrapper aggregates those per-rule coefficients into one |
18 | | -#' number per variable. |
| 14 | +#' Think of the varPro release-rule mechanism as asking: "given a region of |
| 15 | +#' the feature space that the forest carved out, what changes when I remove |
| 16 | +#' the constraint on this one variable and let observations leave?" The |
| 17 | +#' standard importance answer (from [gg_varpro()]) measures that change as a |
| 18 | +#' z-scored contrast between local estimators: no synthetic data, no |
| 19 | +#' permutation. \code{beta.varpro()} asks the same question with a different |
| 20 | +#' ruler: for each rule (a tree-branch pair), it fits a one-predictor lasso |
| 21 | +#' regression of the response on the released variable's values, restricted |
| 22 | +#' to the OOB observations inside the rule's region. The wrapper aggregates |
| 23 | +#' those per-rule coefficients into one number per variable. |
| 24 | +#' |
| 25 | +#' The key distinction from [gg_vimp()], which measures Breiman-Cutler |
| 26 | +#' permutation importance by perturbing a variable's values and watching OOB |
| 27 | +#' error climb, is that neither [gg_varpro()] nor \code{gg_beta_varpro()} |
| 28 | +#' touches the data synthetically: all contrasts are between real subsets |
| 29 | +#' defined by the forest's rules. |
19 | 30 | #' |
20 | 31 | #' @section What `imp` actually is (pedantic, because the column name is misleading): |
21 | 32 | #' The `imp` column on `beta.varpro()`'s `$results` is **not** a |
|
63 | 74 | #' |
64 | 75 | #' @section What you use this for: |
65 | 76 | #' Picking variables when local effects matter more than aggregate |
66 | | -#' split-strength contribution. Compare side-by-side with [gg_varpro()] — |
| 77 | +#' split-strength contribution. Compare side-by-side with [gg_varpro()]: |
67 | 78 | #' a variable that scores high here but low in `gg_varpro` is one whose |
68 | 79 | #' local linear effect inside many rules is real even though its |
69 | 80 | #' release-rule contrast is modest. |
|
92 | 103 | #' class. |
93 | 104 | #' |
94 | 105 | #' **Binary default**: `which_class = NULL` resolves to the *last* |
95 | | -#' factor level of the response — the positive-class convention used |
| 106 | +#' factor level of the response, the positive-class convention used |
96 | 107 | #' by `glm` and `gg_roc`. For a 30-day-mortality outcome with levels |
97 | 108 | #' `c("no", "yes")`, that means the wrapper shows you `"yes"` (the |
98 | 109 | #' event) by default. |
|
118 | 129 | #' @section Reproducibility: |
119 | 130 | #' Byte-for-byte agreement between cached (`beta_fit = b`) and uncached |
120 | 131 | #' (`beta_fit = NULL`) outputs requires that `b` was computed by |
121 | | -#' `beta.varpro(object, ...)` on the same `object` — `set.seed()` alone is |
| 132 | +#' `beta.varpro(object, ...)` on the same `object`; `set.seed()` alone is |
122 | 133 | #' not sufficient, because `beta.varpro`'s internal `cv.glmnet` fits can |
123 | 134 | #' pick slightly different folds across separate calls. Reuse `beta_fit` |
124 | 135 | #' when reproducibility matters. |
|
132 | 143 | #' @param ... Forwarded to [varPro::beta.varpro()] when `beta_fit = NULL`; |
133 | 144 | #' ignored otherwise (with a warning). Documented forwardables: `use.cv`, |
134 | 145 | #' `use.1se`, `nfolds`, `maxit`, `thresh`, `max.rules.tree`, `max.tree`. |
135 | | -#' @param cutoff Selection threshold on `beta_mean`. `NULL` (default) → |
| 146 | +#' @param cutoff Selection threshold on `beta_mean`. `NULL` (default) means |
136 | 147 | #' `mean(beta_mean)` across released variables. Numeric scalar otherwise. |
137 | 148 | #' @param beta_fit Optional pre-computed [varPro::beta.varpro()] result for |
138 | | -#' the same `object`. `NULL` (default) → the wrapper runs `beta.varpro()` |
| 149 | +#' the same `object`. `NULL` (default) means the wrapper runs `beta.varpro()` |
139 | 150 | #' itself. When supplied, must be a `varpro`-class object whose `$results` |
140 | 151 | #' has columns `tree / branch / variable / n.oob / imp`. |
141 | 152 | #' @param which_class For a classification fit, name of a single response |
142 | 153 | #' level to subset on. `NULL` (default) returns all classes (binary fits |
143 | | -#' resolve to the *last* factor level — the positive-class convention |
| 154 | +#' resolve to the *last* factor level, the positive-class convention |
144 | 155 | #' used by `glm` and `gg_roc`). Ignored with a warning on regression |
145 | 156 | #' fits. |
146 | 157 | #' |
|
153 | 164 | #' the same row order. `which_class` (or the binary default |
154 | 165 | #' last-factor-level) collapses the output to a single class. |
155 | 166 | #' |
156 | | -#' @seealso [gg_varpro()], [plot.gg_beta_varpro()], [varPro::beta.varpro()]. |
| 167 | +#' @seealso [gg_varpro()], [gg_vimp()], [plot.gg_beta_varpro()], [varPro::beta.varpro()]. |
157 | 168 | #' |
158 | 169 | #' @examples |
159 | 170 | #' \donttest{ |
@@ -208,7 +219,7 @@ gg_beta_varpro.varpro <- function(object, ..., cutoff = NULL, |
208 | 219 | which_class <- NULL |
209 | 220 | } |
210 | 221 |
|
211 | | - # Capture use.cv from `...` here (NOT inside the internals — the dots |
| 222 | + # Capture use.cv from `...` here (NOT inside the internals; the dots |
212 | 223 | # don't pass through to the internal frame). |
213 | 224 | dots_use_cv <- if (is.null(beta_fit)) isTRUE(list(...)$use.cv) else NA |
214 | 225 |
|
@@ -372,7 +383,7 @@ gg_beta_varpro.varpro <- function(object, ..., cutoff = NULL, |
372 | 383 | ord_names <- names(sort(beta_mean_total, decreasing = TRUE)) |
373 | 384 | lvl <- rev(ord_names) |
374 | 385 |
|
375 | | - # Per-class aggregation — long format |
| 386 | + # Per-class aggregation: long format |
376 | 387 | rows <- list() |
377 | 388 | for (k in seq_len(n_classes)) { |
378 | 389 | col <- imp_cols[k] |
@@ -452,8 +463,8 @@ gg_beta_varpro.varpro <- function(object, ..., cutoff = NULL, |
452 | 463 | class(base) <- c("gg_beta_varpro", "data.frame") |
453 | 464 |
|
454 | 465 | # Build provenance with shape-stable cutoff: |
455 | | - # regr → c("regr" = NA_real_) |
456 | | - # class → named NA_real_ vector, one entry per class level |
| 466 | + # regr gives c("regr" = NA_real_) |
| 467 | + # class gives named NA_real_ vector, one entry per class level |
457 | 468 | if (fam == "class") { |
458 | 469 | class_levels <- .class_levels_from_varpro(object) |
459 | 470 | cutoff_empty <- stats::setNames(rep(NA_real_, length(class_levels)), |
|
0 commit comments