Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: ggRandomForests
Type: Package
Title: Visually Exploring Random Forests
Version: 3.3.0
Version: 3.4.0
Date: 2026-06-23
Authors@R: person("John", "Ehrlinger",
role = c("aut", "cre"),
Expand Down
15 changes: 15 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(autoplot,gg_beta_uvarpro)
S3method(autoplot,gg_beta_varpro)
S3method(autoplot,gg_brier)
S3method(autoplot,gg_error)
Expand All @@ -11,13 +12,16 @@ S3method(autoplot,gg_partial_varpro)
S3method(autoplot,gg_partialpro)
S3method(autoplot,gg_rfsrc)
S3method(autoplot,gg_roc)
S3method(autoplot,gg_sdependent)
S3method(autoplot,gg_survival)
S3method(autoplot,gg_udependent)
S3method(autoplot,gg_variable)
S3method(autoplot,gg_varpro)
S3method(autoplot,gg_vimp)
S3method(calc_roc,randomForest)
S3method(calc_roc,rfsrc)
S3method(gg_beta_uvarpro,default)
S3method(gg_beta_uvarpro,uvarpro)
S3method(gg_beta_varpro,varpro)
S3method(gg_brier,rfsrc)
S3method(gg_error,randomForest)
Expand All @@ -30,12 +34,15 @@ S3method(gg_rfsrc,rfsrc)
S3method(gg_roc,default)
S3method(gg_roc,randomForest)
S3method(gg_roc,rfsrc)
S3method(gg_sdependent,default)
S3method(gg_sdependent,uvarpro)
S3method(gg_survival,default)
S3method(gg_survival,rfsrc)
S3method(gg_variable,randomForest)
S3method(gg_variable,rfsrc)
S3method(gg_vimp,randomForest)
S3method(gg_vimp,rfsrc)
S3method(plot,gg_beta_uvarpro)
S3method(plot,gg_beta_varpro)
S3method(plot,gg_brier)
S3method(plot,gg_error)
Expand All @@ -47,11 +54,13 @@ S3method(plot,gg_partial_varpro)
S3method(plot,gg_partialpro)
S3method(plot,gg_rfsrc)
S3method(plot,gg_roc)
S3method(plot,gg_sdependent)
S3method(plot,gg_survival)
S3method(plot,gg_udependent)
S3method(plot,gg_variable)
S3method(plot,gg_varpro)
S3method(plot,gg_vimp)
S3method(print,gg_beta_uvarpro)
S3method(print,gg_beta_varpro)
S3method(print,gg_brier)
S3method(print,gg_error)
Expand All @@ -63,6 +72,7 @@ S3method(print,gg_partial_varpro)
S3method(print,gg_partialpro)
S3method(print,gg_rfsrc)
S3method(print,gg_roc)
S3method(print,gg_sdependent)
S3method(print,gg_survival)
S3method(print,gg_udependent)
S3method(print,gg_variable)
Expand All @@ -72,6 +82,7 @@ S3method(print,summary.gg)
S3method(print,summary.gg_beta_varpro)
S3method(print,summary.gg_ivarpro)
S3method(print,summary.gg_udependent)
S3method(summary,gg_beta_uvarpro)
S3method(summary,gg_beta_varpro)
S3method(summary,gg_brier)
S3method(summary,gg_error)
Expand All @@ -83,13 +94,15 @@ S3method(summary,gg_partial_varpro)
S3method(summary,gg_partialpro)
S3method(summary,gg_rfsrc)
S3method(summary,gg_roc)
S3method(summary,gg_sdependent)
S3method(summary,gg_survival)
S3method(summary,gg_udependent)
S3method(summary,gg_variable)
S3method(summary,gg_varpro)
S3method(summary,gg_vimp)
export(calc_auc)
export(calc_roc)
export(gg_beta_uvarpro)
export(gg_beta_varpro)
export(gg_brier)
export(gg_error)
Expand All @@ -101,6 +114,7 @@ export(gg_partial_varpro)
export(gg_partialpro)
export(gg_rfsrc)
export(gg_roc)
export(gg_sdependent)
export(gg_survival)
export(gg_udependent)
export(gg_variable)
Expand All @@ -127,6 +141,7 @@ importFrom(ggplot2,geom_jitter)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,geom_ribbon)
importFrom(ggplot2,geom_segment)
importFrom(ggplot2,geom_vline)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,labs)
Expand Down
19 changes: 18 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
Package: ggRandomForests
Version: 3.3.0
Version: 3.4.0

ggRandomForests v3.4.0
======================
* `gg_beta_uvarpro()` / `plot.gg_beta_uvarpro()`: tidy wrapper and bar chart
for `varPro::get.beta.entropy()` -- the unsupervised analogue of
`gg_beta_varpro()`. From a `uvarpro()` fit it aggregates the per-region
lasso coefficients into `beta_mean = colMeans(|beta|)` per variable
(most-important first), flags variables above a selection cutoff, and
accepts a precomputed `beta_fit` matrix. `print`/`summary`/`autoplot`
companions follow the `gg_*` conventions.
* `gg_sdependent()` / `plot.gg_sdependent()`: tidy wrapper and ranked
lollipop for `varPro::sdependent()` signal-variable detection. Returns one
row per candidate variable (`imp_score`, graph `degree`, `signal` flag)
ranked by `imp_score`. Complements `gg_udependent()` (the dependency
graph) with the "which variables are signal" ranking; shares the
`beta_fit` entropy matrix. Follows the `get.beta.entropy` + `sdependent`
workflow from the `varPro::uvarpro()` help (iowa-housing example).

ggRandomForests v3.3.0
======================
Expand Down
235 changes: 235 additions & 0 deletions R/gg_beta_uvarpro.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
##=============================================================================
#' Per-variable lasso-beta importance from an unsupervised varPro fit
#'
#' Tidy wrapper around [varPro::get.beta.entropy()] for a `uvarpro` object.
#' Where [gg_beta_varpro()] refines the *supervised* release-rule contrast,
#' `gg_beta_uvarpro()` does the unsupervised analogue: `uvarpro()` builds
#' entropy regions with no response, and `get.beta.entropy()` fits a
#' cross-validated lasso within each region to ask how strongly every other
#' variable explains the released variable. Averaging the absolute lasso
#' coefficients per variable gives one number per variable: an unsupervised,
#' lasso-flavoured importance.
#'
#' @details
#' `get.beta.entropy(o)` returns a (released-variable x variable) numeric
#' matrix of absolute lasso coefficients. The column mean (`na.rm = TRUE`) is
#' the per-variable importance reported here, matching the canonical
#' `sort(colMeans(beta, na.rm = TRUE), decreasing = TRUE)` idiom in the
#' `varPro::uvarpro()` help ("iowa housing - illustrates lasso importance").
#'
#' Because `get.beta.entropy()` is expensive (a cross-validated `glmnet` per
#' region), the `beta_fit` argument accepts a pre-computed matrix so you can
#' iterate on the cutoff without re-fitting. The pairing mirrors the
#' `beta_fit` argument of [gg_beta_varpro()].
#'
#' @param object A `uvarpro` object from [varPro::uvarpro()].
#' @param ... Forwarded to [varPro::get.beta.entropy()] when
#' `beta_fit = NULL` (e.g. `pre.filter`, `second.stage`, `use.cv`).
#' Ignored, with a warning, when `beta_fit` is supplied.
#' @param cutoff Selection threshold on `beta_mean`. `NULL` (default) uses
#' `mean(beta_mean)`; a scalar sets it explicitly. Variables at or above the
#' cutoff are flagged `selected`.
#' @param beta_fit Optional pre-computed [varPro::get.beta.entropy()] matrix
#' for `object`. When supplied, must be a numeric matrix with column names
#' (the variables); `...` is then ignored.
#'
#' @return A `gg_beta_uvarpro` object (a `data.frame`), one row per variable,
#' most-important first, with columns:
#' \describe{
#' \item{`variable`}{factor; levels reversed so the most-important
#' variable lands at the top after `coord_flip()` (the `gg_vimp`
#' convention).}
#' \item{`beta_mean`}{`mean(|lasso beta|)` over the released regions
#' (`colMeans(beta, na.rm = TRUE)`).}
#' \item{`n_released`}{number of regions contributing a non-`NA`
#' coefficient for the variable.}
#' \item{`selected`}{logical; `beta_mean >= cutoff`.}
#' }
#' The `provenance` attribute records `source`, `family` (`"unsupv"`),
#' `cutoff`, `n_var`, `n_released_regions`, and `precomputed`.
#'
#' @seealso [gg_beta_varpro()] (supervised analogue), [gg_udependent()],
#' [varPro::get.beta.entropy()], [varPro::uvarpro()].
#'
#' @examples
#' \donttest{
#' if (requireNamespace("varPro", quietly = TRUE)) {
#' set.seed(1)
#' o <- varPro::uvarpro(mtcars, ntree = 50)
#' gg <- gg_beta_uvarpro(o)
#' plot(gg)
#' }
#' }
#'
#' @export
gg_beta_uvarpro <- function(object, ..., cutoff = NULL, beta_fit = NULL) {
UseMethod("gg_beta_uvarpro", object)
}

#' @export
gg_beta_uvarpro.default <- function(object, ..., cutoff = NULL,
beta_fit = NULL) {
stop("gg_beta_uvarpro: expected a 'uvarpro' object from varPro::uvarpro(); ",
"got an object of class ", paste(class(object), collapse = "/"), ".",
call. = FALSE)
}

#' @export
gg_beta_uvarpro.uvarpro <- function(object, ..., cutoff = NULL,
beta_fit = NULL) {
if (!inherits(object, "uvarpro")) {
stop("gg_beta_uvarpro: expected a 'uvarpro' object from varPro::uvarpro().",
call. = FALSE)
}
.assert_scalar_numeric_or_null(cutoff, "cutoff", "gg_beta_uvarpro")

# Resolve the beta matrix (cache path)
if (is.null(beta_fit)) {
b <- varPro::get.beta.entropy(object, ...)
} else {
.validate_beta_uvarpro(beta_fit)
if (length(list(...)) > 0L) {
warning("gg_beta_uvarpro: arguments in '...' ignored because beta_fit is supplied.",
call. = FALSE)
}
b <- beta_fit
}

# Empty fast-path: no regions / no variables survived
if (.is_empty_beta_matrix(b)) {
return(.gg_beta_uvarpro_empty(object, beta_fit, cutoff))
}

beta_mean_v <- colMeans(b, na.rm = TRUE)
n_released_v <- colSums(!is.na(b))

# Most-important first; reverse the factor levels so coord_flip() puts the
# top variable at the top (matches gg_vimp / gg_beta_varpro).
ord_names <- names(sort(beta_mean_v, decreasing = TRUE))

resolved_cutoff <- if (is.null(cutoff)) {
mean(beta_mean_v, na.rm = TRUE)
} else {
as.numeric(cutoff)
}

out <- data.frame(
variable = factor(ord_names, levels = rev(ord_names)),
beta_mean = unname(beta_mean_v[ord_names]),
n_released = as.integer(unname(n_released_v[ord_names])),
stringsAsFactors = FALSE
)
out$selected <- out$beta_mean >= resolved_cutoff
rownames(out) <- NULL

class(out) <- c("gg_beta_uvarpro", "data.frame")
attr(out, "provenance") <- list(
source = "varPro::get.beta.entropy",
family = "unsupv",
ntree = if (!is.null(object$ntree)) as.integer(object$ntree) else NA_integer_,
cutoff = stats::setNames(resolved_cutoff, "unsupv"),
cutoff_default = is.null(cutoff),
n_var = ncol(b),
n_released_regions = nrow(b),
precomputed = !is.null(beta_fit),
xvar.names = colnames(b)
)
out
}

#' @noRd
.validate_beta_uvarpro <- function(beta_fit, caller = "gg_beta_uvarpro") {
if (!is.matrix(beta_fit) || !is.numeric(beta_fit)) {
stop(caller, ": beta_fit does not look like a ",
"varPro::get.beta.entropy() result. Expected a numeric matrix.",
call. = FALSE)
}
if (ncol(beta_fit) > 0L && is.null(colnames(beta_fit))) {
stop(caller, ": beta_fit must have column names (the variables). ",
"varPro::get.beta.entropy() returns a named matrix.",
call. = FALSE)
}
invisible(NULL)
}

#' @noRd
.is_empty_beta_matrix <- function(m) {
is.null(m) || !is.matrix(m) || nrow(m) == 0L || ncol(m) == 0L
}

#' @noRd
.assert_scalar_numeric_or_null <- function(x, arg, caller) {
if (!is.null(x) &&
(!is.numeric(x) || length(x) != 1L || is.na(x))) {
stop(caller, ": `", arg, "` must be a single non-NA numeric value (or NULL).",
call. = FALSE)
}
invisible(NULL)
}

#' @rdname print.gg
#' @export
print.gg_beta_uvarpro <- function(x, ...) {
prov <- attr(x, "provenance")
precomputed <- isTRUE(if (!is.null(prov)) prov$precomputed else FALSE)
n_regions <- if (!is.null(prov)) prov$n_released_regions %||% NA_integer_ else NA_integer_
n_sel <- sum(x$selected, na.rm = TRUE)
cutoff <- if (!is.null(prov)) prov$cutoff %||% NA_real_ else NA_real_
cutoff_val <- if (length(cutoff) >= 1L) cutoff[[1]] else NA_real_
cutoff_default <- isTRUE(if (!is.null(prov)) prov$cutoff_default else FALSE)
cat(.gg_header(x, "gg_beta_uvarpro"),
sprintf(" | cutoff: %.4g%s", cutoff_val,
if (cutoff_default) " (default)" else ""),
sprintf(" | precomputed: %s", precomputed),
"\n",
sprintf(" %d of %d variables selected over %s released region(s)\n",
n_sel, nrow(x),
if (is.na(n_regions)) "NA" else format(n_regions)),
sep = "")
invisible(x)
}

#' @rdname summary.gg
#' @export
summary.gg_beta_uvarpro <- function(object, ...) {
v <- sort(stats::setNames(object$beta_mean, as.character(object$variable)),
decreasing = TRUE)
top <- utils::head(v, 5L)
body <- c(
sprintf("variables: %d (selected: %d)",
nrow(object), sum(object$selected, na.rm = TRUE)),
"top variables by mean |lasso beta|:",
sprintf(" %-14s %.4g", names(top), unname(top))
)
.summary_skel(object, "gg_beta_uvarpro", body)
}

#' @importFrom ggplot2 autoplot
#' @export
autoplot.gg_beta_uvarpro <- function(object, ...) {
plot.gg_beta_uvarpro(object, ...)
}

#' @noRd
.gg_beta_uvarpro_empty <- function(object, beta_fit, cutoff) {
out <- data.frame(
variable = factor(character(0)),
beta_mean = numeric(0),
n_released = integer(0),
selected = logical(0),
stringsAsFactors = FALSE
)
class(out) <- c("gg_beta_uvarpro", "data.frame")
attr(out, "provenance") <- list(
source = "varPro::get.beta.entropy",
family = "unsupv",
ntree = if (!is.null(object$ntree)) as.integer(object$ntree) else NA_integer_,
cutoff = stats::setNames(if (is.null(cutoff)) NA_real_ else as.numeric(cutoff), "unsupv"),
cutoff_default = is.null(cutoff),
n_var = 0L,
n_released_regions = 0L,
precomputed = !is.null(beta_fit),
xvar.names = character(0)
)
out
}
Loading
Loading