diff --git a/DESCRIPTION b/DESCRIPTION index dd57db74..38c5a3fc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ggRandomForests Type: Package Title: Visually Exploring Random Forests -Version: 2.7.3.9006 +Version: 2.7.3.9007 Date: 2026-05-21 Authors@R: person("John", "Ehrlinger", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 3854143b..698d1bfd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,24 @@ Package: ggRandomForests -Version: 2.7.3.9006 +Version: 2.7.3.9007 ggRandomForests v2.8.0 (development) — continued ================================================= +* `plot.gg_variable()`: fix render error on the default multi-class + classification plot. The default-xvar selection was treating `yvar` (the + observed-class column) and `outcome` (the multi-class pivot facet) as + predictors; pivoting them into `var` then dropped the column the + downstream `geom_jitter(aes(color = yvar))` referenced, and the patchwork + errored when actually rendered. CI did not catch this because the existing + test only asserted the patchwork class (lazy) and snapshots run with + `VDIFFR_RUN_TESTS = false`. New test exercises a real build of every + sub-plot. +* `plot.gg_variable()`: the same default-xvar selection used substring + `grep("time", ...)` / `grep("event", ...)`, which silently dropped any + predictor whose name contained those substrings -- e.g. the documented + veteran-data survival predictor `diagtime`. Switch to exact matching for + `event` / `time` / `yvar` / `outcome` and an anchored prefix for `yhat` + (`yhat` or `yhat.`). New test exercises `diagtime` on the veteran + survival forest. * `gg_roc()`: per-class one-vs-rest ROC curves (#88, closes #72). - New `per_class` argument, default `FALSE`. With `per_class = TRUE` on a forest of more than two classes, `gg_roc()` returns a long-format diff --git a/R/plot.gg_variable.R b/R/plot.gg_variable.R index b2cb2229..63163f19 100644 --- a/R/plot.gg_variable.R +++ b/R/plot.gg_variable.R @@ -180,11 +180,17 @@ plot.gg_variable <- function(x, # nolint: cyclocomp_linter ## ---- Default xvar: all predictor columns ----------------------------- if (missing(xvar)) { - # Remove response-side columns (yhat, event, time) to isolate predictors + # Remove response-side and structural columns to isolate predictors. + # `event`, `time`, `yvar`, `outcome` are matched by exact name -- a + # substring match (the prior code's `grep("time", ...)`) would silently + # drop a documented predictor like the veteran data's `diagtime`. `yhat` + # matches the bare `yhat` column (regression / single-class survival) and + # also `yhat.` columns produced by classification gg_variable, + # so the regex anchors at the start and either ends or transitions on a + # literal dot. cls <- c( - grep("yhat", colnames(gg_dta)), - grep("event", colnames(gg_dta)), - grep("time", colnames(gg_dta)) + grep("^yhat(\\.|$)", colnames(gg_dta)), + which(colnames(gg_dta) %in% c("event", "time", "yvar", "outcome")) ) xvar <- colnames(gg_dta)[-cls] } diff --git a/tests/testthat/test_gg_variable.R b/tests/testthat/test_gg_variable.R index cbe54087..4251a0d0 100644 --- a/tests/testthat/test_gg_variable.R +++ b/tests/testthat/test_gg_variable.R @@ -411,6 +411,57 @@ test_that("gg_variable.randomForest classification: layer_data works on single-x expect_no_error(ggplot2::layer_data(p, 1L)) }) +test_that("gg_variable.randomForest classification: default plot renders every panel", { + # yvar / outcome must not be treated as predictors by the default-xvar pick. + skip_if_not_installed("randomForest") + # Regression test: the default-xvar selection in plot.gg_variable used to + # include yvar and outcome (the response factor and the multi-class pivot's + # facet column), pivoting them into `var` and dropping them from the + # panel data. The geom_jitter aes then errored at render time with + # "Column `yvar` not found". This test exercises a real build of every + # patchwork sub-plot, which is what catches the regression — checking the + # patchwork class alone does not, because patchwork is lazy. + set.seed(42L) + rf <- randomForest::randomForest(Species ~ ., data = iris, ntree = 50L) + gg <- gg_variable(rf) + p <- plot(gg) + expect_s3_class(p, "patchwork") + # Force every sub-plot to actually build. + for (sub in p$patches$plots) { + expect_no_error(ggplot2::ggplot_build(sub)) + } +}) + +test_that("plot.gg_variable default xvar matches column names exactly, not substring", { + # Regression: the pre-existing default-xvar exclusion used grep("time", ...) + # and grep("event", ...), which silently dropped any predictor whose name + # *contained* those substrings -- e.g. the documented veteran-data survival + # predictor `diagtime`. Switch to exact matching for event/time/yvar/outcome + # and a prefix match for yhat (to catch yhat. columns). + data(veteran, package = "randomForestSRC") + set.seed(42L) + Surv <- survival::Surv # nolint: object_name_linter + rfsrc_veteran <- randomForestSRC::rfsrc(Surv(time, status) ~ ., data = veteran, + ntree = 25, nsplit = 3) + gg <- gg_variable(rfsrc_veteran, time = 90) + expect_true("diagtime" %in% names(gg)) + # The default xvar list must include diagtime (not dropped by substring match). + p <- plot(gg) + # The plot should be a patchwork (multiple predictors) -- and diagtime should + # appear as one of the panels. Force every sub-plot to build cleanly. + expect_s3_class(p, "patchwork") + for (sub in p$patches$plots) { + expect_no_error(ggplot2::ggplot_build(sub)) + } + # Spot-check: at least one panel's data has `var` sourced from diagtime by + # confirming diagtime is NOT in the residual data columns of every panel + # (it was pivoted into `var` for exactly one panel). + var_panel_count <- sum(vapply(p$patches$plots, function(sub) { + !("diagtime" %in% names(sub$data)) + }, logical(1L))) + expect_gte(var_panel_count, 1L) +}) + test_that("gg_variable.randomForest classification: norm.votes=FALSE still gives [0,1] fractions", { skip_if_not_installed("randomForest") set.seed(42L)