From 0e76ee04e3398d2f856ac3bab3fddb63c1108f94 Mon Sep 17 00:00:00 2001 From: John Ehrlinger Date: Mon, 27 Apr 2026 13:25:47 -0400 Subject: [PATCH 1/2] Fix empty-figure bugs in gg_partial_rfsrc; release v2.7.1 Survival forests in gg_partial_rfsrc() were calling partial.rfsrc() without partial.type, triggering a zero-length comparison inside randomForestSRC's C-level prediction code that aborted the call -- which is why the survival vignette's partial-dependence chunks rendered no figures. * Pass partial.type = "surv" by default for survival forests; expose it as a new gg_partial_rfsrc() argument accepting "surv" / "chf" / "mort". * Reshape multi-partial.time results to long form: get.partial.plot.data() returns yhat as a [length(partial.values) x length(partial.time)] matrix, but the previous code assumed a vector and crashed on the time-column assignment. * Improve plot.gg_partial_rfsrc() survival layout: predictor on x-axis, one curve per (rounded) time point coloured by Time, faceted by name. The previous default put time on the x-axis with one near-identical line per predictor value. * New tests/testthat/test_plot_layer_data.R uses ggplot2::layer_data() to verify each plot.gg_*() method renders non-empty layers across all forest families (regression / classification / survival, with/without conf.int, by, multi-time partial-dep). Catches empty-figure regressions without visual inspection. Bump DESCRIPTION to 2.7.1 (semver patch -- bug-fix only) and refresh cran-comments.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- DESCRIPTION | 4 +- NEWS.md | 30 ++- R/gg_partial_rfsrc.R | 55 ++++-- R/plot.gg_partial.R | 16 +- cran-comments.md | 29 +-- man/gg_partial_rfsrc.Rd | 8 + tests/testthat/test_plot_layer_data.R | 254 ++++++++++++++++++++++++++ 7 files changed, 364 insertions(+), 32 deletions(-) create mode 100644 tests/testthat/test_plot_layer_data.R diff --git a/DESCRIPTION b/DESCRIPTION index 357d3155..b2ff2edb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: ggRandomForests Type: Package Title: Visually Exploring Random Forests -Version: 2.7.0.9001 -Date: 2026-03-27 +Version: 2.7.1 +Date: 2026-04-27 Authors@R: person("John", "Ehrlinger", role = c("aut", "cre"), email = "john.ehrlinger@gmail.com") diff --git a/NEWS.md b/NEWS.md index e261a7db..b3784ea8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,33 @@ Package: ggRandomForests -Version: 2.8.0 +Version: 2.7.1 -ggRandomForests v2.8.0 +ggRandomForests v2.7.1 +===================== +* Fix `gg_partial_rfsrc()` for survival forests: `partial.rfsrc()` was being + called without `partial.type`, causing a zero-length comparison + (`if (partial.type == "rel.freq") ...`) inside the C-level prediction + routine and aborting the call. Survival forests now pass + `partial.type = "surv"` (default; configurable via the new `partial.type` + argument accepting `"surv"`, `"chf"`, or `"mort"`). This unblocks the + `partial-dep` chunk in the survival vignette. +* Fix `gg_partial_rfsrc()` for survival forests with multiple + `partial.time` values: `get.partial.plot.data()` returns yhat as an + `[length(partial.values) x length(partial.time)]` matrix, but the previous + code assumed a vector and crashed on column-mismatch when assigning + `time`. The result is now reshaped to long form so each `(x, time)` pair + is a single row. +* Improve `plot.gg_partial_rfsrc()` survival layout: predictor value is now + on the x-axis with one curve per (rounded) time point coloured by `Time`, + faceted by variable name. The previous default put time on the x-axis + and one curve per predictor value, producing a saturated legend with + dozens of nearly-identical lines. +* Add `tests/testthat/test_plot_layer_data.R`: regression suite that uses + `ggplot2::layer_data()` to verify each `plot.gg_*()` method renders + non-empty layers for every supported forest family. Catches the + empty-figure class of bug (transform/plot column-name mismatch) without + requiring visual inspection. + +ggRandomForests v2.7.0 ===================== * S3 design overhaul: `gg_partial()`, `gg_partialpro()`, and `gg_partial_rfsrc()` now stamp their return values with S3 classes diff --git a/R/gg_partial_rfsrc.R b/R/gg_partial_rfsrc.R index ceda8ec7..d8ca4b5c 100644 --- a/R/gg_partial_rfsrc.R +++ b/R/gg_partial_rfsrc.R @@ -47,6 +47,12 @@ #' snapped to the nearest entry in \code{rf_model$time.interest} — see the #' \strong{Survival forests} section below. When \code{NULL} (default), #' three quartile points of \code{time.interest} are used. +#' @param partial.type Character; type of predicted value for survival +#' forests, passed through to \code{\link[randomForestSRC]{partial.rfsrc}}. +#' One of \code{"surv"} (default), \code{"chf"}, or \code{"mort"}. Ignored +#' for non-survival forests. \code{partial.rfsrc()} requires a non-\code{NULL} +#' value for survival families; supplying it here avoids a cryptic +#' \dQuote{argument is of length zero} error from the underlying C code. #' @param cat_limit Variables with fewer than \code{cat_limit} unique values in #' \code{newx} are treated as categorical; all others are continuous. #' Defaults to 10. @@ -89,6 +95,7 @@ gg_partial_rfsrc <- function(rf_model, xvar2.name = NULL, newx = NULL, partial.time = NULL, + partial.type = c("surv", "chf", "mort"), cat_limit = 10, n_eval = 25) { if (is.null(newx)) { @@ -112,14 +119,21 @@ gg_partial_rfsrc <- function(rf_model, is_surv <- !is.null(rf_model$family) && grepl("surv", rf_model$family) if (is_surv) { partial.time <- snap_partial_time(rf_model, partial.time) + # partial.rfsrc() requires a non-NULL partial.type for survival forests; + # NULL triggers a zero-length comparison inside the C code. + partial.type <- match.arg(partial.type) + } else { + partial.type <- NULL } if (is.null(xvar2.name)) { pdta <- partial_no_group(xvar.names, newx, rf_model, - cat_limit, n_eval, is_surv, partial.time) + cat_limit, n_eval, is_surv, partial.time, + partial.type) } else { pdta <- partial_with_group(xvar.names, xvar2.name, newx, rf_model, - cat_limit, n_eval, is_surv, partial.time) + cat_limit, n_eval, is_surv, partial.time, + partial.type) } split_partial_result(do.call("rbind", pdta)) @@ -184,7 +198,7 @@ make_eval_grid <- function(xname, newx, cat_limit, n_eval) { ## Thin wrapper around partial.rfsrc that builds the argument list. call_partial_rfsrc <- function(rf_model, xname, xval, - is_surv, partial.time, + is_surv, partial.time, partial.type, xvar2.name = NULL, x2val = NULL) { args <- list( object = rf_model, @@ -197,6 +211,7 @@ call_partial_rfsrc <- function(rf_model, xname, xval, } if (is_surv) { args$partial.time <- partial.time + args$partial.type <- partial.type } do.call(randomForestSRC::partial.rfsrc, args) } @@ -204,37 +219,54 @@ call_partial_rfsrc <- function(rf_model, xname, xval, ## Process a single predictor variable and return a tidy data.frame (or NULL). partial_one_var <- function(xname, newx, rf_model, cat_limit, n_eval, is_surv, partial.time, + partial.type, xvar2.name = NULL, x2val = NULL) { eg <- make_eval_grid(xname, newx, cat_limit, n_eval) if (is.null(eg)) return(NULL) xval <- eg$xval gr <- eg$categorical partial.obj <- call_partial_rfsrc(rf_model, xname, xval, - is_surv, partial.time, + is_surv, partial.time, partial.type, xvar2.name, x2val) pout <- randomForestSRC::get.partial.plot.data(partial.obj, granule = gr) - out_dta <- data.frame(x = pout$x, yhat = pout$yhat) + # Survival forests with >1 partial.time return yhat as an + # [length(partial.values) x length(partial.time)] matrix; expand to long form + # so each (x, time) pair is its own row. For non-survival or single-time + # cases yhat is already a vector of length(partial.values). + if (is.matrix(pout$yhat)) { + pt <- if (!is.null(pout$partial.time)) pout$partial.time else seq_len(ncol(pout$yhat)) + out_dta <- data.frame( + x = rep(pout$x, times = length(pt)), + yhat = as.numeric(pout$yhat), + time = rep(pt, each = length(pout$x)) + ) + } else { + out_dta <- data.frame(x = pout$x, yhat = pout$yhat) + if (!is.null(pout$partial.time)) { + out_dta$time <- pout$partial.time + } + } out_dta$name <- xname out_dta$type <- c("continuous", "categorical")[gr + 1L] - if (!is.null(pout$partial.time)) { - out_dta$time <- pout$partial.time - } out_dta } ## Compute partial dependence across xvar.names (no grouping variable). partial_no_group <- function(xvar.names, newx, rf_model, - cat_limit, n_eval, is_surv, partial.time) { + cat_limit, n_eval, is_surv, partial.time, + partial.type) { pdta <- lapply(xvar.names, partial_one_var, newx = newx, rf_model = rf_model, cat_limit = cat_limit, n_eval = n_eval, - is_surv = is_surv, partial.time = partial.time) + is_surv = is_surv, partial.time = partial.time, + partial.type = partial.type) Filter(Negate(is.null), pdta) } ## Compute partial dependence across xvar.names for each level of xvar2.name. partial_with_group <- function(xvar.names, xvar2.name, newx, rf_model, - cat_limit, n_eval, is_surv, partial.time) { + cat_limit, n_eval, is_surv, partial.time, + partial.type) { xv2 <- unique(newx[[xvar2.name]]) xv2 <- xv2[!is.na(xv2)] if (length(xv2) == 0L) { @@ -248,6 +280,7 @@ partial_with_group <- function(xvar.names, xvar2.name, newx, rf_model, newx = newx, rf_model = rf_model, cat_limit = cat_limit, n_eval = n_eval, is_surv = is_surv, partial.time = partial.time, + partial.type = partial.type, xvar2.name = xvar2.name, x2val = x2val) p1dta <- Filter(Negate(is.null), p1dta) if (length(p1dta) == 0L) return(NULL) diff --git a/R/plot.gg_partial.R b/R/plot.gg_partial.R index 1a74ffa2..6121fecf 100644 --- a/R/plot.gg_partial.R +++ b/R/plot.gg_partial.R @@ -109,19 +109,23 @@ plot.gg_partial_rfsrc <- function(x, ...) { cont <- gg_dta$continuous if (!is.null(cont$time)) { - ## Survival forest: predictor value is the grouping variable; x-axis is time + ## Survival forest: predictor value on x-axis, one curve per time point + ## (rounded for a tidy legend). Time is typically a small set (1-3 horizons) + ## while x is the dense evaluation grid. + cont$.time_lbl <- factor(round(cont$time, 2), + levels = sort(unique(round(cont$time, 2)))) gg_cont <- ggplot2::ggplot( cont, ggplot2::aes( - x = .data$time, + x = .data$x, y = .data$yhat, - color = factor(.data$x), - group = factor(.data$x) + color = .data$.time_lbl, + group = .data$.time_lbl ) ) + ggplot2::geom_line() + - ggplot2::facet_wrap(~name, scales = "free") + - ggplot2::labs(x = "Time", y = "Partial Effect", color = "Predictor value") + ggplot2::facet_wrap(~name, scales = "free_x") + + ggplot2::labs(x = NULL, y = "Predicted Survival", color = "Time") } else if (!is.null(cont$grp)) { ## Two-variable surface: group is xvar2; x-axis is the primary predictor diff --git a/cran-comments.md b/cran-comments.md index 7b11cb32..f6d9f1a0 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,21 +1,28 @@ -This is ggRandomForests package submission v2.7.0 +This is ggRandomForests package submission v2.7.1 ------------------------------------------------------------------------- -This is a bug-fix and code-quality release. Key changes: +This is a bug-fix release. Key changes: -* Fix critical visual bug: `aes()` calls throughout `plot.gg_rfsrc` and - `plot.gg_roc` used bare string literals instead of `.data[[col]]`, - causing aesthetics to map to constant strings rather than data columns. -* Fix `bootstrap_survival` CI-band indexing and `gg_rfsrc.randomForest` - incorrect use of non-existent `object$xvar` field. -* Fix `seq_len(nvar)` vs `1:nvar` silent bug in `gg_vimp` and `plot.gg_vimp`. -* Full test suite migration to testthat 3.x API. -* Improved GitHub Actions CI (lintr enforcement, warnings-as-errors). +* Fix `gg_partial_rfsrc()` for survival forests: `partial.rfsrc()` is now + called with `partial.type = "surv"` (default; also accepts `"chf"` / + `"mort"`). Without this, a zero-length comparison inside the underlying + C code aborted the call and left the survival-vignette partial-dep chunks + empty. +* Fix `gg_partial_rfsrc()` for multiple `partial.time` values: yhat is + reshaped from the matrix returned by `get.partial.plot.data()` into + long form so each `(x, time)` pair is one row. +* Improve `plot.gg_partial_rfsrc()` survival layout: predictor on the + x-axis with one curve per time point coloured by `Time`, faceted by + variable name. +* New regression test file `test_plot_layer_data.R` uses + `ggplot2::layer_data()` to verify each `plot.gg_*()` method renders + non-empty layers across all forest families, catching empty-figure + regressions without visual inspection. ## R CMD check results 0 errors | 0 warnings | 0 notes ## Test environments -* local R installation (R 4.4, macOS) +* local R installation (R 4.5, macOS) * GitHub Actions: ubuntu-latest (R devel) * GitHub Actions: ubuntu-latest (R release) * GitHub Actions: ubuntu-latest (R oldrel-1) diff --git a/man/gg_partial_rfsrc.Rd b/man/gg_partial_rfsrc.Rd index 19014406..c4343566 100644 --- a/man/gg_partial_rfsrc.Rd +++ b/man/gg_partial_rfsrc.Rd @@ -10,6 +10,7 @@ gg_partial_rfsrc( xvar2.name = NULL, newx = NULL, partial.time = NULL, + partial.type = c("surv", "chf", "mort"), cat_limit = 10, n_eval = 25 ) @@ -34,6 +35,13 @@ snapped to the nearest entry in \code{rf_model$time.interest} — see the \strong{Survival forests} section below. When \code{NULL} (default), three quartile points of \code{time.interest} are used.} +\item{partial.type}{Character; type of predicted value for survival +forests, passed through to \code{\link[randomForestSRC]{partial.rfsrc}}. +One of \code{"surv"} (default), \code{"chf"}, or \code{"mort"}. Ignored +for non-survival forests. \code{partial.rfsrc()} requires a non-\code{NULL} +value for survival families; supplying it here avoids a cryptic +\dQuote{argument is of length zero} error from the underlying C code.} + \item{cat_limit}{Variables with fewer than \code{cat_limit} unique values in \code{newx} are treated as categorical; all others are continuous. Defaults to 10.} diff --git a/tests/testthat/test_plot_layer_data.R b/tests/testthat/test_plot_layer_data.R new file mode 100644 index 00000000..fd2f3435 --- /dev/null +++ b/tests/testthat/test_plot_layer_data.R @@ -0,0 +1,254 @@ +# Regression tests for the empty-figure bug class. +# +# A `plot.gg_*()` method can return a ggplot whose data frame has zero rows, +# or whose layer data computes to zero rows after aesthetic mapping. That +# produces a figure with axes but no points or lines — which is what users +# saw in the survival vignette when the data transform's column names did +# not match what the plot method expected. +# +# These tests use `ggplot2::layer_data()` to inspect what each plot would +# actually draw. layer_data() runs the full ggplot build pipeline and returns +# the post-mapping data frame, so a zero-row result here means the figure +# would render empty. + +Surv <- survival::Surv # nolint: object_name_linter + +# Helper: number of rows ggplot2 would actually render for a given layer. +expect_layer_nonempty <- function(p, layer = 1L, label = NULL) { + testthat::expect_s3_class(p, "ggplot") + ld <- ggplot2::layer_data(p, layer) + testthat::expect_true( + nrow(ld) > 0, + info = sprintf( + "%slayer %d data should have rows (got %d). cols: %s", + if (is.null(label)) "" else paste0(label, ": "), + layer, nrow(ld), paste(colnames(ld), collapse = ",") + ) + ) + invisible(ld) +} + +# Helper: every aesthetic in `mapping_keys` should resolve to a column with +# more than one unique value somewhere in the layer data. The empty-figure +# bug presented as columns mapped to axis labels because the data frame had +# only one literal value per "variable"/"value"/etc. column. +expect_layer_has_variation <- function(p, layer = 1L, mapping_keys) { + ld <- ggplot2::layer_data(p, layer) + for (k in mapping_keys) { + testthat::expect_true( + k %in% colnames(ld), + info = sprintf("layer %d missing aesthetic '%s'", layer, k) + ) + testthat::expect_gt( + length(unique(ld[[k]])), 1, + label = sprintf("variation in layer %d aes '%s'", layer, k) + ) + } + invisible(ld) +} + +# ---------------------------------------------------------------------------- +# gg_rfsrc — survival +# ---------------------------------------------------------------------------- +test_that("plot.gg_rfsrc survival (no CI) renders many step curves", { + data(veteran, package = "randomForestSRC") + set.seed(42) + rf <- randomForestSRC::rfsrc(Surv(time, status) ~ ., data = veteran, + ntree = 50) + + gg <- gg_rfsrc(rf) + expect_true(all(c("variable", "value", "obs_id", "event") %in% colnames(gg))) + expect_type(gg$variable, "double") + expect_type(gg$value, "double") + + p <- plot(gg) + expect_layer_nonempty(p) + expect_layer_has_variation(p, mapping_keys = c("x", "y")) +}) + +test_that("plot.gg_rfsrc survival (CI) renders ribbon + median curve", { + data(veteran, package = "randomForestSRC") + set.seed(42) + rf <- randomForestSRC::rfsrc(Surv(time, status) ~ ., data = veteran, + ntree = 50) + gg <- gg_rfsrc(rf, conf.int = .95) + expect_true(all(c("value", "lower", "upper", "median", "mean") %in% colnames(gg))) + p <- plot(gg) + # Layer 1 is the ribbon, layer 2 is the median step line. + expect_layer_nonempty(p, layer = 1L) + expect_layer_nonempty(p, layer = 2L) +}) + +test_that("plot.gg_rfsrc survival (by group) renders per-group curves", { + data(veteran, package = "randomForestSRC") + set.seed(42) + rf <- randomForestSRC::rfsrc(Surv(time, status) ~ ., data = veteran, + ntree = 50) + gg <- gg_rfsrc(rf, by = "trt") + expect_true("group" %in% colnames(gg)) + p <- plot(gg) + ld <- expect_layer_nonempty(p) + expect_gt(length(unique(ld$group)), 1) +}) + +# ---------------------------------------------------------------------------- +# gg_rfsrc — regression +# ---------------------------------------------------------------------------- +test_that("plot.gg_rfsrc regression renders jitter + boxplot with data", { + set.seed(42) + rf <- randomForestSRC::rfsrc(Ozone ~ ., data = airquality, + na.action = "na.impute", ntree = 50) + gg <- gg_rfsrc(rf) + expect_true(all(c("yhat", "Ozone") %in% colnames(gg))) + p <- plot(gg) + ld <- expect_layer_nonempty(p) + expect_equal(nrow(ld), nrow(gg)) + expect_gt(length(unique(ld$y)), 1) +}) + +# ---------------------------------------------------------------------------- +# gg_rfsrc — classification +# ---------------------------------------------------------------------------- +test_that("plot.gg_rfsrc multi-class renders one row per (obs, class)", { + set.seed(42) + rf <- randomForestSRC::rfsrc(Species ~ ., data = iris, ntree = 50) + gg <- gg_rfsrc(rf) + # Multi-class: one column per class plus "y" + expect_true(all(levels(iris$Species) %in% colnames(gg))) + expect_true("y" %in% colnames(gg)) + p <- plot(gg) + ld <- expect_layer_nonempty(p) + # Three classes × 150 observations => 450 rows after pivot + expect_equal(nrow(ld), nrow(iris) * nlevels(iris$Species)) +}) + +# ---------------------------------------------------------------------------- +# gg_partial_rfsrc — survival regression test for the partial.type fix +# ---------------------------------------------------------------------------- +test_that("gg_partial_rfsrc survival passes partial.type and produces data", { + data(veteran, package = "randomForestSRC") + set.seed(42) + rf <- randomForestSRC::rfsrc(Surv(time, status) ~ ., data = veteran, + ntree = 50) + ti <- rf$time.interest + t90 <- ti[which.min(abs(ti - 90))] + + # Single time point. + expect_no_error({ + pd <- gg_partial_rfsrc(rf, xvar.names = "age", + partial.time = t90, n_eval = 8) + }) + expect_s3_class(pd, "gg_partial_rfsrc") + expect_true(nrow(pd$continuous) > 0) + expect_true(all(c("x", "yhat", "name", "time") %in% colnames(pd$continuous))) + # Survival plot path expects a "time" column. + p <- plot(pd) + expect_layer_nonempty(p) +}) + +test_that("gg_partial_rfsrc survival multi-time expands to long form", { + data(veteran, package = "randomForestSRC") + set.seed(42) + rf <- randomForestSRC::rfsrc(Surv(time, status) ~ ., data = veteran, + ntree = 50) + ti <- rf$time.interest + t30 <- ti[which.min(abs(ti - 30))] + t90 <- ti[which.min(abs(ti - 90))] + + pd <- gg_partial_rfsrc(rf, xvar.names = "age", + partial.time = c(t30, t90), n_eval = 8) + # Two time points × evaluation grid: long form, each (x, time) one row. + expect_equal(length(unique(pd$continuous$time)), 2L) + per_time <- table(pd$continuous$time) + expect_true(all(per_time == per_time[[1]])) + + p <- plot(pd) + ld <- expect_layer_nonempty(p) + # The fixed plot maps time → colour, so we should see two groups. + expect_gte(length(unique(ld$colour)), 2) +}) + +test_that("gg_partial_rfsrc partial.type rejects bad values", { + data(veteran, package = "randomForestSRC") + set.seed(42) + rf <- randomForestSRC::rfsrc(Surv(time, status) ~ ., data = veteran, + ntree = 50) + expect_error( + gg_partial_rfsrc(rf, xvar.names = "age", partial.type = "bogus"), + regexp = "should be one of" + ) +}) + +# ---------------------------------------------------------------------------- +# gg_partial_rfsrc — regression +# ---------------------------------------------------------------------------- +test_that("plot.gg_partial_rfsrc regression renders a non-empty line plot", { + set.seed(42) + rf <- randomForestSRC::rfsrc(Ozone ~ ., data = na.omit(airquality), + ntree = 50) + pd <- gg_partial_rfsrc(rf, xvar.names = "Wind", n_eval = 8) + p <- plot(pd) + expect_layer_nonempty(p) +}) + +# ---------------------------------------------------------------------------- +# gg_error +# ---------------------------------------------------------------------------- +test_that("plot.gg_error single-outcome renders points or a line", { + set.seed(42) + rf <- randomForestSRC::rfsrc(Ozone ~ ., data = airquality, + na.action = "na.impute", + ntree = 50, tree.err = TRUE, + block.size = 5) + gg <- gg_error(rf) + expect_true(all(c("ntree", "error") %in% colnames(gg))) + p <- plot(gg) + expect_layer_nonempty(p) +}) + +test_that("plot.gg_error multi-class pivots and colours by class", { + set.seed(42) + rf <- randomForestSRC::rfsrc(Species ~ ., data = iris, + ntree = 50, tree.err = TRUE, + block.size = 5) + p <- plot(gg_error(rf)) + ld <- expect_layer_nonempty(p) + expect_gte(length(unique(ld$colour)), 2) +}) + +# ---------------------------------------------------------------------------- +# gg_vimp +# ---------------------------------------------------------------------------- +test_that("plot.gg_vimp renders one bar per variable", { + set.seed(42) + rf <- randomForestSRC::rfsrc(Ozone ~ ., data = airquality, + na.action = "na.impute", ntree = 50, + importance = TRUE) + p <- plot(gg_vimp(rf)) + ld <- expect_layer_nonempty(p) + expect_equal(nrow(ld), length(rf$xvar.names)) +}) + +# ---------------------------------------------------------------------------- +# gg_variable +# ---------------------------------------------------------------------------- +test_that("plot.gg_variable survival single xvar has a point per observation", { + data(veteran, package = "randomForestSRC") + set.seed(42) + rf <- randomForestSRC::rfsrc(Surv(time, status) ~ ., data = veteran, + ntree = 50) + gg <- gg_variable(rf, time = 90) + expect_true(all(c("event", "yhat", "time") %in% colnames(gg))) + p <- plot(gg, xvar = "age") + ld <- expect_layer_nonempty(p) + expect_equal(nrow(ld), nrow(veteran)) +}) + +test_that("plot.gg_variable regression renders a scatter with smooth", { + set.seed(42) + rf <- randomForestSRC::rfsrc(Ozone ~ ., data = airquality, + na.action = "na.impute", ntree = 50) + gg <- gg_variable(rf) + p <- plot(gg, xvar = "Wind", smooth = FALSE) + expect_layer_nonempty(p) +}) From 3216c92605b9daad96df9ea9d84022600570d038 Mon Sep 17 00:00:00 2001 From: John Ehrlinger Date: Mon, 27 Apr 2026 13:55:43 -0400 Subject: [PATCH 2/2] Address Copilot review on PR #70 * plot.gg_partial_rfsrc(): y-axis label now adapts to partial.type. The attribute is stamped on the gg_partial_rfsrc object and read back via a new partial_surv_y_label() helper that maps "surv" -> "Predicted Survival", "chf" -> "Predicted CHF", "mort" -> "Predicted Mortality" (with a "Predicted Survival" fallback for legacy objects without the attribute). * plot.gg_partial_rfsrc(): group/colour by full-precision time (not the rounded label), so distinct time horizons that round to the same value no longer collapse into a single curve. Rounding is applied only to legend labels via scale_color_discrete(labels = ...). * plot.gg_partial_rfsrc() roxygen: revised section header to match the new layout (one curve per evaluation time over the predictor's value; y-axis adapts to partial.type). * tests: lock in the y-label dispatch for "surv" / "chf" / "mort" and the full-precision time grouping (synthetic two-time object with values that round to the same 2-dp display). Co-Authored-By: Claude Opus 4.7 (1M context) --- R/gg_partial_rfsrc.R | 6 ++- R/plot.gg_partial.R | 40 +++++++++++++----- man/plot.gg_partial_rfsrc.Rd | 7 +++- tests/testthat/test_plot_layer_data.R | 58 +++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 13 deletions(-) diff --git a/R/gg_partial_rfsrc.R b/R/gg_partial_rfsrc.R index d8ca4b5c..606ea1aa 100644 --- a/R/gg_partial_rfsrc.R +++ b/R/gg_partial_rfsrc.R @@ -136,7 +136,11 @@ gg_partial_rfsrc <- function(rf_model, partial.type) } - split_partial_result(do.call("rbind", pdta)) + result <- split_partial_result(do.call("rbind", pdta)) + # Carry partial.type so plot.gg_partial_rfsrc() can pick the correct + # y-axis label (Survival / CHF / Mortality). + attr(result, "partial.type") <- partial.type + result } ## ---- unexported helpers ------------------------------------------------------- diff --git a/R/plot.gg_partial.R b/R/plot.gg_partial.R index 6121fecf..f24b84dc 100644 --- a/R/plot.gg_partial.R +++ b/R/plot.gg_partial.R @@ -12,6 +12,19 @@ #### ####********************************************************************** ####********************************************************************** + +# Map partial.type ("surv" / "chf" / "mort") to a human y-axis label. +# Falls back to "Predicted Survival" when the attribute is absent (e.g. an +# object built before this attribute was introduced). +partial_surv_y_label <- function(partial.type) { + if (is.null(partial.type)) return("Predicted Survival") + switch(partial.type, + surv = "Predicted Survival", + chf = "Predicted CHF", + mort = "Predicted Mortality", + "Predicted Survival") +} + #' Plot a \code{\link{gg_partial}} object #' #' Produces ggplot2 partial dependence curves from the named list returned by @@ -85,8 +98,11 @@ plot.gg_partial <- function(x, ...) { #' For standard (non-survival) forests: continuous predictors are line plots, #' categorical predictors are bar charts, both faceted by variable name. #' -#' For survival forests (when a \code{time} column is present): each predictor -#' value is a separate curve over time, faceted by variable name. +#' For survival forests (when a \code{time} column is present): each evaluation +#' time point is a separate curve over the predictor's value, faceted by +#' variable name. The y-axis label adapts to the \code{partial.type} stored on +#' the object (\dQuote{Predicted Survival}, \dQuote{Predicted CHF}, or +#' \dQuote{Predicted Mortality}). #' #' For two-variable surface plots (when a \code{grp} column is present): #' each group level is a separate line, faceted by primary predictor name. @@ -109,23 +125,27 @@ plot.gg_partial_rfsrc <- function(x, ...) { cont <- gg_dta$continuous if (!is.null(cont$time)) { - ## Survival forest: predictor value on x-axis, one curve per time point - ## (rounded for a tidy legend). Time is typically a small set (1-3 horizons) - ## while x is the dense evaluation grid. - cont$.time_lbl <- factor(round(cont$time, 2), - levels = sort(unique(round(cont$time, 2)))) + ## Survival forest: predictor value on x-axis, one curve per time point. + ## Group/colour by the *full-precision* time so distinct horizons that + ## happen to round to the same value are not silently merged. The legend + ## is relabelled with rounded values for readability. + time_levels <- sort(unique(cont$time)) + cont$.time_factor <- factor(cont$time, levels = time_levels) + legend_labels <- format(round(time_levels, 2), trim = TRUE) + y_lab <- partial_surv_y_label(attr(gg_dta, "partial.type")) gg_cont <- ggplot2::ggplot( cont, ggplot2::aes( x = .data$x, y = .data$yhat, - color = .data$.time_lbl, - group = .data$.time_lbl + color = .data$.time_factor, + group = .data$.time_factor ) ) + ggplot2::geom_line() + ggplot2::facet_wrap(~name, scales = "free_x") + - ggplot2::labs(x = NULL, y = "Predicted Survival", color = "Time") + ggplot2::scale_color_discrete(labels = legend_labels) + + ggplot2::labs(x = NULL, y = y_lab, color = "Time") } else if (!is.null(cont$grp)) { ## Two-variable surface: group is xvar2; x-axis is the primary predictor diff --git a/man/plot.gg_partial_rfsrc.Rd b/man/plot.gg_partial_rfsrc.Rd index 50aaf72f..2d3b3348 100644 --- a/man/plot.gg_partial_rfsrc.Rd +++ b/man/plot.gg_partial_rfsrc.Rd @@ -23,8 +23,11 @@ Produces ggplot2 partial dependence curves from the named list returned by For standard (non-survival) forests: continuous predictors are line plots, categorical predictors are bar charts, both faceted by variable name. -For survival forests (when a \code{time} column is present): each predictor -value is a separate curve over time, faceted by variable name. +For survival forests (when a \code{time} column is present): each evaluation +time point is a separate curve over the predictor's value, faceted by +variable name. The y-axis label adapts to the \code{partial.type} stored on +the object (\dQuote{Predicted Survival}, \dQuote{Predicted CHF}, or +\dQuote{Predicted Mortality}). For two-variable surface plots (when a \code{grp} column is present): each group level is a separate line, faceted by primary predictor name. diff --git a/tests/testthat/test_plot_layer_data.R b/tests/testthat/test_plot_layer_data.R index fd2f3435..a9f9b927 100644 --- a/tests/testthat/test_plot_layer_data.R +++ b/tests/testthat/test_plot_layer_data.R @@ -179,6 +179,64 @@ test_that("gg_partial_rfsrc partial.type rejects bad values", { ) }) +test_that("plot.gg_partial_rfsrc y-axis adapts to partial.type", { + data(veteran, package = "randomForestSRC") + set.seed(42) + rf <- randomForestSRC::rfsrc(Surv(time, status) ~ ., data = veteran, + ntree = 50) + ti <- rf$time.interest + t90 <- ti[which.min(abs(ti - 90))] + + # Default ("surv") => "Predicted Survival" + pd_s <- gg_partial_rfsrc(rf, xvar.names = "age", + partial.time = t90, n_eval = 6) + expect_equal(attr(pd_s, "partial.type"), "surv") + p_s <- plot(pd_s) + expect_equal(p_s$labels$y, "Predicted Survival") + + # "chf" => "Predicted CHF" + pd_c <- gg_partial_rfsrc(rf, xvar.names = "age", + partial.time = t90, partial.type = "chf", + n_eval = 6) + expect_equal(attr(pd_c, "partial.type"), "chf") + p_c <- plot(pd_c) + expect_equal(p_c$labels$y, "Predicted CHF") + + # "mort" => "Predicted Mortality"; mort returns one value per x (no time dim), + # so the survival "time" branch will not engage — we only assert the + # attribute round-trips. + pd_m <- gg_partial_rfsrc(rf, xvar.names = "age", + partial.type = "mort", + n_eval = 6) + expect_equal(attr(pd_m, "partial.type"), "mort") +}) + +test_that("plot.gg_partial_rfsrc preserves full-precision time grouping", { + # Distinct times that round to the same 2-dp value must not collapse into a + # single line. Build a synthetic gg_partial_rfsrc object with two such times. + cont <- data.frame( + x = rep(c(1, 2, 3), times = 2), + yhat = c(0.9, 0.8, 0.7, 0.5, 0.4, 0.3), + name = "x", + time = rep(c(1.001, 1.002), each = 3) + ) + obj <- structure( + list( + continuous = cont, + categorical = data.frame(x = character(0), yhat = numeric(0), + name = character(0), time = integer(0)) + ), + class = "gg_partial_rfsrc", + partial.type = "surv" + ) + p <- plot(obj) + ld <- ggplot2::layer_data(p, 1L) + # Two distinct full-precision time horizons => two distinct groups, even + # though both round to "1" at 2-dp precision. + expect_equal(length(unique(ld$group)), 2L) + expect_equal(length(unique(ld$colour)), 2L) +}) + # ---------------------------------------------------------------------------- # gg_partial_rfsrc — regression # ----------------------------------------------------------------------------