From 4f9dc48c5be88deb2ca2ee4a1e2c7d2df8aeda5f Mon Sep 17 00:00:00 2001 From: John Ehrlinger Date: Thu, 21 May 2026 11:37:54 -0400 Subject: [PATCH 1/2] docs/test: smooth=TRUE is intentionally a no-op for factor predictors in plot.gg_variable geom_smooth requires a continuous x-axis; for factor predictors the boxplot IQR already serves as the spread summary. Add inline comments to all three factor-predictor branches (binary classification, multi-class classification, regression) and three regression-test guard tests to lock in the no-op behaviour. --- R/plot.gg_variable.R | 15 +++++++-- tests/testthat/test_gg_variable.R | 55 +++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/R/plot.gg_variable.R b/R/plot.gg_variable.R index e6d7ae38..4e0cf715 100644 --- a/R/plot.gg_variable.R +++ b/R/plot.gg_variable.R @@ -526,7 +526,11 @@ plot.gg_variable <- function(x, # nolint: cyclocomp_linter ) } } else { - # Factor predictor: jitter + boxplot coloured by observed class + # Factor predictor: jitter + boxplot coloured by observed class. + # smooth=TRUE is intentionally a no-op here: geom_smooth requires + # a continuous x-axis and has no meaningful interpretation for + # discrete factor levels. The boxplot IQR serves as the spread + # summary. gg_plt[[ind]] <- gg_plt[[ind]] + ggplot2::geom_jitter( ggplot2::aes( @@ -565,6 +569,10 @@ plot.gg_variable <- function(x, # nolint: cyclocomp_linter ) } } else { + # Factor predictor (multi-class): boxplot + jitter per facet. + # smooth=TRUE is intentionally a no-op here for the same reason + # as the binary factor path above — geom_smooth requires a + # continuous x-axis. gg_plt[[ind]] <- gg_plt[[ind]] + ggplot2::geom_boxplot( ggplot2::aes(x = .data$var, y = .data$yhat), @@ -605,7 +613,10 @@ plot.gg_variable <- function(x, # nolint: cyclocomp_linter ggplot2::geom_smooth(ggplot2::aes(x = .data$var, y = .data$yhat), ...) } } else { - # Factor predictor: boxplot + jitter + # Factor predictor (regression): boxplot + jitter. + # smooth=TRUE is intentionally a no-op here: geom_smooth requires a + # continuous x-axis and has no meaningful interpretation for discrete + # factor levels. The boxplot IQR serves as the spread summary. gg_plt[[ind]] <- gg_plt[[ind]] + ggplot2::geom_boxplot( ggplot2::aes(x = .data$var, y = .data$yhat), diff --git a/tests/testthat/test_gg_variable.R b/tests/testthat/test_gg_variable.R index e4bd9e40..4ab10fdb 100644 --- a/tests/testthat/test_gg_variable.R +++ b/tests/testthat/test_gg_variable.R @@ -492,3 +492,58 @@ test_that("gg_variable.randomForest: oob=FALSE triggers a warning", { expect_s3_class(gg, "gg_variable") expect_true("yhat.setosa" %in% names(gg)) }) + +## ── smooth=TRUE is a no-op for factor predictors (all families) ────────────── + +test_that("plot.gg_variable binary classification + factor predictor: smooth=TRUE is no-op (no error, 2 layers)", { + skip_if_not_installed("randomForest") + set.seed(42L) + bin_data <- iris[iris$Species != "virginica", ] + bin_data$Species <- droplevels(bin_data$Species) + bin_data$size <- cut(bin_data$Petal.Length, 2L, labels = c("small", "large")) + rf <- randomForest::randomForest(Species ~ size + Sepal.Width, data = bin_data, + ntree = 50L) + gg <- gg_variable(rf) + # smooth=TRUE with a factor predictor must not error + expect_no_error({ + p <- plot(gg, xvar = "size", smooth = TRUE) + }) + p <- plot(gg, xvar = "size", smooth = TRUE) + expect_s3_class(p, "ggplot") + # Boxplot + jitter — no smooth layer added + expect_equal(length(p$layers), 2L) +}) + +test_that("plot.gg_variable multi-class classification + factor predictor: smooth=TRUE is no-op (no error)", { + skip_if_not_installed("randomForest") + set.seed(42L) + iris2 <- iris + iris2$size <- cut(iris2$Petal.Length, 3L, labels = c("small", "medium", "large")) + rf <- randomForest::randomForest(Species ~ size + Sepal.Width, data = iris2, + ntree = 50L) + gg <- gg_variable(rf) + # smooth=TRUE with a factor predictor must not error + expect_no_error({ + p <- plot(gg, xvar = "size", smooth = TRUE) + }) + p <- plot(gg, xvar = "size", smooth = TRUE) + expect_s3_class(p, "ggplot") +}) + +test_that("plot.gg_variable regression + factor predictor: smooth=TRUE is no-op (no error, 2 layers)", { + skip_if_not_installed("randomForest") + set.seed(42L) + iris2 <- iris + iris2$size <- cut(iris2$Petal.Length, 3L, labels = c("small", "medium", "large")) + rf <- randomForest::randomForest(Sepal.Length ~ size + Sepal.Width, data = iris2, + ntree = 50L) + gg <- gg_variable(rf) + # smooth=TRUE with a factor predictor must not error + expect_no_error({ + p <- plot(gg, xvar = "size", smooth = TRUE) + }) + p <- plot(gg, xvar = "size", smooth = TRUE) + expect_s3_class(p, "ggplot") + # Boxplot + jitter — no smooth layer added + expect_equal(length(p$layers), 2L) +}) From 24b46655f24fcb37b0ff13d4ea00e72cc7aa3245 Mon Sep 17 00:00:00 2001 From: John Ehrlinger Date: Thu, 21 May 2026 17:02:56 -0400 Subject: [PATCH 2/2] =?UTF-8?q?test:=20address=20Copilot=20review=20on=20P?= =?UTF-8?q?R=20#89=20=E2=80=94=20robustness=20+=20speed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace brittle `length(p$layers) == 2` with a direct GeomSmooth-absence check (has_smooth_layer helper) — the actual contract is "no smooth layer for a factor predictor", insensitive to benign layer-composition changes. - Multi-class test now asserts no smooth layer too (was missing the check). - Call plot() once per test via `expect_no_error(p <- plot(...))` instead of twice with identical args. - Drop ntree 50 -> 25; model quality is irrelevant to these tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/testthat/test_gg_variable.R | 42 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/tests/testthat/test_gg_variable.R b/tests/testthat/test_gg_variable.R index 4ab10fdb..cbe54087 100644 --- a/tests/testthat/test_gg_variable.R +++ b/tests/testthat/test_gg_variable.R @@ -495,55 +495,53 @@ test_that("gg_variable.randomForest: oob=FALSE triggers a warning", { ## ── smooth=TRUE is a no-op for factor predictors (all families) ────────────── -test_that("plot.gg_variable binary classification + factor predictor: smooth=TRUE is no-op (no error, 2 layers)", { +# geom_smooth requires a continuous x-axis, so smooth=TRUE has no meaning for a +# factor predictor. The contract these tests lock in: no GeomSmooth layer is +# added for a factor x. Assert that directly (rather than a brittle layer +# count) so benign plot-composition changes do not break the suite. +has_smooth_layer <- function(p) { + any(vapply(p$layers, function(l) inherits(l$geom, "GeomSmooth"), logical(1))) +} + +test_that("plot.gg_variable binary classification + factor predictor: smooth=TRUE adds no smooth layer", { skip_if_not_installed("randomForest") set.seed(42L) bin_data <- iris[iris$Species != "virginica", ] bin_data$Species <- droplevels(bin_data$Species) bin_data$size <- cut(bin_data$Petal.Length, 2L, labels = c("small", "large")) rf <- randomForest::randomForest(Species ~ size + Sepal.Width, data = bin_data, - ntree = 50L) + ntree = 25L) gg <- gg_variable(rf) # smooth=TRUE with a factor predictor must not error - expect_no_error({ - p <- plot(gg, xvar = "size", smooth = TRUE) - }) - p <- plot(gg, xvar = "size", smooth = TRUE) + expect_no_error(p <- plot(gg, xvar = "size", smooth = TRUE)) expect_s3_class(p, "ggplot") - # Boxplot + jitter — no smooth layer added - expect_equal(length(p$layers), 2L) + expect_false(has_smooth_layer(p)) }) -test_that("plot.gg_variable multi-class classification + factor predictor: smooth=TRUE is no-op (no error)", { +test_that("plot.gg_variable multi-class classification + factor predictor: smooth=TRUE adds no smooth layer", { skip_if_not_installed("randomForest") set.seed(42L) iris2 <- iris iris2$size <- cut(iris2$Petal.Length, 3L, labels = c("small", "medium", "large")) rf <- randomForest::randomForest(Species ~ size + Sepal.Width, data = iris2, - ntree = 50L) + ntree = 25L) gg <- gg_variable(rf) # smooth=TRUE with a factor predictor must not error - expect_no_error({ - p <- plot(gg, xvar = "size", smooth = TRUE) - }) - p <- plot(gg, xvar = "size", smooth = TRUE) + expect_no_error(p <- plot(gg, xvar = "size", smooth = TRUE)) expect_s3_class(p, "ggplot") + expect_false(has_smooth_layer(p)) }) -test_that("plot.gg_variable regression + factor predictor: smooth=TRUE is no-op (no error, 2 layers)", { +test_that("plot.gg_variable regression + factor predictor: smooth=TRUE adds no smooth layer", { skip_if_not_installed("randomForest") set.seed(42L) iris2 <- iris iris2$size <- cut(iris2$Petal.Length, 3L, labels = c("small", "medium", "large")) rf <- randomForest::randomForest(Sepal.Length ~ size + Sepal.Width, data = iris2, - ntree = 50L) + ntree = 25L) gg <- gg_variable(rf) # smooth=TRUE with a factor predictor must not error - expect_no_error({ - p <- plot(gg, xvar = "size", smooth = TRUE) - }) - p <- plot(gg, xvar = "size", smooth = TRUE) + expect_no_error(p <- plot(gg, xvar = "size", smooth = TRUE)) expect_s3_class(p, "ggplot") - # Boxplot + jitter — no smooth layer added - expect_equal(length(p$layers), 2L) + expect_false(has_smooth_layer(p)) })