ehrlinger
diff --git a/‎.Rbuildignore‎
Lines changed: 4 additions & 0 deletions b/‎.Rbuildignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.claude/settings.local.json‎
Lines changed: 15 additions & 0 deletions b/‎.claude/settings.local.json‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎.claude/worktrees/lucid-herschel‎
Lines changed: 1 addition & 0 deletions b/‎.claude/worktrees/lucid-herschel‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.vscode/settings.json‎
Lines changed: 3 additions & 0 deletions b/‎.vscode/settings.json‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions b/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 6 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎R/calc_roc.R‎
Lines changed: 16 additions & 11 deletions b/‎R/calc_roc.R‎
Lines changed: 16 additions & 11 deletions
diff --git a/‎R/gg_error.R‎
Lines changed: 15 additions & 6 deletions b/‎R/gg_error.R‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎R/gg_partial.R‎
Lines changed: 76 additions & 32 deletions b/‎R/gg_partial.R‎
Lines changed: 76 additions & 32 deletions
@@ -28,9 +28,13 @@ framed.sty
 ^CRAN-RELEASE$
 ^CRAN-SUBMISSION$
 ^\.github$
+^\.claude$
+^\.git$
+^\.vscode$
 ^doc$
 ^Meta$
 ^_pkgdown\.yml$
 ^docs$
 ^pkgdown$
 ^LICENSE\.md$
+^memory$
@@ -0,0 +1,15 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(Rscript -e \"devtools::test\\(\\)\")",
+      "Bash(Rscript -e \"devtools::test\\(reporter = testthat::SummaryReporter$new\\(\\)\\)\")",
+      "Bash(Rscript -e \"testthat::set_max_fails\\(Inf\\); devtools::test\\(reporter = testthat::SummaryReporter$new\\(\\)\\)\")",
+      "Bash(Rscript -e \"options\\(testthat.max_fails = Inf\\); devtools::test\\(reporter = testthat::SummaryReporter$new\\(\\)\\)\")",
+      "Bash(Rscript -e \":*)",
+      "Bash(Rscript --vanilla -e \":*)",
+      "Bash(Rscript -e \"library\\(covr\\); cov <- package_coverage\\(''/Users/ehrlinj/Documents/GitHub/ggRandomForests/.claude/worktrees/lucid-herschel''\\); print\\(cov\\)\")",
+      "Bash(Rscript -e \"rcmdcheck::rcmdcheck\\(args=''--no-manual'', error_on=''warning''\\)\")",
+      "Bash(git add .Rbuildignore tests/testthat/test_gg_rfsrc.R tests/testthat/test_gg_variable.R tests/testthat/test_gg_partial.R tests/testthat/test_gg_partialpro.R tests/testthat/test_ggrandomforests_news.R tests/testthat/test_surv_partial.R tests/testthat/test_varpro_feature_names.R)"
+    ]
+  }
+}
@@ -0,0 +1 @@
+Subproject commit 9b2abd8d14bd5c4d563cdc460ec2cf9ad4c99645
@@ -0,0 +1,3 @@
+{
+    "snyk.advanced.autoSelectOrganization": true
+}
@@ -1,8 +1,8 @@
 Package: ggRandomForests
 Type: Package
 Title: Visually Exploring Random Forests
-Version: 2.5.0
-Date: 2026-02-19
+Version: 2.6.0
+Date: 2026-03-04
 Authors@R: person("John", "Ehrlinger",  
   role = c("aut", "cre"), 
   email = "john.ehrlinger@gmail.com")
 
@@ -36,6 +36,12 @@ export(gg_variable)
 export(gg_vimp)
 export(kaplan)
 export(nelson)
+export(plot.gg_error)
+export(plot.gg_rfsrc)
+export(plot.gg_roc)
+export(plot.gg_survival)
+export(plot.gg_variable)
+export(plot.gg_vimp)
 export(quantile_pts)
 export(surv_partial.rfsrc)
 export(varpro_feature_names)
 
@@ -66,45 +66,50 @@ calc_roc.rfsrc <-
            which_outcome = "all",
            oob = TRUE,
            ...) {
+    # Ensure response is a factor so levels() is well-defined
     if (!is.factor(dta)) {
       dta <- factor(dta)
     }
 
+    # Re-read oob from ... so callers can override the default
     arg_list <- as.list(substitute(list(...)))
 
     oob <- FALSE
     if (!is.null(arg_list$oob) && is.logical(arg_list$oob)) {
       oob <- as.logical(arg_list$oob)
     }
 
+    # "all" outcomes not yet supported; fall back to the first class
     if (which_outcome == "all") {
       warning("Must specify which_outcome for now.")
       which_outcome <- 1
     }
+    # Build (binary indicator, full-forest prediction, OOB prediction) triplet
     dta_roc <-
       data.frame(cbind(
         res = (dta == levels(dta)[which_outcome]),
         prd = object$predicted[, which_outcome],
         oob_prd = object$predicted.oob[, which_outcome]
       ))
 
-    # Get the list of unique prob
+    # Collect the unique predicted probability thresholds for the ROC sweep
     if (oob) {
       pct <- sort(unique(object$predicted.oob[, which_outcome]))
     } else {
       pct <- sort(unique(object$predicted[, which_outcome]))
     }
 
     last <- length(pct)
+    # Remove the maximum threshold (the cutpoint where nothing is classified
+    # as positive), which produces the (sens=0, spec=1) anchor point
     pct <- pct[-last]
 
-    # Make sure we don't have to many points... if the training set was large,
-    # This may break plotting all ROC curves in multiclass settings.
-    # Arbitrarily reduce this to only include 200 points along the curve
+    # Cap at 200 threshold points to keep multi-class ROC plots manageable
     if (last > 200) {
       pct <- pct[seq(1, length(pct), length.out = 200)]
     }
 
+    # For each threshold, build the 2×2 confusion table and extract TPR/TNR
     gg_dta <- parallel::mclapply(pct, function(crit) {
       if (oob) {
         tbl <- xtabs(~ res + (oob_prd > crit), dta_roc)
@@ -118,6 +123,7 @@ calc_roc.rfsrc <-
     })
 
     gg_dta <- do.call(rbind, gg_dta)
+    # Anchor curve at perfect specificity (0, 1) and perfect sensitivity (1, 0)
     gg_dta <- rbind(c(0, 1), gg_dta, c(1, 0))
 
     gg_dta <- data.frame(gg_dta, row.names = seq_len(nrow(gg_dta)))
@@ -222,16 +228,15 @@ calc_roc.randomForest <-
 #' @aliases calc_auc calc_auc.gg_roc
 #' @export
 calc_auc <- function(x) {
-  ## Use the trapeziod rule, basically calc
-  ##
-  ## auc = dx/2(f(x_{i+1}) - f(x_i))
-  ##
-  ## f(x) is sensitivity, x is 1-specificity
+  ## Trapezoidal rule:  AUC = Σ dx/2 * (f(x_{i+1}) + f(x_i))
+  ## Here f(x) is sensitivity (TPR) and x is 1 − specificity (FPR).
+  ## The shift() helper provides the lead value x_{i+1}.
 
-  # Since we are leading vectors (x_{i+1} - x_{i}), we need to
-  # ensure we are in decreasing order of specificity (x var = 1-spec)
+  # Sort in decreasing specificity so FPR increases left-to-right along the curve
   x <- x[order(x$spec, decreasing = TRUE), ]
 
+  # Trapezoidal approximation: average of consecutive sensitivity values
+  # multiplied by the FPR increment (change in 1 - spec)
   auc <- (3 * shift(x$sens) - x$sens) / 2 * (x$spec - shift(x$spec))
   sum(auc, na.rm = TRUE)
 }
 
@@ -203,7 +203,7 @@ gg_error <- function(object, ...) {
 }
 #' @export
 gg_error.rfsrc <- function(object, ...) {
-  ## Check that the input obect is of the correct type.
+  ## Check that the input object is of the correct type.
   if (!inherits(object, "rfsrc")) {
     stop(
       paste(
@@ -212,20 +212,27 @@ gg_error.rfsrc <- function(object, ...) {
       )
     )
   }
+  # The forest must have been grown with tree.err = TRUE so that per-tree
+  # OOB error rates are recorded in $err.rate.
   if (is.null(object$err.rate)) {
     stop("Performance values are not available for this forest.")
   }
 
+  # Convert the err.rate matrix (ntree × n_outcomes) to a data frame.
   gg_dta <- data.frame(object$err.rate)
 
-  # If there is only one column in the error rate... name it reasonably.
+  # rfsrc wraps single-column matrices with a column name derived from the
+  # object name; rename it to the neutral label "error" for downstream use.
   if ("object.err.rate" %in% colnames(gg_dta)) {
     colnames(gg_dta)[which(colnames(gg_dta) == "object.err.rate")] <-
       "error"
   }
 
+  # Add a sequential tree counter required by the x-axis of plot.gg_error.
   gg_dta$ntree <- seq_len(dim(gg_dta)[1])
 
+  # Optional in-bag training error: re-predict on the full training set using
+  # the stored forest and record the resulting per-tree error trajectory.
   arg_list <- as.list(substitute(list(...)))
   training <- FALSE
   if (!is.null(arg_list$training)) {
@@ -249,7 +256,7 @@ gg_error.rfsrc <- function(object, ...) {
 
 #' @export
 gg_error.randomForest <- function(object, ...) {
-  ## Check that the input obect is of the correct type.
+  ## Check that the input object is of the correct type.
   if (!inherits(object, "randomForest")) {
     stop(
       paste(
@@ -260,10 +267,10 @@ gg_error.randomForest <- function(object, ...) {
   }
 
   if (!is.null(object$mse)) {
-    # For regression
+    # Regression forests store the cumulative OOB mean squared error in $mse.
     gg_dta <- data.frame(object$mse)
 
-    # If there is only one column in the error rate... name it reasonably.
+    # Normalise the auto-generated column name to "error".
     if ("object.mse" %in% colnames(gg_dta)) {
       colnames(gg_dta)[which(colnames(gg_dta) == "object.mse")] <-
         "error"
@@ -277,14 +284,16 @@ gg_error.randomForest <- function(object, ...) {
       training <- arg_list$training
     }
 
+    # Optionally compute and append the per-tree in-bag training error curve.
     if (training) {
       train_curve <- .rf_training_curve(object)
       if (!is.null(train_curve)) {
         gg_dta$train <- train_curve
       }
     }
   } else if (!is.null(object$err.rate)) {
-    # For classification
+    # Classification forests store the cumulative OOB error matrix in
+    # $err.rate (rows = trees, columns = overall + per-class error rates).
     gg_dta <- data.frame(object$err.rate)
 
     gg_dta$ntree <- seq_len(nrow(gg_dta))
 
@@ -1,54 +1,98 @@
 ##=============================================================================
-#' Split partial lots into continuous or categorical datasets
+#' Split partial dependence data into continuous or categorical datasets
+#'
+#' Takes the list returned by \code{rfsrc::plot.variable(partial = TRUE)} and
+#' separates the variables into two data frames: one for continuous predictors
+#' and one for categorical (factor-like) predictors.  The split is controlled
+#' by \code{cat_limit}: variables with more unique x-values than this threshold
+#' are treated as continuous; all others are categorical.
+#'
 #' @param part_dta partial plot data from \code{rfsrc::plot.variable}
 #' @param nvars how many of the partial plot variables to calculate
-#' @param cat_limit Categorical features are build when there are fewer than
-#'  cat_limit unique features.
+#' @param cat_limit Categorical features are built when there are fewer than
+#'  \code{cat_limit} unique feature values.
 #' @param model a label name applied to all features. Useful when combining
 #'  multiple partial plot objects in figures.
 #'
+#' @return A named list with two elements:
+#'   \describe{
+#'     \item{continuous}{data.frame with columns \code{x}, \code{yhat},
+#'       \code{name} (and optionally \code{model}) for continuous variables}
+#'     \item{categorical}{data.frame with the same columns but with \code{x}
+#'       as a factor, for low-cardinality / categorical variables}
+#'   }
+#'
+#' @seealso \code{\link{gg_partial_rfsrc}} \code{\link{gg_partialpro}}
+#'
+#' @examples
+#' ## Build a small regression forest on the airquality dataset
+#' set.seed(42)
+#' airq <- na.omit(airquality)
+#' rf <- rfsrc(Ozone ~ ., data = airq, ntree = 50)
+#'
+#' ## Compute partial dependence via plot.variable (show.plots = FALSE to
+#' ## suppress the base-graphics output — we only want the data)
+#' pv <- randomForestSRC::plot.variable(rf, partial = TRUE,
+#'                                       show.plots = FALSE)
+#'
+#' ## Split into continuous and categorical data frames
+#' result <- gg_partial(pv)
+#' head(result$continuous)
+#'
+#' ## Label this model for later comparison with a second forest
+#' result_labelled <- gg_partial(pv, model = "airq_model")
+#' unique(result_labelled$continuous$model)
+#'
 #' @export
-gg_partial = function(part_dta,
-                      nvars = NULL,
-                      cat_limit = 10,
-                      model = NULL) {
-  ## Prepare the partial dependencies data for panel plots
+gg_partial <- function(part_dta,
+                       nvars = NULL,
+                       cat_limit = 10,
+                       model = NULL) {
+  ## Default: process all variables returned by plot.variable
   if (is.null(nvars)) {
-    nvars = length(part_dta$plotthis)
+    nvars <- length(part_dta$plotthis)
   }
-  
-  cont_list = list()
-  cat_list = list()
+
+  # Accumulate per-variable data frames before binding
+  cont_list <- list()
+  cat_list <- list()
+
   for (feature in seq(nvars)) {
-    ## Format any continuous features (those with fewer than cat_limit unique values)
-    if (length(unique(part_dta$plotthis[[feature]]$x)) > cat_limit) {
-      plt.df = dplyr::bind_cols(
-        x = part_dta$plotthis[[feature]]$x,
+    x_vals <- part_dta$plotthis[[feature]]$x
+
+    ## ---- Continuous variable: more unique x values than cat_limit -------
+    if (length(unique(x_vals)) > cat_limit) {
+      plt.df <- dplyr::bind_cols(
+        x    = x_vals,
         yhat = part_dta$plotthis[[feature]]$yhat
       )
-      plt.df$name = names(part_dta$plotthis)[[feature]]
-      
+      # Tag each row with the variable name for downstream faceting
+      plt.df$name <- names(part_dta$plotthis)[[feature]]
+
       cont_list[[feature]] <- plt.df
-    } else{
-      ## Categorical features
-      
-      ## Though VarPro works with logical or continuous only. Factors are
-      ## one hot encoded internal to the varPro call.
-      plt.df = dplyr::bind_cols(
-        x = factor(part_dta$plotthis[[feature]]$x),
+
+    } else {
+      ## ---- Categorical variable: few unique x values -------------------
+      ## VarPro works with logical or continuous only; factors are
+      ## one-hot encoded internally in the varPro call.
+      plt.df <- dplyr::bind_cols(
+        x    = factor(x_vals),
         yhat = part_dta$plotthis[[feature]]$yhat
       )
-      plt.df$name = names(part_dta$plotthis)[[feature]]
-      
+      plt.df$name <- names(part_dta$plotthis)[[feature]]
+
       cat_list[[feature]] <- plt.df
     }
   }
-  continuous = dplyr::bind_rows(cont_list)
-  categorical = dplyr::bind_rows(cat_list)
-  
+
+  # Combine per-variable lists into single data frames (NULL entries dropped)
+  continuous  <- dplyr::bind_rows(cont_list)
+  categorical <- dplyr::bind_rows(cat_list)
+
+  ## Optionally attach a model label (useful when overlaying multiple forests)
   if (!is.null(model)) {
     continuous$model <- categorical$model <- model
   }
-  
+
   return(list(continuous = continuous, categorical = categorical))
-}
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Subproject commit 9b2abd8d14bd5c4d563cdc460ec2cf9ad4c99645`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{`
	`2`	`+ "snyk.advanced.autoSelectOrganization": true`
	`3`	`+}`