From 76d176cba0107161329a4ffddf4e1b018a566e5c Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 2 Mar 2026 10:34:42 +0100 Subject: [PATCH 1/2] Support for nestedLogit --- DESCRIPTION | 2 +- NEWS.md | 6 ++ R/estimate_predicted.R | 159 ++++++++++++++++++++++++++--------------- 3 files changed, 108 insertions(+), 59 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 60325b84b..f58b40961 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: modelbased Title: Estimation of Model-Based Predictions, Contrasts and Means -Version: 0.14.0 +Version: 0.14.0.1 Authors@R: c(person(given = "Dominique", family = "Makowski", diff --git a/NEWS.md b/NEWS.md index 9233c81c1..1e9a2eece 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# modelbased (devel) + +## Changes + +* Support for models of class `nestedLogit`. + # modelbased 0.14.0 ## Changes diff --git a/R/estimate_predicted.R b/R/estimate_predicted.R index 92df10c30..64c93f4bf 100644 --- a/R/estimate_predicted.R +++ b/R/estimate_predicted.R @@ -255,15 +255,17 @@ #' estimate_relation(model) #' } #' @export -estimate_expectation <- function(model, - data = NULL, - by = NULL, - predict = "expectation", - ci = 0.95, - transform = NULL, - iterations = NULL, - keep_iterations = FALSE, - ...) { +estimate_expectation <- function( + model, + data = NULL, + by = NULL, + predict = "expectation", + ci = 0.95, + transform = NULL, + iterations = NULL, + keep_iterations = FALSE, + ... +) { .estimate_predicted( model, data = data, @@ -280,15 +282,17 @@ estimate_expectation <- function(model, #' @rdname estimate_expectation #' @export -estimate_link <- function(model, - data = "grid", - by = NULL, - predict = "link", - ci = 0.95, - transform = NULL, - iterations = NULL, - keep_iterations = FALSE, - ...) { +estimate_link <- function( + model, + data = "grid", + by = NULL, + predict = "link", + ci = 0.95, + transform = NULL, + iterations = NULL, + keep_iterations = FALSE, + ... +) { # reset to NULL if only "by" was specified if (missing(data) && !missing(by)) { data <- NULL @@ -309,15 +313,17 @@ estimate_link <- function(model, #' @rdname estimate_expectation #' @export -estimate_prediction <- function(model, - data = NULL, - by = NULL, - predict = "prediction", - ci = 0.95, - transform = NULL, - iterations = NULL, - keep_iterations = FALSE, - ...) { +estimate_prediction <- function( + model, + data = NULL, + by = NULL, + predict = "prediction", + ci = 0.95, + transform = NULL, + iterations = NULL, + keep_iterations = FALSE, + ... +) { .estimate_predicted( model, data = data, @@ -333,15 +339,17 @@ estimate_prediction <- function(model, #' @rdname estimate_expectation #' @export -estimate_relation <- function(model, - data = "grid", - by = NULL, - predict = "expectation", - ci = 0.95, - transform = NULL, - iterations = NULL, - keep_iterations = FALSE, - ...) { +estimate_relation <- function( + model, + data = "grid", + by = NULL, + predict = "expectation", + ci = 0.95, + transform = NULL, + iterations = NULL, + keep_iterations = FALSE, + ... +) { # reset to NULL if only "by" was specified if (missing(data) && !missing(by)) { data <- NULL @@ -364,15 +372,17 @@ estimate_relation <- function(model, # Internal ---------------------------------------------------------------- #' @keywords internal -.estimate_predicted <- function(model, - data = "grid", - by = NULL, - predict = "expectation", - ci = 0.95, - transform = NULL, - iterations = NULL, - keep_iterations = FALSE, - ...) { +.estimate_predicted <- function( + model, + data = "grid", + by = NULL, + predict = "expectation", + ci = 0.95, + transform = NULL, + iterations = NULL, + keep_iterations = FALSE, + ... +) { # return early for htest if (inherits(model, "htest")) { return(insight::get_predicted(model, ...)) @@ -384,7 +394,14 @@ estimate_relation <- function(model, } # keep_iterations cannot be larger than interations - if (!is.null(keep_iterations) && !is.null(iterations) && is.numeric(keep_iterations) && is.numeric(iterations) && keep_iterations > iterations) { # nolint + if ( + !is.null(keep_iterations) && + !is.null(iterations) && + is.numeric(keep_iterations) && + is.numeric(iterations) && + keep_iterations > iterations + ) { + # nolint insight::format_error("`keep_iterations` cannot be larger than `iterations`.") } @@ -450,7 +467,12 @@ estimate_relation <- function(model, data <- model_data } else if (!is.data.frame(data)) { if (is_grid) { - data <- insight::get_datagrid(model, reference = model_data, include_response = is_nullmodel, ...) + data <- insight::get_datagrid( + model, + reference = model_data, + include_response = is_nullmodel, + ... + ) } else { insight::format_error( "The `data` argument must either NULL, \"grid\" or another data frame." @@ -462,7 +484,12 @@ estimate_relation <- function(model, grid_specs <- attributes(data) # Get response for later residuals ------------- - if (!is.null(model_response) && length(model_response) == 1 && model_response %in% names(data)) { # nolint + if ( + !is.null(model_response) && + length(model_response) == 1 && + model_response %in% names(data) + ) { + # nolint response <- data[[model_response]] } else { response <- NULL @@ -492,7 +519,10 @@ estimate_relation <- function(model, ) # for predicting grouplevel random effects, add "allow.new.levels" - if (!is.null(grouplevel_effects) && any(grouplevel_effects %in% grid_specs$at_spec$varname)) { + if ( + !is.null(grouplevel_effects) && + any(grouplevel_effects %in% grid_specs$at_spec$varname) + ) { prediction_args$allow.new.levels <- TRUE dots$allow.new.levels <- NULL } @@ -511,13 +541,22 @@ estimate_relation <- function(model, } # remove response variable from data frame, as this variable is predicted - if (!is.null(model_response) && length(model_response) == 1 && model_response %in% colnames(out)) { # nolint + if ( + !is.null(model_response) && + length(model_response) == 1 && + model_response %in% colnames(out) + ) { + # nolint out[[model_response]] <- NULL } # keep row-column, but make sure it's integer if ("Row" %in% colnames(out)) { - out[["Row"]] <- insight::format_value(out[["Row"]], protect_integers = TRUE) + if (inherits(model, "nestedLogit")) { + out[["Row"]] <- NULL + } else { + out[["Row"]] <- insight::format_value(out[["Row"]], protect_integers = TRUE) + } } # Add residuals @@ -557,17 +596,21 @@ estimate_relation <- function(model, by = grid_specs$at, type = "predictions", model = model, - info = c( - grid_specs, - list(predict = predict), - transform = !is.null(transform) - ) + info = c(grid_specs, list(predict = predict), transform = !is.null(transform)) ) - attributes(out) <- c(attributes(out), grid_specs[!names(grid_specs) %in% names(attributes(out))]) + attributes(out) <- c( + attributes(out), + grid_specs[!names(grid_specs) %in% names(attributes(out))] + ) # Class - class(out) <- c(paste0("estimate_", predict), "estimate_predicted", "see_estimate_predicted", class(out)) + class(out) <- c( + paste0("estimate_", predict), + "estimate_predicted", + "see_estimate_predicted", + class(out) + ) out } From 601e25bb08ed679be95cab6995ccf4e9ac512d02 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 2 Mar 2026 14:23:13 +0100 Subject: [PATCH 2/2] add test --- DESCRIPTION | 4 + R/estimate_predicted.R | 3 + man/estimate_expectation.Rd | 5 +- tests/testthat/test-estimate_predicted.R | 150 +++++++++++++++++------ 4 files changed, 124 insertions(+), 38 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f58b40961..c9d8c7f2c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,6 +55,9 @@ Suggests: bootES, brglm2, brms, + broom, + car, + carData, coda, collapse, correlation, @@ -82,6 +85,7 @@ Suggests: mgcv, mvtnorm, nanoparquet, + nestedLogit, nnet, ordinal, palmerpenguins, diff --git a/R/estimate_predicted.R b/R/estimate_predicted.R index 64c93f4bf..2e677e599 100644 --- a/R/estimate_predicted.R +++ b/R/estimate_predicted.R @@ -200,6 +200,9 @@ #' [insight::get_datagrid()] (used when `data = "grid"`) and #' [insight::get_predicted()]. Furthermore, for count regression models that use #' an offset term, use `offset = ` to fix the offset at a specific value. +#' For models of class `nestedLogit`, use the `submodel` argument to specify +#' the component for which predictions should be returned (see +#' `?insight::get_predicted` for details). #' #' @return A data frame of predicted values and uncertainty intervals, with #' class `"estimate_predicted"`. Methods for [`visualisation_recipe()`][visualisation_recipe.estimate_predicted] diff --git a/man/estimate_expectation.Rd b/man/estimate_expectation.Rd index 1e71c23d8..69fc1514b 100644 --- a/man/estimate_expectation.Rd +++ b/man/estimate_expectation.Rd @@ -109,7 +109,10 @@ to the output. You can reshape them to a long format by running \item{...}{You can add all the additional control arguments from \code{\link[insight:get_datagrid]{insight::get_datagrid()}} (used when \code{data = "grid"}) and \code{\link[insight:get_predicted]{insight::get_predicted()}}. Furthermore, for count regression models that use -an offset term, use \verb{offset = } to fix the offset at a specific value.} +an offset term, use \verb{offset = } to fix the offset at a specific value. +For models of class \code{nestedLogit}, use the \code{submodel} argument to specify +the component for which predictions should be returned (see +\code{?insight::get_predicted} for details).} } \value{ A data frame of predicted values and uncertainty intervals, with diff --git a/tests/testthat/test-estimate_predicted.R b/tests/testthat/test-estimate_predicted.R index d01d802ef..d67950ca2 100644 --- a/tests/testthat/test-estimate_predicted.R +++ b/tests/testthat/test-estimate_predicted.R @@ -33,32 +33,54 @@ test_that("estimate_link", { # LMER4 model <- lme4::lmer(Petal.Length ~ Petal.Width + (1 | Species), data = iris) expect_equal(nrow(estimate_link(model, length = 5, verbose = FALSE)), 5) - expect_equal(nrow(estimate_link(model, include_random = TRUE, preserve_range = FALSE, length = 5)), 15) + expect_equal( + nrow(estimate_link(model, include_random = TRUE, preserve_range = FALSE, length = 5)), + 15 + ) # GLMMTMB - model <- suppressWarnings(glmmTMB::glmmTMB(Petal.Length ~ Petal.Width + (1 | Species), data = iris)) + model <- suppressWarnings(glmmTMB::glmmTMB( + Petal.Length ~ Petal.Width + (1 | Species), + data = iris + )) expect_equal(nrow(estimate_link(model, length = 5, verbose = FALSE)), 5) - expect_equal(nrow(estimate_link(model, include_random = TRUE, preserve_range = FALSE, length = 5)), 15) + expect_equal( + nrow(estimate_link(model, include_random = TRUE, preserve_range = FALSE, length = 5)), + 15 + ) # MGCV model <- mgcv::gam(Petal.Length ~ Petal.Width + s(Sepal.Length), data = iris) expect_equal(dim(estimate_link(model, length = 3)), c(9, 6)) expect_equal(dim(estimate_link(model, include_smooth = FALSE, length = 3)), c(3, 5)) - model <- mgcv::gamm(Petal.Length ~ Petal.Width + s(Sepal.Length), random = list(Species = ~1), data = iris) + model <- mgcv::gamm( + Petal.Length ~ Petal.Width + s(Sepal.Length), + random = list(Species = ~1), + data = iris + ) # GAMM4 - model <- gamm4::gamm4(Petal.Length ~ Petal.Width + s(Sepal.Length), - random = ~ (1 | Species), data = iris + model <- gamm4::gamm4( + Petal.Length ~ Petal.Width + s(Sepal.Length), + random = ~ (1 | Species), + data = iris ) expect_equal(nrow(estimate_link(model, length = 3, verbose = FALSE)), 9) - expect_equal(dim(estimate_link(model, include_smooth = FALSE, length = 3, verbose = FALSE)), c(3, 5)) + expect_equal( + dim(estimate_link(model, include_smooth = FALSE, length = 3, verbose = FALSE)), + c(3, 5) + ) # STAN_GAMM4 skip_if_not(.Platform$OS.type == "windows") - model <- suppressWarnings(rstanarm::stan_gamm4(Petal.Length ~ Petal.Width + s(Sepal.Length), - random = ~ (1 | Species), data = iris, - iter = 100, chains = 2, refresh = 0 + model <- suppressWarnings(rstanarm::stan_gamm4( + Petal.Length ~ Petal.Width + s(Sepal.Length), + random = ~ (1 | Species), + data = iris, + iter = 100, + chains = 2, + refresh = 0 )) expect_equal(nrow(estimate_relation(model, length = 3)), 9) expect_equal(dim(estimate_link(model, include_smooth = FALSE, length = 3)), c(3, 5)) @@ -87,11 +109,7 @@ test_that("estimate_expectation - Bayesian", { iter = 200, chains = 2 )) - estim <- estimate_prediction(model, - data = "grid", - seed = 333, - preserve_range = FALSE - ) + estim <- estimate_prediction(model, data = "grid", seed = 333, preserve_range = FALSE) expect_equal(dim(estim), c(30, 6)) model <- suppressWarnings(rstanarm::stan_glm( @@ -104,21 +122,24 @@ test_that("estimate_expectation - Bayesian", { estim <- estimate_prediction(model) expect_equal(dim(estim), c(32, 7)) - model <- suppressWarnings( - rstanarm::stan_glm( - Sepal.Width ~ Petal.Width, - data = iris, - refresh = 0, - iter = 200, - chains = 2 - ) - ) + model <- suppressWarnings(rstanarm::stan_glm( + Sepal.Width ~ Petal.Width, + data = iris, + refresh = 0, + iter = 200, + chains = 2 + )) estim <- estimate_link(model, keep_iterations = TRUE) draws <- bayestestR::reshape_iterations(estim) expect_equal(c(nrow(draws), ncol(draws)), c(2000, 8)) # Non-sampling algorithms - model <- rstanarm::stan_glm(mpg ~ disp, data = mtcars, algorithm = "meanfield", refresh = 0) + model <- rstanarm::stan_glm( + mpg ~ disp, + data = mtcars, + algorithm = "meanfield", + refresh = 0 + ) estim <- estimate_link(model, keep_iterations = TRUE) expect_equal(dim(estim), c(10, 1005)) }) @@ -142,7 +163,6 @@ test_that("estimate_expectation - Frequentist", { estim <- estimate_link(model, by = "wt") expect_equal(dim(estim), c(10, 6)) - data <- mtcars data$gear <- as.factor(data$gear) @@ -171,7 +191,10 @@ test_that("estimate_expectation - VisMatrix", { vm <- insight::get_datagrid(m, by = c("Petal.Length", "Petal.Width = seq(-3, 3)")) estim <- estimate_relation(vm) expect_identical(dim(estim), c(70L, 6L)) - expect_named(estim, c("Petal.Length", "Petal.Width", "Predicted", "SE", "CI_low", "CI_high")) + expect_named( + estim, + c("Petal.Length", "Petal.Width", "Predicted", "SE", "CI_low", "CI_high") + ) }) @@ -194,10 +217,29 @@ test_that("estimate_expectation - predicting RE works", { expect_equal( out$Predicted, c( - 0.23843, 0.23843, 0.18498, 0.35626, 0.24159, 0.29875, 0.14771, - 0.20692, 0.13234, 0.17874, 0.1171, 0.16318, 0.21392, 0.78665, - 0.0828, 0.1716, 0.11488, 0.11488, 0.35552, 0.45806, 0.19238, - 0.09819, 0.25902 + 0.23843, + 0.23843, + 0.18498, + 0.35626, + 0.24159, + 0.29875, + 0.14771, + 0.20692, + 0.13234, + 0.17874, + 0.1171, + 0.16318, + 0.21392, + 0.78665, + 0.0828, + 0.1716, + 0.11488, + 0.11488, + 0.35552, + 0.45806, + 0.19238, + 0.09819, + 0.25902 ), tolerance = 1e-4 ) @@ -205,10 +247,7 @@ test_that("estimate_expectation - predicting RE works", { out <- estimate_relation(m2, by = "e15relat") expect_equal( out$Predicted, - c( - 12.20636, 12.06311, 11.2071, 11.62862, 11.2327, 10.58387, 11.20853, - 11.12288 - ), + c(12.20636, 12.06311, 11.2071, 11.62862, 11.2327, 10.58387, 11.20853, 11.12288), tolerance = 1e-4 ) }) @@ -237,7 +276,44 @@ test_that("estimate_relation - shape", { estimate_prediction(m, by = "cyl", iterations = 5, keep_iterations = 10), regex = "cannot be larger" ) - expect_silent( - estimate_prediction(m, by = "cyl", iterations = 15, keep_iterations = 10) + expect_silent(estimate_prediction(m, by = "cyl", iterations = 15, keep_iterations = 10)) +}) + + +test_that("estimate_relation - nestedLogit", { + skip_if_not_installed("nestedLogit") + skip_if_not_installed("broom") + skip_if_not_installed("car") + skip_if_not_installed("carData") + + data(Womenlf, package = "carData") + + comparisons <- nestedLogit::logits( + work = nestedLogit::dichotomy("not.work", working = c("parttime", "fulltime")), + full = nestedLogit::dichotomy("parttime", "fulltime") + ) + mnl1 <- nestedLogit::nestedLogit( + partic ~ hincome + children, + dichotomies = comparisons, + data = Womenlf + ) + + out <- estimate_relation(mnl1, by = "children", verbose = FALSE) + expect_identical(dim(out), c(6L, 7L)) + expect_named( + out, + c("children", "hincome", "Response", "Predictions", "SE", "CI_low", "CI_high") + ) + + out <- estimate_relation( + mnl1, + by = "children", + submodel = "dichotomies", + verbose = FALSE + ) + expect_identical(dim(out), c(4L, 7L)) + expect_named( + out, + c("children", "hincome", "Response", "Predictions", "SE", "CI_low", "CI_high") ) })