diff --git a/DESCRIPTION b/DESCRIPTION index 03dad963e..0ac3fd222 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: modelbased Title: Estimation of Model-Based Predictions, Contrasts and Means -Version: 0.13.0.9 +Version: 0.13.0.10 Authors@R: c(person(given = "Dominique", family = "Makowski", diff --git a/NEWS.md b/NEWS.md index 899225c04..12f759680 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,13 +2,19 @@ ## Changes +* The `type` argument in `estimate_grouplevel()` gains a `"marginal"` option, + to return marginal group-levels estimates. + * Added an `as.data.frame()` method for *modelbased* objects. * Better formatting of the output for equivalence-tests, when the `equivalence` argument was used. Related docs were added. It is also possible to use `parameters::equivalence_test()` on *modelbased* objects. -* Functions calls are now saved as `call` attribute in *modelbased* objects. +* Function calls are now saved as `call` attribute in *modelbased* objects. + +* More informative warnings and error messages were added to `estimate_contrasts()` + when computing effect sizes. # modelbased 0.13.0 diff --git a/R/estimate_grouplevel.R b/R/estimate_grouplevel.R index 6b505c650..553708720 100644 --- a/R/estimate_grouplevel.R +++ b/R/estimate_grouplevel.R @@ -6,19 +6,32 @@ #' which can be useful to add the random effects to the original data. #' #' @param model A mixed model with random effects. -#' @param type `"random"` or `"total"`. If `"random"` (default), the -#' coefficients correspond to the conditional estimates of the random effects -#' (as they are returned by `lme4::ranef()`). They typically correspond to the -#' deviation of each individual group from their fixed effect (assuming the -#' random effect is also included as a fixed effect). As such, a coefficient -#' close to 0 means that the participants' effect is the same as the -#' population-level effect (in other words, it is "in the norm"). If -#' `"total"`, it will return the sum of the random effect and its +#' @param type String, describing the type of estimates that should be returned. +#' Can be `"random"`, `"total"`, or `"marginal"` (experimental). +#' - If `"random"` (default), the coefficients correspond to the conditional +#' estimates of the random effects (as they are returned by `lme4::ranef()`). +#' They typically correspond to the deviation of each individual group from +#' their fixed effect (assuming the random effect is also included as a fixed +#' effect). As such, a coefficient close to 0 means that the participants' +#' effect is the same as the population-level effect (in other words, it is +#' "in the norm"). +#' - If `"total"`, it will return the sum of the random effect and its #' corresponding fixed effects, which internally relies on the `coef()` method #' (see `?coef.merMod`). Note that `type = "total"` yet does not return #' uncertainty indices (such as SE and CI) for models from *lme4* or #' *glmmTMB*, as the necessary information to compute them is not yet #' available. However, for Bayesian models, it is possible to compute them. +#' - If `"marginal"` (experimental), it returns marginal group-levels +#' estimates. The random intercepts are computed using marginal means (see +#' [estimate_means()]), and the random slopes using marginal effects (see +#' [estimate_slopes()]). This method does not directly extract the parameters +#' estimated by the model, but recomputes them using model predictions. While +#' this is more computationally intensive, one of the benefits include +#' interpretability: the random intercepts correspond to the "mean" value of +#' the outcome for each group, and the random slopes correspond to the direct +#' average "effect" of the predictor for each random group. Note that in this +#' case, the group-level estimates are not technically "intercepts" or model +#' parameters, but marginal average levels and effects. #' @param dispersion,test,diagnostic Arguments passed to #' [parameters::model_parameters()] for Bayesian models. By default, it won't #' return significance or diagnostic indices (as it is not typically very @@ -59,11 +72,9 @@ estimate_grouplevel <- function(model, ...) { #' @rdname estimate_grouplevel #' @export -estimate_grouplevel.default <- function(model, - type = "random", - ...) { +estimate_grouplevel.default <- function(model, type = "random", ...) { # validate argument - type <- insight::validate_argument(type, c("random", "total")) + type <- insight::validate_argument(type, c("random", "total", "marginal")) # sanity check if (is.null(insight::find_random(model))) { @@ -77,23 +88,28 @@ estimate_grouplevel.default <- function(model, ... ) - # get cleaned parameter names with additional information - clean_parameters <- attributes(params)$clean_parameters - - # Re-add info - if (!"Group" %in% names(params) && !is.null(clean_parameters)) { - params$Group <- clean_parameters$Group - } - if (!"Level" %in% names(params) && !is.null(clean_parameters)) { - params$Level <- clean_parameters$Cleaned_Parameter + if (type %in% c("random", "total")) { + # get cleaned parameter names with additional information + clean_parameters <- attributes(params)$clean_parameters + + # Re-add info + if (!"Group" %in% names(params) && !is.null(clean_parameters)) { + params$Group <- clean_parameters$Group + } + if (!"Level" %in% names(params) && !is.null(clean_parameters)) { + params$Level <- clean_parameters$Cleaned_Parameter + } + + # TODO: improve / add new printing that groups by group/level? + random <- as.data.frame(params) + + # Remove columns with only NaNs (as these are probably those of fixed effects) + random[vapply(random, function(x) all(is.na(x)), TRUE)] <- NULL + } else if (type == "marginal") { + # EXPERIMENTAL + random <- .grouplevel_marginal(model) } - # TODO: improve / add new printing that groups by group/level? - random <- as.data.frame(params) - - # Remove columns with only NaNs (as these are probably those of fixed effects) - random[vapply(random, function(x) all(is.na(x)), TRUE)] <- NULL - # Clean and Reorganize columns random <- .clean_grouplevel(random) @@ -107,12 +123,14 @@ estimate_grouplevel.default <- function(model, #' @rdname estimate_grouplevel #' @export -estimate_grouplevel.brmsfit <- function(model, - type = "random", - dispersion = TRUE, - test = NULL, - diagnostic = NULL, - ...) { +estimate_grouplevel.brmsfit <- function( + model, + type = "random", + dispersion = TRUE, + test = NULL, + diagnostic = NULL, + ... +) { # validate argument type <- insight::validate_argument(type, c("random", "total")) @@ -136,7 +154,8 @@ estimate_grouplevel.brmsfit <- function(model, # match parameters if (!is.null(clean_parameters)) { - clean_parameters <- clean_parameters[clean_parameters$Parameter %in% params$Parameter, , drop = FALSE] + # fmt: skip + clean_parameters <- clean_parameters[clean_parameters$Parameter %in% params$Parameter, , drop = FALSE] # nolint } # Re-add info @@ -163,19 +182,17 @@ estimate_grouplevel.brmsfit <- function(model, # Filter out non-random effects random <- random[startsWith(random$Parameter, "r_"), ] # Remove Group from Level - random$Level <- sapply( - 1:nrow(random), - function(i) gsub(paste0("^", random$Group[i], "\\."), "", random$Level[i]) - ) + random$Level <- sapply(seq_len(nrow(random)), function(i) { + gsub(paste0("^", random$Group[i], "\\."), "", random$Level[i]) + }) # Find the group name (what follows "r_" and before the first "[" or "__") random$Group <- gsub("^r_(.*?)(\\[.*|__.*)", "\\1", random$Name) # Keep Parameter what's in between [ and ] random$Parameter <- gsub("^r_.*?\\[(.*?)\\].*", "\\1", random$Name) # Remove Level from it - random$Parameter <- sapply( - 1:nrow(random), - function(i) gsub(paste0("^", random$Level[i], "\\,"), "", random$Parameter[i]) - ) + random$Parameter <- sapply(seq_len(nrow(random)), function(i) { + gsub(paste0("^", random$Level[i], "\\,"), "", random$Parameter[i]) + }) # remove temporary name column random$Name <- NULL } @@ -189,12 +206,14 @@ estimate_grouplevel.brmsfit <- function(model, #' @export -estimate_grouplevel.stanreg <- function(model, - type = "random", - dispersion = TRUE, - test = NULL, - diagnostic = NULL, - ...) { +estimate_grouplevel.stanreg <- function( + model, + type = "random", + dispersion = TRUE, + test = NULL, + diagnostic = NULL, + ... +) { # validate argument type <- insight::validate_argument(type, c("random", "total")) @@ -222,14 +241,10 @@ estimate_grouplevel.stanreg <- function(model, ## insight::clean_parameter()) if (!is.null(clean_parameters)) { # fix for rstanarm, which contains a sigma columns - clean_parameters <- clean_parameters[ - clean_parameters$Component != "sigma" & !startsWith(clean_parameters$Parameter, "Sigma["), , - drop = FALSE # nolint - ] - clean_parameters <- clean_parameters[ - clean_parameters$Parameter %in% params$Parameter, , - drop = FALSE - ] + # fmt: skip + clean_parameters <- clean_parameters[clean_parameters$Component != "sigma" & !startsWith(clean_parameters$Parameter, "Sigma["), , drop = FALSE] # nolint + # fmt: skip + clean_parameters <- clean_parameters[clean_parameters$Parameter %in% params$Parameter, , drop = FALSE] # nolint params$Parameter <- insight::trim_ws(sub(":.*", "", clean_parameters$Group)) params$Group <- insight::trim_ws(sub("^[^:]*:", "", clean_parameters$Group)) @@ -302,7 +317,84 @@ estimate_grouplevel.stanreg <- function(model, ), ] } else { - random <- random[order(random$Group, datawizard::coerce_to_numeric(random$Level), random$Parameter), ] # nolint + random <- random[ + order(random$Group, datawizard::coerce_to_numeric(random$Level), random$Parameter), + ] } random } + + +# Experimental ------------------------------------------------------------ + +# Quick tests: +# model <- lme4::lmer(mpg ~ hp + (1 | carb), data = mtcars) +# model <- lme4::lmer(mpg ~ hp + (1 + hp | carb), data = mtcars) +# model <- lme4::lmer(mpg ~ hp + (1 + hp | carb) + (1 | gear), data = mtcars) + +# m1 <- estimate_grouplevel(model, type = "random") +# m2 <- estimate_grouplevel(model, type = "total") +# m3 <- estimate_grouplevel(model, type = "marginal") +# cor.test(m1$Coefficient, m3$Coefficient) # r = 1 +# cor.test(m2$Coefficient, m3$Coefficient) # r = 1 +.grouplevel_marginal <- function(model) { + insight::check_if_installed("lme4") + + out <- list() + + # TODO: currently only takes random effects for main component into account + # we could also look for random effects in zero-inflated, dispersion, etc. + + # Analyze random effect structure + randomgroups <- insight::find_random(model, split_nested = TRUE)$random + # extract random slopes and their related grouping variable + randomslopes <- list() + p <- lme4::ranef(model) + for (g in names(p)) { + s <- names(p[[g]])[names(p[[g]]) != "(Intercept)"] + + # Only pick non-factor random slopes for now + # TODO: correctly deal with the case when the random slope is a factor + pred <- insight::find_predictors(model, effects = "all", flatten = TRUE) + s <- s[s %in% pred] + + if (length(s) > 0) { + randomslopes[[g]] <- s + } + } + + # Extract coefs + for (g in randomgroups) { + intercepts <- estimate_means(model, by = g, include_random = TRUE, estimate = "average") + out[[g]] <- data.frame( + Group = g, + Level = intercepts[[g]], + Parameter = "(Intercept)", + Coefficient = intercepts$Mean, + CI_low = intercepts$CI_low, + CI_high = intercepts$CI_high, + stringsAsFactors = FALSE + ) + + if (g %in% names(randomslopes)) { + for (s in randomslopes[[g]]) { + slopes <- estimate_slopes(model, by = g, trend = s, include_random = TRUE) + out[[paste(g, s, sep = "_")]] <- data.frame( + Group = g, + Level = slopes[[g]], + Parameter = s, + Coefficient = slopes$Slope, + CI_low = slopes$CI_low, + CI_high = slopes$CI_high, + stringsAsFactors = FALSE + ) + } + } + } + params <- do.call(rbind, out) + row.names(params) <- NULL + + # TODO: robustify this function and integrate into the above + + params +} diff --git a/inst/WORDLIST b/inst/WORDLIST index c1057ba81..cc594ce5a 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -114,6 +114,7 @@ https individuals’ intersectional intersectionality +interpretability intra jmr joss diff --git a/man/estimate_grouplevel.Rd b/man/estimate_grouplevel.Rd index aebb2b060..0985c984d 100644 --- a/man/estimate_grouplevel.Rd +++ b/man/estimate_grouplevel.Rd @@ -30,19 +30,34 @@ reshape_grouplevel(x, ...) \item{...}{Other arguments passed to \code{\link[parameters:model_parameters]{parameters::model_parameters()}}.} -\item{type}{\code{"random"} or \code{"total"}. If \code{"random"} (default), the -coefficients correspond to the conditional estimates of the random effects -(as they are returned by \code{lme4::ranef()}). They typically correspond to the -deviation of each individual group from their fixed effect (assuming the -random effect is also included as a fixed effect). As such, a coefficient -close to 0 means that the participants' effect is the same as the -population-level effect (in other words, it is "in the norm"). If -\code{"total"}, it will return the sum of the random effect and its +\item{type}{String, describing the type of estimates that should be returned. +Can be \code{"random"}, \code{"total"}, or \code{"marginal"} (experimental). +\itemize{ +\item If \code{"random"} (default), the coefficients correspond to the conditional +estimates of the random effects (as they are returned by \code{lme4::ranef()}). +They typically correspond to the deviation of each individual group from +their fixed effect (assuming the random effect is also included as a fixed +effect). As such, a coefficient close to 0 means that the participants' +effect is the same as the population-level effect (in other words, it is +"in the norm"). +\item If \code{"total"}, it will return the sum of the random effect and its corresponding fixed effects, which internally relies on the \code{coef()} method (see \code{?coef.merMod}). Note that \code{type = "total"} yet does not return uncertainty indices (such as SE and CI) for models from \emph{lme4} or \emph{glmmTMB}, as the necessary information to compute them is not yet -available. However, for Bayesian models, it is possible to compute them.} +available. However, for Bayesian models, it is possible to compute them. +\item If \code{"marginal"} (experimental), it returns marginal group-levels +estimates. The random intercepts are computed using marginal means (see +\code{\link[=estimate_means]{estimate_means()}}), and the random slopes using marginal effects (see +\code{\link[=estimate_slopes]{estimate_slopes()}}). This method does not directly extract the parameters +estimated by the model, but recomputes them using model predictions. While +this is more computationally intensive, one of the benefits include +interpretability: the random intercepts correspond to the "mean" value of +the outcome for each group, and the random slopes correspond to the direct +average "effect" of the predictor for each random group. Note that in this +case, the group-level estimates are not technically "intercepts" or model +parameters, but marginal average levels and effects. +}} \item{dispersion, test, diagnostic}{Arguments passed to \code{\link[parameters:model_parameters]{parameters::model_parameters()}} for Bayesian models. By default, it won't diff --git a/tests/testthat/test-estimate_grouplevel.R b/tests/testthat/test-estimate_grouplevel.R index baecb7476..dc1ab5f84 100644 --- a/tests/testthat/test-estimate_grouplevel.R +++ b/tests/testthat/test-estimate_grouplevel.R @@ -175,3 +175,148 @@ withr::with_environment( expect_identical(dim(out), c(203L, 5L)) }) ) + +skip_if_not_installed("marginaleffects") + +test_that("estimate_grouplevel type='marginal'", { + skip_on_cran() + skip_if_not_installed("lme4") + data(mtcars) + + model <- lme4::lmer(mpg ~ hp + (1 | carb), data = mtcars) + gl1 <- estimate_grouplevel(model, type = "random") + gl3 <- estimate_grouplevel(model, type = "marginal") + expect_s3_class(gl3, "estimate_grouplevel") + expect_equal(dim(gl3), c(6, 6)) + expect_equal( + colnames(gl3), + c("Group", "Level", "Parameter", "Coefficient", "CI_low", "CI_high") + ) + expect_equal( + as.numeric(cor.test(gl1$Coefficient, gl3$Coefficient)$estimate), + -0.04057546, + tolerance = 1e-4, + ignore_attr = TRUE + ) + + # include random slope + model <- lme4::lmer(mpg ~ hp + (1 + hp | carb), data = mtcars) + gl1 <- estimate_grouplevel(model, type = "random") + gl3 <- estimate_grouplevel(model, type = "marginal") + expect_s3_class(gl3, "estimate_grouplevel") + expect_identical(dim(gl3), c(12L, 6L)) + expect_equal( + colnames(gl3), + c("Group", "Level", "Parameter", "Coefficient", "CI_low", "CI_high") + ) + r <- cor.test( + gl1[gl1$Parameter == "(Intercept)", "Coefficient"], + gl3[gl3$Parameter == "(Intercept)", "Coefficient"] + ) + expect_gt(r$estimate, 0.95) + r <- cor.test( + gl1[gl1$Parameter == "hp", "Coefficient"], + gl3[gl3$Parameter == "hp", "Coefficient"] + ) + expect_gt(r$estimate, 0.99) + + # check with cross classified design + model <- lme4::lmer(mpg ~ hp + (1 + hp | carb) + (1 | gear), data = mtcars) + gl1 <- estimate_grouplevel(model, type = "random") + gl3 <- estimate_grouplevel(model, type = "marginal") + expect_s3_class(gl3, "estimate_grouplevel") + expect_identical(dim(gl3), c(15L, 6L)) + expect_equal( + colnames(gl3), + c("Group", "Level", "Parameter", "Coefficient", "CI_low", "CI_high") + ) + r <- cor.test( + gl1[gl1$Parameter == "(Intercept)" & gl1$Group == "carb", "Coefficient"], + gl3[gl3$Parameter == "(Intercept)" & gl3$Group == "carb", "Coefficient"] + ) + expect_equal(r$estimate, 0.6626235, tolerance = 1e-4, ignore_attr = TRUE) + r <- cor.test( + gl1[gl1$Parameter == "(Intercept)" & gl1$Group == "gear", "Coefficient"], + gl3[gl3$Parameter == "(Intercept)" & gl3$Group == "gear", "Coefficient"] + ) + r <- cor.test( + gl1[gl1$Parameter == "hp", "Coefficient"], + gl3[gl3$Parameter == "hp", "Coefficient"] + ) + expect_gt(r$estimate, 0.99) + + # check with suppressed intercept + model <- lme4::lmer(mpg ~ hp + (0 + hp | carb), data = mtcars) + gl1 <- estimate_grouplevel(model, type = "random") + gl3 <- estimate_grouplevel(model, type = "marginal") + expect_identical(dim(gl3), c(12L, 6L)) + r <- cor.test( + gl1[gl1$Parameter == "hp", "Coefficient"], + gl3[gl3$Parameter == "hp", "Coefficient"] + ) + expect_gt(r$estimate, 0.99) +}) + +test_that("estimate_grouplevel type='marginal' correlations", { + skip_on_cran() + skip_if_not_installed("lme4") + data(mtcars) + + model <- lme4::lmer(mpg ~ hp + (1 + hp | carb) + (1 | gear), data = mtcars) + m1 <- estimate_grouplevel(model, type = "random") + m2 <- estimate_grouplevel(model, type = "total") + m3 <- estimate_grouplevel(model, type = "marginal") + + # merge m1 and m3 to compare + m1_intercept <- m1[m1$Parameter == "(Intercept)", ] + m3_intercept <- m3[m3$Parameter == "(Intercept)", ] + merged_intercepts <- merge(m1_intercept, m3_intercept, by = c("Group", "Level")) + expect_equal( + cor(merged_intercepts$Coefficient.x, merged_intercepts$Coefficient.y), + 0.6493804, + tolerance = 1e-4, + ignore_attr = TRUE + ) + + m1_hp <- m1[m1$Parameter == "hp", ] + m3_hp <- m3[m3$Parameter == "hp", ] + merged_hp <- merge(m1_hp, m3_hp, by = c("Group", "Level")) + expect_gt(cor(merged_hp$Coefficient.x, merged_hp$Coefficient.y), 0.99) + + # merge m2 and m3 to compare + m2_intercept <- m2[m2$Parameter == "(Intercept)", ] + m3_intercept <- m3[m3$Parameter == "(Intercept)", ] + m2_intercept$Level <- as.character(m2_intercept$Level) + m3_intercept$Level <- as.character(m3_intercept$Level) + merged_intercepts <- merge(m2_intercept, m3_intercept, by = c("Group", "Level")) + expect_equal( + cor(merged_intercepts$Coefficient.x, merged_intercepts$Coefficient.y), + 0.6493804, + tolerance = 1e-4, + ignore_attr = TRUE + ) + + m2_hp <- m2[m2$Parameter == "hp", ] + m3_hp <- m3[m3$Parameter == "hp", ] + m2_hp$Level <- as.character(m2_hp$Level) + m3_hp$Level <- as.character(m3_hp$Level) + merged_hp <- merge(m2_hp, m3_hp, by = c("Group", "Level")) + expect_gt(cor(merged_hp$Coefficient.x, merged_hp$Coefficient.y), 0.99) +}) + +test_that("estimate_grouplevel type='marginal' nested design", { + skip_on_cran() + skip_if_not_installed("lme4") + data(sleepstudy, package = "lme4") + set.seed(12345) + sleepstudy$grp <- sample(1:5, size = 180, replace = TRUE) + sleepstudy$subgrp <- NA + for (i in 1:5) { + filter_group <- sleepstudy$grp == i + sleepstudy$subgrp[filter_group] <- sample(1:30, size = sum(filter_group), replace = TRUE) + } + model <- lme4::lmer(Reaction ~ Days + (1 | grp / subgrp) + (1 | Subject), data = sleepstudy) + out <- estimate_grouplevel(model, type = "marginal") + expect_identical(dim(out), c(53L, 6L)) + expect_true(all(unique(out$Group) %in% c("grp", "subgrp", "Subject"))) +})