|
| 1 | +#' Model comparison (deprecated, old version) |
| 2 | +#' |
| 3 | +#' **This function is deprecated**. Please use the new [loo_compare()] function |
| 4 | +#' instead. |
| 5 | +#' |
| 6 | +#' @export |
| 7 | +#' @param ... At least two objects returned by [loo()] (or [waic()]). |
| 8 | +#' @param x A list of at least two objects returned by [loo()] (or |
| 9 | +#' [waic()]). This argument can be used as an alternative to |
| 10 | +#' specifying the objects in `...`. |
| 11 | +#' |
| 12 | +#' @return A vector or matrix with class `'compare.loo'` that has its own |
| 13 | +#' print method. If exactly two objects are provided in `...` or |
| 14 | +#' `x`, then the difference in expected predictive accuracy and the |
| 15 | +#' standard error of the difference are returned. If more than two objects are |
| 16 | +#' provided then a matrix of summary information is returned (see **Details**). |
| 17 | +#' |
| 18 | +#' @details |
| 19 | +#' When comparing two fitted models, we can estimate the difference in their |
| 20 | +#' expected predictive accuracy by the difference in `elpd_loo` or |
| 21 | +#' `elpd_waic` (or multiplied by -2, if desired, to be on the |
| 22 | +#' deviance scale). |
| 23 | +#' |
| 24 | +#' *When that difference, `elpd_diff`, is positive then the expected |
| 25 | +#' predictive accuracy for the second model is higher. A negative |
| 26 | +#' `elpd_diff` favors the first model.* |
| 27 | +#' |
| 28 | +#' When using `compare()` with more than two models, the values in the |
| 29 | +#' `elpd_diff` and `se_diff` columns of the returned matrix are |
| 30 | +#' computed by making pairwise comparisons between each model and the model |
| 31 | +#' with the best ELPD (i.e., the model in the first row). |
| 32 | +#' Although the `elpd_diff` column is equal to the difference in |
| 33 | +#' `elpd_loo`, do not expect the `se_diff` column to be equal to the |
| 34 | +#' the difference in `se_elpd_loo`. |
| 35 | +#' |
| 36 | +#' To compute the standard error of the difference in ELPD we use a |
| 37 | +#' paired estimate to take advantage of the fact that the same set of _N_ |
| 38 | +#' data points was used to fit both models. These calculations should be most |
| 39 | +#' useful when _N_ is large, because then non-normality of the |
| 40 | +#' distribution is not such an issue when estimating the uncertainty in these |
| 41 | +#' sums. These standard errors, for all their flaws, should give a better |
| 42 | +#' sense of uncertainty than what is obtained using the current standard |
| 43 | +#' approach of comparing differences of deviances to a Chi-squared |
| 44 | +#' distribution, a practice derived for Gaussian linear models or |
| 45 | +#' asymptotically, and which only applies to nested models in any case. |
| 46 | +#' |
| 47 | +#' @template loo-and-psis-references |
| 48 | +#' |
| 49 | +#' @examples |
| 50 | +#' \dontrun{ |
| 51 | +#' loo1 <- loo(log_lik1) |
| 52 | +#' loo2 <- loo(log_lik2) |
| 53 | +#' print(compare(loo1, loo2), digits = 3) |
| 54 | +#' print(compare(x = list(loo1, loo2))) |
| 55 | +#' |
| 56 | +#' waic1 <- waic(log_lik1) |
| 57 | +#' waic2 <- waic(log_lik2) |
| 58 | +#' compare(waic1, waic2) |
| 59 | +#' } |
| 60 | +#' |
| 61 | +compare <- function(..., x = list()) { |
| 62 | + .Deprecated("loo_compare") |
| 63 | + dots <- list(...) |
| 64 | + if (length(dots)) { |
| 65 | + if (length(x)) { |
| 66 | + stop("If 'x' is specified then '...' should not be specified.", |
| 67 | + call. = FALSE) |
| 68 | + } |
| 69 | + nms <- as.character(match.call(expand.dots = TRUE))[-1L] |
| 70 | + } else { |
| 71 | + if (!is.list(x) || !length(x)) { |
| 72 | + stop("'x' must be a list.", call. = FALSE) |
| 73 | + } |
| 74 | + dots <- x |
| 75 | + nms <- names(dots) |
| 76 | + if (!length(nms)) { |
| 77 | + nms <- paste0("model", seq_along(dots)) |
| 78 | + } |
| 79 | + } |
| 80 | + |
| 81 | + if (!all(sapply(dots, is.loo))) { |
| 82 | + stop("All inputs should have class 'loo'.") |
| 83 | + } |
| 84 | + if (length(dots) <= 1L) { |
| 85 | + stop("'compare' requires at least two models.") |
| 86 | + } else if (length(dots) == 2L) { |
| 87 | + loo1 <- dots[[1]] |
| 88 | + loo2 <- dots[[2]] |
| 89 | + comp <- compare_two_models(loo1, loo2) |
| 90 | + class(comp) <- c(class(comp), "old_compare.loo") |
| 91 | + return(comp) |
| 92 | + } else { |
| 93 | + Ns <- sapply(dots, function(x) nrow(x$pointwise)) |
| 94 | + if (!all(Ns == Ns[1L])) { |
| 95 | + stop("Not all models have the same number of data points.", call. = FALSE) |
| 96 | + } |
| 97 | + |
| 98 | + x <- sapply(dots, function(x) { |
| 99 | + est <- x$estimates |
| 100 | + setNames(c(est), nm = c(rownames(est), paste0("se_", rownames(est))) ) |
| 101 | + }) |
| 102 | + colnames(x) <- nms |
| 103 | + rnms <- rownames(x) |
| 104 | + comp <- x |
| 105 | + ord <- order(x[grep("^elpd", rnms), ], decreasing = TRUE) |
| 106 | + comp <- t(comp)[ord, ] |
| 107 | + patts <- c("elpd", "p_", "^waic$|^looic$", "^se_waic$|^se_looic$") |
| 108 | + col_ord <- unlist(sapply(patts, function(p) grep(p, colnames(comp))), |
| 109 | + use.names = FALSE) |
| 110 | + comp <- comp[, col_ord] |
| 111 | + |
| 112 | + # compute elpd_diff and se_elpd_diff relative to best model |
| 113 | + rnms <- rownames(comp) |
| 114 | + diffs <- mapply(elpd_diffs, dots[ord[1]], dots[ord]) |
| 115 | + elpd_diff <- apply(diffs, 2, sum) |
| 116 | + se_diff <- apply(diffs, 2, se_elpd_diff) |
| 117 | + comp <- cbind(elpd_diff = elpd_diff, se_diff = se_diff, comp) |
| 118 | + rownames(comp) <- rnms |
| 119 | + class(comp) <- c("compare.loo", class(comp), "old_compare.loo") |
| 120 | + comp |
| 121 | + } |
| 122 | +} |
| 123 | + |
| 124 | + |
| 125 | + |
| 126 | +# internal ---------------------------------------------------------------- |
| 127 | +compare_two_models <- function(loo_a, loo_b, return = c("elpd_diff", "se"), check_dims = TRUE) { |
| 128 | + if (check_dims) { |
| 129 | + if (dim(loo_a$pointwise)[1] != dim(loo_b$pointwise)[1]) { |
| 130 | + stop(paste("Models don't have the same number of data points.", |
| 131 | + "\nFound N_1 =", dim(loo_a$pointwise)[1], "and N_2 =", dim(loo_b$pointwise)[1]), call. = FALSE) |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + diffs <- elpd_diffs(loo_a, loo_b) |
| 136 | + comp <- c(elpd_diff = sum(diffs), se = se_elpd_diff(diffs)) |
| 137 | + structure(comp, class = "compare.loo") |
| 138 | +} |
0 commit comments