From d877053eecb820d3b73a3e5449e53f8ce9715fcc Mon Sep 17 00:00:00 2001
From: Aki Vehtari <Aki.Vehtari@aalto.fi>
Date: Sun, 29 Mar 2026 21:30:56 +0300
Subject: [PATCH 1/3] add proj_epred

---
 R/methods.R                      |  99 ++++++++++---
 tests/testthat/helpers/testers.R |  44 ++++++
 tests/testthat/setup.R           |   6 +
 tests/testthat/test_proj_epred.R | 242 +++++++++++++++++++++++++++++++
 4 files changed, 368 insertions(+), 23 deletions(-)
 create mode 100644 tests/testthat/test_proj_epred.R

diff --git a/R/methods.R b/R/methods.R
index c42f99d6d..57a810ddb 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -2,19 +2,39 @@
 
 #' Predictions from a submodel (after projection)
 #'
-#' After the projection of the reference model onto a submodel, the linear
-#' predictors (for the original or a new dataset) based on that submodel can be
-#' calculated by [proj_linpred()]. These linear predictors can also be
-#' transformed to response scale and averaged across the projected parameter
-#' draws. Furthermore, [proj_linpred()] returns the corresponding log predictive
-#' density values if the (original or new) dataset contains response values. The
-#' [proj_predict()] function draws from the predictive distributions (there is
-#' one such distribution for each observation from the original or new dataset)
-#' of the submodel that the reference model has been projected onto. If the
-#' projection has not been performed yet, both functions call [project()]
-#' internally to perform the projection. Both functions can also handle multiple
-#' submodels at once (for `object`s of class `vsel` or `object`s returned by a
-#' [project()] call to an object of class `vsel`; see [project()]).
+#' The [proj_predict()] function draws from the projected posterior
+#' predictive distribution of the submodel that the reference model
+#' has been projected onto. By definition, these draws have higher
+#' variability than draws of the expected value of the posterior
+#' predictive distribution computed by [proj_epred()]. This is because
+#' the aleatoric uncertainty from the data model is incorporated in
+#' [proj_predict()].
+#'
+#' The [proj_epred()] function draws from the distribution of the
+#' expected value of the projected posterior predictive distribution.
+#' By definition, these predictions have smaller variability than the
+#' projected posterior predictions performed by [proj_predict()]. This
+#' is because only the epistemic uncertainty in the expected value of
+#' the projected posterior predictive distribution is incorporated in
+#' the draws, while the aleatoric uncertainty from the data model is
+#' not included. However, the estimated means of both methods averaged
+#' across draws should be very similar.
+#' 
+#' The [proj_linpred()] function draws from the projected posterior of the
+#' linear predictors, that is, draws before applying any link functions
+#' or other transformations. These linear predictors can also be
+#' transformed to response scale with argument `transform = TRUE`, which
+#' produces draws equivalent to draws produced by [proj_epred()].
+#' Furthermore, [proj_linpred()] returns the corresponding log predictive
+#' density values if the (original or new) dataset contains response values.
+#'
+#' All these predictions can be performed for the data used to fit the
+#' reference model or for new data. If the projection has not been
+#' performed yet, all three functions call [project()] internally to
+#' perform the projection. All three functions can also handle
+#' multiple submodels at once (for `object`s of class `vsel` or
+#' `object`s returned by a [project()] call to an object of class
+#' `vsel`; see [project()]).
 #'
 #' @name pred-projection
 #'
@@ -26,13 +46,14 @@
 #'   for only those elements (submodels) with a number of predictor terms in
 #'   `filter_nterms`. Therefore, needs to be a numeric vector or `NULL`. If
 #'   `NULL`, use all submodels.
-#' @param transform For [proj_linpred()] only. A single logical value indicating
+#' @param transform For [proj_linpred()] only (not applicable for [proj_epred()]
+#'   which always uses `transform = TRUE` internally). A single logical value indicating
 #'   whether the linear predictor should be transformed to response scale using
 #'   the inverse-link function (`TRUE`) or not (`FALSE`). In case of the latent
 #'   projection, argument `transform` is similar in spirit to argument
 #'   `resp_oscale` from other functions and affects the scale of both output
 #'   elements `pred` and `lpd` (see sections "Details" and "Value" below).
-#' @param integrated For [proj_linpred()] only. A single logical value
+#' @param integrated For [proj_linpred()] and [proj_epred()] only. A single logical value
 #'   indicating whether the output should be averaged across the projected
 #'   posterior draws (`TRUE`) or not (`FALSE`).
 #' @param nresample_clusters For [proj_predict()] with clustered projection (and
@@ -42,8 +63,8 @@
 #'   gives the number of draws (*with* replacement) from the set of clustered
 #'   posterior draws after projection (with this set being determined by
 #'   argument `nclusters` of [project()]).
-#' @param allow_nonconst_wdraws_prj Only relevant for [proj_linpred()] and only
-#'   if `integrated` is `FALSE`. A single logical value indicating whether to
+#' @param allow_nonconst_wdraws_prj Only relevant for [proj_linpred()] and
+#'   [proj_epred()] and only if `integrated` is `FALSE`. A single logical value indicating whether to
 #'   allow projected draws with different (i.e., nonconstant) weights (`TRUE`)
 #'   or not (`FALSE`). If `return_draws_matrix` is `TRUE`,
 #'   `allow_nonconst_wdraws_prj` is internally set to `TRUE` as well.
@@ -53,16 +74,18 @@
 #'   matrices).
 #' @param return_draws_matrix A single logical value indicating whether to
 #'   return an object (in case of [proj_predict()]) or objects (in case of
-#'   [proj_linpred()]) of class `draws_matrix` (see
-#'   [posterior::draws_matrix()]). In case of [proj_linpred()] and projected
+#'   [proj_linpred()] and [proj_epred()]) of class `draws_matrix` (see
+#'   [posterior::draws_matrix()]). In case of [proj_linpred()] or
+#'   [proj_epred()] and projected
 #'   draws with nonconstant weights (as well as `integrated` being `FALSE`),
 #'   [posterior::weight_draws()] is applied internally.
 #' @param .seed Pseudorandom number generation (PRNG) seed by which the same
 #'   results can be obtained again if needed. Passed to argument `seed` of
 #'   [set.seed()], but can also be `NA` to not call [set.seed()] at all. If not
 #'   `NA`, then the PRNG state is reset (to the state before calling
-#'   [proj_linpred()] or [proj_predict()]) upon exiting [proj_linpred()] or
-#'   [proj_predict()]. Here, `.seed` is used for drawing new group-level effects
+#'   [proj_linpred()], [proj_epred()], or [proj_predict()]) upon exiting
+#'   [proj_linpred()], [proj_epred()], or [proj_predict()]. Here, `.seed` is
+#'   used for drawing new group-level effects
 #'   in case of a multilevel submodel (however, not yet in case of a GAMM) and
 #'   for drawing from the predictive distributions of the submodel(s) in case of
 #'   [proj_predict()]. If a clustered projection was performed, then in
@@ -140,6 +163,12 @@
 #'       `return_draws_matrix`, `allow_nonconst_wdraws_prj`, and `integrated`
 #'       are all `FALSE`, then projected draws with nonconstant weights cause an
 #'       error.)
+#'   * [proj_epred()] is a wrapper around [proj_linpred()] with `transform =
+#'   TRUE` and returns only the draws of the expected value of the projected
+#'   posterior predictive distribution on the response scale (i.e., the `pred`
+#'   element of the `list` returned by [proj_linpred()], without the `lpd`
+#'   element). The structure of the returned object is the same as that of the
+#'   `pred` element described for [proj_linpred()] above.
 #'   * [proj_predict()] returns an \eqn{S_{\mathrm{prj}} \times N}{S_prj x N}
 #'   matrix of predictions where \eqn{S_{\mathrm{prj}}}{S_prj} denotes
 #'   `nresample_clusters` in case of clustered projection (or, more generally,
@@ -179,14 +208,15 @@
 #' # Predictions (at the training points) from the submodel onto which the
 #' # reference model was projected:
 #' prjl <- proj_linpred(prj)
+#' prje <- proj_epred(prj)
 #' prjp <- proj_predict(prj, .seed = 7364)
 #'
 NULL
 
 # Function definitions ----------------------------------------------------
 
-## The 'helper' for proj_linpred and proj_predict, ie. does all the
-## functionality that is common to them. It essentially checks all the arguments
+## The 'helper' for proj_linpred, proj_epred, and proj_predict, ie. does all
+## the functionality that is common to them. It essentially checks all the arguments
 ## and sets them to their respective defaults and then loops over the
 ## projections. For each projection, it evaluates the fun-function, which
 ## calculates the linear predictor if called from proj_linpred and samples from
@@ -506,6 +536,29 @@ proj_predict <- function(object, newdata = NULL, offsetnew = NULL,
   )
 }
 
+#' @rdname pred-projection
+#' @export
+proj_epred <- function(object, newdata = NULL, offsetnew = NULL,
+                       weightsnew = NULL, filter_nterms = NULL,
+                       integrated = FALSE,
+                       allow_nonconst_wdraws_prj = return_draws_matrix,
+                       return_draws_matrix = FALSE, .seed = NA, ...) {
+  out <- proj_linpred(
+    object = object, newdata = newdata,
+    offsetnew = offsetnew, weightsnew = weightsnew,
+    filter_nterms = filter_nterms, transform = TRUE,
+    integrated = integrated,
+    allow_nonconst_wdraws_prj = allow_nonconst_wdraws_prj,
+    return_draws_matrix = return_draws_matrix,
+    .seed = .seed, ...
+  )
+  if (is.list(out) && "pred" %in% names(out)) {
+    return(out$pred)
+  }
+  ## Multiple submodels: each element is a list with `pred` and `lpd`.
+  return(lapply(out, "[[", "pred"))
+}
+
 ## function applied to each projected submodel in case of proj_predict()
 proj_predict_aux <- function(proj, newdata, offsetnew, weightsnew,
                              nresample_clusters = 1000, resp_oscale = TRUE,
diff --git a/tests/testthat/helpers/testers.R b/tests/testthat/helpers/testers.R
index f8ec6b4c7..5c82c55be 100644
--- a/tests/testthat/helpers/testers.R
+++ b/tests/testthat/helpers/testers.R
@@ -1881,6 +1881,50 @@ pl_tester <- function(pl,
   return(invisible(TRUE))
 }
 
+# A helper function for testing the structure of an object returned by
+# proj_epred().
+#
+# @param pe An object resulting from a call to proj_epred().
+# @param len_expected The number of `projection` objects used for `pe`.
+# @param nprjdraws_expected The expected number of projected draws in `pe`.
+# @param wdraws_prj_expected The expected value for the `wdraws_prj` attribute
+#   of `pe`.
+# @param nobsv_expected The expected number of observations in `pe`.
+# @param ncats_nlats_expected A list of length `len_expected`. Each element is
+#   either an empty integer (for the traditional or latent projection without
+#   categories) or a single integer giving the expected number of response
+#   categories (or latent terms).
+# @param info_str A single character string giving information to be printed in
+#   case of failure.
+#
+# @return `TRUE` (invisible).
+pe_tester <- function(pe,
+                      len_expected = 1,
+                      nprjdraws_expected = nclusters_pred_tst,
+                      wdraws_prj_expected = NULL,
+                      nobsv_expected = nobsv,
+                      ncats_nlats_expected = replicate(len_expected,
+                                                       integer(),
+                                                       simplify = FALSE),
+                      info_str) {
+  if (len_expected == 1) {
+    pe <- list(pe)
+  } else {
+    expect_type(pe, "list")
+    expect_length(pe, len_expected)
+  }
+  for (j in seq_along(pe)) {
+    expect_identical(
+      dim(pe[[!!j]]),
+      c(nprjdraws_expected, nobsv_expected, ncats_nlats_expected[[!!j]]),
+      info = info_str
+    )
+    expect_identical(attr(pe[[!!j]], "wdraws_prj"), wdraws_prj_expected,
+                     info = info_str)
+  }
+  return(invisible(TRUE))
+}
+
 # A helper function for testing the structure of an object returned by
 # proj_predict().
 #
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 7e32b2281..56c9ee109 100755
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -1582,6 +1582,8 @@ if (run_cvvs) {
 if (run_prj) {
   pls <- lapply(prjs, proj_linpred, allow_nonconst_wdraws_prj = TRUE,
                 .seed = seed2_tst)
+  pes <- lapply(prjs, proj_epred, allow_nonconst_wdraws_prj = TRUE,
+                .seed = seed2_tst)
   pps <- lapply(prjs, proj_predict, .seed = seed2_tst)
 }
 
@@ -1592,6 +1594,8 @@ if (run_prj) {
 if (run_vs) {
   pls_vs <- lapply(prjs_vs, proj_linpred, allow_nonconst_wdraws_prj = TRUE,
                    .seed = seed2_tst)
+  pes_vs <- lapply(prjs_vs, proj_epred, allow_nonconst_wdraws_prj = TRUE,
+                   .seed = seed2_tst)
   pps_vs <- lapply(prjs_vs, proj_predict, .seed = seed2_tst)
 }
 
@@ -1600,6 +1604,8 @@ if (run_vs) {
 if (run_cvvs) {
   pls_cvvs <- lapply(prjs_cvvs, proj_linpred, allow_nonconst_wdraws_prj = TRUE,
                      .seed = seed2_tst)
+  pes_cvvs <- lapply(prjs_cvvs, proj_epred, allow_nonconst_wdraws_prj = TRUE,
+                     .seed = seed2_tst)
   pps_cvvs <- lapply(prjs_cvvs, proj_predict, .seed = seed2_tst)
 }
 
diff --git a/tests/testthat/test_proj_epred.R b/tests/testthat/test_proj_epred.R
new file mode 100644
index 000000000..eb77be9ab
--- /dev/null
+++ b/tests/testthat/test_proj_epred.R
@@ -0,0 +1,242 @@
+# proj_epred() -------------------------------------------------------------
+
+context("proj_epred()")
+
+## object ------------------------------------------------------------------
+
+test_that("pe: `object` of class `projection` works", {
+  skip_if_not(run_prj)
+  for (tstsetup in names(prjs)) {
+    if (!is.null(refmods[[args_prj[[tstsetup]]$tstsetup_ref]]$family$cats)) {
+      ncats_nlats_expected_crr <- length(
+        refmods[[args_prj[[tstsetup]]$tstsetup_ref]]$family$cats
+      )
+    } else {
+      ncats_nlats_expected_crr <- integer()
+    }
+    ndr_ncl <- ndr_ncl_dtls(args_prj[[tstsetup]])
+    if (!has_const_wdr_prj(prjs[[tstsetup]])) {
+      wdr_crr <- prjs[[tstsetup]][["wdraws_prj"]]
+    } else {
+      wdr_crr <- NULL
+    }
+    pe_tester(pes[[tstsetup]],
+              nprjdraws_expected = ndr_ncl$nprjdraws,
+              wdraws_prj_expected = wdr_crr,
+              ncats_nlats_expected = list(ncats_nlats_expected_crr),
+              info_str = tstsetup)
+  }
+})
+
+test_that(paste(
+  "pe: `object` of (informal) class `proj_list` (based on varsel()) works"
+), {
+  skip_if_not(run_vs)
+  for (tstsetup in names(prjs_vs)) {
+    tstsetup_vs <- args_prj_vs[[tstsetup]]$tstsetup_vsel
+    nterms_crr <- args_prj_vs[[tstsetup]]$nterms
+    if (is.null(nterms_crr)) {
+      nterms_crr <- suggest_size(vss[[tstsetup_vs]], warnings = FALSE)
+    }
+    if (!is.null(
+      refmods[[args_prj_vs[[tstsetup]]$tstsetup_ref]]$family$cats
+    )) {
+      ncats_nlats_expected_crr <- length(
+        refmods[[args_prj_vs[[tstsetup]]$tstsetup_ref]]$family$cats
+      )
+    } else {
+      ncats_nlats_expected_crr <- integer()
+    }
+    ndr_ncl <- ndr_ncl_dtls(args_prj_vs[[tstsetup]])
+    if (!has_const_wdr_prj(prjs_vs[[tstsetup]])) {
+      if (length(nterms_crr) > 1) {
+        wdr_crr <- drop(unique(do.call(rbind, lapply(prjs_vs[[tstsetup]], "[[",
+                                                     "wdraws_prj"))))
+      } else {
+        wdr_crr <- prjs_vs[[tstsetup]][["wdraws_prj"]]
+      }
+    } else {
+      wdr_crr <- NULL
+    }
+    pe_tester(pes_vs[[tstsetup]],
+              len_expected = length(nterms_crr),
+              nprjdraws_expected = ndr_ncl$nprjdraws,
+              wdraws_prj_expected = wdr_crr,
+              ncats_nlats_expected = replicate(length(nterms_crr),
+                                               ncats_nlats_expected_crr,
+                                               simplify = FALSE),
+              info_str = tstsetup)
+  }
+})
+
+test_that(paste(
+  "pe: `object` of (informal) class `proj_list` (based on cv_varsel()) works"
+), {
+  skip_if_not(run_cvvs)
+  for (tstsetup in names(prjs_cvvs)) {
+    tstsetup_cvvs <- args_prj_cvvs[[tstsetup]]$tstsetup_vsel
+    nterms_crr <- args_prj_cvvs[[tstsetup]]$nterms
+    if (is.null(nterms_crr)) {
+      nterms_crr <- suggest_size(cvvss[[tstsetup_cvvs]], warnings = FALSE)
+    }
+    if (!is.null(
+      refmods[[args_prj_cvvs[[tstsetup]]$tstsetup_ref]]$family$cats
+    )) {
+      ncats_nlats_expected_crr <- length(
+        refmods[[args_prj_cvvs[[tstsetup]]$tstsetup_ref]]$family$cats
+      )
+    } else {
+      ncats_nlats_expected_crr <- integer()
+    }
+    ndr_ncl <- ndr_ncl_dtls(args_prj_cvvs[[tstsetup]])
+    if (!has_const_wdr_prj(prjs_cvvs[[tstsetup]])) {
+      if (length(nterms_crr) > 1) {
+        wdr_crr <- drop(unique(do.call(rbind, lapply(prjs_cvvs[[tstsetup]],
+                                                     "[[", "wdraws_prj"))))
+      } else {
+        wdr_crr <- prjs_cvvs[[tstsetup]][["wdraws_prj"]]
+      }
+    } else {
+      wdr_crr <- NULL
+    }
+    pe_tester(pes_cvvs[[tstsetup]],
+              len_expected = length(nterms_crr),
+              nprjdraws_expected = ndr_ncl$nprjdraws,
+              wdraws_prj_expected = wdr_crr,
+              ncats_nlats_expected = replicate(length(nterms_crr),
+                                               ncats_nlats_expected_crr,
+                                               simplify = FALSE),
+              info_str = tstsetup)
+  }
+})
+
+## Equivalence with proj_linpred(transform = TRUE) -------------------------
+
+test_that("pe: equivalent to proj_linpred(transform = TRUE)$pred", {
+  skip_if_not(run_prj)
+  for (tstsetup in names(prjs)) {
+    ndr_ncl <- ndr_ncl_dtls(args_prj[[tstsetup]])
+    pl_true <- proj_linpred(prjs[[tstsetup]], transform = TRUE,
+                            allow_nonconst_wdraws_prj = ndr_ncl$clust_used_gt1,
+                            .seed = seed2_tst)
+    pe_crr <- proj_epred(prjs[[tstsetup]],
+                         allow_nonconst_wdraws_prj = ndr_ncl$clust_used_gt1,
+                         .seed = seed2_tst)
+    expect_equal(pe_crr, pl_true$pred, info = tstsetup)
+  }
+})
+
+## newdata -----------------------------------------------------------------
+
+test_that("pe: `newdata` works", {
+  skip_if_not(run_prj)
+  for (tstsetup in names(prjs)) {
+    ndr_ncl <- ndr_ncl_dtls(args_prj[[tstsetup]])
+    if (!has_const_wdr_prj(prjs[[tstsetup]])) {
+      wdr_crr <- prjs[[tstsetup]][["wdraws_prj"]]
+    } else {
+      wdr_crr <- NULL
+    }
+    dat_crr <- get_dat(tstsetup)
+    for (nobsv_crr in nobsv_tst) {
+      if (args_prj[[tstsetup]]$mod_nm == "gamm") {
+        # TODO (GAMMs): Fix this.
+        next
+      }
+      if (!is.null(
+        refmods[[args_prj[[tstsetup]]$tstsetup_ref]]$family$cats
+      )) {
+        ncats_nlats_expected_crr <- length(
+          refmods[[args_prj[[tstsetup]]$tstsetup_ref]]$family$cats
+        )
+      } else {
+        ncats_nlats_expected_crr <- integer()
+      }
+      if (grepl("\\.with_wobs|\\.binom", tstsetup)) {
+        wobs_crr <- head(prjs[[tstsetup]]$refmodel$wobs, nobsv_crr)
+      } else {
+        wobs_crr <- NULL
+      }
+      if (grepl("\\.with_offs", tstsetup)) {
+        offs_crr <- head(prjs[[tstsetup]]$refmodel$offset, nobsv_crr)
+      } else {
+        offs_crr <- NULL
+      }
+      expect_warning(
+        pe_crr <- proj_epred(
+          prjs[[tstsetup]],
+          newdata = head(dat_crr, nobsv_crr),
+          weightsnew = wobs_crr,
+          offsetnew = offs_crr,
+          allow_nonconst_wdraws_prj = ndr_ncl$clust_used_gt1,
+          .seed = seed2_tst
+        ),
+        get_warn_wrhs_orhs(tstsetup, weightsnew = wobs_crr,
+                           offsetnew = offs_crr),
+        info = tstsetup
+      )
+      pe_tester(pe_crr,
+                nprjdraws_expected = ndr_ncl$nprjdraws,
+                wdraws_prj_expected = wdr_crr,
+                nobsv_expected = nobsv_crr,
+                ncats_nlats_expected = list(ncats_nlats_expected_crr),
+                info_str = paste(tstsetup, nobsv_crr, sep = "__"))
+    }
+  }
+})
+
+## integrated --------------------------------------------------------------
+
+test_that("pe: `integrated` works", {
+  skip_if_not(run_prj)
+  for (tstsetup in names(prjs)) {
+    ndr_ncl <- ndr_ncl_dtls(args_prj[[tstsetup]])
+    if (!is.null(
+      refmods[[args_prj[[tstsetup]]$tstsetup_ref]]$family$cats
+    )) {
+      ncats_nlats_expected_crr <- length(
+        refmods[[args_prj[[tstsetup]]$tstsetup_ref]]$family$cats
+      )
+    } else {
+      ncats_nlats_expected_crr <- integer()
+    }
+    pe_intgr <- proj_epred(prjs[[tstsetup]], integrated = TRUE,
+                           .seed = seed2_tst)
+    pe_tester(pe_intgr,
+              nprjdraws_expected = 1L,
+              ncats_nlats_expected = list(ncats_nlats_expected_crr),
+              info_str = paste(tstsetup, "integrated", sep = "__"))
+  }
+})
+
+## return_draws_matrix -----------------------------------------------------
+
+test_that(paste(
+  "pe: `return_draws_matrix` causes a conversion of the output type"
+), {
+  skip_if_not(run_prj)
+  skip_if_not_installed("posterior")
+  for (tstsetup in names(prjs)) {
+    ndr_ncl <- ndr_ncl_dtls(args_prj[[tstsetup]])
+    pe_orig <- pes[[tstsetup]]
+    pe_dr <- proj_epred(prjs[[tstsetup]],
+                        return_draws_matrix = TRUE,
+                        .seed = seed2_tst)
+    if (args_prj[[tstsetup]]$prj_nm == "augdat" ||
+        (args_prj[[tstsetup]]$prj_nm == "latent" && !is.null(
+          refmods[[args_prj[[tstsetup]]$tstsetup_ref]]$family$cats
+        ))) {
+      pe_orig_flat <- do.call(rbind, apply(pe_orig, 1, as.vector,
+                                           simplify = FALSE))
+    } else {
+      pe_orig_flat <- pe_orig
+    }
+    pe_dr_repl <- posterior::as_draws_matrix(pe_orig_flat)
+    if (!has_const_wdr_prj(prjs[[tstsetup]])) {
+      pe_dr_repl <- posterior::weight_draws(
+        pe_dr_repl, weights = prjs[[tstsetup]][["wdraws_prj"]]
+      )
+    }
+    expect_equal(pe_dr, pe_dr_repl, info = tstsetup)
+  }
+})

From b9c356371b3066413fb7ec8b45ea0a2edcf0b972 Mon Sep 17 00:00:00 2001
From: fweber144 <fweber144@protonmail.com>
Date: Tue, 31 Mar 2026 21:27:27 +0200
Subject: [PATCH 2/3] re-document

---
 NAMESPACE              |  1 +
 man/pred-projection.Rd | 85 +++++++++++++++++++++++++++++++-----------
 2 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 3ae108cc2..68d7659ed 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -73,6 +73,7 @@ export(get_refmodel)
 export(init_refmodel)
 export(performances)
 export(predictor_terms)
+export(proj_epred)
 export(proj_linpred)
 export(proj_predict)
 export(project)
diff --git a/man/pred-projection.Rd b/man/pred-projection.Rd
index 54656715f..4a189e68b 100644
--- a/man/pred-projection.Rd
+++ b/man/pred-projection.Rd
@@ -4,6 +4,7 @@
 \alias{pred-projection}
 \alias{proj_linpred}
 \alias{proj_predict}
+\alias{proj_epred}
 \title{Predictions from a submodel (after projection)}
 \usage{
 proj_linpred(
@@ -32,6 +33,19 @@ proj_predict(
   resp_oscale = TRUE,
   ...
 )
+
+proj_epred(
+  object,
+  newdata = NULL,
+  offsetnew = NULL,
+  weightsnew = NULL,
+  filter_nterms = NULL,
+  integrated = FALSE,
+  allow_nonconst_wdraws_prj = return_draws_matrix,
+  return_draws_matrix = FALSE,
+  .seed = NA,
+  ...
+)
 }
 \arguments{
 \item{object}{An object returned by \code{\link[=project]{project()}} or an object that can be
@@ -57,19 +71,20 @@ for only those elements (submodels) with a number of predictor terms in
 \code{filter_nterms}. Therefore, needs to be a numeric vector or \code{NULL}. If
 \code{NULL}, use all submodels.}
 
-\item{transform}{For \code{\link[=proj_linpred]{proj_linpred()}} only. A single logical value indicating
+\item{transform}{For \code{\link[=proj_linpred]{proj_linpred()}} only (not applicable for \code{\link[=proj_epred]{proj_epred()}}
+which always uses \code{transform = TRUE} internally). A single logical value indicating
 whether the linear predictor should be transformed to response scale using
 the inverse-link function (\code{TRUE}) or not (\code{FALSE}). In case of the latent
 projection, argument \code{transform} is similar in spirit to argument
 \code{resp_oscale} from other functions and affects the scale of both output
 elements \code{pred} and \code{lpd} (see sections "Details" and "Value" below).}
 
-\item{integrated}{For \code{\link[=proj_linpred]{proj_linpred()}} only. A single logical value
+\item{integrated}{For \code{\link[=proj_linpred]{proj_linpred()}} and \code{\link[=proj_epred]{proj_epred()}} only. A single logical value
 indicating whether the output should be averaged across the projected
 posterior draws (\code{TRUE}) or not (\code{FALSE}).}
 
-\item{allow_nonconst_wdraws_prj}{Only relevant for \code{\link[=proj_linpred]{proj_linpred()}} and only
-if \code{integrated} is \code{FALSE}. A single logical value indicating whether to
+\item{allow_nonconst_wdraws_prj}{Only relevant for \code{\link[=proj_linpred]{proj_linpred()}} and
+\code{\link[=proj_epred]{proj_epred()}} and only if \code{integrated} is \code{FALSE}. A single logical value indicating whether to
 allow projected draws with different (i.e., nonconstant) weights (\code{TRUE})
 or not (\code{FALSE}). If \code{return_draws_matrix} is \code{TRUE},
 \code{allow_nonconst_wdraws_prj} is internally set to \code{TRUE} as well.
@@ -80,8 +95,9 @@ matrices).}
 
 \item{return_draws_matrix}{A single logical value indicating whether to
 return an object (in case of \code{\link[=proj_predict]{proj_predict()}}) or objects (in case of
-\code{\link[=proj_linpred]{proj_linpred()}}) of class \code{draws_matrix} (see
-\code{\link[posterior:draws_matrix]{posterior::draws_matrix()}}). In case of \code{\link[=proj_linpred]{proj_linpred()}} and projected
+\code{\link[=proj_linpred]{proj_linpred()}} and \code{\link[=proj_epred]{proj_epred()}}) of class \code{draws_matrix} (see
+\code{\link[posterior:draws_matrix]{posterior::draws_matrix()}}). In case of \code{\link[=proj_linpred]{proj_linpred()}} or
+\code{\link[=proj_epred]{proj_epred()}} and projected
 draws with nonconstant weights (as well as \code{integrated} being \code{FALSE}),
 \code{\link[posterior:weight_draws]{posterior::weight_draws()}} is applied internally.}
 
@@ -89,8 +105,9 @@ draws with nonconstant weights (as well as \code{integrated} being \code{FALSE})
 results can be obtained again if needed. Passed to argument \code{seed} of
 \code{\link[=set.seed]{set.seed()}}, but can also be \code{NA} to not call \code{\link[=set.seed]{set.seed()}} at all. If not
 \code{NA}, then the PRNG state is reset (to the state before calling
-\code{\link[=proj_linpred]{proj_linpred()}} or \code{\link[=proj_predict]{proj_predict()}}) upon exiting \code{\link[=proj_linpred]{proj_linpred()}} or
-\code{\link[=proj_predict]{proj_predict()}}. Here, \code{.seed} is used for drawing new group-level effects
+\code{\link[=proj_linpred]{proj_linpred()}}, \code{\link[=proj_epred]{proj_epred()}}, or \code{\link[=proj_predict]{proj_predict()}}) upon exiting
+\code{\link[=proj_linpred]{proj_linpred()}}, \code{\link[=proj_epred]{proj_epred()}}, or \code{\link[=proj_predict]{proj_predict()}}. Here, \code{.seed} is
+used for drawing new group-level effects
 in case of a multilevel submodel (however, not yet in case of a GAMM) and
 for drawing from the predictive distributions of the submodel(s) in case of
 \code{\link[=proj_predict]{proj_predict()}}. If a clustered projection was performed, then in
@@ -162,6 +179,11 @@ these draws stored in an attribute \code{wdraws_prj}. (If
 \code{return_draws_matrix}, \code{allow_nonconst_wdraws_prj}, and \code{integrated}
 are all \code{FALSE}, then projected draws with nonconstant weights cause an
 error.)
+\item \code{\link[=proj_epred]{proj_epred()}} is a wrapper around \code{\link[=proj_linpred]{proj_linpred()}} with \code{transform =   TRUE} and returns only the draws of the expected value of the projected
+posterior predictive distribution on the response scale (i.e., the \code{pred}
+element of the \code{list} returned by \code{\link[=proj_linpred]{proj_linpred()}}, without the \code{lpd}
+element). The structure of the returned object is the same as that of the
+\code{pred} element described for \code{\link[=proj_linpred]{proj_linpred()}} above.
 \item \code{\link[=proj_predict]{proj_predict()}} returns an \eqn{S_{\mathrm{prj}} \times N}{S_prj x N}
 matrix of predictions where \eqn{S_{\mathrm{prj}}}{S_prj} denotes
 \code{nresample_clusters} in case of clustered projection (or, more generally,
@@ -181,21 +203,41 @@ each submodel (the names of this \code{list} being the numbers of predictor
 terms of the submodels when counting the intercept, too).
 }
 \description{
-After the projection of the reference model onto a submodel, the linear
-predictors (for the original or a new dataset) based on that submodel can be
-calculated by \code{\link[=proj_linpred]{proj_linpred()}}. These linear predictors can also be
-transformed to response scale and averaged across the projected parameter
-draws. Furthermore, \code{\link[=proj_linpred]{proj_linpred()}} returns the corresponding log predictive
-density values if the (original or new) dataset contains response values. The
-\code{\link[=proj_predict]{proj_predict()}} function draws from the predictive distributions (there is
-one such distribution for each observation from the original or new dataset)
-of the submodel that the reference model has been projected onto. If the
-projection has not been performed yet, both functions call \code{\link[=project]{project()}}
-internally to perform the projection. Both functions can also handle multiple
-submodels at once (for \code{object}s of class \code{vsel} or \code{object}s returned by a
-\code{\link[=project]{project()}} call to an object of class \code{vsel}; see \code{\link[=project]{project()}}).
+The \code{\link[=proj_predict]{proj_predict()}} function draws from the projected posterior
+predictive distribution of the submodel that the reference model
+has been projected onto. By definition, these draws have higher
+variability than draws of the expected value of the posterior
+predictive distribution computed by \code{\link[=proj_epred]{proj_epred()}}. This is because
+the aleatoric uncertainty from the data model is incorporated in
+\code{\link[=proj_predict]{proj_predict()}}.
 }
 \details{
+The \code{\link[=proj_epred]{proj_epred()}} function draws from the distribution of the
+expected value of the projected posterior predictive distribution.
+By definition, these predictions have smaller variability than the
+projected posterior predictions performed by \code{\link[=proj_predict]{proj_predict()}}. This
+is because only the epistemic uncertainty in the expected value of
+the projected posterior predictive distribution is incorporated in
+the draws, while the aleatoric uncertainty from the data model is
+not included. However, the estimated means of both methods averaged
+across draws should be very similar.
+
+The \code{\link[=proj_linpred]{proj_linpred()}} function draws from the projected posterior of the
+linear predictors, that is, draws before applying any link functions
+or other transformations. These linear predictors can also be
+transformed to response scale with argument \code{transform = TRUE}, which
+produces draws equivalent to draws produced by \code{\link[=proj_epred]{proj_epred()}}.
+Furthermore, \code{\link[=proj_linpred]{proj_linpred()}} returns the corresponding log predictive
+density values if the (original or new) dataset contains response values.
+
+All these predictions can be performed for the data used to fit the
+reference model or for new data. If the projection has not been
+performed yet, all three functions call \code{\link[=project]{project()}} internally to
+perform the projection. All three functions can also handle
+multiple submodels at once (for \code{object}s of class \code{vsel} or
+\code{object}s returned by a \code{\link[=project]{project()}} call to an object of class
+\code{vsel}; see \code{\link[=project]{project()}}).
+
 Currently, \code{\link[=proj_predict]{proj_predict()}} ignores observation weights that are not
 equal to \code{1}. A corresponding warning is thrown if this is the case.
 
@@ -238,6 +280,7 @@ prj <- project(fit, predictor_terms = c("X1", "X3", "X5"), ndraws = 21,
 # Predictions (at the training points) from the submodel onto which the
 # reference model was projected:
 prjl <- proj_linpred(prj)
+prje <- proj_epred(prj)
 prjp <- proj_predict(prj, .seed = 7364)
 \dontshow{\}) # examplesIf}
 }

From 4adcca69c8c07e04982952e59f981e700ae04cd8 Mon Sep 17 00:00:00 2001
From: fweber144 <fweber144@protonmail.com>
Date: Tue, 31 Mar 2026 21:27:36 +0200
Subject: [PATCH 3/3] create NEWS entry

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index bbeddacb0..4c0efcf78 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,6 +8,8 @@ If you read this from a place other than <https://mc-stan.org/projpred/news/inde
 
 ## Minor changes
 
+* Added `proj_epred()`, which is essentially a wrapper around `proj_linpred()` with `transform = TRUE`. (GitHub: #559, #560)
+
 ## Bug fixes