PecanProject · mdietze · Apr 22, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/base/db/R/get.trait.data.pft.R b/base/db/R/get.trait.data.pft.R
@@ -1,15 +1,54 @@
 ##' Get trait data from the database for a single PFT
 ##'
-##' @details `pft` should be a list containing at least `name` and `outdir`, and optionally `posteriorid` and `constants`. BEWARE: All existing files in `outir` will be deleted!
-##' @param pft list of settings for the pft whose traits to retrieve. See details
-##' @param modeltype type of model that is used, this is used to distinguish between different pfts with the same name.
+##' @md
+##' Queries BETYdb for trait observations and prior distributions for a single
+##' plant functional type (PFT). Results are saved to files
+##' in the PFT output directory (`pft$outdir`), and also registered in the
+##' database as posterior records when `write = TRUE`.
+##'
+##' @details
+##' `pft` should be a list containing at least `name` and `outdir`, and
+##' optionally `posteriorid` and `constants`.
+##'
+##' **File-based side effects (saved to `pft$outdir`):**
+##' \describe{
+##'   \item{`trait.data.Rdata`}{Contains a single object `trait.data`: a named
+##'     list of data frames, one per trait. Each data frame has columns from
+##'     BETYdb's traits/yields views (e.g., `mean`, `stat`, `n`, `site_id`,
+##'     `treatment_id`). Names correspond to trait variable names
+##'     (e.g., `"SLA"`, `"Vcmax"`).}
+##'   \item{`prior.distns.Rdata`}{Contains a single object `prior.distns`: a
+##'     data frame with one row per trait and columns `distn`, `parama`,
+##'     `paramb`, and `n`. Row names are trait variable names. Traits listed
+##'     in `pft$constants` are excluded.}
+##'   \item{`trait.data.csv`}{CSV export of `trait.data` (all traits
+##'     row-bound).}
+##'   \item{`prior.distns.csv`}{CSV export of `prior.distns`.}
+##'   \item{`species.csv` or `cultivars.csv`}{PFT membership list used to
+##'     detect changes between runs.}
+##' }
+##'
+##' **Downstream contract:** The files `trait.data.Rdata` and
+##' `prior.distns.Rdata` are expected by \code{run.meta.analysis.pft}, which
+##' loads them from `pft$outdir`. This implicit file-based coupling means
+##' the two functions must agree on directory path and object names. A future
+##' refactoring goal is to pass these objects directly via function arguments
+##' instead.
+##'
+##' @param pft list of settings for the pft whose traits to retrieve. See details.
+##' @param modeltype type of model that is used, this is used to distinguish
+##'   between different pfts with the same name.
 ##' @param dbfiles location where previous results are found
 ##' @param dbcon database connection
-##' @param forceupdate set this to true to force an update, auto will check to see if an update is needed.
+##' @param forceupdate set this to true to force an update, auto will check to
+##'   see if an update is needed.
 ##' @param write (Logical) If `TRUE` updated posteriors will be written to
-##'   BETYdb.  Defaults to FALSE.
+##'   BETYdb.  Defaults to `FALSE`.
 ##' @param trait.names list of trait names to retrieve
-##' @return updated pft with posteriorid
+##' @return The `pft` input list, updated with `pft$posteriorid` set to the
+##'   ID of the (possibly new) posterior record in BETYdb. The posterior ID can
+##'   be used to locate the output files (`trait.data.Rdata`, `prior.distns.Rdata`,
+##'   etc.) via BETYdb's `dbfiles` table.
 ##' @author David LeBauer, Shawn Serbin, Rob Kooper
 ##' @export
 get.trait.data.pft <-
@@ -27,10 +66,6 @@ get.trait.data.pft <-
     PEcAn.logger::logger.error(paste0("Couldn't create PFT output directory: ", pft$outdir))
   }
 
-  ## Remove old files.  Clean up.
-  old.files <- list.files(path = pft$outdir, full.names = TRUE, include.dirs = FALSE)
-  file.remove(old.files)
-
   # find appropriate pft
   pftres <- query_pfts(dbcon, pft[["name"]], modeltype)
   pfttype <- pftres[["pft_type"]]

diff --git a/base/db/man/get.trait.data.pft.Rd b/base/db/man/get.trait.data.pft.Rd
diff --git a/base/workflow/R/run.write.configs.R b/base/workflow/R/run.write.configs.R
@@ -1,28 +1,62 @@
 #' Write model-specific run scripts and configuration files
 #'
-#' Generates run scripts and configuration files for all analyses specified
-#' in the provided settings. Most of the heavy lifting is done by the
-#' \code{write.config.*} function for your specific ecosystem model
-#' (e.g. write.config.ED2, write.config.SIPNET).
+#' @md
+#' Generates run scripts and configuration files for all analyses (ensemble
+#' and/or sensitivity analysis) specified in the provided settings. Delegates
+#' the model-specific config writing to the appropriate `write.config.*`
+#' function (e.g. `write.config.ED2`, `write.config.SIPNET`).
 #'
+#' @details
+#' **Upstream contract (reads from `settings$outdir`):**
+#' \describe{
+#'   \item{`samples.Rdata`}{Produced by \code{\link[PEcAn.uncertainty]{get.parameter.samples}}.
+#'     Contains 5 bundled objects: `trait.samples`,
+#'     `sa.samples`, `ensemble.samples`, `runs.samples`, `env.samples`.
+#'     This function loads `trait.samples` and `sa.samples` to build
+#'     model configuration files. If `input_design` contains a `param`
+#'     column, `ensemble.samples` is rebuilt by subsetting `trait.samples`
+#'     according to the design indices.}
+#' }
+#'
+#' **File-based side effects (saved to `settings$outdir`):**
+#' \describe{
+#'   \item{`sensitivity.samples.<ensemble_id>.Rdata`}{Contains `sa.run.ids`
+#'     (named list of run IDs per PFT/trait/quantile), `sa.ensemble.id`,
+#'     `sa.samples`, `pft.names`, and `trait.names`. Saved when sensitivity
+#'     analysis is configured.}
+#'   \item{`ensemble.samples.<ensemble_id>.Rdata`}{Contains `ens.run.ids`
+#'     (vector of run IDs), `ens.ensemble.id`, `ens.samples`, `pft.names`,
+#'     and `trait.names`. Saved when ensemble is configured.}
+#'   \item{`runs_manifest.csv`}{A CSV table tracking all runs created,
+#'     appended across ensemble and SA analyses.}
+#' }
+#'
+#' **Downstream contract:** The `sensitivity.samples.*.Rdata` and
+#' `ensemble.samples.*.Rdata` files are loaded by \code{\link[PEcAn.uncertainty]{get.results}}
+#'  to match model outputs to their corresponding
+#'  parameter sets. This implicit file-based coupling is a refactoring target.
+#'
+#' The default value for `posterior.files` is NA, in which case the
+#'    most recent posterior or prior (in that order) for the workflow is used.
+#'    When specified, `posterior.files` should be a vector of filenames with one
+#'    entry for each PFT. Specify filenames with no path; PFT outdirs will be
+#'    appended. This forces use of only files within this workflow, to avoid
+#'    confusion.
 #'
 #' @param settings a PEcAn settings list
 #' @param ensemble.size number of ensemble runs
-#' @param input_design Input design data.frame coordinating input files across runs.
-#'   Contains columns for each sampled input (met, param, etc.) with row indices,
-#'   as documented in \code{runModule.run.write.configs()}.
+#' @param input_design Input design data.frame coordinating input files across
+#'   runs. Contains columns for each sampled input (met, param, etc.) with row
+#'   indices, as documented in \code{\link[PEcAn.workflow]{runModule.run.write.configs}}.
 #' @param write should the runs be written to the database?
-#' @param posterior.files Filenames for posteriors for drawing samples for ensemble and sensitivity
-#'    analysis (e.g. post.distns.Rdata, or prior.distns.Rdata)
+#' @param posterior.files Filenames for posteriors for drawing samples for
+#'   ensemble and sensitivity analysis (e.g. `post.distns.Rdata`, or
+#'   `prior.distns.Rdata`).
 #' @param overwrite logical: Replace output files that already exist?
 #'
-#' @details The default value for \code{posterior.files} is NA, in which case the
-#'    most recent posterior or prior (in that order) for the workflow is used.
-#'    When specified, \code{posterior.files} should be a vector of filenames with one entry for each PFT.
-#'    Specify filenames with no path; PFT outdirs will be appended. This forces use of only
-#'    files within this workflow, to avoid confusion.
-#'
-#' @return an updated settings list, which includes ensemble IDs for SA and ensemble analysis
+#' @return The `settings` list (invisibly), updated with ensemble IDs for SA
+#'   and ensemble analysis (e.g. `settings$sensitivity.analysis$ensemble.id`,
+#'   `settings$ensemble$ensemble.id`).
 #' @export
 #'
 #' @author David LeBauer, Shawn Serbin, Ryan Kelly, Mike Dietze, Akash B V

diff --git a/base/workflow/man/run.write.configs.Rd b/base/workflow/man/run.write.configs.Rd
diff --git a/modules/meta.analysis/R/run.meta.analysis.R b/modules/meta.analysis/R/run.meta.analysis.R
@@ -183,16 +183,66 @@ check_consistent <- function(point, prior,
 }
 
 #' "Workflow" version of run.meta.analysis.pft
+#' #' Run Bayesian meta-analysis for a single PFT (file-based wrapper)
 #'
-#' Thin wrapper around `meta_analysis_standalone` that also reads/writes files 
-#' and registers results in the PEcAn database. 
+#' @md
+#' Thin wrapper around \code{\link[PEcAn.MA]{meta_analysis_standalone}} that reads trait data and
+#' priors from disk, runs the meta-analysis, and saves results back to disk.
+#' Also registers result files in the BETYdb posteriors table.
+#'
+#' @details
+#' **Upstream contract (reads from `pft$outdir`):**
+#' \describe{
+#'   \item{`trait.data.Rdata`}{Named list of data frames produced by
+#'     \code{\link[PEcAn.DB]{get.trait.data.pft}}. Loaded into `trait_env$trait.data`.}
+#'   \item{`prior.distns.Rdata`}{Data frame of prior distributions produced by
+#'     \code{\link[PEcAn.DB]{get.trait.data.pft}}. Loaded into `prior_env$prior.distns`.}
+#' }
+#'
+#' **File-based side effects (saved to `pft$outdir`):**
+#' \describe{
+#'   \item{`trait.mcmc.Rdata`}{Contains `trait.mcmc`: a named list of
+#'     `mcmc.list` objects (one per trait) with posterior MCMC samples from
+#'     JAGS. Each element has columns `beta.o` (overall mean) and optionally
+#'     `sd.o` (overall SD).}
+#'   \item{`post.distns.MA.Rdata`}{Contains `post.distns`: a data frame with
+#'     one row per trait and columns `distn`, `parama`, `paramb`, `n`
+#'     summarizing the fitted posterior distribution.}
+#'   \item{`post.distns.Rdata`}{Symlink to `post.distns.MA.Rdata`.}
+#'   \item{`jagged.data.Rdata`}{Contains `jagged.data`: a named list of data
+#'     frames (one per trait) formatted for use in the JAGS meta-analysis
+#'     model (see \code{\link[PEcAn.MA]{jagify}}).}
+#' }
+#'
+#' **Downstream contract:** The files `trait.mcmc.Rdata` and
+#' `post.distns.Rdata` are expected by \code{\link[PEcAn.uncertainty]{get.parameter.samples}} (in
+#' `PEcAn.uncertainty`), which loads them to generate ensemble and sensitivity
+#' analysis samples.
+#'
+#' **Note:** The core computation is performed by \code{\link[PEcAn.MA]{meta_analysis_standalone}},
+#' which accepts and returns R objects directly — see its documentation for
+#' the pure-function interface.
 #'
 #' @param pft (list) PFT list object, as defined in settings. Must include the
 #'  following: `outdir`, `name`, `posteriorid`
 #' @param dbfiles (character) directory where previous results are found
 #' @param dbcon (DBI connection object) BETY database connection object
-#' @param update (boolean; default = TRUE) If `TRUE`, replace existing
-#'  posteriors with new ones
+#' @param update (boolean; default = FALSE) If `TRUE`, replace existing
+#'   posteriors with new ones
+#'
+#' @return The `pft` list (invisibly), or `NA` if no trait data are available.
+#'   The returned `pft` list is a named list with the following elements:
+#'   \describe{
+#'     \item{`name`}{(character) PFT name, e.g. `"temperate.deciduous"`.}
+#'     \item{`outdir`}{(character) Path to directory where output files are
+#'       stored (trait data, priors, posteriors, MCMC samples).}
+#'     \item{`posteriorid`}{(integer) Row ID of the posterior record in
+#'       BETYdb's `posteriors` table.}
+#'     \item{`constants`}{(named list, optional) Trait values to treat as
+#'       fixed constants, bypassing the meta-analysis.}
+#'   }
+#'   The function's primary outputs are communicated through files saved in
+#'   `pft$outdir`.
 #'
 #' @inheritParams meta_analysis_standalone
 run.meta.analysis.pft <- function(pft, iterations, random = TRUE, threshold = 1.2, dbfiles, dbcon, use_ghs = TRUE, update = FALSE) {
@@ -299,19 +349,23 @@ run.meta.analysis.pft <- function(pft, iterations, random = TRUE, threshold = 1.
 } # run.meta.analysis.pft
 
 ##--------------------------------------------------------------------------------------------------##
-##' Run meta analysis
+##' Run meta-analysis across all PFTs
+##'
+##' @md
+##' Iterates over a list of PFTs and runs \code{\link[PEcAn.MA]{run.meta.analysis.pft}} for each
+##' one. This is the main entry point called by \code{\link[PEcAn.MA]{runModule.run.meta.analysis}}.
 ##'
 ##' This will use the following items from settings:
-##' - settings$pfts
-##' - settings$database$bety
-##' - settings$database$dbfiles
-##' - settings$meta.analysis$update
+##' - `settings$pfts`
+##' - `settings$database$bety`
+##' - `settings$database$dbfiles`
+##' - `settings$meta.analysis$update`
 ##'
 ##' @param pfts the list of pfts to get traits for
 ##' @param database database connection parameters
 ##' @param update logical: Rerun the meta-analysis if result files already exist?
 ##' @param threshold Gelman-Rubin convergence diagnostic, passed on to
-##'   \code{\link{pecan.ma.summary}}
+##'  \code{\link{pecan.ma.summary}}
 ##' @inheritParams meta_analysis_standalone
 ##' @inheritParams run.meta.analysis.pft
 ##'