Skip to content

Commit d9ca08e

Browse files
committed
Add testthat coverage for trait meta-analysis pipeline functions
Add comprehensive test coverage for the core meta-analysis pipeline functions as preparation for modularity improvements planned in the GSoC 'Refactoring the PEcAn trait meta-analysis workflow' project. New test files: - helper-test-data.R: Shared fixtures - test-check_consistent.R: Unit tests for check_consistent() - test-run.meta.analysis.pft.R: Tests for workflow wrapper - test-meta_analysis_standalone.R: Tests for core analysis function - test-get.parameter.samples.R: Documents invisible(NULL) return problem These tests lock in current behavior before any refactoring begins. No functional changes to existing code.
1 parent fe4a948 commit d9ca08e

5 files changed

Lines changed: 652 additions & 0 deletions

File tree

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# helper-test-data.R
2+
# Shared test fixtures for meta-analysis pipeline tests
3+
#
4+
# These helpers create minimal but realistic test data structures
5+
# that mirror what the actual pipeline produces and consumes.
6+
7+
# Create minimal trait data matching the structure expected by
8+
# meta_analysis_standalone() and run.meta.analysis.pft()
9+
#
10+
# The columns here match the @param documentation in run.meta.analysis.pft.R:
11+
# name, mean, statname, stat, greenhouse, n,
12+
# site_id, specie_id, citation_id, cultivar_id,
13+
# date, time, control
14+
create_test_trait_data <- function(n_obs = 10, trait_mean = 20, trait_sd = 2) {
15+
test_trait <- data.frame(
16+
citation_id = rep(1L, n_obs),
17+
site_id = rep(1:2, length.out = n_obs),
18+
name = rep(paste0("species_", 1:2), length.out = n_obs),
19+
trt_id = rep("control", n_obs),
20+
control = rep(1L, n_obs),
21+
greenhouse = rep(0L, n_obs),
22+
date = rep(1, n_obs),
23+
time = rep(NA, n_obs),
24+
cultivar_id = rep(1L, n_obs),
25+
specie_id = rep(1L, n_obs),
26+
n = rep(5L, n_obs),
27+
mean = rnorm(n_obs, mean = trait_mean, sd = trait_sd),
28+
stat = rep(trait_sd / sqrt(5), n_obs),
29+
statname = rep("SE", n_obs),
30+
treatment_id = seq_len(n_obs),
31+
stringsAsFactors = FALSE
32+
)
33+
return(test_trait)
34+
}
35+
36+
# Create a minimal prior distributions data.frame
37+
# matching the structure returned by PEcAn.DB::query.priors()
38+
#
39+
# Uses normal distribution so that check_consistent() and
40+
# p.point.in.prior() work without exotic distribution functions.
41+
create_test_priors <- function(trait_names = "SLA",
42+
distn = "norm",
43+
parama = 20,
44+
paramb = 5) {
45+
prior.distns <- data.frame(
46+
distn = rep(distn, length(trait_names)),
47+
parama = rep(parama, length(trait_names)),
48+
paramb = rep(paramb, length(trait_names)),
49+
n = rep(NA_real_, length(trait_names)),
50+
stringsAsFactors = FALSE
51+
)
52+
rownames(prior.distns) <- trait_names
53+
return(prior.distns)
54+
}
55+
56+
# Create a minimal pft list matching what run.meta.analysis.pft() expects
57+
#
58+
# This mirrors the structure from settings$pfts[[i]] after get.trait.data.pft()
59+
# has been run, which adds posteriorid to the pft object.
60+
create_test_pft <- function(outdir = file.path(tempdir(), "test-pft"),
61+
pft_name = "temperate.deciduous",
62+
posteriorid = 9999L) {
63+
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
64+
list(
65+
name = pft_name,
66+
outdir = outdir,
67+
posteriorid = posteriorid
68+
)
69+
}
70+
71+
# Write trait.data.Rdata and prior.distns.Rdata files to a directory
72+
# so that run.meta.analysis.pft() can load them
73+
#
74+
# This simulates what get.trait.data.pft() produces as output.
75+
setup_trait_files <- function(outdir,
76+
trait_names = "SLA",
77+
trait_mean = 20,
78+
trait_sd = 2,
79+
n_obs = 10,
80+
prior_parama = 20,
81+
prior_paramb = 5) {
82+
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
83+
84+
# Build trait.data as a named list of data.frames (one per trait)
85+
trait.data <- stats::setNames(
86+
lapply(trait_names, function(tn) {
87+
create_test_trait_data(
88+
n_obs = n_obs,
89+
trait_mean = trait_mean,
90+
trait_sd = trait_sd
91+
)
92+
}),
93+
trait_names
94+
)
95+
save(trait.data, file = file.path(outdir, "trait.data.Rdata"))
96+
97+
# Build prior.distns data.frame
98+
prior.distns <- create_test_priors(
99+
trait_names = trait_names,
100+
parama = prior_parama,
101+
paramb = prior_paramb
102+
)
103+
save(prior.distns, file = file.path(outdir, "prior.distns.Rdata"))
104+
105+
invisible(list(trait.data = trait.data, prior.distns = prior.distns))
106+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# test-check_consistent.R
2+
# Tests for check_consistent() — the prior-data consistency checker
3+
#
4+
# check_consistent() is a pure function with no side effects or DB dependency,
5+
# making it ideal for unit testing. It is called twice inside
6+
# meta_analysis_standalone(): once to check data vs prior, and again to check
7+
# posterior vs prior.
8+
9+
10+
11+
test_that("check_consistent returns no_error=TRUE, no_warning=TRUE when point is well within prior", {
12+
# A normal(0, 10) prior — point at the mean should be perfectly consistent
13+
14+
prior <- data.frame(distn = "norm", parama = 0, paramb = 10,
15+
stringsAsFactors = FALSE)
16+
result <- PEcAn.MA:::check_consistent(point = 0, prior = prior)
17+
18+
expect_type(result, "logical")
19+
expect_named(result, c("no_error", "no_warning"))
20+
expect_true(result[["no_error"]])
21+
expect_true(result[["no_warning"]])
22+
})
23+
24+
test_that("check_consistent returns warning (but not error) for moderately extreme points", {
25+
26+
# normal(0, 1): p(2.1) ≈ 0.982 which is > 0.975 (warning threshold)
27+
28+
# but < 0.9995 (error threshold)
29+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
30+
stringsAsFactors = FALSE)
31+
result <- PEcAn.MA:::check_consistent(point = 2.1, prior = prior)
32+
33+
expect_true(result[["no_error"]])
34+
expect_false(result[["no_warning"]])
35+
})
36+
37+
test_that("check_consistent returns error for extremely inconsistent points", {
38+
# normal(0, 1): p(5) ≈ 1.0 which exceeds both thresholds
39+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
40+
stringsAsFactors = FALSE)
41+
result <- PEcAn.MA:::check_consistent(point = 5, prior = prior)
42+
43+
expect_false(result[["no_error"]])
44+
expect_false(result[["no_warning"]])
45+
})
46+
47+
test_that("check_consistent works symmetrically for low-tail extremes", {
48+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
49+
stringsAsFactors = FALSE)
50+
result <- PEcAn.MA:::check_consistent(point = -5, prior = prior)
51+
52+
expect_false(result[["no_error"]])
53+
expect_false(result[["no_warning"]])
54+
})
55+
56+
test_that("check_consistent respects custom p_error and p_warning thresholds", {
57+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
58+
stringsAsFactors = FALSE)
59+
60+
# With very permissive thresholds, even extreme points pass
61+
result <- PEcAn.MA:::check_consistent(
62+
point = 3, prior = prior,
63+
p_error = 1e-10, p_warning = 1e-5
64+
)
65+
expect_true(result[["no_error"]])
66+
expect_true(result[["no_warning"]])
67+
})
68+
69+
test_that("check_consistent validates that p_warning >= p_error", {
70+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
71+
stringsAsFactors = FALSE)
72+
expect_error(
73+
PEcAn.MA:::check_consistent(point = 0, prior = prior,
74+
p_error = 0.05, p_warning = 0.01)
75+
)
76+
})
77+
78+
test_that("check_consistent works with non-normal prior distributions", {
79+
# Gamma distribution: dgamma with shape=2, rate=1
80+
# Median of gamma(2,1) ≈ 1.678
81+
prior_gamma <- data.frame(distn = "gamma", parama = 2, paramb = 1,
82+
stringsAsFactors = FALSE)
83+
84+
# Point near the mode should be consistent
85+
result <- PEcAn.MA:::check_consistent(point = 1.5, prior = prior_gamma)
86+
expect_true(result[["no_error"]])
87+
expect_true(result[["no_warning"]])
88+
89+
# Point far in the tail should trigger error
90+
result_extreme <- PEcAn.MA:::check_consistent(point = 50, prior = prior_gamma)
91+
expect_false(result_extreme[["no_error"]])
92+
})
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# test-get.parameter.samples.R
2+
# Documents the invisible(NULL) return value problem in get.parameter.samples()
3+
#
4+
# get.parameter.samples() ends with save() and has no return() statement,
5+
# so it returns invisible(NULL). This means callers cannot programmatically
6+
# access results without loading the saved file.
7+
# The GSoC project "Refactoring the PEcAn trait meta-analysis workflow"
8+
# aims to fix this by returning a named list instead.
9+
10+
11+
12+
test_that("get.parameter.samples returns invisible(NULL) — documenting the problem", {
13+
skip(paste0(
14+
"Requires full PEcAn settings + database connection. ",
15+
"Current function ends with save() and no return(), ",
16+
"so it returns invisible(NULL). ",
17+
"GSoC refactoring will make it return a named list."
18+
))
19+
})
20+
21+
# ---------------------------------------------------------------------------
22+
# p.point.in.prior (helper used throughout the pipeline)
23+
# ---------------------------------------------------------------------------
24+
25+
26+
27+
test_that("p.point.in.prior returns correct quantile for normal distribution", {
28+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1)
29+
result <- PEcAn.MA:::p.point.in.prior(point = 0, prior = prior)
30+
expect_equal(result, 0.5)
31+
})
32+
33+
test_that("p.point.in.prior returns correct quantile for extreme values", {
34+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1)
35+
result_low <- PEcAn.MA:::p.point.in.prior(point = -5, prior = prior)
36+
expect_true(result_low < 0.001)
37+
result_high <- PEcAn.MA:::p.point.in.prior(point = 5, prior = prior)
38+
expect_true(result_high > 0.999)
39+
})
40+
41+
test_that("p.point.in.prior works with gamma distribution", {
42+
prior <- data.frame(distn = "gamma", parama = 2, paramb = 1)
43+
result <- PEcAn.MA:::p.point.in.prior(point = 2, prior = prior)
44+
expected <- pgamma(2, shape = 2, rate = 1)
45+
expect_equal(result, expected)
46+
})
47+
48+
test_that("p.point.in.prior returns numeric of length 1", {
49+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1)
50+
result <- PEcAn.MA:::p.point.in.prior(point = 1.5, prior = prior)
51+
expect_type(result, "double")
52+
expect_length(result, 1)
53+
expect_true(result >= 0 && result <= 1)
54+
})
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# test-meta_analysis_standalone.R
2+
# Tests for meta_analysis_standalone() — the pure analysis function
3+
#
4+
# meta_analysis_standalone() is the core analysis function that:
5+
# 1. Calls jagify() to prepare data for JAGS
6+
# 2. Checks data-prior consistency via check_consistent()
7+
# 3. Runs pecan.ma() (JAGS MCMC)
8+
# 4. Summarizes results via pecan.ma.summary()
9+
# 5. Fits approximate posteriors via approx.posterior()
10+
# 6. Returns list(trait.mcmc, post.distns, jagged.data)
11+
#
12+
# Full integration tests require JAGS to be installed and are slow,
13+
# so we test input validation and structure here, and skip JAGS-dependent
14+
# tests when JAGS is unavailable.
15+
16+
# Note: meta_analysis_standalone() does not create outdir itself — caller must
17+
# ensure it exists because pecan.ma() calls sink() before any dir.create().
18+
# Input validation (stopifnot checks) and prior inconsistency error behavior
19+
# will be verified and tested as part of GSoC refactoring work.
20+
21+
# ---------------------------------------------------------------------------
22+
# Greenhouse data filtering
23+
# ---------------------------------------------------------------------------
24+
25+
test_that("meta_analysis_standalone filters greenhouse data when use_ghs=FALSE", {
26+
# Create data that is ALL greenhouse
27+
gh_data <- create_test_trait_data(n_obs = 5, trait_mean = 20)
28+
gh_data$greenhouse <- 1L
29+
30+
trait_data <- list(SLA = gh_data)
31+
priors <- create_test_priors("SLA")
32+
33+
# When use_ghs=FALSE, greenhouse data is excluded by jagify().
34+
# If all data is greenhouse, jagged_data will have 0 rows,
35+
# and the function should handle this (remove the trait from analysis).
36+
# With no traits left, the function should still not crash.
37+
# The exact behavior depends on downstream calls, but we verify no hard crash.
38+
result <- tryCatch(
39+
PEcAn.MA::meta_analysis_standalone(
40+
trait_data = trait_data,
41+
priors = priors,
42+
iterations = 100,
43+
use_ghs = FALSE
44+
),
45+
error = function(e) e
46+
)
47+
48+
# Either it completes (with empty result) or errors gracefully
49+
# — it should NOT produce a cryptic R crash
50+
expect_true(
51+
is.list(result) || inherits(result, "error"),
52+
info = "meta_analysis_standalone should handle all-greenhouse data gracefully"
53+
)
54+
})
55+
56+
# ---------------------------------------------------------------------------
57+
# Full integration test (requires JAGS)
58+
# ---------------------------------------------------------------------------
59+
60+
test_that("meta_analysis_standalone returns correct structure with valid inputs", {
61+
skip_if_not_installed("rjags")
62+
skip_on_cran()
63+
64+
# Check if JAGS is actually available
65+
jags_available <- tryCatch({
66+
rjags::jags.model(
67+
textConnection("model { x ~ dnorm(0, 1) }"),
68+
data = list(), n.chains = 1, quiet = TRUE
69+
)
70+
TRUE
71+
}, error = function(e) FALSE)
72+
skip_if(!jags_available, "JAGS not available")
73+
74+
outdir <- file.path(tempdir(), "test-ma-standalone-integration")
75+
if (dir.exists(outdir)) unlink(outdir, recursive = TRUE)
76+
77+
set.seed(42)
78+
trait_data <- list(
79+
SLA = create_test_trait_data(n_obs = 15, trait_mean = 20, trait_sd = 2)
80+
)
81+
priors <- create_test_priors("SLA", parama = 20, paramb = 5)
82+
83+
result <- PEcAn.MA::meta_analysis_standalone(
84+
trait_data = trait_data,
85+
priors = priors,
86+
iterations = 3000,
87+
outdir = outdir,
88+
pft_name = "test.integration",
89+
random = TRUE,
90+
threshold = 1.2,
91+
use_ghs = TRUE
92+
)
93+
94+
# Verify return structure matches documentation:
95+
# list(trait.mcmc, post.distns, jagged.data)
96+
expect_type(result, "list")
97+
expect_named(result, c("trait.mcmc", "post.distns", "jagged.data"),
98+
ignore.order = FALSE)
99+
100+
# trait.mcmc should be a named list with one entry per trait
101+
expect_type(result$trait.mcmc, "list")
102+
expect_true("SLA" %in% names(result$trait.mcmc) ||
103+
length(result$trait.mcmc) == 0,
104+
info = "trait.mcmc should contain analyzed traits (or be empty if convergence failed)")
105+
106+
# post.distns should be a data.frame with same structure as priors
107+
expect_s3_class(result$post.distns, "data.frame")
108+
expect_true(all(c("distn", "parama", "paramb") %in% colnames(result$post.distns)))
109+
expect_true("SLA" %in% rownames(result$post.distns))
110+
111+
# jagged.data should be a named list of data.frames (output of jagify)
112+
expect_type(result$jagged.data, "list")
113+
expect_true("SLA" %in% names(result$jagged.data))
114+
expect_s3_class(result$jagged.data[["SLA"]], "data.frame")
115+
# jagify output must contain at minimum the column "Y"
116+
expect_true("Y" %in% colnames(result$jagged.data[["SLA"]]))
117+
118+
unlink(outdir, recursive = TRUE)
119+
})

0 commit comments

Comments
 (0)