Skip to content

Commit 6783775

Browse files
committed
Add testthat coverage for trait meta-analysis pipeline functions
Add comprehensive test coverage for the core meta-analysis pipeline functions as preparation for modularity improvements planned in the GSoC 'Refactoring the PEcAn trait meta-analysis workflow' project. New test files: - helper-test-data.R: Shared fixtures - test-check_consistent.R: Unit tests for check_consistent() - test-run.meta.analysis.pft.R: Tests for workflow wrapper - test-meta_analysis_standalone.R: Tests for core analysis function - test-get.parameter.samples.R: Documents invisible(NULL) return problem These tests lock in current behavior before any refactoring begins. No functional changes to existing code.
1 parent fe4a948 commit 6783775

5 files changed

Lines changed: 649 additions & 0 deletions

File tree

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# helper-test-data.R
2+
# Shared test fixtures for meta-analysis pipeline tests
3+
#
4+
# These helpers create minimal but realistic test data structures
5+
# that mirror what the actual pipeline produces and consumes.
6+
7+
# Create minimal trait data matching the structure expected by
8+
# meta_analysis_standalone() and run.meta.analysis.pft()
9+
#
10+
# The columns here match the @param documentation in run.meta.analysis.pft.R:
11+
# name, mean, statname, stat, greenhouse, n,
12+
# site_id, specie_id, citation_id, cultivar_id,
13+
# date, time, control
14+
create_test_trait_data <- function(n_obs = 10, trait_mean = 20, trait_sd = 2, seed = 42) {
15+
set.seed(seed)
16+
test_trait <- data.frame(
17+
citation_id = rep(1L, n_obs),
18+
site_id = rep(1:2, length.out = n_obs),
19+
name = rep(paste0("species_", 1:2), length.out = n_obs),
20+
trt_id = rep("control", n_obs),
21+
control = rep(1L, n_obs),
22+
greenhouse = rep(0L, n_obs),
23+
date = rep(1, n_obs),
24+
time = rep(NA, n_obs),
25+
cultivar_id = rep(1L, n_obs),
26+
specie_id = rep(1L, n_obs),
27+
n = rep(5L, n_obs),
28+
mean = rnorm(n_obs, mean = trait_mean, sd = trait_sd),
29+
stat = rep(trait_sd / sqrt(5), n_obs),
30+
statname = rep("SE", n_obs),
31+
treatment_id = seq_len(n_obs),
32+
stringsAsFactors = FALSE
33+
)
34+
return(test_trait)
35+
}
36+
37+
# Create a minimal prior distributions data.frame
38+
# matching the structure returned by PEcAn.DB::query.priors()
39+
#
40+
# Uses normal distribution so that check_consistent() and
41+
# p.point.in.prior() work without exotic distribution functions.
42+
create_test_priors <- function(trait_names = "SLA",
43+
distn = "norm",
44+
parama = 20,
45+
paramb = 5) {
46+
prior.distns <- data.frame(
47+
distn = rep(distn, length(trait_names)),
48+
parama = rep(parama, length(trait_names)),
49+
paramb = rep(paramb, length(trait_names)),
50+
n = rep(NA_real_, length(trait_names)),
51+
stringsAsFactors = FALSE
52+
)
53+
rownames(prior.distns) <- trait_names
54+
return(prior.distns)
55+
}
56+
57+
# Create a minimal pft list matching what run.meta.analysis.pft() expects
58+
#
59+
# This mirrors the structure from settings$pfts[[i]] after get.trait.data.pft()
60+
# has been run, which adds posteriorid to the pft object.
61+
create_test_pft <- function(outdir = file.path(tempdir(), "test-pft"),
62+
pft_name = "temperate.deciduous",
63+
posteriorid = 9999L) {
64+
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
65+
list(
66+
name = pft_name,
67+
outdir = outdir,
68+
posteriorid = posteriorid
69+
)
70+
}
71+
72+
# Write trait.data.Rdata and prior.distns.Rdata files to a directory
73+
# so that run.meta.analysis.pft() can load them
74+
#
75+
# This simulates what get.trait.data.pft() produces as output.
76+
setup_trait_files <- function(outdir,
77+
trait_names = "SLA",
78+
trait_mean = 20,
79+
trait_sd = 2,
80+
n_obs = 10,
81+
prior_parama = 20,
82+
prior_paramb = 5) {
83+
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
84+
85+
# Build trait.data as a named list of data.frames (one per trait)
86+
trait.data <- stats::setNames(
87+
lapply(trait_names, function(tn) {
88+
create_test_trait_data(
89+
n_obs = n_obs,
90+
trait_mean = trait_mean,
91+
trait_sd = trait_sd
92+
)
93+
}),
94+
trait_names
95+
)
96+
save(trait.data, file = file.path(outdir, "trait.data.Rdata"))
97+
98+
# Build prior.distns data.frame
99+
prior.distns <- create_test_priors(
100+
trait_names = trait_names,
101+
parama = prior_parama,
102+
paramb = prior_paramb
103+
)
104+
save(prior.distns, file = file.path(outdir, "prior.distns.Rdata"))
105+
106+
invisible(list(trait.data = trait.data, prior.distns = prior.distns))
107+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# test-check_consistent.R
2+
# Tests for check_consistent() — the prior-data consistency checker
3+
#
4+
# check_consistent() is a pure function with no side effects or DB dependency,
5+
# making it ideal for unit testing. It is called twice inside
6+
# meta_analysis_standalone(): once to check data vs prior, and again to check
7+
# posterior vs prior.
8+
9+
# Note: We use ::: to access internal (non-exported) functions.
10+
# This is standard practice for testing internal functions in R packages.
11+
# See: https://r-pkgs.org/testing-basics.html
12+
13+
test_that("check_consistent returns no_error=TRUE, no_warning=TRUE when point is well within prior", {
14+
# A normal(0, 10) prior — point at the mean should be perfectly consistent
15+
16+
prior <- data.frame(distn = "norm", parama = 0, paramb = 10,
17+
stringsAsFactors = FALSE)
18+
result <- PEcAn.MA:::check_consistent(point = 0, prior = prior)
19+
20+
expect_type(result, "logical")
21+
expect_named(result, c("no_error", "no_warning"))
22+
expect_true(result[["no_error"]])
23+
expect_true(result[["no_warning"]])
24+
})
25+
26+
test_that("check_consistent returns warning (but not error) for moderately extreme points", {
27+
28+
# normal(0, 1): p(2.1) ≈ 0.982 which is > 0.975 (warning threshold)
29+
30+
# but < 0.9995 (error threshold)
31+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
32+
stringsAsFactors = FALSE)
33+
result <- PEcAn.MA:::check_consistent(point = 2.1, prior = prior)
34+
35+
expect_true(result[["no_error"]])
36+
expect_false(result[["no_warning"]])
37+
})
38+
39+
test_that("check_consistent returns error for extremely inconsistent points", {
40+
# normal(0, 1): p(5) ≈ 1.0 which exceeds both thresholds
41+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
42+
stringsAsFactors = FALSE)
43+
result <- PEcAn.MA:::check_consistent(point = 5, prior = prior)
44+
45+
expect_false(result[["no_error"]])
46+
expect_false(result[["no_warning"]])
47+
})
48+
49+
test_that("check_consistent works symmetrically for low-tail extremes", {
50+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
51+
stringsAsFactors = FALSE)
52+
result <- PEcAn.MA:::check_consistent(point = -5, prior = prior)
53+
54+
expect_false(result[["no_error"]])
55+
expect_false(result[["no_warning"]])
56+
})
57+
58+
test_that("check_consistent respects custom p_error and p_warning thresholds", {
59+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
60+
stringsAsFactors = FALSE)
61+
62+
# With very permissive thresholds, even extreme points pass
63+
result <- PEcAn.MA:::check_consistent(
64+
point = 3, prior = prior,
65+
p_error = 1e-10, p_warning = 1e-5
66+
)
67+
expect_true(result[["no_error"]])
68+
expect_true(result[["no_warning"]])
69+
})
70+
71+
test_that("check_consistent validates that p_warning >= p_error", {
72+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
73+
stringsAsFactors = FALSE)
74+
expect_error(
75+
PEcAn.MA:::check_consistent(point = 0, prior = prior,
76+
p_error = 0.05, p_warning = 0.01)
77+
)
78+
})
79+
80+
test_that("check_consistent works with non-normal prior distributions", {
81+
# Gamma distribution: dgamma with shape=2, rate=1
82+
# Median of gamma(2,1) ≈ 1.678
83+
prior_gamma <- data.frame(distn = "gamma", parama = 2, paramb = 1,
84+
stringsAsFactors = FALSE)
85+
86+
# Point near the mode should be consistent
87+
result <- PEcAn.MA:::check_consistent(point = 1.5, prior = prior_gamma)
88+
expect_true(result[["no_error"]])
89+
expect_true(result[["no_warning"]])
90+
91+
# Point far in the tail should trigger error
92+
result_extreme <- PEcAn.MA:::check_consistent(point = 50, prior = prior_gamma)
93+
expect_false(result_extreme[["no_error"]])
94+
})
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# test-get.parameter.samples.R
2+
# Documents the invisible(NULL) return value problem in get.parameter.samples()
3+
#
4+
# get.parameter.samples() ends with save() and has no return() statement,
5+
# so it returns invisible(NULL). This means callers cannot programmatically
6+
# access results without loading the saved file.
7+
# The GSoC project "Refactoring the PEcAn trait meta-analysis workflow"
8+
# aims to fix this by returning a named list instead.
9+
10+
11+
12+
test_that("get.parameter.samples returns invisible(NULL) — documenting the problem", {
13+
skip(paste0(
14+
"Requires full PEcAn settings + database connection. ",
15+
"Current function ends with save() and no return(), ",
16+
"so it returns invisible(NULL). ",
17+
"GSoC refactoring will make it return a named list."
18+
))
19+
})
20+
21+
# ---------------------------------------------------------------------------
22+
# p.point.in.prior (helper used throughout the pipeline)
23+
# ---------------------------------------------------------------------------
24+
25+
26+
27+
test_that("p.point.in.prior returns correct quantile for normal distribution", {
28+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1)
29+
result <- PEcAn.MA:::p.point.in.prior(point = 0, prior = prior)
30+
expect_equal(result, 0.5)
31+
})
32+
33+
test_that("p.point.in.prior returns correct quantile for extreme values", {
34+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1)
35+
result_low <- PEcAn.MA:::p.point.in.prior(point = -5, prior = prior)
36+
expect_true(result_low < 0.001)
37+
result_high <- PEcAn.MA:::p.point.in.prior(point = 5, prior = prior)
38+
expect_true(result_high > 0.999)
39+
})
40+
41+
test_that("p.point.in.prior works with gamma distribution", {
42+
prior <- data.frame(distn = "gamma", parama = 2, paramb = 1)
43+
result <- PEcAn.MA:::p.point.in.prior(point = 2, prior = prior)
44+
expected <- pgamma(2, shape = 2, rate = 1)
45+
expect_equal(result, expected)
46+
})
47+
48+
test_that("p.point.in.prior returns numeric of length 1", {
49+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1)
50+
result <- PEcAn.MA:::p.point.in.prior(point = 1.5, prior = prior)
51+
expect_type(result, "double")
52+
expect_length(result, 1)
53+
expect_true(result >= 0 && result <= 1)
54+
})
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# test-meta_analysis_standalone.R
2+
# Tests for meta_analysis_standalone() — the pure analysis function
3+
#
4+
# meta_analysis_standalone() is the core analysis function that:
5+
# 1. Calls jagify() to prepare data for JAGS
6+
# 2. Checks data-prior consistency via check_consistent()
7+
# 3. Runs pecan.ma() (JAGS MCMC)
8+
# 4. Summarizes results via pecan.ma.summary()
9+
# 5. Fits approximate posteriors via approx.posterior()
10+
# 6. Returns list(trait.mcmc, post.distns, jagged.data)
11+
12+
# ---------------------------------------------------------------------------
13+
# Greenhouse data filtering
14+
# ---------------------------------------------------------------------------
15+
16+
test_that("meta_analysis_standalone filters greenhouse data when use_ghs=FALSE", {
17+
skip_if_not_installed("rjags")
18+
skip_on_cran()
19+
20+
jags_available <- tryCatch({
21+
rjags::jags.model(
22+
textConnection("model { x ~ dnorm(0, 1) }"),
23+
data = list(), n.chains = 1, quiet = TRUE
24+
)
25+
TRUE
26+
}, error = function(e) FALSE)
27+
skip_if(!jags_available, "JAGS not available")
28+
29+
outdir <- file.path(tempdir(), "test-ma-greenhouse")
30+
dir.create(outdir, recursive = TRUE, showWarnings = FALSE)
31+
on.exit(unlink(outdir, recursive = TRUE), add = TRUE)
32+
33+
# Create data that is ALL greenhouse
34+
gh_data <- create_test_trait_data(n_obs = 10, trait_mean = 20)
35+
gh_data$greenhouse <- 1L
36+
trait_data <- list(SLA = gh_data)
37+
priors <- create_test_priors("SLA")
38+
39+
# When use_ghs=FALSE, all greenhouse data is excluded.
40+
# With no data left, function should handle this gracefully.
41+
result <- tryCatch(
42+
PEcAn.MA::meta_analysis_standalone(
43+
trait_data = trait_data,
44+
priors = priors,
45+
iterations = 1000,
46+
outdir = outdir,
47+
use_ghs = FALSE
48+
),
49+
error = function(e) e
50+
)
51+
52+
expect_true(
53+
is.list(result),
54+
info = "Should return list or error object, not crash"
55+
)
56+
})
57+
58+
# ---------------------------------------------------------------------------
59+
# Full integration test (requires JAGS)
60+
# ---------------------------------------------------------------------------
61+
62+
test_that("meta_analysis_standalone returns correct structure with valid inputs", {
63+
skip_if_not_installed("rjags")
64+
skip_on_cran()
65+
66+
jags_available <- tryCatch({
67+
rjags::jags.model(
68+
textConnection("model { x ~ dnorm(0, 1) }"),
69+
data = list(), n.chains = 1, quiet = TRUE
70+
)
71+
TRUE
72+
}, error = function(e) FALSE)
73+
skip_if(!jags_available, "JAGS not available")
74+
75+
outdir <- file.path(tempdir(), "test-ma-standalone-integration")
76+
dir.create(outdir, recursive = TRUE, showWarnings = FALSE)
77+
on.exit(unlink(outdir, recursive = TRUE), add = TRUE)
78+
79+
set.seed(42)
80+
trait_data <- list(
81+
SLA = create_test_trait_data(n_obs = 15, trait_mean = 20, trait_sd = 2)
82+
)
83+
priors <- create_test_priors("SLA", parama = 20, paramb = 5)
84+
85+
result <- PEcAn.MA::meta_analysis_standalone(
86+
trait_data = trait_data,
87+
priors = priors,
88+
iterations = 3000,
89+
outdir = outdir,
90+
pft_name = "test.integration",
91+
random = TRUE,
92+
threshold = 1.2,
93+
use_ghs = TRUE
94+
)
95+
96+
# Verify return structure
97+
expect_type(result, "list")
98+
expect_named(result, c("trait.mcmc", "post.distns", "jagged.data"),
99+
ignore.order = FALSE)
100+
101+
# post.distns should be a data.frame
102+
expect_s3_class(result$post.distns, "data.frame")
103+
expect_true(all(c("distn", "parama", "paramb") %in% colnames(result$post.distns)))
104+
expect_true("SLA" %in% rownames(result$post.distns))
105+
106+
# jagged.data should be a named list
107+
expect_type(result$jagged.data, "list")
108+
expect_true("SLA" %in% names(result$jagged.data))
109+
expect_s3_class(result$jagged.data[["SLA"]], "data.frame")
110+
expect_true("Y" %in% colnames(result$jagged.data[["SLA"]]))
111+
})

0 commit comments

Comments
 (0)