From 0f0de9dc82e02491515f717839097bbb50f60b28 Mon Sep 17 00:00:00 2001 From: Tejas Date: Sun, 31 May 2026 16:15:28 -0500 Subject: [PATCH] Add PEcAn-to-ILAMB GeoTIFF conversion pipeline (GH-4019) Adds a Python pipeline in PEcAn.benchmark to convert downscaled SDA reanalysis GeoTIFF ensemble maps into ILAMB-compatible CF-1.8 netCDF for cVeg, cSoil, mrsol, and lai, with unit conversions and 13 unit tests. --- CHANGELOG.md | 1 + CITATION.cff | 1 + modules/benchmark/NEWS.md | 4 + modules/benchmark/inst/ilamb/README.md | 105 +++++++ .../inst/ilamb/convert_geotiff_to_ilamb.py | 269 ++++++++++++++++++ modules/benchmark/inst/ilamb/test_convert.py | 162 +++++++++++ 6 files changed, 542 insertions(+) create mode 100644 modules/benchmark/inst/ilamb/README.md create mode 100644 modules/benchmark/inst/ilamb/convert_geotiff_to_ilamb.py create mode 100644 modules/benchmark/inst/ilamb/test_convert.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 55550127e09..17f279b811c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha ## Unreleased ### Added +- Added `inst/ilamb/` pipeline in PEcAn.benchmark to convert downscaled SDA reanalysis GeoTIFFs into ILAMB-compatible CF netCDF for carbon-cycle benchmarking (#4019). - Added PEcAn.PEPRMT model, including a demo run with example data - Add `format_try_for_ma()` and `try_trait_mapping()` to `PEcAn.data.remote` to convert trait data from the external TRY database into the tabular format required by the PEcAn meta-analysis module (#3717). - Add function `qsub_sda()` for submitting SDA batch jobs by splitting a large number of sites into multiple small groups of sites (#3634). diff --git a/CITATION.cff b/CITATION.cff index 23bb09dd918..b50e09b7227 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -30,6 +30,7 @@ authors: - given-names: David LeBauer affiliation: University of Arizona orcid: 'https://orcid.org/0000-0001-7228-053X' + - given-names: Tejas Dahiya - given-names: Michael Dietze orcid: 'https://orcid.org/0000-0002-2324-2518' affiliation: Boston University diff --git a/modules/benchmark/NEWS.md b/modules/benchmark/NEWS.md index 521e5560b30..c09da5ed37e 100644 --- a/modules/benchmark/NEWS.md +++ b/modules/benchmark/NEWS.md @@ -1,5 +1,9 @@ # PEcAn.benchmark 1.7.5.9000 +## Added + +* Added Python pipeline (`inst/ilamb/`) to convert PEcAn SDA reanalysis GeoTIFFs to ILAMB-compatible CF netCDF (cVeg, cSoil, mrsol, lai) with unit tests (#4019). + ## Fixed * `metric_PPMC()`: added `use = "pairwise.complete.obs"` to `stats::cor()` so the Pearson correlation is computed on available pairs rather than returning `NA` whenever any observation is missing. This matches the behaviour of `metric_cor()` in the same package. diff --git a/modules/benchmark/inst/ilamb/README.md b/modules/benchmark/inst/ilamb/README.md new file mode 100644 index 00000000000..3be450b9d94 --- /dev/null +++ b/modules/benchmark/inst/ilamb/README.md @@ -0,0 +1,105 @@ +# PEcAn-to-ILAMB Conversion + +Converts PEcAn SDA carbon reanalysis outputs (downscaled GeoTIFF ensemble maps) +into ILAMB-compatible CF-convention netCDF files for benchmarking against +TRENDY, CMIP, and observational datasets. + +## Overview + +The PEcAn North American carbon reanalysis (Zhang et al. 2026) provides +downscaled 1 km ensemble maps of four state variables. This tool reads those +GeoTIFFs, computes the ensemble mean, coarsens to ILAMB's default 0.5 degree +resolution, applies unit conversions, and writes CF-1.8 compliant netCDF that +ILAMB can ingest directly. + +## Input + +GeoTIFF ensemble maps organized as: + +``` +//_/ensemble___.tiff +``` + +- 13 years (2012-2024), annual snapshots fixed to July 15 +- 4 variables, 100 ensemble members each +- 1 km resolution (9360 x 19080), EPSG:4326 + +## Variable mapping and unit conversions + +| PEcAn variable | CMOR name | Source units | Conversion | ILAMB units | +|----------------|-----------|--------------|------------|-------------| +| AbvGrndWood | cVeg | Mg C ha-1 | x 0.1 | kg m-2 | +| TotSoilCarb | cSoil | kg C m-2 | none | kg m-2 | +| SoilMoistFrac | mrsol | vol. percent | x 9.98 | kg m-2 | +| LAI | lai | m2 m-2 | none | m2 m-2 | + +**Aboveground biomass** is already a carbon density (Mg C ha-1), so the +conversion to kg m-2 is purely unit scaling: 1 Mg ha-1 = 0.1 kg m-2. + +**Soil moisture** is volumetric water content expressed as percent over the +0-100 cm root zone. Conversion to mass per area: + +``` +kg m-2 = percent / 100 x 1.0 m depth x 998 kg/m3 = percent x 9.98 +``` + +The 0-100 cm root-zone depth was confirmed with the dataset author +(D. Zhang, pers. comm.), and matches the depth span of the ILAMB Wang2021 +soil-moisture benchmark (0-10, 10-30, 30-50, 50-100 cm layers). + +## Output + +CF-1.8 compliant netCDF on a 0.5 degree regular grid (156 x 318) covering the +North American study area (7-85N, 179-20W): + +- `//_.nc` (one file per year) +- `/.nc` (merged multi-year file) + +Latitude is monotonically increasing (south to north); coordinates are rounded +to 0.01 degrees; time is encoded as days since 1850-01-01 with full-year +bounds. + +## Usage + +```bash +module load python3 gcc/13.2.0 +export PATH=$HOME/.local/bin:$PATH + +# Convert all variables, all years +python convert_geotiff_to_ilamb.py \ + --input_dir /path/to/NA_SDA_maps_zipped \ + --output_dir /path/to/output + +# Single variable / year range +python convert_geotiff_to_ilamb.py --variables AbvGrndWood --years 2014 2014 + +# Skip the merge step +python convert_geotiff_to_ilamb.py --skip-merge +``` + +On an HPC system, reading 100 full-resolution members per variable exceeds +interactive CPU limits; submit the full run as a batch job. + +## Testing + +```bash +pytest test_convert.py -v +``` + +13 tests cover file existence, CMOR variable naming, all four unit +conversions, output grid shape, CF-1.8 compliance, latitude direction, +spatial coverage, chronological multi-year merging, and ILAMB `ModelResult` +loading. Set `ILAMB_OUTPUT_DIR` to point the tests at your output directory. + +## Dependencies + +`numpy`, `xarray`, `rasterio`, `netCDF4`, and `ILAMB` (for the loading test). + +## Notes + +- Fluxes (GPP, NEE) are not included; the downscaled product covers only the + four state variables above. Flux benchmarking will draw from the raw SDA + netCDF outputs in a later contribution. +- A known structural discontinuity exists in the underlying LandTrendr input + around 2017-2018 (see the ORNL DAAC documentation); it is preserved as-is in + the converted output rather than adjusted here. diff --git a/modules/benchmark/inst/ilamb/convert_geotiff_to_ilamb.py b/modules/benchmark/inst/ilamb/convert_geotiff_to_ilamb.py new file mode 100644 index 00000000000..21913902d02 --- /dev/null +++ b/modules/benchmark/inst/ilamb/convert_geotiff_to_ilamb.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +""" +convert_geotiff_to_ilamb.py + +Convert PEcAn SDA carbon reanalysis outputs (downscaled GeoTIFF ensemble maps) +into ILAMB-compatible CF-convention netCDF files. + +The PEcAn North American carbon reanalysis (Zhang et al. 2026) provides +downscaled 1 km ensemble maps of four state variables. This tool: + + 1. Reads the 100 GeoTIFF ensemble members for each variable/year + 2. Computes the ensemble mean (memory-safe, one member at a time) + 3. Coarsens from 1 km to ILAMB's default 0.5 degree resolution + 4. Applies unit conversions to CF/CMOR standards + 5. Writes CF-1.8 compliant netCDF that ILAMB can ingest directly + +Input GeoTIFF layout: + //_/ensemble___.tiff + +Output netCDF layout: + //_.nc (annual files) + /.nc (merged multi-year) + +Usage: + python convert_geotiff_to_ilamb.py --input_dir DIR --output_dir DIR + python convert_geotiff_to_ilamb.py --variables AbvGrndWood --years 2014 2014 + +Author: Tejas Dahiya (Google Summer of Code 2026, PEcAn Project) +""" + +import argparse +import glob +import os +from datetime import datetime + +import numpy as np +import rasterio +import xarray as xr + + +# ----------------------------------------------------------------------------- +# Configuration +# ----------------------------------------------------------------------------- + +# Maps each PEcAn GeoTIFF variable to its CMOR name, target units, the +# multiplicative scale factor needed to reach those units, and a descriptive +# long name. Unit conversions are documented in the README. +# +# AbvGrndWood : Mg C ha-1 -> kg m-2 requires x 0.1 +# TotSoilCarb : kg C m-2 -> kg m-2 no conversion +# SoilMoistFrac: vol. % -> kg m-2 requires x 9.98 +# (percent / 100 x 1.0 m depth x 998 kg/m3 water density) +# LAI : m2 m-2 -> m2 m-2 no conversion +VARIABLE_MAP = { + "AbvGrndWood": {"cmor": "cVeg", "units": "kg m-2", "scale": 0.1, + "long_name": "Carbon Mass in Vegetation"}, + "TotSoilCarb": {"cmor": "cSoil", "units": "kg m-2", "scale": 1.0, + "long_name": "Carbon Mass in Soil Pool"}, + "SoilMoistFrac": {"cmor": "mrsol", "units": "kg m-2", "scale": 9.98, + "long_name": "Total Soil Moisture Content"}, + "LAI": {"cmor": "lai", "units": "m2 m-2", "scale": 1.0, + "long_name": "Leaf Area Index"}, +} + +# Coarsening factor: native 0.00833 deg (1 km) x 60 = 0.5 deg (ILAMB default). +# The native grid (9360 x 19080) divides evenly: 156 x 318. +COARSEN_FACTOR = 60 + +# GeoTIFF NoData sentinel per the ORNL DAAC documentation. Files on the SCC +# use NaN, but we guard against -9999 as well for robustness. +NODATA_VALUE = -9999 + + +# ----------------------------------------------------------------------------- +# Core processing +# ----------------------------------------------------------------------------- + +def process_variable_year(base_dir, year, pecan_var, output_dir): + """Convert one variable for one year to an ILAMB-compatible netCDF. + + Reads all ensemble GeoTIFFs, computes the ensemble mean one member at a + time (to stay within memory limits), applies the unit conversion, coarsens + to 0.5 degrees, orients latitude south-to-north, and writes a CF-1.8 file. + + Parameters + ---------- + base_dir : str + Root directory of the GeoTIFF ensemble maps. + year : int + Year to process (e.g. 2014). + pecan_var : str + PEcAn variable name (key in VARIABLE_MAP). + output_dir : str + Directory to write the netCDF output. + """ + info = VARIABLE_MAP[pecan_var] + cmor_name = info["cmor"] + var_dir = os.path.join(base_dir, str(year), f"{pecan_var}_{year}") + + if not os.path.isdir(var_dir): + print(f" WARNING: {var_dir} not found, skipping") + return + + files = sorted( + os.path.join(var_dir, f) + for f in os.listdir(var_dir) + if f.endswith((".tiff", ".tif")) + ) + n_ens = len(files) + print(f" {pecan_var} -> {cmor_name}: {n_ens} ensemble files") + + # Grid metadata from the first member (all members share the same grid). + with rasterio.open(files[0]) as src: + shape = src.shape + bounds = src.bounds + + # Accumulate the ensemble mean with a running sum / count so we never hold + # more than one full-resolution member in memory at once. + running_sum = np.zeros(shape, dtype=np.float64) + running_count = np.zeros(shape, dtype=np.float64) + + for i, f in enumerate(files): + with rasterio.open(f) as src: + data = src.read(1).astype(np.float64) + data = np.where(data == NODATA_VALUE, np.nan, data) + valid = np.isfinite(data) + running_sum[valid] += data[valid] + running_count[valid] += 1 + if (i + 1) % 25 == 0: + print(f" Read {i + 1}/{n_ens} members") + + ens_mean = np.where(running_count > 0, running_sum / running_count, np.nan) + + # Apply unit conversion to reach CMOR units. + ens_mean *= info["scale"] + + valid_vals = ens_mean[np.isfinite(ens_mean)] + print(f" Mean range: {valid_vals.min():.3f} to " + f"{valid_vals.max():.3f} {info['units']}") + + # Coarsen 1 km -> 0.5 deg by block-averaging COARSEN_FACTOR^2 pixels, + # ignoring NaN (ocean / non-land) within each block. + nr = shape[0] // COARSEN_FACTOR + nc = shape[1] // COARSEN_FACTOR + trimmed = ens_mean[:nr * COARSEN_FACTOR, :nc * COARSEN_FACTOR] + reshaped = trimmed.reshape(nr, COARSEN_FACTOR, nc, COARSEN_FACTOR) + with np.errstate(all="ignore"): + coarsened = np.nanmean(reshaped, axis=(1, 3)) + + # GeoTIFF rows run north-to-south; flip so latitude increases south-to-north + # (the convention expected by ILAMB and most CF tools). Coordinates are + # rounded to remove sub-millidegree floating point noise from the bounds. + coarsened = np.flip(coarsened, axis=0) + lat = np.round( + np.flip(np.linspace(bounds.top - 0.25, bounds.bottom + 0.25, nr)), 2) + lon = np.round( + np.linspace(bounds.left + 0.25, bounds.right - 0.25, nc), 2) + + # Time: annual snapshot fixed to July 15 (per ORNL documentation), encoded + # as days since 1850-01-01 with full-year bounds. + time_val = (datetime(year, 7, 15) - datetime(1850, 1, 1)).days + tb_start = (datetime(year, 1, 1) - datetime(1850, 1, 1)).days + tb_end = (datetime(year, 12, 31) - datetime(1850, 1, 1)).days + + ds = xr.Dataset( + { + cmor_name: xr.DataArray( + data=coarsened[np.newaxis, :, :].astype(np.float32), + dims=["time", "lat", "lon"], + attrs={"units": info["units"], "long_name": info["long_name"]}, + ), + "time_bounds": xr.DataArray( + data=np.array([[tb_start, tb_end]], dtype=np.float64), + dims=["time", "nb"], + ), + }, + coords={ + "time": ("time", [time_val], { + "units": "days since 1850-01-01 00:00:00", + "calendar": "standard", + "bounds": "time_bounds", + }), + "lat": ("lat", lat, {"units": "degrees_north"}), + "lon": ("lon", lon, {"units": "degrees_east"}), + }, + attrs={ + "Conventions": "CF-1.8", + "source": ("PEcAn SDA Reanalysis (Zhang et al. 2026)"), + }, + ) + + out_var_dir = os.path.join(output_dir, cmor_name) + os.makedirs(out_var_dir, exist_ok=True) + outpath = os.path.join(out_var_dir, f"{cmor_name}_{year}.nc") + ds.to_netcdf(outpath, encoding={cmor_name: {"zlib": True, "complevel": 4}}) + print(f" Saved: {outpath}") + + +def merge_annual_files(output_dir): + """Merge per-year files into one multi-year netCDF per variable.""" + for info in VARIABLE_MAP.values(): + cmor_name = info["cmor"] + var_dir = os.path.join(output_dir, cmor_name) + if not os.path.isdir(var_dir): + continue + files = sorted(glob.glob(os.path.join(var_dir, f"{cmor_name}_????.nc"))) + if len(files) < 2: + continue + print(f"\nMerging {len(files)} files for {cmor_name}...") + ds = xr.open_mfdataset(files, combine="by_coords") + merged_path = os.path.join(output_dir, f"{cmor_name}.nc") + ds.to_netcdf(merged_path, + encoding={cmor_name: {"zlib": True, "complevel": 4}}) + print(f" Saved: {merged_path}") + ds.close() + + +# ----------------------------------------------------------------------------- +# Command-line interface +# ----------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description="Convert PEcAn SDA GeoTIFF outputs to ILAMB netCDF") + parser.add_argument( + "--input_dir", + default=("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/" + "SDA_8k_site/NA_SDA_maps_zipped"), + help="Root directory of GeoTIFF ensemble maps") + parser.add_argument( + "--output_dir", + default="/projectnb/dietzelab/tdahiya/ilamb_outputs/pecan_v2", + help="Directory to write ILAMB-compatible netCDF") + parser.add_argument( + "--variables", nargs="+", default=None, + help="PEcAn variable names to convert (default: all four)") + parser.add_argument( + "--years", nargs=2, type=int, default=None, metavar=("START", "END"), + help="Inclusive year range to process (default: 2012 2024)") + parser.add_argument( + "--skip-merge", action="store_true", + help="Skip merging annual files into multi-year files") + args = parser.parse_args() + + variables = args.variables or list(VARIABLE_MAP.keys()) + years = (range(args.years[0], args.years[1] + 1) + if args.years else range(2012, 2025)) + + print("GeoTIFF -> ILAMB Pipeline") + print(f" Input: {args.input_dir}") + print(f" Output: {args.output_dir}") + print(f" Variables: {variables}") + print(f" Years: {list(years)}") + + os.makedirs(args.output_dir, exist_ok=True) + + for year in years: + print(f"\n=== Year {year} ===") + for var in variables: + process_variable_year(args.input_dir, year, var, args.output_dir) + + if not args.skip_merge: + merge_annual_files(args.output_dir) + + print("\nDone!") + + +if __name__ == "__main__": + main() diff --git a/modules/benchmark/inst/ilamb/test_convert.py b/modules/benchmark/inst/ilamb/test_convert.py new file mode 100644 index 00000000000..86f10545653 --- /dev/null +++ b/modules/benchmark/inst/ilamb/test_convert.py @@ -0,0 +1,162 @@ +""" +test_convert.py + +Unit tests for convert_geotiff_to_ilamb.py. + +Validates the converted ILAMB-compatible netCDF outputs: file existence, +CMOR variable naming, unit conversions, output grid shape, CF-1.8 compliance, +latitude orientation, spatial coverage, multi-year merging, and that ILAMB's +ModelResult can load the results. + +Run: + module load python3 gcc/13.2.0 + export PATH=$HOME/.local/bin:$PATH + pytest test_convert.py -v + +Author: Tejas Dahiya (Google Summer of Code 2026, PEcAn Project) +""" + +import os + +import numpy as np +import xarray as xr + +# Directory holding the converted outputs. Override with the ILAMB_OUTPUT_DIR +# environment variable if your outputs live elsewhere. +OUTPUT_DIR = os.environ.get( + "ILAMB_OUTPUT_DIR", + "/projectnb/dietzelab/tdahiya/ilamb_outputs/pecan_v2", +) + +CMOR_VARS = ["cVeg", "cSoil", "mrsol", "lai"] +YEARS = range(2012, 2025) + + +def test_all_files_exist(): + """Every variable has 13 annual files plus a merged multi-year file.""" + for var in CMOR_VARS: + merged = os.path.join(OUTPUT_DIR, f"{var}.nc") + assert os.path.exists(merged), f"Missing merged file: {merged}" + for year in YEARS: + annual = os.path.join(OUTPUT_DIR, var, f"{var}_{year}.nc") + assert os.path.exists(annual), f"Missing annual file: {annual}" + + +def test_variable_names(): + """Each output uses the correct CMOR variable name.""" + for cmor_name in CMOR_VARS: + ds = xr.open_dataset( + os.path.join(OUTPUT_DIR, cmor_name, f"{cmor_name}_2014.nc")) + assert cmor_name in ds.data_vars, f"{cmor_name} not in dataset" + ds.close() + + +def test_output_shape(): + """Coarsened grid is the expected 0.5 degree resolution (156 x 318).""" + ds = xr.open_dataset(os.path.join(OUTPUT_DIR, "cVeg", "cVeg_2014.nc")) + assert ds.sizes["time"] == 1 + assert ds.sizes["lat"] == 156 + assert ds.sizes["lon"] == 318 + ds.close() + + +def test_cveg_unit_conversion(): + """cVeg (AGB) is converted from Mg C ha-1 to kg m-2 (x 0.1).""" + ds = xr.open_dataset(os.path.join(OUTPUT_DIR, "cVeg", "cVeg_2014.nc")) + valid = ds["cVeg"].values[np.isfinite(ds["cVeg"].values)] + assert valid.min() >= 0, "cVeg has negative values" + assert valid.max() < 60, f"cVeg max {valid.max()} too high (units?)" + assert valid.mean() > 0.1, "cVeg mean suspiciously low" + assert ds["cVeg"].attrs["units"] == "kg m-2" + ds.close() + + +def test_csoil_values(): + """cSoil (SOC) stays in kg m-2 with a physically reasonable range.""" + ds = xr.open_dataset(os.path.join(OUTPUT_DIR, "cSoil", "cSoil_2014.nc")) + valid = ds["cSoil"].values[np.isfinite(ds["cSoil"].values)] + assert valid.min() > 0, "cSoil has zero/negative values" + assert valid.max() < 300, f"cSoil max {valid.max()} unreasonably high" + assert ds["cSoil"].attrs["units"] == "kg m-2" + ds.close() + + +def test_lai_values(): + """lai stays in m2 m-2 with a physically reasonable range.""" + ds = xr.open_dataset(os.path.join(OUTPUT_DIR, "lai", "lai_2014.nc")) + valid = ds["lai"].values[np.isfinite(ds["lai"].values)] + assert valid.min() >= 0, "LAI has negative values" + assert valid.max() < 15, f"LAI max {valid.max()} unreasonably high" + assert ds["lai"].attrs["units"] == "m2 m-2" + ds.close() + + +def test_mrsol_values(): + """mrsol is converted from volumetric percent to kg m-2 (x 9.98).""" + ds = xr.open_dataset(os.path.join(OUTPUT_DIR, "mrsol", "mrsol_2014.nc")) + valid = ds["mrsol"].values[np.isfinite(ds["mrsol"].values)] + assert valid.min() >= 0, "mrsol has negative values" + assert valid.max() < 600, f"mrsol max {valid.max()} too high for kg m-2" + assert valid.mean() > 50, f"mrsol mean {valid.mean()} too low for kg m-2" + assert ds["mrsol"].attrs["units"] == "kg m-2" + ds.close() + + +def test_cf_time_encoding(): + """Time is CF-encoded as days since 1850-01-01 with bounds.""" + ds = xr.open_dataset( + os.path.join(OUTPUT_DIR, "cVeg", "cVeg_2014.nc"), decode_times=False) + assert "time" in ds.coords + assert ds.time.attrs.get("units") == "days since 1850-01-01 00:00:00" + assert ds.time.attrs.get("calendar") == "standard" + assert "time_bounds" in ds.data_vars + ds.close() + + +def test_cf_coordinates(): + """Coordinate attributes and global Conventions are CF-compliant.""" + ds = xr.open_dataset(os.path.join(OUTPUT_DIR, "cVeg", "cVeg_2014.nc")) + assert ds.lat.attrs.get("units") == "degrees_north" + assert ds.lon.attrs.get("units") == "degrees_east" + assert ds.attrs.get("Conventions") == "CF-1.8" + ds.close() + + +def test_lat_increasing(): + """Latitude is monotonically increasing (south to north).""" + ds = xr.open_dataset(os.path.join(OUTPUT_DIR, "cVeg", "cVeg_2014.nc")) + assert ds.lat.values[1] > ds.lat.values[0], "Latitude not increasing" + assert ds.lat.values[0] == 7.25, f"First lat {ds.lat.values[0]} != 7.25" + assert ds.lat.values[-1] == 84.75, f"Last lat {ds.lat.values[-1]} != 84.75" + ds.close() + + +def test_spatial_coverage(): + """Grid covers the North American study area (7-85N, 179-20W).""" + ds = xr.open_dataset(os.path.join(OUTPUT_DIR, "cVeg", "cVeg_2014.nc")) + assert float(ds.lat.min()) < 10, "Southern boundary too far north" + assert float(ds.lat.max()) > 80, "Northern boundary too far south" + assert float(ds.lon.min()) < -170, "Western boundary too far east" + assert float(ds.lon.max()) > -25, "Eastern boundary too far west" + ds.close() + + +def test_merged_file_years(): + """Merged file spans all 13 years in chronological order.""" + ds = xr.open_dataset( + os.path.join(OUTPUT_DIR, "cVeg.nc"), decode_times=False) + assert ds.sizes["time"] == 13, f"Expected 13 years, got {ds.sizes['time']}" + times = ds.time.values + assert all(times[i] < times[i + 1] for i in range(len(times) - 1)), \ + "Merged time axis is not monotonically increasing" + ds.close() + + +def test_ilamb_loads(): + """ILAMB's ModelResult can load all four converted variables.""" + from ILAMB.ModelResult import ModelResult + m = ModelResult(OUTPUT_DIR, modelname="PEcAn") + assert m.name == "PEcAn" + expected = set(CMOR_VARS) + found = set(m.variables.keys()) & expected + assert found == expected, f"Missing variables: {expected - found}"