Skip to content
Closed
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
eed755f
add a user warning when data is not lazy
valeriupredoi Aug 8, 2025
f13b642
add a test module for zarrs
valeriupredoi Aug 8, 2025
3f7cf4a
add tiny Zarr sample data
valeriupredoi Aug 8, 2025
91ce068
add test
valeriupredoi Aug 8, 2025
edd6c36
make warning more robust
valeriupredoi Aug 8, 2025
105031f
add Zarr3 test data
valeriupredoi Aug 8, 2025
16fb588
rm erroneous file
valeriupredoi Aug 8, 2025
f6c2766
add zarr3 test
valeriupredoi Aug 8, 2025
7846f77
make the warning better
valeriupredoi Aug 8, 2025
2d2c661
full test suite
valeriupredoi Aug 8, 2025
1796100
more general search meth
valeriupredoi Aug 8, 2025
ba10ec7
add extra deps and trigger GHA locally
valeriupredoi Aug 11, 2025
74a7c0e
run just a simple pytest session
valeriupredoi Aug 11, 2025
d7a2698
pop a conda env file
valeriupredoi Aug 11, 2025
16e28ca
use conda env file in GHA
valeriupredoi Aug 11, 2025
6fdb0b9
run just simple pytest
valeriupredoi Aug 11, 2025
1d6cddd
Merge branch 'main' into add_warning_and_tests
pp-mo Aug 11, 2025
01cce51
Update lib/ncdata/xarray.py
valeriupredoi Aug 13, 2025
b7ad9f4
unreference uneeded conda env file
valeriupredoi Aug 13, 2025
cc79ff1
restore GHA workflow to original
valeriupredoi Aug 13, 2025
e2f607f
add comment
valeriupredoi Aug 13, 2025
773de45
Update lib/ncdata/xarray.py
valeriupredoi Aug 13, 2025
edf06ac
remove user warning
valeriupredoi Aug 13, 2025
aba9fa6
remove test for warning
valeriupredoi Aug 13, 2025
5f96f32
Merge branch 'add_warning_and_tests' of https://github.com/valeriupre…
valeriupredoi Aug 13, 2025
1fb1db7
readd empty line
valeriupredoi Aug 13, 2025
75e24cf
unrun GHA on push
valeriupredoi Aug 13, 2025
b48088a
Update tests/integration/test_zarr_to_iris.py
valeriupredoi Aug 13, 2025
7365729
Update tests/integration/test_zarr_to_iris.py
valeriupredoi Aug 13, 2025
33a9c80
Update tests/integration/test_zarr_to_iris.py
valeriupredoi Aug 13, 2025
0518afe
shorten imports
valeriupredoi Aug 13, 2025
5039d07
Merge branch 'add_warning_and_tests' of https://github.com/valeriupre…
valeriupredoi Aug 13, 2025
8f4e4da
correct test for s3 connection
valeriupredoi Aug 13, 2025
1c21daa
add dependency
valeriupredoi Aug 13, 2025
ec91745
remove zarr sample data
valeriupredoi Aug 13, 2025
0d74e97
move zzarr sample data
valeriupredoi Aug 13, 2025
b6dd8cb
fix test for new sample data path
valeriupredoi Aug 13, 2025
f8dd7be
run pre-commit
valeriupredoi Aug 13, 2025
e7df68d
remove unify chunks
valeriupredoi Aug 14, 2025
f1a4c46
Merge branch 'main' into add_warning_and_tests
valeriupredoi Aug 14, 2025
2ee710b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ on:
push:
branches:
- "main"
- "v*x"
- add_warning_and_tests
tags:
- "v*"
pull_request:
Expand Down Expand Up @@ -43,13 +43,14 @@ jobs:
with:
miniforge-version: latest
activate-environment: testenv
environment-file: environment.yml
python-version: ${{ matrix.version }}
channels: conda-forge
show-channel-urls: true

- name: "Install dependencies"
run: |
conda install --yes numpy pytest pytest-mock iris xarray filelock requests
conda install --yes numpy pytest pytest-mock iris xarray filelock requests zarr aiohttp

- name: "Install *latest* Iris"
run: |
Expand All @@ -74,11 +75,16 @@ jobs:
mkdir --parents ${GITHUB_WORKSPACE}/iris_test_data
mv iris-test-data-${IRIS_TEST_DATA_VERSION} ${GITHUB_WORKSPACE}/iris_test_data_download

- name: "Run tests"
#- name: "Run tests"
# if: matrix.session == 'tests'
# run: |
# ls ${GITHUB_WORKSPACE}/iris_test_data_download/test_data
# OVERRIDE_TEST_DATA_REPOSITORY=${GITHUB_WORKSPACE}/iris_test_data_download/test_data PYTHONPATH=./tests:$PYTHONPATH pytest -v ./tests
- name: "Run simple pytest"
if: matrix.session == 'tests'
run: |
ls ${GITHUB_WORKSPACE}/iris_test_data_download/test_data
OVERRIDE_TEST_DATA_REPOSITORY=${GITHUB_WORKSPACE}/iris_test_data_download/test_data PYTHONPATH=./tests:$PYTHONPATH pytest -v ./tests
pytest
pytest tests/integration/test_xarray_load_and_save_equivalence.py::test_save_direct_vs_viancdata[testdata____lambert_conformal__test_lcc]

- name: "Run doctests: Docs"
if: matrix.session == 'doctests-docs'
Expand Down
9 changes: 9 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
name: ncdata
channels:
- conda-forge
- nodefaults

dependencies:
- dask
- netCDF4
21 changes: 21 additions & 0 deletions lib/ncdata/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# Hopefully a minimal amount.
# The structure of an NcData object makes it fairly painless.
#
import warnings

from pathlib import Path
from typing import AnyStr, Union
Expand All @@ -21,6 +22,16 @@
from . import NcAttribute, NcData, NcDimension, NcVariable


def _raise_warning(var):
"""Raise a warnings.warning if variable data not lazy."""
warn_msg = (
f"Variable {var} has fully realized "
Comment thread
valeriupredoi marked this conversation as resolved.
Outdated
"data, if you need lazy data, then add "
"chunks={} as argument to Xarray open_dataset."
)
warnings.warn(warn_msg, UserWarning, stacklevel=2)


class _XarrayNcDataStore(NetCDF4DataStore):
"""
An adapter class presenting ncdata as an xarray datastore.
Expand Down Expand Up @@ -96,6 +107,16 @@ def store(

# Install variables, creating dimensions as we go.
for varname, var in new_variables.items():
if isinstance(var.data, np.ndarray):
# Zarr2 metadata
if "axis" not in var.attrs:
std_axes = ["latitude", "longitude", "time"]
if not list(set(var.attrs.values()) & set(std_axes)):
Comment thread
valeriupredoi marked this conversation as resolved.
Outdated
_raise_warning(var)
# Zarr3 metadata
else:
if var.attrs["axis"] not in ["X", "Y", "Z", "T"]:
_raise_warning(var)
if varname in self.ncdata.variables:
raise ValueError(f'duplicate variable : "{varname}"')

Expand Down
118 changes: 118 additions & 0 deletions tests/integration/test_zarr_to_iris.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Test conversion of remote and local Zarr store to iris Cube."""
from importlib.resources import files as importlib_files
from pathlib import Path

import iris
import pytest
import xarray as xr
import ncdata
import ncdata.iris_xarray
import zarr
Comment thread
valeriupredoi marked this conversation as resolved.
Outdated


def _return_kwargs():
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
xr_kwargs = {
"consolidated": True,
"decode_times": time_coder,
"engine": "zarr",
"chunks": {},
"backend_kwargs": {},
}

return xr_kwargs


def _run_checks(cube):
"""Run some standard checks."""
assert cube.var_name == "q"
assert cube.standard_name == "specific_humidity"
assert cube.long_name is None
coords = cube.coords()
coord_names = [coord.standard_name for coord in coords]
assert "longitude" in coord_names
assert "latitude" in coord_names


def test_load_zarr2_local():
"""Test loading a Zarr2 store from local FS."""
zarr_path = (
Path(importlib_files("tests"))
/ "zarr-sample-data"
/ "example_field_0.zarr2"
)

xr_kwargs = _return_kwargs()
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)
zarr_xr.unify_chunks()

conversion_func = ncdata.iris_xarray.cubes_from_xarray
Comment thread
pp-mo marked this conversation as resolved.
Outdated
cubes = conversion_func(zarr_xr)

assert len(cubes) == 1
cube = cubes[0]
_run_checks(cube)


def test_load_zarr3_local():
"""Test loading a Zarr3 store from local FS."""
zarr_path = (
Path(importlib_files("tests"))
/ "zarr-sample-data"
/ "example_field_0.zarr3"
)

xr_kwargs = _return_kwargs()
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)
zarr_xr.unify_chunks()
Comment thread
pp-mo marked this conversation as resolved.
Outdated

conversion_func = ncdata.iris_xarray.cubes_from_xarray
cubes = conversion_func(zarr_xr)

assert len(cubes) == 1
cube = cubes[0]
_run_checks(cube)


def test_load_remote_zarr():
Comment thread
valeriupredoi marked this conversation as resolved.
"""Test loading a remote Zarr store.

This is a ~250MB compressed Zarr in an S3 bucket.
Conversion is done fully lazily, by passing chunks={}
to Xarray loader. Test takes ~3-4s and needs ~400MB res mem.
"""
zarr_path = (
"https://uor-aces-o.s3-ext.jc.rl.ac.uk/"
"esmvaltool-zarr/pr_Amon_CNRM-ESM2-1_02Kpd-11_r1i1p2f2_gr_200601-220112.zarr3"
)
Comment thread
valeriupredoi marked this conversation as resolved.
Outdated

xr_kwargs = _return_kwargs()
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)
zarr_xr.unify_chunks()

conversion_func = ncdata.iris_xarray.cubes_from_xarray
cubes = conversion_func(zarr_xr)

assert isinstance(cubes, iris.cube.CubeList)
assert len(cubes) == 1
assert cubes[0].has_lazy_data()


def test_load_remote_zarr_realized_data():
"""Test with the same remote Zarr store but chunks=None."""
zarr_path = (
"https://uor-aces-o.s3-ext.jc.rl.ac.uk/"
"esmvaltool-zarr/pr_Amon_CNRM-ESM2-1_02Kpd-11_r1i1p2f2_gr_200601-220112.zarr3"
)

xr_kwargs = _return_kwargs()
xr_kwargs["chunks"] = None
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)

conversion_func = ncdata.iris_xarray.cubes_from_xarray
msg = (
"has fully realized data, if you need lazy data, "
"then add chunks={} as argument to Xarray open_dataset."
)
with pytest.warns(UserWarning, match=msg) as w:
cubes = conversion_func(zarr_xr)
3 changes: 3 additions & 0 deletions tests/zarr-sample-data/example_field_0.zarr2/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
Comment thread
pp-mo marked this conversation as resolved.
"Conventions": "CF-1.12"
}
3 changes: 3 additions & 0 deletions tests/zarr-sample-data/example_field_0.zarr2/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
171 changes: 171 additions & 0 deletions tests/zarr-sample-data/example_field_0.zarr2/.zmetadata
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
{
"metadata": {
".zattrs": {
"Conventions": "CF-1.12"
},
".zgroup": {
"zarr_format": 2
},
"lat/.zarray": {
"chunks": [
5
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5
],
"zarr_format": 2
},
"lat/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lat"
],
"bounds": "lat_bnds",
"standard_name": "latitude",
"units": "degrees_north"
},
"lat_bnds/.zarray": {
"chunks": [
3,
2
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5,
2
],
"zarr_format": 2
},
"lat_bnds/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lat",
"bounds2"
]
},
"lon/.zarray": {
"chunks": [
8
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
8
],
"zarr_format": 2
},
"lon/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lon"
],
"bounds": "lon_bnds",
"standard_name": "longitude",
"units": "degrees_east"
},
"lon_bnds/.zarray": {
"chunks": [
4,
2
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
8,
2
],
"zarr_format": 2
},
"lon_bnds/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lon",
"bounds2"
]
},
"q/.zarray": {
"chunks": [
3,
4
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5,
8
],
"zarr_format": 2
},
"q/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lat",
"lon"
],
"cell_methods": "area: mean",
"coordinates": "time",
"project": "research",
"standard_name": "specific_humidity",
"units": "1"
},
"time/.zarray": {
"chunks": [],
"compressor": null,
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [],
"zarr_format": 2
},
"time/.zattrs": {
"_ARRAY_DIMENSIONS": [],
"standard_name": "time",
"units": "days since 2018-12-01"
}
},
"zarr_consolidated_format": 1
}
Loading
Loading