Skip to content

Commit fbd2e93

Browse files
pp-movaleriupredoi
andauthored
Zarr to iris Cube: add tests and Zarr sample data (v2) (#152)
* add a user warning when data is not lazy * add a test module for zarrs * add tiny Zarr sample data * add test * make warning more robust * add Zarr3 test data * rm erroneous file * add zarr3 test * make the warning better * full test suite * more general search meth * add extra deps and trigger GHA locally * run just a simple pytest session * pop a conda env file * use conda env file in GHA * run just simple pytest * Update lib/ncdata/xarray.py Co-authored-by: Patrick Peglar <patrick.peglar@metoffice.gov.uk> * unreference uneeded conda env file * restore GHA workflow to original * add comment * Update lib/ncdata/xarray.py Co-authored-by: Patrick Peglar <patrick.peglar@metoffice.gov.uk> * remove user warning * remove test for warning * readd empty line * unrun GHA on push * Update tests/integration/test_zarr_to_iris.py Co-authored-by: Patrick Peglar <patrick.peglar@metoffice.gov.uk> * Update tests/integration/test_zarr_to_iris.py Co-authored-by: Patrick Peglar <patrick.peglar@metoffice.gov.uk> * Update tests/integration/test_zarr_to_iris.py Co-authored-by: Patrick Peglar <patrick.peglar@metoffice.gov.uk> * shorten imports * correct test for s3 connection * add dependency * remove zarr sample data * move zzarr sample data * fix test for new sample data path * run pre-commit * remove unify chunks * Added changenote. --------- Co-authored-by: Valeriu Predoi <valeriu.predoi@gmail.com>
1 parent 1d36d41 commit fbd2e93

42 files changed

Lines changed: 1021 additions & 2 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci-tests.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ on:
1111
push:
1212
branches:
1313
- "main"
14-
- "v*x"
1514
tags:
1615
- "v*"
1716
pull_request:
@@ -49,7 +48,7 @@ jobs:
4948

5049
- name: "Install dependencies"
5150
run: |
52-
conda install --yes numpy pytest pytest-mock iris xarray filelock requests
51+
conda install --yes numpy pytest pytest-mock iris xarray filelock requests zarr aiohttp fsspec
5352
5453
- name: "Install *latest* Iris"
5554
run: |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
@valeriupredoi added test for Zarr conversion to Iris cubes.
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""Test conversion of remote and local Zarr store to iris Cube."""
2+
3+
from importlib.resources import files as importlib_files
4+
from pathlib import Path
5+
6+
import fsspec
7+
import iris
8+
import pytest
9+
import xarray as xr
10+
from ncdata.iris_xarray import cubes_from_xarray as conversion_func
11+
12+
13+
def _return_kwargs():
14+
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
15+
return {
16+
"consolidated": True,
17+
"decode_times": time_coder,
18+
"engine": "zarr",
19+
"chunks": {},
20+
"backend_kwargs": {},
21+
}
22+
23+
24+
def _run_checks(cube):
25+
"""Run some standard checks."""
26+
assert cube.var_name == "q"
27+
assert cube.standard_name == "specific_humidity"
28+
assert cube.long_name is None
29+
coords = cube.coords()
30+
coord_names = [coord.standard_name for coord in coords]
31+
assert "longitude" in coord_names
32+
assert "latitude" in coord_names
33+
34+
35+
def test_load_zarr2_local():
36+
"""Test loading a Zarr2 store from local FS."""
37+
zarr_path = (
38+
Path(importlib_files("tests"))
39+
/ "testdata"
40+
/ "zarr-sample-data"
41+
/ "example_field_0.zarr2"
42+
)
43+
44+
xr_kwargs = _return_kwargs()
45+
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)
46+
47+
cubes = conversion_func(zarr_xr)
48+
49+
assert len(cubes) == 1
50+
cube = cubes[0]
51+
_run_checks(cube)
52+
53+
54+
def test_load_zarr3_local():
55+
"""Test loading a Zarr3 store from local FS."""
56+
zarr_path = (
57+
Path(importlib_files("tests"))
58+
/ "testdata"
59+
/ "zarr-sample-data"
60+
/ "example_field_0.zarr3"
61+
)
62+
63+
xr_kwargs = _return_kwargs()
64+
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)
65+
66+
cubes = conversion_func(zarr_xr)
67+
68+
assert len(cubes) == 1
69+
cube = cubes[0]
70+
_run_checks(cube)
71+
72+
73+
def _is_url_ok(url):
74+
fs = fsspec.filesystem("http")
75+
valid_zarr = True
76+
try:
77+
fs.open(str(url) + "/zarr.json", "rb") # Zarr3
78+
except Exception: # noqa: BLE001
79+
try:
80+
fs.open(str(url) + "/.zmetadata", "rb") # Zarr2
81+
except Exception: # noqa: BLE001
82+
valid_zarr = False
83+
84+
return valid_zarr
85+
86+
87+
S3_TEST_PATH = (
88+
"https://uor-aces-o.s3-ext.jc.rl.ac.uk/"
89+
"esmvaltool-zarr/pr_Amon_CNRM-ESM2-1_02Kpd-11_r1i1p2f2_gr_200601-220112.zarr3"
90+
)
91+
_S3_accessible = _is_url_ok(S3_TEST_PATH)
92+
93+
94+
@pytest.mark.skipif(not _S3_accessible, reason="S3 url not accessible")
95+
def test_load_remote_zarr():
96+
"""Test loading a remote Zarr store.
97+
98+
This is a ~250MB compressed Zarr in an S3 bucket.
99+
Conversion is done fully lazily, by passing chunks={}
100+
to Xarray loader. Test takes ~3-4s and needs ~400MB res mem.
101+
"""
102+
zarr_path = S3_TEST_PATH
103+
104+
xr_kwargs = _return_kwargs()
105+
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)
106+
107+
cubes = conversion_func(zarr_xr)
108+
109+
assert isinstance(cubes, iris.cube.CubeList)
110+
assert len(cubes) == 1
111+
assert cubes[0].has_lazy_data()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"Conventions": "CF-1.12"
3+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"zarr_format": 2
3+
}
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
{
2+
"metadata": {
3+
".zattrs": {
4+
"Conventions": "CF-1.12"
5+
},
6+
".zgroup": {
7+
"zarr_format": 2
8+
},
9+
"lat/.zarray": {
10+
"chunks": [
11+
5
12+
],
13+
"compressor": {
14+
"blocksize": 0,
15+
"clevel": 5,
16+
"cname": "lz4",
17+
"id": "blosc",
18+
"shuffle": 1
19+
},
20+
"dtype": "<f8",
21+
"fill_value": "NaN",
22+
"filters": null,
23+
"order": "C",
24+
"shape": [
25+
5
26+
],
27+
"zarr_format": 2
28+
},
29+
"lat/.zattrs": {
30+
"_ARRAY_DIMENSIONS": [
31+
"lat"
32+
],
33+
"bounds": "lat_bnds",
34+
"standard_name": "latitude",
35+
"units": "degrees_north"
36+
},
37+
"lat_bnds/.zarray": {
38+
"chunks": [
39+
3,
40+
2
41+
],
42+
"compressor": {
43+
"blocksize": 0,
44+
"clevel": 5,
45+
"cname": "lz4",
46+
"id": "blosc",
47+
"shuffle": 1
48+
},
49+
"dtype": "<f8",
50+
"fill_value": "NaN",
51+
"filters": null,
52+
"order": "C",
53+
"shape": [
54+
5,
55+
2
56+
],
57+
"zarr_format": 2
58+
},
59+
"lat_bnds/.zattrs": {
60+
"_ARRAY_DIMENSIONS": [
61+
"lat",
62+
"bounds2"
63+
]
64+
},
65+
"lon/.zarray": {
66+
"chunks": [
67+
8
68+
],
69+
"compressor": {
70+
"blocksize": 0,
71+
"clevel": 5,
72+
"cname": "lz4",
73+
"id": "blosc",
74+
"shuffle": 1
75+
},
76+
"dtype": "<f8",
77+
"fill_value": "NaN",
78+
"filters": null,
79+
"order": "C",
80+
"shape": [
81+
8
82+
],
83+
"zarr_format": 2
84+
},
85+
"lon/.zattrs": {
86+
"_ARRAY_DIMENSIONS": [
87+
"lon"
88+
],
89+
"bounds": "lon_bnds",
90+
"standard_name": "longitude",
91+
"units": "degrees_east"
92+
},
93+
"lon_bnds/.zarray": {
94+
"chunks": [
95+
4,
96+
2
97+
],
98+
"compressor": {
99+
"blocksize": 0,
100+
"clevel": 5,
101+
"cname": "lz4",
102+
"id": "blosc",
103+
"shuffle": 1
104+
},
105+
"dtype": "<f8",
106+
"fill_value": "NaN",
107+
"filters": null,
108+
"order": "C",
109+
"shape": [
110+
8,
111+
2
112+
],
113+
"zarr_format": 2
114+
},
115+
"lon_bnds/.zattrs": {
116+
"_ARRAY_DIMENSIONS": [
117+
"lon",
118+
"bounds2"
119+
]
120+
},
121+
"q/.zarray": {
122+
"chunks": [
123+
3,
124+
4
125+
],
126+
"compressor": {
127+
"blocksize": 0,
128+
"clevel": 5,
129+
"cname": "lz4",
130+
"id": "blosc",
131+
"shuffle": 1
132+
},
133+
"dtype": "<f8",
134+
"fill_value": "NaN",
135+
"filters": null,
136+
"order": "C",
137+
"shape": [
138+
5,
139+
8
140+
],
141+
"zarr_format": 2
142+
},
143+
"q/.zattrs": {
144+
"_ARRAY_DIMENSIONS": [
145+
"lat",
146+
"lon"
147+
],
148+
"cell_methods": "area: mean",
149+
"coordinates": "time",
150+
"project": "research",
151+
"standard_name": "specific_humidity",
152+
"units": "1"
153+
},
154+
"time/.zarray": {
155+
"chunks": [],
156+
"compressor": null,
157+
"dtype": "<f8",
158+
"fill_value": "NaN",
159+
"filters": null,
160+
"order": "C",
161+
"shape": [],
162+
"zarr_format": 2
163+
},
164+
"time/.zattrs": {
165+
"_ARRAY_DIMENSIONS": [],
166+
"standard_name": "time",
167+
"units": "days since 2018-12-01"
168+
}
169+
},
170+
"zarr_consolidated_format": 1
171+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"chunks": [
3+
5
4+
],
5+
"compressor": {
6+
"blocksize": 0,
7+
"clevel": 5,
8+
"cname": "lz4",
9+
"id": "blosc",
10+
"shuffle": 1
11+
},
12+
"dtype": "<f8",
13+
"fill_value": "NaN",
14+
"filters": null,
15+
"order": "C",
16+
"shape": [
17+
5
18+
],
19+
"zarr_format": 2
20+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"_ARRAY_DIMENSIONS": [
3+
"lat"
4+
],
5+
"bounds": "lat_bnds",
6+
"standard_name": "latitude",
7+
"units": "degrees_north"
8+
}
56 Bytes
Binary file not shown.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"chunks": [
3+
3,
4+
2
5+
],
6+
"compressor": {
7+
"blocksize": 0,
8+
"clevel": 5,
9+
"cname": "lz4",
10+
"id": "blosc",
11+
"shuffle": 1
12+
},
13+
"dtype": "<f8",
14+
"fill_value": "NaN",
15+
"filters": null,
16+
"order": "C",
17+
"shape": [
18+
5,
19+
2
20+
],
21+
"zarr_format": 2
22+
}

0 commit comments

Comments
 (0)