Skip to content

Commit edded0c

Browse files
authored
Strengthen grid parser format checks (#1478)
* Strengthen grid parser format checks * Update grid parser docstring
1 parent c4e2758 commit edded0c

File tree

2 files changed

+149
-21
lines changed

2 files changed

+149
-21
lines changed

test/io/test_utils.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import numpy as np
2+
import pytest
3+
import xarray as xr
4+
5+
from uxarray.io.utils import _parse_grid_type
6+
7+
8+
@pytest.mark.parametrize(
9+
("path_args", "expected_spec"),
10+
[
11+
(("exodus", "outCSne8", "outCSne8.g"), "Exodus"),
12+
(("scrip", "outCSne8", "outCSne8.nc"), "Scrip"),
13+
(("ugrid", "outCSne30", "outCSne30.ug"), "UGRID"),
14+
(("mpas", "QU", "mesh.QU.1920km.151026.nc"), "MPAS"),
15+
(("esmf", "ne30", "ne30pg3.grid.nc"), "ESMF"),
16+
(("geos-cs", "c12", "test-c12.native.nc4"), "GEOS-CS"),
17+
(("icon", "R02B04", "icon_grid_0010_R02B04_G.nc"), "ICON"),
18+
(("fesom", "soufflet-netcdf", "grid.nc"), "FESOM2"),
19+
],
20+
)
21+
def test_parse_grid_type_detects_supported_formats(gridpath, path_args, expected_spec):
22+
with xr.open_dataset(gridpath(*path_args)) as ds:
23+
source_grid_spec, lon_name, lat_name = _parse_grid_type(ds)
24+
25+
assert source_grid_spec == expected_spec
26+
assert lon_name is None
27+
assert lat_name is None
28+
29+
30+
def test_parse_grid_type_detects_structured_grid():
31+
lon = xr.DataArray(
32+
np.array([0.0, 1.0, 2.0]),
33+
dims=["lon"],
34+
attrs={"standard_name": "longitude"},
35+
)
36+
lat = xr.DataArray(
37+
np.array([-1.0, 0.0, 1.0]),
38+
dims=["lat"],
39+
attrs={"standard_name": "latitude"},
40+
)
41+
ds = xr.Dataset(coords={"lon": lon, "lat": lat})
42+
43+
source_grid_spec, lon_name, lat_name = _parse_grid_type(ds)
44+
45+
assert source_grid_spec == "Structured"
46+
assert lon_name == "lon"
47+
assert lat_name == "lat"
48+
49+
50+
@pytest.mark.parametrize(
51+
"dataset",
52+
[
53+
xr.Dataset({"grid_center_lon": xr.DataArray([0.0], dims=["grid_size"])}),
54+
xr.Dataset(
55+
{
56+
"coordx": xr.DataArray([0.0, 1.0], dims=["num_nodes"]),
57+
"coordy": xr.DataArray([0.0, 1.0], dims=["num_nodes"]),
58+
}
59+
),
60+
xr.Dataset({"verticesOnCell": xr.DataArray([[1, 2, 3]], dims=["nCells", "nVert"])}),
61+
],
62+
)
63+
def test_parse_grid_type_rejects_incomplete_format_signals(dataset):
64+
with pytest.raises(RuntimeError, match="Could not recognize dataset format"):
65+
_parse_grid_type(dataset)

uxarray/io/utils.py

Lines changed: 84 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,51 +7,114 @@
77
from uxarray.io._ugrid import _is_ugrid, _read_ugrid
88

99

10+
def _is_exodus(dataset: xr.Dataset) -> bool:
11+
"""Check whether a dataset looks like an Exodus mesh."""
12+
has_packed_coords = "coord" in dataset
13+
has_split_coords = {"coordx", "coordy"}.issubset(dataset.variables)
14+
has_connectivity = any(
15+
name.startswith("connect") for name in dataset.variables
16+
) or any("num_nod_per_el" in dim for dim in dataset.dims)
17+
18+
return has_connectivity and (has_packed_coords or has_split_coords)
19+
20+
21+
def _is_scrip(dataset: xr.Dataset) -> bool:
22+
"""Check whether a dataset looks like an unstructured SCRIP grid."""
23+
required_vars = {
24+
"grid_center_lon",
25+
"grid_center_lat",
26+
"grid_corner_lon",
27+
"grid_corner_lat",
28+
}
29+
unstructured_markers = {"grid_imask", "grid_rank", "grid_area"}
30+
31+
return required_vars.issubset(dataset.variables) and any(
32+
marker in dataset for marker in unstructured_markers
33+
)
34+
35+
36+
def _is_mpas(dataset: xr.Dataset) -> bool:
37+
"""Check whether a dataset looks like an MPAS grid."""
38+
if "verticesOnCell" not in dataset:
39+
return False
40+
41+
companion_groups = (
42+
{"nEdgesOnCell"},
43+
{"latCell", "lonCell"},
44+
{"latVertex", "lonVertex"},
45+
{"xCell", "yCell", "zCell"},
46+
{"xVertex", "yVertex", "zVertex"},
47+
)
48+
49+
return any(group.issubset(dataset.variables) for group in companion_groups)
50+
51+
52+
def _is_esmf(dataset: xr.Dataset) -> bool:
53+
"""Check whether a dataset looks like an ESMF mesh."""
54+
return "maxNodePElement" in dataset.dims and "elementConn" in dataset
55+
56+
57+
def _is_geos_cs(dataset: xr.Dataset) -> bool:
58+
"""Check whether a dataset looks like a GEOS cube-sphere grid."""
59+
required_dims = {"nf", "YCdim", "XCdim"}
60+
required_vars = {"corner_lons", "corner_lats"}
61+
62+
return required_dims.issubset(dataset.sizes) and required_vars.issubset(
63+
dataset.variables
64+
)
65+
66+
67+
def _is_icon(dataset: xr.Dataset) -> bool:
68+
"""Check whether a dataset looks like an ICON grid."""
69+
required_vars = {"vertex_of_cell", "clon", "clat", "vlon", "vlat"}
70+
return required_vars.issubset(dataset.variables)
71+
72+
73+
def _is_fesom2(dataset: xr.Dataset) -> bool:
74+
"""Check whether a dataset looks like a FESOM2 grid."""
75+
return "triag_nodes" in dataset
76+
77+
1078
def _parse_grid_type(dataset):
11-
"""Checks input and contents to determine grid type. Supports detection of
12-
UGrid, SCRIP, Exodus, ESMF, and shape file.
79+
"""Determine the grid type represented by an input dataset.
1380
1481
Parameters
1582
----------
1683
dataset : Xarray dataset
17-
Xarray dataset of the grid
84+
Xarray dataset containing grid topology information.
1885
1986
Returns
2087
-------
21-
mesh_type : str
22-
File type of the file, ug, exo, scrip or shp
88+
tuple[str, str | None, str | None]
89+
A 3-tuple of ``(mesh_type, lon_name, lat_name)``. ``mesh_type`` is one
90+
of ``"Exodus"``, ``"Scrip"``, ``"UGRID"``, ``"MPAS"``, ``"ESMF"``,
91+
``"GEOS-CS"``, ``"ICON"``, ``"FESOM2"``, or ``"Structured"``. The
92+
longitude and latitude coordinate names are only returned for structured
93+
grids and are otherwise ``None``.
2394
2495
Raises
2596
------
2697
RuntimeError
27-
If invalid file type
28-
ValueError
29-
If file is not in UGRID format
98+
If the dataset format cannot be recognized.
3099
"""
31100

32101
_structured, lon_name, lat_name = _is_structured(dataset)
33102

34-
if "coord" in dataset:
35-
# exodus with coord or coordx
36-
mesh_type = "Exodus"
37-
elif "coordx" in dataset:
103+
if _is_exodus(dataset):
38104
mesh_type = "Exodus"
39-
elif "grid_center_lon" in dataset:
40-
# scrip with grid_center_lon
105+
elif _is_scrip(dataset):
41106
mesh_type = "Scrip"
42107
elif _is_ugrid(dataset):
43-
# ugrid topology is present
44108
mesh_type = "UGRID"
45-
elif "verticesOnCell" in dataset:
109+
elif _is_mpas(dataset):
46110
mesh_type = "MPAS"
47-
elif "maxNodePElement" in dataset.dims:
111+
elif _is_esmf(dataset):
48112
mesh_type = "ESMF"
49-
elif all(key in dataset.sizes for key in ["nf", "YCdim", "XCdim"]):
50-
# expected dimensions for a GEOS cube sphere grid
113+
elif _is_geos_cs(dataset):
51114
mesh_type = "GEOS-CS"
52-
elif "vertex_of_cell" in dataset:
115+
elif _is_icon(dataset):
53116
mesh_type = "ICON"
54-
elif "triag_nodes" in dataset:
117+
elif _is_fesom2(dataset):
55118
mesh_type = "FESOM2"
56119
elif _structured:
57120
mesh_type = "Structured"

0 commit comments

Comments
 (0)