Skip to content

Commit bacb58e

Browse files
authored
Merge pull request #85 from d-v-b/chore/update-multiscales
Refactor multiscales and update test JSON documents
2 parents 6fe358f + e1daab7 commit bacb58e

29 files changed

Lines changed: 859 additions & 478 deletions

src/eopf_geozarr/data_api/geozarr/common.py

Lines changed: 38 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
"""Common utilities for GeoZarr data API."""
22

3+
from __future__ import annotations
4+
35
import io
46
import urllib
57
import urllib.request
68
from dataclasses import dataclass
7-
from typing import Annotated, Any, Mapping, Self, TypeGuard, TypeVar
9+
from typing import Annotated, Any, Mapping, NotRequired, Self, TypeGuard, TypeVar
810
from urllib.error import URLError
911

1012
from cf_xarray.utils import parse_cf_standard_name_table
1113
from pydantic import AfterValidator, BaseModel, Field, model_validator
1214
from pydantic.experimental.missing_sentinel import MISSING
13-
from typing_extensions import Final, Literal, Protocol, runtime_checkable
15+
from typing_extensions import Final, Literal, Protocol, TypedDict, runtime_checkable
1416

1517
from eopf_geozarr.data_api.geozarr.projjson import ProjJSON
16-
from eopf_geozarr.data_api.geozarr.types import ResamplingMethod
1718

1819

1920
@dataclass(frozen=True)
@@ -28,6 +29,35 @@ class UNSET_TYPE:
2829
GEO_PROJ_VERSION: Final = "0.1"
2930

3031

32+
class ZarrConventionMetadata(BaseModel):
33+
uuid: str | MISSING = MISSING
34+
schema_url: str | MISSING = MISSING
35+
spec_url: str | MISSING = MISSING
36+
name: str | MISSING = MISSING
37+
description: str | MISSING = MISSING
38+
39+
@model_validator(mode="after")
40+
def ensure_identifiable(self) -> Self:
41+
if (
42+
self.uuid is MISSING
43+
and self.schema_url is MISSING
44+
and self.spec_url is MISSING
45+
):
46+
raise ValueError(
47+
"At least one of uuid, schema_url, or spec_url must be provided."
48+
)
49+
50+
return self
51+
52+
53+
class ZarrConventionMetadataJSON(TypedDict):
54+
uuid: NotRequired[str]
55+
schema_url: NotRequired[str]
56+
name: NotRequired[str]
57+
description: NotRequired[str]
58+
spec_url: NotRequired[str]
59+
60+
3161
class ProjAttrs(BaseModel, extra="allow"):
3262
"""
3363
Zarr attributes for coordinate reference system (CRS) encoding.
@@ -220,57 +250,6 @@ def array_dimensions(self) -> tuple[str, ...]: ...
220250
attributes: BaseDataArrayAttrs
221251

222252

223-
class TileMatrixLimit(BaseModel):
224-
""""""
225-
226-
tileMatrix: str
227-
minTileCol: int
228-
minTileRow: int
229-
maxTileCol: int
230-
maxTileRow: int
231-
232-
233-
class TileMatrix(BaseModel):
234-
id: str
235-
scaleDenominator: float
236-
cellSize: float
237-
pointOfOrigin: tuple[float, float]
238-
tileWidth: int
239-
tileHeight: int
240-
matrixWidth: int
241-
matrixHeight: int
242-
243-
244-
class TileMatrixSet(BaseModel):
245-
id: str
246-
title: str | None = None
247-
crs: str | None = None
248-
supportedCRS: str | None = None
249-
orderedAxes: tuple[str, str] | None = None
250-
tileMatrices: tuple[TileMatrix, ...]
251-
252-
253-
class TMSMultiscales(BaseModel, extra="allow"):
254-
"""
255-
Multiscale metadata for a GeoZarr dataset based on the OGC TileMatrixSet standard
256-
257-
Attributes
258-
----------
259-
tile_matrix_set : str
260-
The tile matrix set identifier for the multiscale dataset.
261-
resampling_method : ResamplingMethod
262-
The name of the resampling method for the multiscale dataset.
263-
tile_matrix_set_limits : dict[str, TileMatrixSetLimits] | None, optional
264-
The tile matrix set limits for the multiscale dataset.
265-
"""
266-
267-
tile_matrix_set: TileMatrixSet
268-
resampling_method: ResamplingMethod
269-
# TODO: ensure that the keys match tile_matrix_set.tileMatrices[$index].id
270-
# TODO: ensure that the keys match the tileMatrix attribute
271-
tile_matrix_limits: dict[str, TileMatrixLimit] | None = None
272-
273-
274253
class DatasetAttrs(BaseModel, extra="allow"):
275254
"""
276255
Attributes for a GeoZarr dataset.
@@ -295,25 +274,15 @@ def check_grid_mapping(model: TDataSetLike) -> TDataSetLike:
295274
"""
296275
if model.members is not None:
297276
for name, member in model.members.items():
298-
if member.attributes.grid_mapping not in model.members:
277+
if (
278+
hasattr(member.attributes, "grid_mapping")
279+
and isinstance(member.attributes.grid_mapping, str)
280+
and member.attributes.grid_mapping not in model.members
281+
):
299282
msg = f"Grid mapping variable '{member.attributes.grid_mapping}' declared by {name} was not found in dataset members"
300283
raise ValueError(msg)
301284
return model
302285

303286

304-
class MultiscaleGroupAttrs(BaseModel, extra="allow"):
305-
"""
306-
Attributes for Multiscale GeoZarr dataset.
307-
308-
A Multiscale dataset is a collection of Dataet
309-
310-
Attributes
311-
----------
312-
multiscales: MultiscaleAttrs
313-
"""
314-
315-
multiscales: TMSMultiscales
316-
317-
318287
def is_none(data: object) -> TypeGuard[None]:
319288
return data is None

src/eopf_geozarr/data_api/geozarr/multiscales.py

Lines changed: 0 additions & 90 deletions
This file was deleted.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
"""Zarr multiscales convention support."""
2+
3+
from .geozarr import MultiscaleGroupAttrs, MultiscaleMeta
4+
from .zcm import MULTISCALE_CONVENTION_METADATA, Multiscales, ScaleLevel, ScaleLevelJSON
5+
6+
__all__ = [
7+
"MultiscaleGroupAttrs",
8+
"MultiscaleMeta",
9+
"MULTISCALE_CONVENTION_METADATA",
10+
"Multiscales",
11+
"ScaleLevel",
12+
"ScaleLevelJSON",
13+
]
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
from __future__ import annotations
2+
3+
from typing import Self
4+
5+
from pydantic import BaseModel, model_validator
6+
from pydantic.experimental.missing_sentinel import MISSING
7+
from typing_extensions import NotRequired, TypedDict
8+
9+
from eopf_geozarr.data_api.geozarr.common import ZarrConventionMetadata
10+
11+
from . import tms, zcm
12+
13+
14+
class MultiscaleMeta(BaseModel):
15+
"""
16+
Attributes for Multiscale GeoZarr dataset. Can be a mix of TMS multiscale
17+
or ZCM multiscale metadata
18+
"""
19+
20+
layout: tuple[zcm.ScaleLevel, ...] | MISSING = MISSING
21+
resampling_method: str | MISSING = MISSING
22+
tile_matrix_set: tms.TileMatrixSet | MISSING = MISSING
23+
tile_matrix_limits: dict[str, tms.TileMatrixLimit] | MISSING = MISSING
24+
25+
@model_validator(mode="after")
26+
def valid_zcm(self) -> Self:
27+
"""
28+
Ensure that the ZCM metadata, if present, is valid
29+
"""
30+
if self.layout is not MISSING:
31+
zcm.Multiscales(**self.model_dump())
32+
33+
return self
34+
35+
@model_validator(mode="after")
36+
def valid_tms(self) -> Self:
37+
"""
38+
Ensure that the TMS metadata, if present, is valid
39+
"""
40+
if self.tile_matrix_set is not MISSING:
41+
tms.Multiscales(**self.model_dump())
42+
43+
return self
44+
45+
46+
class MultiscaleGroupAttrs(BaseModel):
47+
"""
48+
Attributes for Multiscale GeoZarr dataset.
49+
50+
A Multiscale dataset is a Zarr group containing multiscale metadata
51+
That metadata can be either in the Zarr Convention Metadata (ZCM) format, or
52+
the Tile Matrix Set (TMS) format, or both.
53+
54+
Attributes
55+
----------
56+
multiscales: MultiscaleAttrs
57+
"""
58+
59+
zarr_conventions: tuple[ZarrConventionMetadata, ...] | MISSING = MISSING
60+
multiscales: MultiscaleMeta
61+
62+
_zcm_multiscales: zcm.Multiscales | None = None
63+
_tms_multiscales: tms.Multiscales | None = None
64+
65+
@model_validator(mode="after")
66+
def valid_zcm_and_tms(self) -> Self:
67+
"""
68+
Ensure that the ZCM metadata, if present, is valid, and that TMS metadata, if present,
69+
is valid, and that at least one of the two is present.
70+
"""
71+
if self.zarr_conventions is not MISSING:
72+
self._zcm_multiscales = zcm.Multiscales(
73+
layout=self.multiscales.layout,
74+
resampling_method=self.multiscales.resampling_method,
75+
)
76+
if self.multiscales.tile_matrix_limits is not MISSING:
77+
self._tms_multiscales = tms.Multiscales(
78+
tile_matrix_limits=self.multiscales.tile_matrix_limits,
79+
resampling_method=self.multiscales.resampling_method, # type: ignore[arg-type]
80+
tile_matrix_set=self.multiscales.tile_matrix_set,
81+
)
82+
if self._tms_multiscales is None and self._zcm_multiscales is None:
83+
raise ValueError(
84+
"Either ZCM multiscales or TMS multiscales must be present"
85+
)
86+
return self
87+
88+
@property
89+
def multiscale_meta(self) -> MultiscaleMetaDict:
90+
out: MultiscaleMetaDict = {}
91+
if self._tms_multiscales is not None:
92+
out["tms"] = self._tms_multiscales
93+
if self._zcm_multiscales is not None:
94+
out["zcm"] = self._zcm_multiscales
95+
return out
96+
97+
98+
class MultiscaleMetaDict(TypedDict):
99+
tms: NotRequired[tms.Multiscales]
100+
zcm: NotRequired[zcm.Multiscales]

0 commit comments

Comments
 (0)