|
| 1 | +"""Common utilities for GeoZarr data API.""" |
| 2 | + |
| 3 | +import io |
| 4 | +import urllib |
| 5 | +import urllib.request |
| 6 | +from typing import Annotated, Any, Mapping, TypeVar |
| 7 | + |
| 8 | +from cf_xarray.utils import parse_cf_standard_name_table |
| 9 | +from pydantic import AfterValidator, BaseModel |
| 10 | +from pydantic.experimental.missing_sentinel import MISSING |
| 11 | +from typing_extensions import Protocol, runtime_checkable |
| 12 | + |
| 13 | +from eopf_geozarr.data_api.geozarr.types import ResamplingMethod |
| 14 | + |
| 15 | + |
| 16 | +class BaseDataArrayAttrs(BaseModel, extra="allow"): |
| 17 | + """ |
| 18 | + Base attributes for a GeoZarr DataArray. |
| 19 | +
|
| 20 | + Attributes |
| 21 | + ---------- |
| 22 | + """ |
| 23 | + |
| 24 | + grid_mapping: str | MISSING = MISSING |
| 25 | + |
| 26 | + |
| 27 | +class GridMappingAttrs(BaseModel, extra="allow"): |
| 28 | + """ |
| 29 | + Grid mapping attributes for a GeoZarr grid mapping variable. |
| 30 | +
|
| 31 | + Attributes |
| 32 | + ---------- |
| 33 | + grid_mapping_name : str |
| 34 | + The name of the grid mapping. |
| 35 | +
|
| 36 | + Extra fields are permitted. |
| 37 | +
|
| 38 | + Additional attributes might be present depending on the type of grid mapping. |
| 39 | +
|
| 40 | + References |
| 41 | + ---------- |
| 42 | + https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#grid-mappings-and-projections |
| 43 | + """ |
| 44 | + |
| 45 | + grid_mapping_name: str |
| 46 | + |
| 47 | + |
| 48 | +def get_cf_standard_names(url: str) -> tuple[str, ...]: |
| 49 | + """Retrieve the set of CF standard names and return them as a tuple.""" |
| 50 | + |
| 51 | + headers = {"User-Agent": "eopf_geozarr"} |
| 52 | + |
| 53 | + req = urllib.request.Request(url, headers=headers) |
| 54 | + |
| 55 | + try: |
| 56 | + with urllib.request.urlopen(req) as response: |
| 57 | + content = response.read() # Read the entire response body into memory |
| 58 | + content_fobj = io.BytesIO(content) |
| 59 | + except urllib.error.URLError as e: |
| 60 | + raise e |
| 61 | + |
| 62 | + _info, table, _aliases = parse_cf_standard_name_table(source=content_fobj) |
| 63 | + return tuple(table.keys()) |
| 64 | + |
| 65 | + |
| 66 | +# This is a URL to the CF standard names table. |
| 67 | +CF_STANDARD_NAME_URL = ( |
| 68 | + "https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/" |
| 69 | + "master/Data/cf-standard-names/current/src/cf-standard-name-table.xml" |
| 70 | +) |
| 71 | + |
| 72 | +# this does IO against github. consider locally storing this data instead if fetching every time |
| 73 | +# is problematic. |
| 74 | +CF_STANDARD_NAMES = get_cf_standard_names(url=CF_STANDARD_NAME_URL) |
| 75 | + |
| 76 | + |
| 77 | +def check_standard_name(name: str) -> str: |
| 78 | + """ |
| 79 | + Check if the standard name is valid according to the CF conventions. |
| 80 | +
|
| 81 | + Parameters |
| 82 | + ---------- |
| 83 | + name : str |
| 84 | + The standard name to check. |
| 85 | +
|
| 86 | + Returns |
| 87 | + ------- |
| 88 | + str |
| 89 | + The validated standard name. |
| 90 | +
|
| 91 | + Raises |
| 92 | + ------ |
| 93 | + ValueError |
| 94 | + If the standard name is not valid. |
| 95 | + """ |
| 96 | + |
| 97 | + if name in CF_STANDARD_NAMES: |
| 98 | + return name |
| 99 | + raise ValueError( |
| 100 | + f"Invalid standard name: {name}. This name was not found in the list of CF standard names." |
| 101 | + ) |
| 102 | + |
| 103 | + |
| 104 | +CFStandardName = Annotated[str, AfterValidator(check_standard_name)] |
| 105 | + |
| 106 | + |
| 107 | +@runtime_checkable |
| 108 | +class GroupLike(Protocol): |
| 109 | + members: Mapping[str, Any] | None |
| 110 | + attributes: Any |
| 111 | + |
| 112 | + |
| 113 | +TGroupLike = TypeVar("TGroupLike", bound=GroupLike) |
| 114 | + |
| 115 | + |
| 116 | +def check_valid_coordinates(model: TGroupLike) -> TGroupLike: |
| 117 | + """ |
| 118 | + Check if the coordinates of the DataArrayLike objects listed in GroupLike objects are valid. |
| 119 | +
|
| 120 | + For each DataArrayLike in the model, we check the dimensions associated with the DataArrayLike. |
| 121 | + For each dimension associated with a data variable, a DataArrayLike with the name of that data |
| 122 | + variable must be present in the members of the group. |
| 123 | +
|
| 124 | + Parameters |
| 125 | + ---------- |
| 126 | + model : GroupLike |
| 127 | + An object that implements the GroupLike protocol. |
| 128 | +
|
| 129 | + Returns |
| 130 | + ------- |
| 131 | + GroupLike |
| 132 | + A GroupLike object with referentially valid coordinates. |
| 133 | + """ |
| 134 | + if model.members is None: |
| 135 | + raise ValueError("Model members cannot be None") |
| 136 | + |
| 137 | + arrays: dict[str, DataArrayLike] = { |
| 138 | + k: v for k, v in model.members.items() if isinstance(v, DataArrayLike) |
| 139 | + } |
| 140 | + for key, array in arrays.items(): |
| 141 | + for idx, dim in enumerate(array.array_dimensions): |
| 142 | + if dim not in model.members: |
| 143 | + raise ValueError( |
| 144 | + f"Dimension '{dim}' for array '{key}' is not defined in the model members." |
| 145 | + ) |
| 146 | + member = model.members[dim] |
| 147 | + if isinstance(member, GroupLike): |
| 148 | + raise ValueError( |
| 149 | + f"Dimension '{dim}' for array '{key}' should be a group. Found an array instead." |
| 150 | + ) |
| 151 | + if member.shape[0] != array.shape[idx]: |
| 152 | + raise ValueError( |
| 153 | + f"Dimension '{dim}' for array '{key}' has a shape mismatch: " |
| 154 | + f"{member.shape[0]} != {array.shape[idx]}." |
| 155 | + ) |
| 156 | + return model |
| 157 | + |
| 158 | + |
| 159 | +@runtime_checkable |
| 160 | +class DataArrayLike(Protocol): |
| 161 | + """ |
| 162 | + This is a protocol that models the relevant properties of Zarr V2 and Zarr V3 DataArrays. |
| 163 | + """ |
| 164 | + |
| 165 | + @property |
| 166 | + def array_dimensions(self) -> tuple[str, ...]: ... |
| 167 | + |
| 168 | + shape: tuple[int, ...] |
| 169 | + attributes: BaseDataArrayAttrs |
| 170 | + |
| 171 | + |
| 172 | +class TileMatrixLimit(BaseModel): |
| 173 | + """""" |
| 174 | + |
| 175 | + tileMatrix: str |
| 176 | + minTileCol: int |
| 177 | + minTileRow: int |
| 178 | + maxTileCol: int |
| 179 | + maxTileRow: int |
| 180 | + |
| 181 | + |
| 182 | +class TileMatrix(BaseModel): |
| 183 | + id: str |
| 184 | + scaleDenominator: float |
| 185 | + cellSize: float |
| 186 | + pointOfOrigin: tuple[float, float] |
| 187 | + tileWidth: int |
| 188 | + tileHeight: int |
| 189 | + matrixWidth: int |
| 190 | + matrixHeight: int |
| 191 | + |
| 192 | + |
| 193 | +class TileMatrixSet(BaseModel): |
| 194 | + id: str |
| 195 | + title: str | None = None |
| 196 | + crs: str | None = None |
| 197 | + supportedCRS: str | None = None |
| 198 | + orderedAxes: tuple[str, str] | None = None |
| 199 | + tileMatrices: tuple[TileMatrix, ...] |
| 200 | + |
| 201 | + |
| 202 | +class Multiscales(BaseModel, extra="allow"): |
| 203 | + """ |
| 204 | + Multiscale metadata for a GeoZarr dataset. |
| 205 | +
|
| 206 | + Attributes |
| 207 | + ---------- |
| 208 | + tile_matrix_set : str |
| 209 | + The tile matrix set identifier for the multiscale dataset. |
| 210 | + resampling_method : ResamplingMethod |
| 211 | + The name of the resampling method for the multiscale dataset. |
| 212 | + tile_matrix_set_limits : dict[str, TileMatrixSetLimits] | None, optional |
| 213 | + The tile matrix set limits for the multiscale dataset. |
| 214 | + """ |
| 215 | + |
| 216 | + tile_matrix_set: TileMatrixSet |
| 217 | + resampling_method: ResamplingMethod |
| 218 | + # TODO: ensure that the keys match tile_matrix_set.tileMatrices[$index].id |
| 219 | + # TODO: ensure that the keys match the tileMatrix attribute |
| 220 | + tile_matrix_limits: dict[str, TileMatrixLimit] | None = None |
| 221 | + |
| 222 | + |
| 223 | +class DatasetAttrs(BaseModel, extra="allow"): |
| 224 | + """ |
| 225 | + Attributes for a GeoZarr dataset. |
| 226 | +
|
| 227 | + A dataset is a collection of DataArrays. This class models the attributes of a dataset |
| 228 | + """ |
| 229 | + |
| 230 | + ... |
| 231 | + |
| 232 | + |
| 233 | +@runtime_checkable |
| 234 | +class DatasetLike(Protocol): |
| 235 | + members: Mapping[str, DataArrayLike] | None |
| 236 | + |
| 237 | + |
| 238 | +TDataSetLike = TypeVar("TDataSetLike", bound=DatasetLike) |
| 239 | + |
| 240 | + |
| 241 | +def check_grid_mapping(model: TDataSetLike) -> TDataSetLike: |
| 242 | + """ |
| 243 | + Ensure that a grid mapping variable is present, and that it refers to a member of the model. |
| 244 | + """ |
| 245 | + if model.members is not None: |
| 246 | + for name, member in model.members.items(): |
| 247 | + if member.attributes.grid_mapping not in model.members: |
| 248 | + msg = f"Grid mapping variable '{member.attributes.grid_mapping}' declared by {name} was not found in dataset members" |
| 249 | + raise ValueError(msg) |
| 250 | + return model |
| 251 | + |
| 252 | + |
| 253 | +class MultiscaleGroupAttrs(BaseModel, extra="allow"): |
| 254 | + """ |
| 255 | + Attributes for Multiscale GeoZarr dataset. |
| 256 | +
|
| 257 | + A Multiscale dataset is a collection of Dataet |
| 258 | +
|
| 259 | + Attributes |
| 260 | + ---------- |
| 261 | + multiscales: MultiscaleAttrs |
| 262 | + """ |
| 263 | + |
| 264 | + multiscales: Multiscales |
0 commit comments