|
| 1 | +"""Tests for the eopf-geozarr package.""" |
| 2 | + |
| 3 | +import pathlib |
| 4 | + |
| 5 | +import xarray as xr |
| 6 | + |
| 7 | + |
| 8 | +def _verify_basic_structure(output_path: pathlib.Path, groups: list[str]) -> None: |
| 9 | + """Verify the basic Zarr store structure.""" |
| 10 | + print("Verifying basic structure...") |
| 11 | + |
| 12 | + # Check that the main zarr store exists |
| 13 | + assert (output_path / "zarr.json").exists() |
| 14 | + |
| 15 | + # Check that each group has been created |
| 16 | + for group in groups: |
| 17 | + group_path = output_path / group.lstrip("/") |
| 18 | + assert group_path.exists(), f"Group {group} not found" |
| 19 | + assert (group_path / "zarr.json").exists(), f"Group {group} missing zarr.json" |
| 20 | + |
| 21 | + # Check that level 0 (native resolution) exists |
| 22 | + level_0_path = group_path / "0" |
| 23 | + assert level_0_path.exists(), f"Level 0 not found for {group}" |
| 24 | + assert (level_0_path / "zarr.json").exists(), ( |
| 25 | + f"Level 0 missing zarr.json for {group}" |
| 26 | + ) |
| 27 | + |
| 28 | + |
| 29 | +def _verify_geozarr_spec_compliance(output_path: pathlib.Path, group: str) -> None: |
| 30 | + """ |
| 31 | + Verify GeoZarr specification compliance following the notebook verification. |
| 32 | +
|
| 33 | + This replicates the compliance checks from the notebook: |
| 34 | + - _ARRAY_DIMENSIONS attributes on all arrays |
| 35 | + - CF standard names properly set |
| 36 | + - Grid mapping attributes reference correct CRS variables |
| 37 | + - GeoTransform attributes in grid_mapping variables |
| 38 | + - Native CRS preservation |
| 39 | + """ |
| 40 | + print(f"Verifying GeoZarr-spec compliance for {group}...") |
| 41 | + |
| 42 | + # Open the native resolution dataset (level 0) |
| 43 | + group_path = str(output_path / group.lstrip("/") / "0") |
| 44 | + ds = xr.open_dataset(group_path, engine="zarr", zarr_format=3) |
| 45 | + |
| 46 | + print(f" Variables: {list(ds.data_vars)}") |
| 47 | + print(f" Coordinates: {list(ds.coords)}") |
| 48 | + |
| 49 | + # Check 1: _ARRAY_DIMENSIONS attributes (required by GeoZarr spec) |
| 50 | + for var_name in ds.data_vars: |
| 51 | + if var_name != "spatial_ref": # Skip grid_mapping variable |
| 52 | + assert "_ARRAY_DIMENSIONS" in ds[var_name].attrs, ( |
| 53 | + f"Missing _ARRAY_DIMENSIONS for {var_name} in {group}" |
| 54 | + ) |
| 55 | + assert ds[var_name].attrs["_ARRAY_DIMENSIONS"] == list(ds[var_name].dims), ( |
| 56 | + f"Incorrect _ARRAY_DIMENSIONS for {var_name} in {group}" |
| 57 | + ) |
| 58 | + print( |
| 59 | + f" ✅ _ARRAY_DIMENSIONS: {ds[var_name].attrs['_ARRAY_DIMENSIONS']}" |
| 60 | + ) |
| 61 | + |
| 62 | + # Check coordinates |
| 63 | + for coord_name in ds.coords: |
| 64 | + if coord_name not in ["spatial_ref"]: # Skip CRS coordinate |
| 65 | + assert "_ARRAY_DIMENSIONS" in ds[coord_name].attrs, ( |
| 66 | + f"Missing _ARRAY_DIMENSIONS for coordinate {coord_name} in {group}" |
| 67 | + ) |
| 68 | + print( |
| 69 | + f" ✅ {coord_name} _ARRAY_DIMENSIONS: {ds[coord_name].attrs['_ARRAY_DIMENSIONS']}" |
| 70 | + ) |
| 71 | + |
| 72 | + # Check 2: CF standard names (required by GeoZarr spec) |
| 73 | + for var_name in ds.data_vars: |
| 74 | + if var_name != "spatial_ref": |
| 75 | + assert "standard_name" in ds[var_name].attrs, ( |
| 76 | + f"Missing standard_name for {var_name} in {group}" |
| 77 | + ) |
| 78 | + assert ( |
| 79 | + ds[var_name].attrs["standard_name"] == "toa_bidirectional_reflectance" |
| 80 | + ), f"Incorrect standard_name for {var_name} in {group}" |
| 81 | + print(f" ✅ standard_name: {ds[var_name].attrs['standard_name']}") |
| 82 | + |
| 83 | + # Check 3: Grid mapping attributes (required by GeoZarr spec) |
| 84 | + for var_name in ds.data_vars: |
| 85 | + if var_name != "spatial_ref": |
| 86 | + assert "grid_mapping" in ds[var_name].attrs, ( |
| 87 | + f"Missing grid_mapping for {var_name} in {group}" |
| 88 | + ) |
| 89 | + assert ds[var_name].attrs["grid_mapping"] == "spatial_ref", ( |
| 90 | + f"Incorrect grid_mapping for {var_name} in {group}" |
| 91 | + ) |
| 92 | + print(f" ✅ grid_mapping: {ds[var_name].attrs['grid_mapping']}") |
| 93 | + |
| 94 | + # Check 4: Spatial reference variable (as in notebook) |
| 95 | + assert "spatial_ref" in ds, f"Missing spatial_ref variable in {group}" |
| 96 | + assert "_ARRAY_DIMENSIONS" in ds["spatial_ref"].attrs, ( |
| 97 | + f"Missing _ARRAY_DIMENSIONS for spatial_ref in {group}" |
| 98 | + ) |
| 99 | + assert ds["spatial_ref"].attrs["_ARRAY_DIMENSIONS"] == [], ( |
| 100 | + f"Incorrect _ARRAY_DIMENSIONS for spatial_ref in {group}" |
| 101 | + ) |
| 102 | + print( |
| 103 | + f" ✅ spatial_ref _ARRAY_DIMENSIONS: {ds['spatial_ref'].attrs['_ARRAY_DIMENSIONS']}" |
| 104 | + ) |
| 105 | + |
| 106 | + # Check 5: GeoTransform attribute (from notebook verification) |
| 107 | + if "GeoTransform" in ds["spatial_ref"].attrs: |
| 108 | + print(f" ✅ GeoTransform: {ds['spatial_ref'].attrs['GeoTransform']}") |
| 109 | + else: |
| 110 | + print(" ⚠️ Missing GeoTransform attribute") |
| 111 | + |
| 112 | + # Check 6: CRS information (from notebook verification) |
| 113 | + if "crs_wkt" in ds["spatial_ref"].attrs: |
| 114 | + print(" ✅ CRS WKT present") |
| 115 | + else: |
| 116 | + print(" ⚠️ Missing CRS WKT") |
| 117 | + |
| 118 | + # Check 7: Coordinate standard names (from notebook verification) |
| 119 | + for coord in ["x", "y"]: |
| 120 | + if coord in ds.coords: |
| 121 | + if "standard_name" in ds[coord].attrs: |
| 122 | + expected_name = ( |
| 123 | + "projection_x_coordinate" |
| 124 | + if coord == "x" |
| 125 | + else "projection_y_coordinate" |
| 126 | + ) |
| 127 | + assert ds[coord].attrs["standard_name"] == expected_name, ( |
| 128 | + f"Incorrect standard_name for {coord} coordinate in {group}" |
| 129 | + ) |
| 130 | + print( |
| 131 | + f" ✅ {coord} standard_name: {ds[coord].attrs['standard_name']}" |
| 132 | + ) |
| 133 | + |
| 134 | + ds.close() |
| 135 | + |
| 136 | + |
| 137 | +def _verify_multiscale_structure(output_path: pathlib.Path, group: str) -> None: |
| 138 | + """Verify multiscale structure following notebook patterns.""" |
| 139 | + print(f"Verifying multiscale structure for {group}...") |
| 140 | + |
| 141 | + group_path = output_path / group.lstrip("/") |
| 142 | + |
| 143 | + # Check that at least one level exists (level 0 is always created) |
| 144 | + level_dirs = [d for d in group_path.iterdir() if d.is_dir() and d.name.isdigit()] |
| 145 | + assert len(level_dirs) >= 1, ( |
| 146 | + f"Expected at least 1 overview level for {group}, found {len(level_dirs)}" |
| 147 | + ) |
| 148 | + print( |
| 149 | + f" Found {len(level_dirs)} overview levels: {sorted([d.name for d in level_dirs])}" |
| 150 | + ) |
| 151 | + |
| 152 | + # For larger datasets, expect multiple levels |
| 153 | + level_0_path = str(group_path / "0") |
| 154 | + ds_0 = xr.open_dataset(level_0_path, engine="zarr", zarr_format=3) |
| 155 | + native_size = min(ds_0.sizes["y"], ds_0.sizes["x"]) |
| 156 | + ds_0.close() |
| 157 | + |
| 158 | + if native_size >= 512: # Larger datasets should have multiple levels |
| 159 | + assert len(level_dirs) >= 2, ( |
| 160 | + f"Expected multiple overview levels for large dataset {group} (size {native_size}), found {len(level_dirs)}" |
| 161 | + ) |
| 162 | + else: |
| 163 | + print(f" Small dataset (size {native_size}), single level is acceptable") |
| 164 | + |
| 165 | + # Verify level 0 (native resolution) exists |
| 166 | + assert (group_path / "0").exists(), f"Level 0 missing for {group}" |
| 167 | + |
| 168 | + # Check that each level contains valid data |
| 169 | + level_shapes = {} |
| 170 | + for level_dir in sorted(level_dirs, key=lambda x: int(x.name)): |
| 171 | + level_num = int(level_dir.name) |
| 172 | + level_path = str(level_dir) |
| 173 | + ds = xr.open_dataset(level_path, engine="zarr", zarr_format=3) |
| 174 | + |
| 175 | + # Verify that the dataset has data variables |
| 176 | + assert len(ds.data_vars) > 0, f"No data variables in {level_path}" |
| 177 | + |
| 178 | + # Verify that spatial dimensions exist |
| 179 | + assert "x" in ds.dims and "y" in ds.dims, ( |
| 180 | + f"Missing spatial dimensions in {level_path}" |
| 181 | + ) |
| 182 | + |
| 183 | + # Store shape for progression verification |
| 184 | + level_shapes[level_num] = (ds.dims["y"], ds.dims["x"]) |
| 185 | + print(f" Level {level_num}: {level_shapes[level_num]} pixels") |
| 186 | + |
| 187 | + ds.close() |
| 188 | + |
| 189 | + # Verify that overview levels have progressively smaller dimensions (COG-style /2 downsampling) |
| 190 | + if len(level_shapes) > 1: |
| 191 | + for level in sorted(level_shapes.keys())[1:]: |
| 192 | + prev_level = level - 1 |
| 193 | + if prev_level in level_shapes: |
| 194 | + prev_height, prev_width = level_shapes[prev_level] |
| 195 | + curr_height, curr_width = level_shapes[level] |
| 196 | + |
| 197 | + # Check that dimensions are roughly half (allowing for rounding) |
| 198 | + height_ratio = prev_height / curr_height |
| 199 | + width_ratio = prev_width / curr_width |
| 200 | + |
| 201 | + assert 1.8 <= height_ratio <= 2.2, ( |
| 202 | + f"Height ratio between level {prev_level} and {level} should be ~2, got {height_ratio:.2f}" |
| 203 | + ) |
| 204 | + assert 1.8 <= width_ratio <= 2.2, ( |
| 205 | + f"Width ratio between level {prev_level} and {level} should be ~2, got {width_ratio:.2f}" |
| 206 | + ) |
| 207 | + |
| 208 | + print( |
| 209 | + f" Level {prev_level}→{level} downsampling ratio: {height_ratio:.2f}x{width_ratio:.2f}" |
| 210 | + ) |
| 211 | + |
| 212 | + |
| 213 | +def _verify_rgb_data_access(output_path: pathlib.Path, groups: list[str]) -> None: |
| 214 | + """Verify RGB data access patterns from the notebook.""" |
| 215 | + print("Verifying RGB data access patterns...") |
| 216 | + |
| 217 | + # Find groups with RGB bands (following notebook logic) |
| 218 | + rgb_groups = [] |
| 219 | + for group in groups: |
| 220 | + group_path_str = str(output_path / group.lstrip("/") / "0") |
| 221 | + ds = xr.open_dataset(group_path_str, engine="zarr", zarr_format=3) |
| 222 | + |
| 223 | + # Check for RGB bands (b04=red, b03=green, b02=blue for Sentinel-2) |
| 224 | + has_rgb = all(band in ds.data_vars for band in ["b04", "b03", "b02"]) |
| 225 | + if has_rgb: |
| 226 | + rgb_groups.append(group) |
| 227 | + print(f" Found RGB bands in {group}") |
| 228 | + |
| 229 | + ds.close() |
| 230 | + |
| 231 | + # Test data access for RGB groups (following notebook access patterns) |
| 232 | + for group in rgb_groups: |
| 233 | + print(f" Testing data access for {group}...") |
| 234 | + |
| 235 | + # Test access to different overview levels (as in notebook) |
| 236 | + group_path = output_path / group.lstrip("/") |
| 237 | + level_dirs = [ |
| 238 | + d for d in group_path.iterdir() if d.is_dir() and d.name.isdigit() |
| 239 | + ] |
| 240 | + |
| 241 | + for level_dir in sorted(level_dirs, key=lambda x: int(x.name))[ |
| 242 | + :3 |
| 243 | + ]: # Test first 3 levels |
| 244 | + level_num = int(level_dir.name) |
| 245 | + level_path = str(level_dir) |
| 246 | + |
| 247 | + # Open dataset and access RGB bands (following notebook pattern) |
| 248 | + ds = xr.open_dataset(level_path, engine="zarr", zarr_format=3) |
| 249 | + |
| 250 | + # Access RGB data (as in notebook) |
| 251 | + red_data = ds["b04"].values |
| 252 | + green_data = ds["b03"].values |
| 253 | + blue_data = ds["b02"].values |
| 254 | + |
| 255 | + # Verify data shapes match |
| 256 | + assert red_data.shape == green_data.shape == blue_data.shape, ( |
| 257 | + f"RGB band shapes don't match in {group} level {level_num}" |
| 258 | + ) |
| 259 | + |
| 260 | + # Verify data is not empty |
| 261 | + assert red_data.size > 0, f"Empty red data in {group} level {level_num}" |
| 262 | + assert green_data.size > 0, f"Empty green data in {group} level {level_num}" |
| 263 | + assert blue_data.size > 0, f"Empty blue data in {group} level {level_num}" |
| 264 | + |
| 265 | + print( |
| 266 | + f" Level {level_num}: RGB access successful, shape {red_data.shape}" |
| 267 | + ) |
| 268 | + |
| 269 | + ds.close() |
0 commit comments