22
33from __future__ import annotations
44
5- from typing import Any
6- from typing import TYPE_CHECKING
75import logging
6+ from typing import TYPE_CHECKING
7+ from typing import Any
88
9- import numpy as np
109import dask
1110from tqdm .auto import tqdm
1211from tqdm .dask import TqdmCallback
1312from xarray import DataArray
1413
14+ from mdio .api .io import _normalize_path
1515from mdio .api .io import open_mdio
1616from mdio .api .io import to_mdio
17- from mdio .api .io import _normalize_path
1817from mdio .builder .xarray_builder import _compressor_to_encoding
1918from mdio .core .config import MDIOSettings
2019
21-
2220logger = logging .getLogger (__name__ )
2321
2422
2523if TYPE_CHECKING :
26- from collections .abc import Mapping
2724 from pathlib import Path
25+
2826 from upath import UPath
27+ from xarray import Dataset
2928
29+ from mdio .builder .schemas .chunk_grid import RectilinearChunkGrid
30+ from mdio .builder .schemas .chunk_grid import RegularChunkGrid
3031 from mdio .builder .schemas .compressors import ZFP
3132 from mdio .builder .schemas .compressors import Blosc
32- from mdio .builder .schemas .chunk_grid import RegularChunkGrid
33- from mdio .builder .schemas .chunk_grid import RectilinearChunkGrid
3433
3534
36- def _remove_fillvalue_attrs (dataset : Any ) -> None :
35+ def _remove_fillvalue_attrs (dataset : Dataset ) -> None :
3736 """Remove _FillValue from all variable attrs to avoid conflicts with consolidated metadata.
3837
3938 This is only relevant for Zarr v2 format.
@@ -45,31 +44,33 @@ def _remove_fillvalue_attrs(dataset: Any) -> None:
4544
4645def _validate_inputs (
4746 new_variable : str | list [str ],
48- chunk_grid : "RegularChunkGrid"
49- | "RectilinearChunkGrid"
50- | list ["RegularChunkGrid" | "RectilinearChunkGrid" ],
51- compressor : "ZFP" | "Blosc" | list ["ZFP" | "Blosc" ] | None ,
47+ chunk_grid : RegularChunkGrid | RectilinearChunkGrid | list [RegularChunkGrid | RectilinearChunkGrid ],
48+ compressor : ZFP | Blosc | list [ZFP | Blosc ] | None ,
5249) -> None :
5350 """Validate basic shapes/types (no broadcasting here)."""
54-
5551 # new_variable must be str or non-empty list[str]
5652 if isinstance (new_variable , str ):
5753 pass
5854 elif isinstance (new_variable , list ):
5955 if not new_variable :
60- raise ValueError ("new_variable list must not be empty" )
56+ msg = "new_variable list must not be empty"
57+ raise ValueError (msg )
6158 if not all (isinstance (v , str ) for v in new_variable ):
62- raise TypeError ("All entries in new_variable must be strings" )
59+ msg = "All entries in new_variable must be strings"
60+ raise TypeError (msg )
6361 else :
64- raise TypeError ("new_variable must be a string or a list of strings" )
62+ msg = "new_variable must be a string or a list of strings"
63+ raise TypeError (msg )
6564
6665 # chunk_grid can be a single grid or non-empty list of grids
6766 if isinstance (chunk_grid , list ) and not chunk_grid :
68- raise ValueError ("chunk_grid list must not be empty" )
67+ msg = "chunk_grid list must not be empty"
68+ raise ValueError (msg )
6969
7070 # compressor can be None, a single compressor, or non-empty list
7171 if isinstance (compressor , list ) and not compressor :
72- raise ValueError ("compressor list must not be empty" )
72+ msg = "compressor list must not be empty"
73+ raise ValueError (msg )
7374
7475
7576def _normalize_new_variable (
@@ -83,28 +84,25 @@ def _normalize_new_variable(
8384
8485
8586def _normalize_chunk_grid (
86- chunk_grid : "RegularChunkGrid"
87- | "RectilinearChunkGrid"
88- | list ["RegularChunkGrid" | "RectilinearChunkGrid" ],
87+ chunk_grid : RegularChunkGrid | RectilinearChunkGrid | list [RegularChunkGrid | RectilinearChunkGrid ],
8988 num_variables : int ,
90- ) -> list [" RegularChunkGrid" | " RectilinearChunkGrid" ]:
89+ ) -> list [RegularChunkGrid | RectilinearChunkGrid ]:
9190 """Broadcast chunk_grid to match num_variables."""
9291 if isinstance (chunk_grid , list ):
9392 if len (chunk_grid ) == 1 and num_variables > 1 :
9493 return chunk_grid * num_variables
9594 if len (chunk_grid ) == num_variables :
9695 return list (chunk_grid )
97- raise ValueError (
98- "chunk_grid list length must be 1 or equal to the number of new variables"
99- )
96+ msg = "chunk_grid list length must be 1 or equal to the number of new variables"
97+ raise ValueError (msg )
10098 # single grid reused for all variables
10199 return [chunk_grid ] * num_variables
102100
103101
104102def _normalize_compressor (
105- compressor : " ZFP" | " Blosc" | list [" ZFP" | " Blosc" ] | None ,
103+ compressor : ZFP | Blosc | list [ZFP | Blosc ] | None ,
106104 num_variables : int ,
107- ) -> list [" ZFP" | " Blosc" | None ]:
105+ ) -> list [ZFP | Blosc | None ]:
108106 """Broadcast compressor to match num_variables."""
109107 if compressor is None :
110108 return [None ] * num_variables
@@ -114,22 +112,19 @@ def _normalize_compressor(
114112 return compressor * num_variables
115113 if len (compressor ) == num_variables :
116114 return list (compressor )
117- raise ValueError (
118- "compressor list length must be 1 or equal to the number of new variables"
119- )
115+ msg = "compressor list length must be 1 or equal to the number of new variables"
116+ raise ValueError (msg )
120117
121118 # single compressor reused for all variables
122119 return [compressor ] * num_variables
123120
124121
125- def from_variable (
126- dataset_path : " UPath | Path | str" ,
122+ def from_variable ( # noqa: PLR0913
123+ dataset_path : UPath | Path | str ,
127124 source_variable : str ,
128125 new_variable : str | list [str ],
129- chunk_grid : "RegularChunkGrid"
130- | "RectilinearChunkGrid"
131- | list ["RegularChunkGrid" | "RectilinearChunkGrid" ],
132- compressor : "ZFP" | "Blosc" | list ["ZFP" | "Blosc" ] | None = None ,
126+ chunk_grid : RegularChunkGrid | RectilinearChunkGrid | list [RegularChunkGrid | RectilinearChunkGrid ],
127+ compressor : ZFP | Blosc | list [ZFP | Blosc ] | None = None ,
133128 copy_metadata : bool = True ,
134129) -> None :
135130 """Add new Variable(s) to the Dataset with different chunking and compression.
@@ -169,8 +164,7 @@ def from_variable(
169164 shape = source_var .shape
170165 store_chunks = source_var .encoding .get ("chunks" , None )
171166
172- logger .debug ("Source variable %r: dims=%r, shape=%r, store_chunks=%r" ,
173- source_variable , dims , shape , store_chunks )
167+ logger .debug ("Source variable %r: dims=%r, shape=%r, store_chunks=%r" , source_variable , dims , shape , store_chunks )
174168
175169 settings = MDIOSettings ()
176170 num_workers = settings .export_cpus
@@ -183,7 +177,7 @@ def from_variable(
183177 zip (new_variables , chunk_grids , compressors , strict = True ),
184178 total = len (new_variables ),
185179 desc = "Generating newly chunked Variables" ,
186- unit = "variable"
180+ unit = "variable" ,
187181 ):
188182 new_chunks = tuple (grid .configuration .chunk_shape )
189183
@@ -206,10 +200,12 @@ def from_variable(
206200 else :
207201 rechunked = source_var .chunk (dest_mapping )
208202
209- logger .debug ("Variable %r: nominal_chunks=%r, task graph has %d tasks" ,
210- name ,
211- tuple (dim_chunks [0 ] for dim_chunks in rechunked .chunks ),
212- len (rechunked .__dask_graph__ ()))
203+ logger .debug (
204+ "Variable %r: nominal_chunks=%r, task graph has %d tasks" ,
205+ name ,
206+ tuple (dim_chunks [0 ] for dim_chunks in rechunked .chunks ),
207+ len (rechunked .__dask_graph__ ()),
208+ )
213209
214210 # Build DataArray for the new variable
215211 attrs = source_var .attrs .copy () if copy_metadata else {}
@@ -223,9 +219,7 @@ def from_variable(
223219 new_ds = new_da .to_dataset (name = name )
224220
225221 # Per-variable encoding
226- encoding : dict [str , Any ] = (
227- source_var .encoding .copy () if copy_metadata else {}
228- )
222+ encoding : dict [str , Any ] = source_var .encoding .copy () if copy_metadata else {}
229223 encoding ["chunks" ] = new_chunks
230224 if comp is not None :
231225 compressor_encoding = _compressor_to_encoding (comp )
0 commit comments