33from __future__ import annotations
44
55from typing import TYPE_CHECKING
6- from typing import Any
76from typing import Literal
87
98import zarr
9+ from obstore .store import from_url as obstore_from_url
1010from upath import UPath
1111from xarray import Dataset as xr_Dataset
1212from xarray import open_zarr as xr_open_zarr
1313from xarray .backends .writers import to_zarr as xr_to_zarr
14+ from zarr .storage import FsspecStore
15+ from zarr .storage import ObjectStore
1416
1517from mdio .constants import ZarrFormat
1618from mdio .core .zarr_io import zarr_warnings_suppress_unstable_structs_v3
1719
1820if TYPE_CHECKING :
1921 from collections .abc import Mapping
2022 from pathlib import Path
23+ from typing import Any
2124
2225 from xarray import Dataset
2326 from xarray .core .types import T_Chunks
2427 from xarray .core .types import ZarrWriteModes
28+ from zarr .abc .store import Store
29+
30+
31+ StorageBackendT = Literal ["fsspec" , "obstore" ]
2532
2633
2734def _normalize_path (path : UPath | Path | str ) -> UPath :
2835 return UPath (path )
2936
3037
31- def _normalize_storage_options (path : UPath ) -> dict [str , Any ] | None :
32- return None if len (path .storage_options ) == 0 else path .storage_options
38+ def _get_store (upath : UPath , storage_backend : StorageBackendT ) -> Store :
39+ if storage_backend == "obstore" :
40+ uri = upath .as_posix ()
41+ storage_options : Mapping [str , Any ] = getattr (upath , "storage_options" , {})
42+ return ObjectStore (obstore_from_url (uri , ** storage_options ))
43+ return FsspecStore .from_upath (upath )
3344
3445
35- def open_mdio (input_path : UPath | Path | str , chunks : T_Chunks = None ) -> xr_Dataset :
46+ def open_mdio (
47+ input_path : UPath | Path | str ,
48+ chunks : T_Chunks = None ,
49+ storage_backend : StorageBackendT = "fsspec" ,
50+ ) -> xr_Dataset :
3651 """Open a Zarr dataset from the specified universal file path.
3752
3853 Args:
@@ -45,18 +60,19 @@ def open_mdio(input_path: UPath | Path | str, chunks: T_Chunks = None) -> xr_Dat
4560 - ``chunks={dim: chunk, ...}`` loads the data with dask using the specified chunk size for each dimension.
4661
4762 See dask chunking for more details.
63+ storage_backend: The storage backend to use for reading the dataset. Defaults to "fsspec".
64+ For faster reads use "obstore". However, it is not as broadly compatible as "fsspec".
4865
4966 Returns:
5067 An Xarray dataset opened from the input path.
5168 """
5269 input_path = _normalize_path (input_path )
53- storage_options = _normalize_storage_options (input_path )
5470 zarr_format = zarr .config .get ("default_zarr_format" )
5571
72+ input_store = _get_store (input_path , storage_backend )
5673 return xr_open_zarr (
57- input_path . as_posix () ,
74+ input_store ,
5875 chunks = chunks ,
59- storage_options = storage_options ,
6076 mask_and_scale = zarr_format == ZarrFormat .V3 , # off for v2, on for v3
6177 consolidated = zarr_format == ZarrFormat .V2 , # on for v2, off for v3
6278 )
@@ -69,6 +85,7 @@ def to_mdio( # noqa: PLR0913
6985 * ,
7086 compute : bool = True ,
7187 region : Mapping [str , slice | Literal ["auto" ]] | Literal ["auto" ] | None = None ,
88+ storage_backend : StorageBackendT = "fsspec" ,
7289) -> None :
7390 """Write dataset contents to an MDIO output_path.
7491
@@ -81,23 +98,24 @@ def to_mdio( # noqa: PLR0913
8198 "a-" means only append those variables that have ``append_dim``.
8299 "r+" means modify existing array *values* only (raise an error if any metadata or shapes would change).
83100 The default mode is "r+" if ``region`` is set and ``w-`` otherwise.
84- compute: If True write array data immediately; otherwise return a ``dask.delayed.Delayed`` object that
101+ compute: If True writes array data immediately; otherwise return a ``dask.delayed.Delayed`` object that
85102 can be computed to write array data later. Metadata is always updated eagerly.
86103 region: Optional mapping from dimension names to either a) ``"auto"``, or b) integer slices, indicating
87104 the region of existing MDIO array(s) in which to write this dataset's data.
105+ storage_backend: The storage backend to use for reading the dataset. Defaults to "fsspec".
106+ For faster reads use "obstore". However, it is not as broadly compatible as "fsspec".
88107 """
89108 output_path = _normalize_path (output_path )
90- storage_options = _normalize_storage_options (output_path )
91109 zarr_format = zarr .config .get ("default_zarr_format" )
92110
111+ output_store = _get_store (output_path , storage_backend )
93112 with zarr_warnings_suppress_unstable_structs_v3 ():
94113 xr_to_zarr (
95114 dataset ,
96- store = output_path . as_posix (), # xarray doesn't like URI when file:// is protocol
115+ store = output_store ,
97116 mode = mode ,
98117 compute = compute ,
99118 consolidated = zarr_format == ZarrFormat .V2 , # on for v2, off for v3
100119 region = region ,
101- storage_options = storage_options ,
102120 write_empty_chunks = False ,
103121 )
0 commit comments