Skip to content

Commit 3df974f

Browse files
committed
solve bux unify chunking
1 parent e10cb20 commit 3df974f

6 files changed

Lines changed: 56 additions & 19 deletions

File tree

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Added support for ERA5 time series extraction via the [xcube-cds](github.com/xcube-dev/xcube-cds)
66
data store plugin.
77
* Improved documentation by adding a new Data Availability page.
8+
* Fixed bug to unify chunking of concatenated datasets before writing.
89

910
## Changes in 0.3.0
1011

environment.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,19 @@ dependencies:
1212
- pyproj
1313
- tabulate
1414
- xarray
15-
- xcube >=1.9.0
16-
- xcube-resampling >=0.2.4
15+
- xcube >=1.13.1
16+
- xcube-resampling >=0.3.0
1717
- yaml
1818
# data store plugins
1919
- xcube-cci >=0.12.1
2020
- xcube-cds >=1.2.0
2121
- xcube-clms >=0.2.2
2222
- xcube-stac >=1.1.2
23-
- xcube-zenodo >=1.1.0
23+
- xcube-zenodo >=1.1.1
24+
- # Pip section
25+
- pip
26+
- pip:
27+
- xcube-icosdp >=0.1.1
2428
# Development Dependencies - Tools
2529
- black
2630
- isort

xcube_multistore/accessors/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,17 @@
2828
from .base import BaseAccessor
2929
from .cds import CdsAccessor
3030
from .clms import ClmsAccessor
31+
from .icosdp import IcosDpAccessor
3132
from .stac import StacAccessor
3233
from .zenodo import ZenodoAccessor
3334

3435
ACCESSOR_MAPPING = {
3536
"cds": CdsAccessor,
36-
"zenodo": ZenodoAccessor,
3737
"clms": ClmsAccessor,
38+
"icosdp": IcosDpAccessor,
3839
"stac-cdse-ardc": StacAccessor,
3940
"stac-pc-ardc": StacAccessor,
41+
"zenodo": ZenodoAccessor,
4042
}
4143

4244

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# MIT License
2+
#
3+
# Copyright (c) 2025 Brockmann Consult GmbH
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
import xarray as xr
24+
25+
from xcube_multistore.accessor import Accessor
26+
27+
28+
class IcosDpAccessor(Accessor):
29+
"""Provides methods for accessing dataset from xcube-zenodo data store"""
30+
31+
def open_data(self, data_id: str, **open_params) -> xr.Dataset:
32+
if data_id.endswith(".zarr"):
33+
ds = self.store.cache_store.open_data(data_id, **open_params)
34+
else:
35+
ds = self.store.open_data(data_id, **open_params)
36+
return ds

xcube_multistore/multistore.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -675,22 +675,16 @@ def _write_dataset(self, ds: xr.Dataset, config: dict) -> xr.Dataset | Exception
675675
ds = prepare_dataset_for_netcdf(ds)
676676
ds = clean_dataset(ds)
677677

678-
# unify chunksize
679-
ds = ds.unify_chunks()
680678
chunksize = config.get("chunksize")
681-
if chunksize is None:
682-
chunksize = {
683-
dim: sizes[0] for dim, sizes in getattr(ds, "chunksizes", {}).items()
684-
}
679+
if not chunksize:
680+
if hasattr(ds, "chunks") and ds.chunks is not None:
681+
chunksize = {dim: chunks[0] for dim, chunks in ds.chunks.items()}
685682
if chunksize:
686-
# Select format name for chunking
687-
format_name = "zarr" if format_id in ["zarr", "levels"] else format_id
688-
ds = chunk_dataset(
689-
ds, format_name=format_name, chunk_sizes=chunksize, data_vars_only=True
690-
)
691-
# Remove "chunks" from encoding to avoid serialization issues
692-
for var in ds.data_vars:
693-
ds[var].encoding.pop("chunks", None)
683+
ds = ds.chunk(chunks=chunksize)
684+
# Remove "chunks" from encoding to avoid serialization issues
685+
for var in ds.data_vars:
686+
ds[var].encoding.pop("chunks", None)
687+
ds[var].encoding.pop("chunksizes", None)
694688

695689
data_id = _get_data_id(config)
696690
store.write_data(ds, data_id, replace=True)

xcube_multistore/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def clean_dataset(ds: xr.Dataset, gm: GridMapping | None = None) -> xr.Dataset:
133133
A cleaned version of the dataset with boundary variables removed and grid
134134
mapping normalized.
135135
"""
136-
check_vars = ["x_bnds", "y_bnds", "lat_bnds", "lon_bnds", "time_bnds"]
136+
check_vars = ["x_bnds", "y_bnds", "lat_bnds", "lon_bnds", "time_bnds", "hour_bnds"]
137137
sel_vars = []
138138
for var in check_vars:
139139
if var in ds:

0 commit comments

Comments
 (0)