solve bux unify chunking

konstntokas · konstntokas · commit 3df974f9e012 · 2026-02-20T11:29:26.000+01:00
diff --git a/CHANGES.md b/CHANGES.md
@@ -5,6 +5,7 @@
 * Added support for ERA5 time series extraction via the [xcube-cds](github.com/xcube-dev/xcube-cds)
   data store plugin.
 * Improved documentation by adding a new Data Availability page.
+* Fixed bug to unify chunking of concatenated datasets before writing.
 
 ## Changes in 0.3.0
 
diff --git a/environment.yml b/environment.yml
@@ -12,15 +12,19 @@ dependencies:
   - pyproj
   - tabulate
   - xarray
-  - xcube >=1.9.0
-  - xcube-resampling >=0.2.4
+  - xcube >=1.13.1
+  - xcube-resampling >=0.3.0
   - yaml
   # data store plugins
   - xcube-cci >=0.12.1
   - xcube-cds >=1.2.0
   - xcube-clms >=0.2.2
   - xcube-stac >=1.1.2
-  - xcube-zenodo >=1.1.0
+  - xcube-zenodo >=1.1.1
+  - # Pip section
+  - pip
+  - pip:
+      - xcube-icosdp >=0.1.1
   # Development Dependencies - Tools
   - black
   - isort
diff --git a/xcube_multistore/accessors/__init__.py b/xcube_multistore/accessors/__init__.py
@@ -28,15 +28,17 @@
 from .base import BaseAccessor
 from .cds import CdsAccessor
 from .clms import ClmsAccessor
+from .icosdp import IcosDpAccessor
 from .stac import StacAccessor
 from .zenodo import ZenodoAccessor
 
 ACCESSOR_MAPPING = {
     "cds": CdsAccessor,
-    "zenodo": ZenodoAccessor,
     "clms": ClmsAccessor,
+    "icosdp": IcosDpAccessor,
     "stac-cdse-ardc": StacAccessor,
     "stac-pc-ardc": StacAccessor,
+    "zenodo": ZenodoAccessor,
 }
 
 
diff --git a/xcube_multistore/accessors/icosdp.py b/xcube_multistore/accessors/icosdp.py
@@ -0,0 +1,36 @@
+# MIT License
+#
+# Copyright (c) 2025 Brockmann Consult GmbH
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import xarray as xr
+
+from xcube_multistore.accessor import Accessor
+
+
+class IcosDpAccessor(Accessor):
+    """Provides methods for accessing dataset from xcube-zenodo data store"""
+
+    def open_data(self, data_id: str, **open_params) -> xr.Dataset:
+        if data_id.endswith(".zarr"):
+            ds = self.store.cache_store.open_data(data_id, **open_params)
+        else:
+            ds = self.store.open_data(data_id, **open_params)
+        return ds
diff --git a/xcube_multistore/multistore.py b/xcube_multistore/multistore.py
@@ -675,22 +675,16 @@ def _write_dataset(self, ds: xr.Dataset, config: dict) -> xr.Dataset | Exception
             ds = prepare_dataset_for_netcdf(ds)
         ds = clean_dataset(ds)
 
-        # unify chunksize
-        ds = ds.unify_chunks()
         chunksize = config.get("chunksize")
-        if chunksize is None:
-            chunksize = {
-                dim: sizes[0] for dim, sizes in getattr(ds, "chunksizes", {}).items()
-            }
+        if not chunksize:
+            if hasattr(ds, "chunks") and ds.chunks is not None:
+                chunksize = {dim: chunks[0] for dim, chunks in ds.chunks.items()}
         if chunksize:
-            # Select format name for chunking
-            format_name = "zarr" if format_id in ["zarr", "levels"] else format_id
-            ds = chunk_dataset(
-                ds, format_name=format_name, chunk_sizes=chunksize, data_vars_only=True
-            )
-            # Remove "chunks" from encoding to avoid serialization issues
-            for var in ds.data_vars:
-                ds[var].encoding.pop("chunks", None)
+            ds = ds.chunk(chunks=chunksize)
+        # Remove "chunks" from encoding to avoid serialization issues
+        for var in ds.data_vars:
+            ds[var].encoding.pop("chunks", None)
+            ds[var].encoding.pop("chunksizes", None)
 
         data_id = _get_data_id(config)
         store.write_data(ds, data_id, replace=True)
diff --git a/xcube_multistore/utils.py b/xcube_multistore/utils.py
@@ -133,7 +133,7 @@ def clean_dataset(ds: xr.Dataset, gm: GridMapping | None = None) -> xr.Dataset:
         A cleaned version of the dataset with boundary variables removed and grid
         mapping normalized.
     """
-    check_vars = ["x_bnds", "y_bnds", "lat_bnds", "lon_bnds", "time_bnds"]
+    check_vars = ["x_bnds", "y_bnds", "lat_bnds", "lon_bnds", "time_bnds", "hour_bnds"]
     sel_vars = []
     for var in check_vars:
         if var in ds: