aodn
diff --git a/‎aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_delayed_qc.json‎
Lines changed: 3 additions & 3 deletions b/‎aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_delayed_qc.json‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_derived_product.json‎
Lines changed: 7 additions & 7 deletions b/‎aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_derived_product.json‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎aodn_cloud_optimised/lib/CommonHandler.py‎
Lines changed: 4 additions & 1 deletion b/‎aodn_cloud_optimised/lib/CommonHandler.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎aodn_cloud_optimised/lib/DataQuery.py‎
Lines changed: 3 additions & 0 deletions b/‎aodn_cloud_optimised/lib/DataQuery.py‎
Lines changed: 3 additions & 0 deletions
@@ -276,19 +276,19 @@
     "_COMMENT": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
     "coiled_cluster_options": {
       "n_workers": [
-        1,
+        5,
         20
       ],
       "scheduler_vm_types": "m7i.xlarge",
-      "worker_vm_types": "m7i.large",
+      "worker_vm_types": "m7i.2xlarge",
       "allow_ingress_from": "me",
       "compute_purchase_option": "spot_with_fallback",
       "worker_options": {
         "nthreads": 4,
         "memory_limit": "16GB"
       }
     },
-    "batch_size": 1,
+    "batch_size": 2,
     "cluster": {
       "mode": "coiled",
       "restart_every_path": false
 
@@ -19,20 +19,20 @@
     "clear_existing_data": true,
     "coiled_cluster_options": {
       "n_workers": [
-        1,
-        20
+        5,
+        9
       ],
-      "scheduler_vm_types": "m7i.2xlarge",
-      "worker_vm_types": "m7i.xlarge",
+      "scheduler_vm_types": "m7i.xlarge",
+      "worker_vm_types": "m7i.2xlarge",
       "allow_ingress_from": "me",
       "compute_purchase_option": "spot_with_fallback",
       "worker_options": {
         "nthreads": 4,
-        "memory_limit": "16GB"
+        "memory_limit": "160GB"
       }
     },
-    "_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
-    "batch_size": 1,
+    "_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable. See https://github.com/pydata/xarray/issues/10501",
+    "batch_size": 25,
     "raise_error": false
   },
   "metadata_uuid": "28f8bfed-ca6a-472a-84e4-42563ce4df3f",
 
@@ -129,7 +129,10 @@ def __init__(self, **kwargs):
         self.s3_client_opts = kwargs.get("s3_client_opts", None)
 
         self.s3_fs = s3fs.S3FileSystem(
-            anon=False, default_cache_type=None, session=kwargs.get("s3fs_session")
+            anon=False,
+            default_cache_type="readahead",
+            default_fill_cache=False,
+            session=kwargs.get("s3fs_session"),
         )  # variable overwritten in unittest to use moto server
 
         self.uuid_log = None
 
@@ -1747,6 +1747,9 @@ def _open_zarr_store(self) -> xr.Dataset:
             ds = xr.open_zarr(
                 fsspec.get_mapper(self.dname, anon=True), chunks=None, consolidated=True
             )
+            # ds = (
+            #     ds.unify_chunks()
+            # )  # we must remove chunks=None if we want to use unify_chunks
             # Find the time variable name to sort by
             time_names = [
                 "time",