aodn · lbesnard · Oct 21, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 20, 2025
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -95,4 +95,4 @@ jobs:
 
       - name: Verify build
         run: |
-          pip install dist/*.whl
+          pip install dist/*.whl --no-deps
diff --git a/aodn_cloud_optimised/config/dataset/satellite_chlorophylla_oci_1day_aqua.json b/aodn_cloud_optimised/config/dataset/satellite_chlorophylla_oci_1day_aqua.json
@@ -126,8 +126,7 @@
           ".*chl_oci\\.nc"
         ],
         "year_range": [
-          2002,
-          2024
+          2002
         ]
       }
     ],

diff --git a/aodn_cloud_optimised/config/dataset/satellite_ocean_colour_1day_aqua_main.json b/aodn_cloud_optimised/config/dataset/satellite_ocean_colour_1day_aqua_main.json
@@ -8,7 +8,7 @@
         150
       ],
       "scheduler_vm_types": "m7i.2xlarge",
-      "worker_vm_types": "m7i.2xlarge",
+      "worker_vm_types": "m7i.4xlarge",
       "allow_ingress_from": "me",
       "compute_purchase_option": "spot_with_fallback",
       "worker_options": {

diff --git a/aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_delayed_qc.json b/aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_delayed_qc.json
@@ -273,29 +273,36 @@
     "Citation": "IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access]"
   },
   "run_settings": {
-    "_COMMENT": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
+    "_COMMENT": "2025-10-21: running this dataset with a batch size of more than 1 will use more than 32GB of memory on the host machine running coiled. It doesn't make sence, but this is an on-going issue with ffspec, or maybe xarray/engine",
     "coiled_cluster_options": {
       "n_workers": [
         1,
         20
       ],
-      "scheduler_vm_types": "m7i.xlarge",
-      "worker_vm_types": "m7i.large",
+      "scheduler_vm_types": "m7i.large",
+      "worker_vm_types": "m7i.8xlarge",
       "allow_ingress_from": "me",
       "compute_purchase_option": "spot_with_fallback",
       "worker_options": {
-        "nthreads": 4,
-        "memory_limit": "16GB"
+        "nthreads": 32,
+        "memory_limit": "128GB"
       }
     },
     "batch_size": 1,
     "cluster": {
       "mode": "coiled",
-      "restart_every_path": false
+      "restart_every_path": true
     },
     "paths": [
       {
-        "s3_uri": "s3://imos-data/IMOS/SRS/OC/radiometer/",
+        "s3_uri": "s3://imos-data/IMOS/SRS/OC/radiometer/VMQ9273_Solander",
+        "filter": [
+          ".*FV01.*\\.nc"
+        ],
+        "year_range": []
+      },
+      {
+        "s3_uri": "s3://imos-data/IMOS/SRS/OC/radiometer/VLHJ_Southern-Surveyor",
         "filter": [
           ".*FV01.*\\.nc"
         ],
@@ -365,14 +372,6 @@
           "dimensions": "TIME"
         }
       },
-      "quality_control_version": {
-        "source": "@global_attribute:file_version",
-        "schema": {
-          "type": "<U49",
-          "units": "1",
-          "dimensions": "TIME"
-        }
-      },
       "platform_code": {
         "source": "@global_attribute:platform_code",
         "schema": {

diff --git a/aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_derived_product.json b/aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_derived_product.json
@@ -19,20 +19,20 @@
     "clear_existing_data": true,
     "coiled_cluster_options": {
       "n_workers": [
-        1,
-        20
+        5,
+        9
       ],
-      "scheduler_vm_types": "m7i.2xlarge",
-      "worker_vm_types": "m7i.xlarge",
+      "scheduler_vm_types": "m7i.xlarge",
+      "worker_vm_types": "m7i.2xlarge",
       "allow_ingress_from": "me",
       "compute_purchase_option": "spot_with_fallback",
       "worker_options": {
         "nthreads": 4,
-        "memory_limit": "16GB"
+        "memory_limit": "160GB"
       }
     },
-    "_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
-    "batch_size": 1,
+    "_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable. See https://github.com/pydata/xarray/issues/10501",
+    "batch_size": 25,
     "raise_error": false
   },
   "metadata_uuid": "28f8bfed-ca6a-472a-84e4-42563ce4df3f",

diff --git a/aodn_cloud_optimised/lib/DataQuery.py b/aodn_cloud_optimised/lib/DataQuery.py
@@ -1876,15 +1876,6 @@ def _open_zarr_store(self) -> xr.Dataset:
             ValueError: If a suitable time variable cannot be found for sorting.
         """
         try:
-            # storage_opts = self.s3_fs_opts.get("storage_options", {})
-            # anon_flag = self.s3_fs_opts.get("anon", True)
-            # ds = xr.open_zarr(
-            #     fsspec.get_mapper(self.dname, anon=anon_flag, **storage_opts),
-            #     chunks=None,
-            #     consolidated=True,
-            # )
-            # mapper = fsspec.get_mapper(self.dname, storage_options={"fs": self.s3})
-
             mapper = self.s3.get_mapper(self.dname)
             ds = xr.open_zarr(mapper, chunks=None, consolidated=True)
-Original file line number
+Diff line change
@@ Expand Up / @@ -126,8 +126,7 @@ @@
               ".*chl_oci\\.nc"
             ],
             "year_range": [
-,
             ]
           }
         ],
@@ Expand Down @@