Skip to content

Commit 8dbdd36

Browse files
authored
Merge pull request #167 from aodn/XarrayFix
Fix: xarray data loss - udpate - use align_chunks
2 parents 3fd3e53 + 606ac3d commit 8dbdd36

15 files changed

Lines changed: 1978 additions & 2612 deletions

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,4 @@ jobs:
9595
9696
- name: Verify build
9797
run: |
98-
pip install dist/*.whl
98+
pip install dist/*.whl --no-deps

aodn_cloud_optimised/config/dataset/satellite_chlorophylla_oci_1day_aqua.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,7 @@
126126
".*chl_oci\\.nc"
127127
],
128128
"year_range": [
129-
2002,
130-
2024
129+
2002
131130
]
132131
}
133132
],

aodn_cloud_optimised/config/dataset/satellite_ocean_colour_1day_aqua_main.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
150
99
],
1010
"scheduler_vm_types": "m7i.2xlarge",
11-
"worker_vm_types": "m7i.2xlarge",
11+
"worker_vm_types": "m7i.4xlarge",
1212
"allow_ingress_from": "me",
1313
"compute_purchase_option": "spot_with_fallback",
1414
"worker_options": {

aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_delayed_qc.json

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -273,29 +273,36 @@
273273
"Citation": "IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access]"
274274
},
275275
"run_settings": {
276-
"_COMMENT": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
276+
"_COMMENT": "2025-10-21: running this dataset with a batch size of more than 1 will use more than 32GB of memory on the host machine running coiled. It doesn't make sence, but this is an on-going issue with ffspec, or maybe xarray/engine",
277277
"coiled_cluster_options": {
278278
"n_workers": [
279279
1,
280280
20
281281
],
282-
"scheduler_vm_types": "m7i.xlarge",
283-
"worker_vm_types": "m7i.large",
282+
"scheduler_vm_types": "m7i.large",
283+
"worker_vm_types": "m7i.8xlarge",
284284
"allow_ingress_from": "me",
285285
"compute_purchase_option": "spot_with_fallback",
286286
"worker_options": {
287-
"nthreads": 4,
288-
"memory_limit": "16GB"
287+
"nthreads": 32,
288+
"memory_limit": "128GB"
289289
}
290290
},
291291
"batch_size": 1,
292292
"cluster": {
293293
"mode": "coiled",
294-
"restart_every_path": false
294+
"restart_every_path": true
295295
},
296296
"paths": [
297297
{
298-
"s3_uri": "s3://imos-data/IMOS/SRS/OC/radiometer/",
298+
"s3_uri": "s3://imos-data/IMOS/SRS/OC/radiometer/VMQ9273_Solander",
299+
"filter": [
300+
".*FV01.*\\.nc"
301+
],
302+
"year_range": []
303+
},
304+
{
305+
"s3_uri": "s3://imos-data/IMOS/SRS/OC/radiometer/VLHJ_Southern-Surveyor",
299306
"filter": [
300307
".*FV01.*\\.nc"
301308
],
@@ -365,14 +372,6 @@
365372
"dimensions": "TIME"
366373
}
367374
},
368-
"quality_control_version": {
369-
"source": "@global_attribute:file_version",
370-
"schema": {
371-
"type": "<U49",
372-
"units": "1",
373-
"dimensions": "TIME"
374-
}
375-
},
376375
"platform_code": {
377376
"source": "@global_attribute:platform_code",
378377
"schema": {

aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_derived_product.json

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,20 @@
1919
"clear_existing_data": true,
2020
"coiled_cluster_options": {
2121
"n_workers": [
22-
1,
23-
20
22+
5,
23+
9
2424
],
25-
"scheduler_vm_types": "m7i.2xlarge",
26-
"worker_vm_types": "m7i.xlarge",
25+
"scheduler_vm_types": "m7i.xlarge",
26+
"worker_vm_types": "m7i.2xlarge",
2727
"allow_ingress_from": "me",
2828
"compute_purchase_option": "spot_with_fallback",
2929
"worker_options": {
3030
"nthreads": 4,
31-
"memory_limit": "16GB"
31+
"memory_limit": "160GB"
3232
}
3333
},
34-
"_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
35-
"batch_size": 1,
34+
"_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable. See https://github.com/pydata/xarray/issues/10501",
35+
"batch_size": 25,
3636
"raise_error": false
3737
},
3838
"metadata_uuid": "28f8bfed-ca6a-472a-84e4-42563ce4df3f",

aodn_cloud_optimised/lib/DataQuery.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,15 +1876,6 @@ def _open_zarr_store(self) -> xr.Dataset:
18761876
ValueError: If a suitable time variable cannot be found for sorting.
18771877
"""
18781878
try:
1879-
# storage_opts = self.s3_fs_opts.get("storage_options", {})
1880-
# anon_flag = self.s3_fs_opts.get("anon", True)
1881-
# ds = xr.open_zarr(
1882-
# fsspec.get_mapper(self.dname, anon=anon_flag, **storage_opts),
1883-
# chunks=None,
1884-
# consolidated=True,
1885-
# )
1886-
# mapper = fsspec.get_mapper(self.dname, storage_options={"fs": self.s3})
1887-
18881879
mapper = self.s3.get_mapper(self.dname)
18891880
ds = xr.open_zarr(mapper, chunks=None, consolidated=True)
18901881

0 commit comments

Comments
 (0)