Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,4 @@ jobs:

- name: Verify build
run: |
pip install dist/*.whl
pip install dist/*.whl --no-deps
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@
".*chl_oci\\.nc"
],
"year_range": [
2002,
2024
2002
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
150
],
"scheduler_vm_types": "m7i.2xlarge",
"worker_vm_types": "m7i.2xlarge",
"worker_vm_types": "m7i.4xlarge",
"allow_ingress_from": "me",
"compute_purchase_option": "spot_with_fallback",
"worker_options": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,29 +273,36 @@
"Citation": "IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access]"
},
"run_settings": {
"_COMMENT": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
"_COMMENT": "2025-10-21: running this dataset with a batch size of more than 1 will use more than 32GB of memory on the host machine running coiled. It doesn't make sence, but this is an on-going issue with ffspec, or maybe xarray/engine",
"coiled_cluster_options": {
"n_workers": [
1,
20
],
"scheduler_vm_types": "m7i.xlarge",
"worker_vm_types": "m7i.large",
"scheduler_vm_types": "m7i.large",
"worker_vm_types": "m7i.8xlarge",
"allow_ingress_from": "me",
"compute_purchase_option": "spot_with_fallback",
"worker_options": {
"nthreads": 4,
"memory_limit": "16GB"
"nthreads": 32,
"memory_limit": "128GB"
}
},
"batch_size": 1,
"cluster": {
"mode": "coiled",
"restart_every_path": false
"restart_every_path": true
},
"paths": [
{
"s3_uri": "s3://imos-data/IMOS/SRS/OC/radiometer/",
"s3_uri": "s3://imos-data/IMOS/SRS/OC/radiometer/VMQ9273_Solander",
"filter": [
".*FV01.*\\.nc"
],
"year_range": []
},
{
"s3_uri": "s3://imos-data/IMOS/SRS/OC/radiometer/VLHJ_Southern-Surveyor",
"filter": [
".*FV01.*\\.nc"
],
Expand Down Expand Up @@ -365,14 +372,6 @@
"dimensions": "TIME"
}
},
"quality_control_version": {
"source": "@global_attribute:file_version",
"schema": {
"type": "<U49",
"units": "1",
"dimensions": "TIME"
}
},
"platform_code": {
"source": "@global_attribute:platform_code",
"schema": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@
"clear_existing_data": true,
"coiled_cluster_options": {
"n_workers": [
1,
20
5,
9
],
"scheduler_vm_types": "m7i.2xlarge",
"worker_vm_types": "m7i.xlarge",
"scheduler_vm_types": "m7i.xlarge",
"worker_vm_types": "m7i.2xlarge",
"allow_ingress_from": "me",
"compute_purchase_option": "spot_with_fallback",
"worker_options": {
"nthreads": 4,
"memory_limit": "16GB"
"memory_limit": "160GB"
}
},
"_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
"batch_size": 1,
"_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable. See https://github.com/pydata/xarray/issues/10501",
"batch_size": 25,
"raise_error": false
},
"metadata_uuid": "28f8bfed-ca6a-472a-84e4-42563ce4df3f",
Expand Down
9 changes: 0 additions & 9 deletions aodn_cloud_optimised/lib/DataQuery.py
Original file line number Diff line number Diff line change
Expand Up @@ -1876,15 +1876,6 @@ def _open_zarr_store(self) -> xr.Dataset:
ValueError: If a suitable time variable cannot be found for sorting.
"""
try:
# storage_opts = self.s3_fs_opts.get("storage_options", {})
# anon_flag = self.s3_fs_opts.get("anon", True)
# ds = xr.open_zarr(
# fsspec.get_mapper(self.dname, anon=anon_flag, **storage_opts),
# chunks=None,
# consolidated=True,
# )
# mapper = fsspec.get_mapper(self.dname, storage_options={"fs": self.s3})

mapper = self.s3.get_mapper(self.dname)
ds = xr.open_zarr(mapper, chunks=None, consolidated=True)

Expand Down
Loading
Loading