Skip to content

Commit 4231e4e

Browse files
committed
Fix rebase
1 parent 45bddca commit 4231e4e

13 files changed

Lines changed: 2311 additions & 2516 deletions

aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_delayed_qc.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,19 +276,19 @@
276276
"_COMMENT": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
277277
"coiled_cluster_options": {
278278
"n_workers": [
279-
1,
279+
5,
280280
20
281281
],
282282
"scheduler_vm_types": "m7i.xlarge",
283-
"worker_vm_types": "m7i.large",
283+
"worker_vm_types": "m7i.2xlarge",
284284
"allow_ingress_from": "me",
285285
"compute_purchase_option": "spot_with_fallback",
286286
"worker_options": {
287287
"nthreads": 4,
288288
"memory_limit": "16GB"
289289
}
290290
},
291-
"batch_size": 1,
291+
"batch_size": 2,
292292
"cluster": {
293293
"mode": "coiled",
294294
"restart_every_path": false

aodn_cloud_optimised/config/dataset/vessel_satellite_radiance_derived_product.json

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,20 @@
1919
"clear_existing_data": true,
2020
"coiled_cluster_options": {
2121
"n_workers": [
22-
1,
23-
20
22+
5,
23+
9
2424
],
25-
"scheduler_vm_types": "m7i.2xlarge",
26-
"worker_vm_types": "m7i.xlarge",
25+
"scheduler_vm_types": "m7i.xlarge",
26+
"worker_vm_types": "m7i.2xlarge",
2727
"allow_ingress_from": "me",
2828
"compute_purchase_option": "spot_with_fallback",
2929
"worker_options": {
3030
"nthreads": 4,
31-
"memory_limit": "16GB"
31+
"memory_limit": "160GB"
3232
}
3333
},
34-
"_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable",
35-
"batch_size": 1,
34+
"_comment": "2025-07-01: running this dataset with a batch size of more than 1 results in some silent data corruption with dask v2. It is unclear why. Maybe similar to xarray/issues/8882. Spent too much time on this. Maybe race conditions. But proof that xarray/zarr... is not reliable. See https://github.com/pydata/xarray/issues/10501",
35+
"batch_size": 25,
3636
"raise_error": false
3737
},
3838
"metadata_uuid": "28f8bfed-ca6a-472a-84e4-42563ce4df3f",

aodn_cloud_optimised/lib/CommonHandler.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,10 @@ def __init__(self, **kwargs):
129129
self.s3_client_opts = kwargs.get("s3_client_opts", None)
130130

131131
self.s3_fs = s3fs.S3FileSystem(
132-
anon=False, default_cache_type=None, session=kwargs.get("s3fs_session")
132+
anon=False,
133+
default_cache_type="readahead",
134+
default_fill_cache=False,
135+
session=kwargs.get("s3fs_session"),
133136
) # variable overwritten in unittest to use moto server
134137

135138
self.uuid_log = None

aodn_cloud_optimised/lib/DataQuery.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,6 +1747,9 @@ def _open_zarr_store(self) -> xr.Dataset:
17471747
ds = xr.open_zarr(
17481748
fsspec.get_mapper(self.dname, anon=True), chunks=None, consolidated=True
17491749
)
1750+
# ds = (
1751+
# ds.unify_chunks()
1752+
# ) # we must remove chunks=None if we want to use unify_chunks
17501753
# Find the time variable name to sort by
17511754
time_names = [
17521755
"time",

0 commit comments

Comments
 (0)