Hello, I’m using cubed==0.23.0, zarr==3.1.3, xarray==2025.10.0, to rechunk a Zarr (time: 394488 rlat: 778 rlon: 706 ; float32) from the original chunks (24, 778, 706) to (43800, 5, 5). The process finishes without any errors, but then, when opening the final file, some chunks were not written correctly, leaving the values empty in the innermost dimension (i.e., rlon ; see figure). After trying different solutions, the way I was able to work around this problem was to create a temporal intermediate Zarr with chunks (3650, 50, 50).
Orignal code
`
import ray
import xarray as xr
import cubed
len_time = 365245
len_rlat = 5
len_rlon = 5
setup_ray_cluster(
min_worker_nodes=8,
max_worker_nodes=8,
num_cpus_per_node=16,
num_gpus_worker_node=0,
num_cpus_head_node = 15,
collect_log_to_path="/dbfs/tmp/ray_logs"
)
ray.init(address="auto")
config.set({
"spec.work_dir"./casr-na-eccc-grid-reanalysis-3.2/",
"spec.allowed_mem": "5GB",
"spec.reserved_mem": "0",
"spec.executor_name": "ray",
})
ds = xr.open_zarr(
source,
chunked_array_type="cubed",
chunks={},
)
rds = ds_tmp[['CaSR_v3.2_P_FI_SFC']].chunk({'time':len_time, 'rlat':len_rlat, 'rlon':len_rlon}, chunked_array_type="cubed")
rds.to_zarr(
target,
consolidated=False,
)`
Solution
`
len_time_tmp = 3650
len_rlat_tmp = 50
len_rlon_tmp = 50
rds_tmp = ds[['CaSR_v3.2_P_FI_SFC']].chunk({'time':len_time_tmp,'rlat':len_rlat_tmp,'rlon':len_rlon_tmp}, chunked_array_type="cubed")
rds_tmp.to_zarr(
target+'_tmp',
mode=mode,
consolidated=False,
)
ds_tmp = xr.open_zarr(
target+'_tmp',
chunked_array_type="cubed",
chunks={},
)
rds = ds_tmp[['CaSR_v3.2_P_FI_SFC']].chunk({'time':len_time, 'rlat':len_rlat, 'rlon':len_rlon}, chunked_array_type="cubed")
rds.to_zarr(
target,
mode=mode,
consolidated=False,
)
`
Hello, I’m using cubed==0.23.0, zarr==3.1.3, xarray==2025.10.0, to rechunk a Zarr (time: 394488 rlat: 778 rlon: 706 ; float32) from the original chunks (24, 778, 706) to (43800, 5, 5). The process finishes without any errors, but then, when opening the final file, some chunks were not written correctly, leaving the values empty in the innermost dimension (i.e., rlon ; see figure). After trying different solutions, the way I was able to work around this problem was to create a temporal intermediate Zarr with chunks (3650, 50, 50).
Orignal code
`
import ray
import xarray as xr
import cubed
len_time = 365245
len_rlat = 5
len_rlon = 5
setup_ray_cluster(
min_worker_nodes=8,
max_worker_nodes=8,
num_cpus_per_node=16,
num_gpus_worker_node=0,
num_cpus_head_node = 15,
collect_log_to_path="/dbfs/tmp/ray_logs"
)
ray.init(address="auto")
config.set({
"spec.work_dir"./casr-na-eccc-grid-reanalysis-3.2/",
"spec.allowed_mem": "5GB",
"spec.reserved_mem": "0",
"spec.executor_name": "ray",
})
ds = xr.open_zarr(
source,
chunked_array_type="cubed",
chunks={},
)
rds = ds_tmp[['CaSR_v3.2_P_FI_SFC']].chunk({'time':len_time, 'rlat':len_rlat, 'rlon':len_rlon}, chunked_array_type="cubed")
rds.to_zarr(
target,
consolidated=False,
)`
Solution
`
len_time_tmp = 3650
len_rlat_tmp = 50
len_rlon_tmp = 50
rds_tmp = ds[['CaSR_v3.2_P_FI_SFC']].chunk({'time':len_time_tmp,'rlat':len_rlat_tmp,'rlon':len_rlon_tmp}, chunked_array_type="cubed")
rds_tmp.to_zarr(
target+'_tmp',
mode=mode,
consolidated=False,
)
ds_tmp = xr.open_zarr(
target+'_tmp',
chunked_array_type="cubed",
chunks={},
)
rds = ds_tmp[['CaSR_v3.2_P_FI_SFC']].chunk({'time':len_time, 'rlat':len_rlat, 'rlon':len_rlon}, chunked_array_type="cubed")
rds.to_zarr(
target,
mode=mode,
consolidated=False,
)
`