Skip to content

Commit 01bdda8

Browse files
Add sharding support for GeoZarr conversion and CLI (#38)
* feat: add sharding support for GeoZarr conversion and CLI * update launch configurations for GeoZarr conversion with new data sources and adjusted parameters * feat: enable sharding in GeoZarr conversion launch configuration * fix: update sharding codec handling in _create_sharded_encoding function * refactor: streamline sharding configuration in _create_geozarr_encoding function * feat: enhance sharding logic in _create_geozarr_encoding and add _calculate_shard_dimension utility * feat: improve sharding configuration and validation in _create_geozarr_encoding * fix: refine shard dimension calculation and improve divisor check in utility functions * Add dataset tree structure and test script for sharding fix - Introduced a new dataset tree structure for Sentinel-2 data, detailing conditions, quality, and measurements. - Added a comprehensive test script to verify the sharding fix for GeoZarr conversion. - Implemented tests for shard dimension calculations and encoding creation with sharding enabled/disabled. - Enhanced output for better debugging and validation of shard dimensions against chunk dimensions. * feat: enable sharding in Dask cluster setup and enhance chunking logic for sharded variables * Remove outdated dataset tree structure and test script for sharding fix - Deleted the `dataset_tree_simplified.txt` file as it is no longer needed. - Removed the `test_sharding_fix.py` script which was used to verify the sharding fix for GeoZarr conversion. * feat: update GeoZarr encoding to include optional shards attribute in XarrayEncodingJSON * fix: update test for calculate_aligned_chunk_size to assert exact target chunk size when no suitable divisor is found * declare types for ambiguous variables --------- Co-authored-by: Davis Vann Bennett <davis.v.bennett@gmail.com>
1 parent 44f206a commit 01bdda8

6 files changed

Lines changed: 162 additions & 28 deletions

File tree

.vscode/launch.json

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,16 @@
1313
"module": "eopf_geozarr",
1414
"args": [
1515
"convert",
16-
"https://objectstore.eodc.eu:2222/e05ab01a9d56408d82ac32d69a5aae2a:sample-data/tutorial_data/cpm_v253/S2B_MSIL1C_20250113T103309_N0511_R108_T32TLQ_20250113T122458.zarr",
17-
"./tests-output/eopf_geozarr/s2b_test.zarr",
18-
"--groups", "/measurements/reflectance/r10m", "/measurements/reflectance/r20m", "/measurements/reflectance/r60m", "/quality/l1c_quicklook/r10m",
16+
"https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202508-s02msil2a/11/products/cpm_v256/S2C_MSIL2A_20250811T112131_N0511_R037_T29TPF_20250811T152216.zarr",
17+
"./tests-output/eopf_geozarr/s2l2_test.zarr",
18+
"--groups", "/measurements/reflectance/r10m", "/measurements/reflectance/r20m", "/measurements/reflectance/r60m", "/quality/l2a_quicklook/r10m",
1919
"--crs-groups", "/conditions/geometry",
20-
"--spatial-chunk", "4096",
20+
"--spatial-chunk", "512",
2121
"--min-dimension", "256",
2222
"--tile-width", "256",
2323
"--max-retries", "2",
24-
"--verbose"
24+
"--verbose",
25+
"--enable-sharding"
2526
],
2627
"cwd": "${workspaceFolder}",
2728
"justMyCode": false,
@@ -99,18 +100,27 @@
99100
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202507-s02msil2a/04/products/cpm_v256/S2A_MSIL2A_20250704T094051_N0511_R036_T33SWB_20250704T115824.zarr",
100101
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202508-s02msil2a/04/products/cpm_v256/S2B_MSIL2A_20250804T103629_N0511_R008_T31TDH_20250804T130722.zarr",
101102
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202508-s02msil2a/07/products/cpm_v256/S2B_MSIL2A_20250807T104619_N0511_R051_T31TDH_20250807T131144.zarr",
102-
"https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202508-s02msil2a/11/products/cpm_v256/S2C_MSIL2A_20250811T112131_N0511_R037_T29TPF_20250811T152216.zarr",
103-
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/eopf_geozarr/S2A_MSIL2A_20250704T094051_N0511_R036_T33SWB_20250704T115824.zarr",
104-
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/eopf_geozarr/S2B_MSIL2A_20250804T103629_N0511_R008_T31TDH_20250804T130722.zarr",
105-
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/eopf_geozarr/S2B_MSIL2A_20250807T104619_N0511_R051_T31TDH_20250807T131144.zarr",
106-
"s3://esa-zarr-sentinel-explorer-fra/tests-output/eopf_geozarr/S2C_MSIL2A_20250811T112131_N0511_R037_T29TPF_20250811T152216.zarr",
103+
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202508-s02msil2a/11/products/cpm_v256/S2C_MSIL2A_20250811T112131_N0511_R037_T29TPF_20250811T152216.zarr",
104+
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202509-s02msil2a/13/products/cpm_v256/S2C_MSIL2A_20250913T095041_N0511_R079_T33TVF_20250913T151113.zarr",
105+
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202509-s02msil2a/21/products/cpm_v256/S2B_MSIL2A_20250921T100029_N0511_R122_T32TQM_20250921T135752.zarr",
106+
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202509-s02msil2a/21/products/cpm_v256/S2B_MSIL2A_20250921T100029_N0511_R122_T33TTG_20250921T135752.zarr",
107+
"https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202509-s02msil2a/08/products/cpm_v256/S2A_MSIL2A_20250908T100041_N0511_R122_T32TQM_20250908T115116.zarr",
108+
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2A_MSIL2A_20250704T094051_N0511_R036_T33SWB_20250704T115824.zarr",
109+
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2B_MSIL2A_20250804T103629_N0511_R008_T31TDH_20250804T130722.zarr",
110+
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2B_MSIL2A_20250807T104619_N0511_R051_T31TDH_20250807T131144.zarr",
111+
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2C_MSIL2A_20250811T112131_N0511_R037_T29TPF_20250811T152216.zarr",
112+
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2C_MSIL2A_20250913T095041_N0511_R079_T33TVF_20250913T151113.zarr",
113+
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2B_MSIL2A_20250921T100029_N0511_R122_T32TQM_20250921T135752.zarr",
114+
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2B_MSIL2A_20250921T100029_N0511_R122_T33TTG_20250921T135752.zarr",
115+
"s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2A_MSIL2A_20250908T100041_N0511_R122_T32TQM_20250908T115116.zarr",
107116
"--groups", "/measurements/reflectance/r10m", "/measurements/reflectance/r20m", "/measurements/reflectance/r60m", "/quality/l2a_quicklook/r10m",
108117
"--crs-groups", "/conditions/geometry",
109-
"--spatial-chunk", "512",
118+
"--spatial-chunk", "256",
110119
"--min-dimension", "256",
111120
"--tile-width", "256",
112121
"--max-retries", "2",
113122
"--dask-cluster",
123+
"--enable-sharding",
114124
"--verbose"
115125
],
116126
"cwd": "${workspaceFolder}",
@@ -156,8 +166,12 @@
156166
"module": "eopf_geozarr",
157167
"args": [
158168
"convert",
159-
"https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202509-s01siwgrh/12/products/cpm_v256/S1C_IW_GRDH_1SDV_20250912T053648_20250912T053713_004087_0081FD_5AA4.zarr",
160-
"s3://esa-zarr-sentinel-explorer-fra/tests-output/eopf_geozarr/S1C_IW_GRDH_1SDV_20250912T053648_20250912T053713_004087_0081FD_5AA4.zarr",
169+
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:notebook-data/tutorial_data/cpm_v260/S1A_IW_GRDH_1SDV_20241124T180254_20241124T180319_056700_06F516_BA27.zarr",
170+
"https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:notebook-data/tutorial_data/cpm_v260/S1A_IW_GRDH_1SDV_20241218T180252_20241218T180317_057050_0702F2_0BC2.zarr",
171+
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202509-s01siwgrh/12/products/cpm_v256/S1A_IW_GRDH_1SDV_20241230T180251_20241230T180316_057225_0709DD_15AC.zarr",
172+
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/eopf_geozarr/S1A_IW_GRDH_1SDV_20241124T180254_20241124T180319_056700_06F516_BA27_2.zarr",
173+
"s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel1-l1-grd/S1A_IW_GRDH_1SDV_20241218T180252_20241218T180317_057050_0702F2_0BC2.zarr",
174+
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/eopf_geozarr/S1A_IW_GRDH_1SDV_20241230T180251_20241230T180316_057225_0709DD_15AC.zarr",
161175
"--groups", "/measurements",
162176
"--gcp-group", "/conditions/gcp",
163177
// "--crs-groups", "/conditions/geometry",
@@ -205,7 +219,7 @@
205219
"module": "eopf_geozarr",
206220
"args": [
207221
"info",
208-
"./tests-output/eopf_geozarr/s2b_test.zarr",
222+
"./tests-output/eopf_geozarr/s2l2_test.zarr",
209223
"--verbose",
210224
"--html-output", "dataset_info.html"
211225
],
@@ -224,7 +238,7 @@
224238
"module": "eopf_geozarr",
225239
"args": [
226240
"info",
227-
"s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2B_MSIL2A_20250921T100029_N0511_R122_T33TTG_20250921T135752.zarr",
241+
"s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2A_MSIL2A_20250704T094051_N0511_R036_T33SWB_20250704T115824.zarr",
228242
"--verbose",
229243
"--html-output", "dataset_info.html"
230244
],

src/eopf_geozarr/cli.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ def setup_dask_cluster(enable_dask: bool, verbose: bool = False) -> Optional[Any
5151
try:
5252
from dask.distributed import Client
5353

54-
# Set up local cluster
55-
client = Client() # set up local cluster
54+
# Set up local cluster with high memory limits
55+
client = Client(memory_limit="8GB") # set up local cluster
56+
# client = Client() # set up local cluster
5657

5758
if verbose:
5859
print(f"🚀 Dask cluster started: {client}")
@@ -175,6 +176,7 @@ def convert_command(args: argparse.Namespace) -> None:
175176
max_retries=args.max_retries,
176177
crs_groups=args.crs_groups,
177178
gcp_group=args.gcp_group,
179+
enable_sharding=args.enable_sharding,
178180
)
179181

180182
print("✅ Successfully converted EOPF dataset to GeoZarr format")
@@ -1109,6 +1111,11 @@ def create_parser() -> argparse.ArgumentParser:
11091111
action="store_true",
11101112
help="Start a local dask cluster for parallel processing of chunks",
11111113
)
1114+
convert_parser.add_argument(
1115+
"--enable-sharding",
1116+
action="store_true",
1117+
help="Enable zarr sharding for spatial dimensions of each variable",
1118+
)
11121119
convert_parser.set_defaults(func=convert_command)
11131120

11141121
# Info command

src/eopf_geozarr/conversion/geozarr.py

Lines changed: 118 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def create_geozarr_dataset(
5757
max_retries: int = 3,
5858
crs_groups: Iterable[str] | None = None,
5959
gcp_group: str | None = None,
60+
enable_sharding: bool = False,
6061
) -> xr.DataTree:
6162
"""
6263
Create a GeoZarr-spec 0.4 compliant dataset from EOPF data.
@@ -81,6 +82,8 @@ def create_geozarr_dataset(
8182
Iterable of group names that need CRS information added on best-effort basis
8283
gcp_group : str, optional
8384
Group name where GCPs (Ground Control Points) are located.
85+
enable_sharding : bool, default False
86+
Enable zarr sharding for spatial dimensions of each variable
8487
8588
Returns
8689
-------
@@ -90,6 +93,9 @@ def create_geozarr_dataset(
9093
dt = dt_input.copy()
9194
compressor = BloscCodec(cname="zstd", clevel=3, shuffle="shuffle", blocksize=0)
9295

96+
if enable_sharding:
97+
print("🔧 Zarr sharding enabled for spatial dimensions")
98+
9399
if _is_sentinel1(dt_input):
94100
if gcp_group is None:
95101
raise ValueError(
@@ -132,6 +138,7 @@ def create_geozarr_dataset(
132138
max_retries,
133139
crs_groups,
134140
gcp_group,
141+
enable_sharding,
135142
)
136143

137144
# Consolidate metadata at the root level AFTER all groups are written
@@ -230,6 +237,7 @@ def iterative_copy(
230237
max_retries: int = 3,
231238
crs_groups: Iterable[str] | None = None,
232239
gcp_group: str | None = None,
240+
enable_sharding: bool = False,
233241
) -> xr.DataTree:
234242
"""
235243
Iteratively copy groups from original DataTree to GeoZarr DataTree.
@@ -301,6 +309,7 @@ def iterative_copy(
301309
min_dimension=min_dimension,
302310
tile_width=tile_width,
303311
gcp_group=gcp_group,
312+
enable_sharding=enable_sharding,
304313
)
305314
written_groups.add(current_group_path)
306315
continue
@@ -407,6 +416,7 @@ def write_geozarr_group(
407416
min_dimension: int = 256,
408417
tile_width: int = 256,
409418
gcp_group: str | None = None,
419+
enable_sharding: bool = False,
410420
) -> xr.DataTree:
411421
"""
412422
Write a group to a GeoZarr dataset with multiscales support.
@@ -451,7 +461,7 @@ def write_geozarr_group(
451461
dt.attrs = ds.attrs.copy()
452462

453463
# Create encoding for all variables
454-
encoding = _create_geozarr_encoding(ds, compressor, spatial_chunk)
464+
encoding = _create_geozarr_encoding(ds, compressor, spatial_chunk, enable_sharding)
455465

456466
# Write native data in the group 0 (overview level 0)
457467
native_dataset_group_name = f"{group_name}/0"
@@ -492,6 +502,7 @@ def write_geozarr_group(
492502
tile_width=tile_width,
493503
spatial_chunk=spatial_chunk,
494504
ds_gcp=ds_gcp,
505+
enable_sharding=enable_sharding,
495506
)
496507
except Exception as e:
497508
print(
@@ -517,6 +528,7 @@ def create_geozarr_compliant_multiscales(
517528
tile_width: int = 256,
518529
spatial_chunk: int = 4096,
519530
ds_gcp: xr.Dataset | None = None,
531+
enable_sharding: bool = False,
520532
) -> Dict[str, Any]:
521533
"""
522534
Create GeoZarr-spec compliant multiscales following the specification exactly.
@@ -674,10 +686,13 @@ def create_geozarr_compliant_multiscales(
674686
native_bounds,
675687
data_vars,
676688
ds_gcp_overview,
689+
enable_sharding,
677690
)
678691

679692
# Create encoding for this overview level
680-
encoding = _create_geozarr_encoding(overview_ds, compressor, spatial_chunk)
693+
encoding = _create_geozarr_encoding(
694+
overview_ds, compressor, spatial_chunk, enable_sharding
695+
)
681696

682697
# Write overview level
683698
overview_path = fs_utils.normalize_path(f"{output_path}/{group_name}/{level}")
@@ -885,6 +900,7 @@ def create_overview_dataset_all_vars(
885900
native_bounds: Tuple[float, float, float, float],
886901
data_vars: Sequence[Hashable],
887902
ds_gcp: xr.Dataset | None = None,
903+
enable_sharding: bool = False,
888904
) -> xr.Dataset:
889905
"""
890906
Create an overview dataset containing all variables for a specific level.
@@ -1090,7 +1106,21 @@ def write_dataset_band_by_band_with_validation(
10901106
for attempt in range(max_retries):
10911107
try:
10921108
# Ensure the dataset is properly chunked to align with encoding
1093-
if var in var_encoding and "chunks" in var_encoding[var]:
1109+
if (
1110+
var in var_encoding
1111+
and "shards" in var_encoding[var]
1112+
and var_encoding[var]["shards"] is not None
1113+
):
1114+
# For sharded variables, use the shards dimensions
1115+
shard_dims = var_encoding[var].get("shards", None)
1116+
if shard_dims is not None:
1117+
var_dims = single_var_ds[var].dims
1118+
chunk_dict = {}
1119+
for i, dim in enumerate(var_dims):
1120+
if i < len(shard_dims):
1121+
chunk_dict[dim] = shard_dims[i]
1122+
single_var_ds[var] = single_var_ds[var].chunk(chunk_dict)
1123+
elif var in var_encoding and "chunks" in var_encoding[var]:
10941124
target_chunks = var_encoding[var]["chunks"]
10951125
# Create chunk dict using the actual dimensions of the variable
10961126
var_dims = single_var_ds[var].dims
@@ -1442,10 +1472,11 @@ def _create_encoding(
14421472

14431473

14441474
def _create_geozarr_encoding(
1445-
ds: xr.Dataset, compressor: Any, spatial_chunk: int
1475+
ds: xr.Dataset, compressor: Any, spatial_chunk: int, enable_sharding: bool = False
14461476
) -> dict[Hashable, XarrayEncodingJSON]:
14471477
"""Create encoding for GeoZarr dataset variables."""
14481478
encoding: dict[Hashable, XarrayEncodingJSON] = {}
1479+
chunks: tuple[int, ...]
14491480
for var in ds.data_vars:
14501481
if utils.is_grid_mapping_variable(ds, var):
14511482
encoding[var] = {"compressors": None}
@@ -1458,12 +1489,54 @@ def _create_geozarr_encoding(
14581489
utils.calculate_aligned_chunk_size(width, spatial_chunk),
14591490
utils.calculate_aligned_chunk_size(height, spatial_chunk),
14601491
)
1492+
1493+
if len(data_shape) == 3:
1494+
chunks = (1, spatial_chunk_aligned, spatial_chunk_aligned)
1495+
else:
1496+
chunks = (spatial_chunk_aligned, spatial_chunk_aligned)
14611497
else:
14621498
spatial_chunk_aligned = spatial_chunk
1499+
chunks = (spatial_chunk_aligned,)
1500+
1501+
shards: tuple[int, ...] | None = None
1502+
1503+
if enable_sharding:
1504+
# Calculate shard dimensions that are divisible by chunk dimensions
1505+
if len(data_shape) == 3:
1506+
# For 3D data (time, y, x), ensure shard dimensions are divisible by chunks
1507+
shard_time = data_shape[0] # Keep full time dimension
1508+
shard_y = _calculate_shard_dimension(data_shape[1], chunks[1])
1509+
shard_x = _calculate_shard_dimension(data_shape[2], chunks[2])
1510+
shards = (shard_time, shard_y, shard_x)
1511+
print(
1512+
f" 🔧 Sharding config for {var}: data_shape={data_shape}, chunks={chunks}, shards={shards}"
1513+
)
1514+
elif len(data_shape) == 2:
1515+
# For 2D data (y, x), ensure shard dimensions are divisible by chunks
1516+
shard_y = _calculate_shard_dimension(data_shape[0], chunks[0])
1517+
shard_x = _calculate_shard_dimension(data_shape[1], chunks[1])
1518+
shards = (shard_y, shard_x)
1519+
print(
1520+
f" 🔧 Sharding config for {var}: data_shape={data_shape}, chunks={chunks}, shards={shards}"
1521+
)
1522+
else:
1523+
# For 1D data, use the full dimension
1524+
shards = (data_shape[0],)
1525+
print(
1526+
f" 🔧 Sharding config for {var}: data_shape={data_shape}, chunks={chunks}, shards={shards}"
1527+
)
1528+
1529+
# Validate that shards are evenly divisible by chunks
1530+
for i, (shard_dim, chunk_dim) in enumerate(zip(shards, chunks)):
1531+
if shard_dim % chunk_dim != 0:
1532+
print(
1533+
f" ⚠️ Warning: Shard dimension {shard_dim} not evenly divisible by chunk dimension {chunk_dim} at axis {i}"
1534+
)
14631535

14641536
encoding[var] = {
1465-
"chunks": (spatial_chunk_aligned, spatial_chunk_aligned),
1537+
"chunks": chunks,
14661538
"compressors": compressor,
1539+
"shards": shards,
14671540
}
14681541

14691542
# Add coordinate encoding
@@ -1618,6 +1691,46 @@ def _add_grid_mapping_variable(
16181691
print(f" Added grid_mapping attribute to {var_name}")
16191692

16201693

1694+
def _calculate_shard_dimension(data_dim: int, chunk_dim: int) -> int:
1695+
"""
1696+
Calculate shard dimension that is evenly divisible by chunk dimension.
1697+
1698+
For Zarr v3 sharding with Dask, the shard dimension must be evenly
1699+
divisible by the chunk dimension to avoid checksum mismatches.
1700+
1701+
Parameters
1702+
----------
1703+
data_dim : int
1704+
Size of the data dimension
1705+
chunk_dim : int
1706+
Size of the chunk dimension
1707+
1708+
Returns
1709+
-------
1710+
int
1711+
Shard dimension that is evenly divisible by chunk_dim
1712+
"""
1713+
# If chunk is larger than data dimension, the effective chunk will be data_dim
1714+
# In this case, shard should also be data_dim to maintain divisibility
1715+
if chunk_dim >= data_dim:
1716+
return data_dim
1717+
1718+
# Calculate how many complete chunks fit in the data dimension
1719+
num_complete_chunks = data_dim // chunk_dim
1720+
1721+
# If we have at least 2 complete chunks, use a multiple of chunk_dim
1722+
if num_complete_chunks >= 2:
1723+
# Use a shard size that's a multiple of chunk_dim
1724+
for multiplier in range(num_complete_chunks + 1, 2, -1):
1725+
shard_size = multiplier * chunk_dim
1726+
if shard_size <= data_dim:
1727+
return shard_size
1728+
1729+
# Fallback: use the largest multiple of chunk_dim that fits
1730+
# If no complete chunks fit, use data_dim (this handles edge cases)
1731+
return num_complete_chunks * chunk_dim if num_complete_chunks > 0 else data_dim
1732+
1733+
16211734
def _is_sentinel1(dt: xr.DataTree) -> bool:
16221735
"""Return True if the input DataTree represents a Sentinel-1 product."""
16231736
stac_props = dt.attrs.get("stac_discovery", {}).get("properties", {})

src/eopf_geozarr/conversion/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ def calculate_aligned_chunk_size(dimension_size: int, target_chunk_size: int) ->
124124
return dimension_size
125125

126126
# Find the largest divisor of dimension_size that is <= target_chunk_size
127-
for chunk_size in range(target_chunk_size, 0, -1):
128-
if dimension_size % chunk_size < 0.1 * chunk_size:
127+
for chunk_size in range(target_chunk_size, int(target_chunk_size * 0.51), -1):
128+
if dimension_size % chunk_size == 0:
129129
return chunk_size
130130

131131
# If no divisor is found, return the closest value to target_chunk_size

0 commit comments

Comments
 (0)