Skip to content

Commit 9bc2976

Browse files
committed
Add test for multi-part zarr upload
1 parent 57b353f commit 9bc2976

1 file changed

Lines changed: 42 additions & 0 deletions

File tree

dandi/tests/test_files.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,48 @@ def spy_multipart_upload(**kwargs):
567567
assert remote_entries == set(called_paths)
568568

569569

570+
@pytest.mark.ai_generated
571+
def test_upload_zarr_mixed_chunks(new_dandiset, tmp_path):
572+
"""Chunks above ZARR_LARGE_CHUNK_THRESHOLD go multipart; smaller ones use single-part upload."""
573+
filepath = tmp_path / "mixed.zarr"
574+
store = zarr.open_group(str(filepath), mode="w")
575+
# small array: 10 int64 elements, produces a ~96-byte chunk (compressed)
576+
store.create_dataset("small", data=np.arange(10, dtype=np.int64), chunks=(10,))
577+
# large array: 200 int64 elements, produces a ~329-byte chunk (compressed)
578+
store.create_dataset("large", data=np.arange(200, dtype=np.int64), chunks=(200,))
579+
580+
zf = dandi_file(filepath)
581+
assert isinstance(zf, ZarrAsset)
582+
583+
multipart_paths: list[str] = []
584+
585+
def spy_multipart_upload(**kwargs):
586+
multipart_paths.append(kwargs["asset_path"])
587+
yield from real_multipart_upload(**kwargs)
588+
589+
# Threshold sits between the two chunk sizes so only the large chunk goes multipart
590+
mixed_threshold = 200
591+
with (
592+
patch("dandi.files.zarr.ZARR_LARGE_CHUNK_THRESHOLD", mixed_threshold),
593+
patch("dandi.files.zarr._multipart_upload", spy_multipart_upload),
594+
):
595+
asset = zf.upload(new_dandiset.dandiset, {})
596+
597+
assert isinstance(asset, RemoteZarrAsset)
598+
599+
remote_entries = {str(e) for e in asset.iterfiles()}
600+
# Only chunk files whose on-disk size exceeds the threshold should be multipart-uploaded
601+
large_chunks = {
602+
p
603+
for p in remote_entries
604+
if (filepath / p).stat().st_size > mixed_threshold
605+
}
606+
assert set(multipart_paths) == large_chunks
607+
# At least one chunk must have gone each path so the test is meaningful
608+
assert len(multipart_paths) > 0
609+
assert len(remote_entries) - len(multipart_paths) > 0
610+
611+
570612
def test_validate_deep_zarr(tmp_path: Path) -> None:
571613
zarr_path = tmp_path / "foo.zarr"
572614
zarr.save(zarr_path, np.arange(1000), np.arange(1000, 0, -1))

0 commit comments

Comments
 (0)