Skip to content

Commit f15aa1f

Browse files
committed
chore: keep BatchedCodecPipeline as default
Restore the default codec pipeline to BatchedCodecPipeline. SyncCodecPipeline remains available and is tested, but is opt-in via the codec_pipeline.path config setting. Tests that exercise SyncCodecPipeline-specific behavior (byte-range writes for partial shard updates) now skip when a different pipeline is active. Also drop a few stale # type: ignore comments in sharding.py that mypy now flags as unused.
1 parent af3b8e9 commit f15aa1f

4 files changed

Lines changed: 22 additions & 8 deletions

File tree

src/zarr/codecs/sharding.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,7 @@ def _encode_partial_sync(
611611
total_data_size = n_chunks * chunk_byte_length
612612
total_shard_size = total_data_size + shard_index_size
613613

614-
existing = byte_setter.get_sync(prototype=shard_spec.prototype) # type: ignore[attr-defined]
614+
existing = byte_setter.get_sync(prototype=shard_spec.prototype)
615615
if existing is not None and len(existing) == total_shard_size:
616616
key = byte_setter.path if hasattr(byte_setter, "path") else str(byte_setter)
617617
shard_reader = self._shard_reader_from_bytes_sync(existing, chunks_per_shard)
@@ -662,7 +662,7 @@ def _byte_offset(coords: tuple[int, ...]) -> int:
662662
morton_order_iter(chunks_per_shard)
663663
)
664664
else:
665-
existing_bytes = byte_setter.get_sync(prototype=shard_spec.prototype) # type: ignore[attr-defined]
665+
existing_bytes = byte_setter.get_sync(prototype=shard_spec.prototype)
666666
if existing_bytes is not None:
667667
shard_reader_fb = self._shard_reader_from_bytes_sync(
668668
existing_bytes, chunks_per_shard
@@ -706,9 +706,9 @@ def _byte_offset(coords: tuple[int, ...]) -> int:
706706
buffer_prototype=default_buffer_prototype(),
707707
)
708708
if blob is None:
709-
byte_setter.delete_sync() # type: ignore[attr-defined]
709+
byte_setter.delete_sync()
710710
else:
711-
byte_setter.set_sync(blob) # type: ignore[attr-defined]
711+
byte_setter.set_sync(blob)
712712

713713
def _encode_shard_dict_sync(
714714
self,

src/zarr/core/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def enable_gpu(self) -> ConfigSet:
104104
"threading": {"max_workers": None},
105105
"json_indent": 2,
106106
"codec_pipeline": {
107-
"path": "zarr.core.codec_pipeline.SyncCodecPipeline",
107+
"path": "zarr.core.codec_pipeline.BatchedCodecPipeline",
108108
"batch_size": 1,
109109
},
110110
"codecs": {

tests/test_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def test_config_defaults_set() -> None:
6161
"threading": {"max_workers": None},
6262
"json_indent": 2,
6363
"codec_pipeline": {
64-
"path": "zarr.core.codec_pipeline.SyncCodecPipeline",
64+
"path": "zarr.core.codec_pipeline.BatchedCodecPipeline",
6565
"batch_size": 1,
6666
},
6767
"codecs": {

tests/test_sync_pipeline.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,11 @@ def test_partial_shard_write_roundtrip_correctness() -> None:
520520

521521

522522
def test_partial_shard_write_uses_set_range() -> None:
523-
"""Partial shard writes with fixed-size codecs should use set_range_sync."""
523+
"""Partial shard writes with fixed-size codecs should use set_range_sync.
524+
525+
Only the SyncCodecPipeline uses byte-range writes for partial shard
526+
updates; skipped under other pipelines.
527+
"""
524528
from unittest.mock import patch
525529

526530
store = zarr.storage.MemoryStore()
@@ -533,6 +537,9 @@ def test_partial_shard_write_uses_set_range() -> None:
533537
compressors=None,
534538
fill_value=0.0,
535539
)
540+
if not isinstance(arr._async_array.codec_pipeline, SyncCodecPipeline):
541+
pytest.skip("byte-range write optimization is specific to SyncCodecPipeline")
542+
536543
# Initial full write to create the shard blob
537544
arr[:] = np.arange(100, dtype="float64")
538545

@@ -552,7 +559,12 @@ def test_partial_shard_write_uses_set_range() -> None:
552559

553560

554561
def test_partial_shard_write_falls_back_for_compressed() -> None:
555-
"""Partial shard writes with compressed inner codecs should NOT use set_range."""
562+
"""Partial shard writes with compressed inner codecs should NOT use set_range.
563+
564+
Only meaningful under SyncCodecPipeline (which can use byte-range writes
565+
for fixed-size inner codecs). Other pipelines never use set_range_sync,
566+
so the assertion is trivially true and the test is uninformative.
567+
"""
556568
from unittest.mock import patch
557569

558570
store = zarr.storage.MemoryStore()
@@ -565,6 +577,8 @@ def test_partial_shard_write_falls_back_for_compressed() -> None:
565577
compressors=GzipCodec(),
566578
fill_value=0.0,
567579
)
580+
if not isinstance(arr._async_array.codec_pipeline, SyncCodecPipeline):
581+
pytest.skip("byte-range write optimization is specific to SyncCodecPipeline")
568582
arr[:] = np.arange(100, dtype="float64")
569583

570584
with patch.object(type(store), "set_range_sync", wraps=store.set_range_sync) as mock_set_range:

0 commit comments

Comments
 (0)