|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | | -from dataclasses import dataclass |
| 3 | +from dataclasses import dataclass, field |
4 | 4 | from itertools import islice, pairwise |
5 | 5 | from typing import TYPE_CHECKING, Any |
6 | 6 | from warnings import warn |
|
14 | 14 | Codec, |
15 | 15 | CodecPipeline, |
16 | 16 | GetResult, |
| 17 | + SupportsSyncCodec, |
17 | 18 | ) |
18 | 19 | from zarr.core.common import concurrent_map |
19 | 20 | from zarr.core.config import config |
@@ -66,6 +67,111 @@ def fill_value_or_default(chunk_spec: ArraySpec) -> Any: |
66 | 67 | return fill_value |
67 | 68 |
|
68 | 69 |
|
| 70 | +@dataclass(slots=True, kw_only=True) |
| 71 | +class ChunkTransform: |
| 72 | + """A synchronous codec chain bound to an ArraySpec. |
| 73 | +
|
| 74 | + Provides `encode` and `decode` for pure-compute codec operations |
| 75 | + (no IO, no threading, no batching). |
| 76 | +
|
| 77 | + All codecs must implement `SupportsSyncCodec`. Construction will |
| 78 | + raise `TypeError` if any codec does not. |
| 79 | + """ |
| 80 | + |
| 81 | + codecs: tuple[Codec, ...] |
| 82 | + array_spec: ArraySpec |
| 83 | + |
| 84 | + # (sync codec, input_spec) pairs in pipeline order. |
| 85 | + _aa_codecs: tuple[tuple[SupportsSyncCodec[NDBuffer, NDBuffer], ArraySpec], ...] = field( |
| 86 | + init=False, repr=False, compare=False |
| 87 | + ) |
| 88 | + _ab_codec: SupportsSyncCodec[NDBuffer, Buffer] = field(init=False, repr=False, compare=False) |
| 89 | + _ab_spec: ArraySpec = field(init=False, repr=False, compare=False) |
| 90 | + _bb_codecs: tuple[SupportsSyncCodec[Buffer, Buffer], ...] = field( |
| 91 | + init=False, repr=False, compare=False |
| 92 | + ) |
| 93 | + |
| 94 | + def __post_init__(self) -> None: |
| 95 | + non_sync = [c for c in self.codecs if not isinstance(c, SupportsSyncCodec)] |
| 96 | + if non_sync: |
| 97 | + names = ", ".join(type(c).__name__ for c in non_sync) |
| 98 | + raise TypeError( |
| 99 | + f"All codecs must implement SupportsSyncCodec. The following do not: {names}" |
| 100 | + ) |
| 101 | + |
| 102 | + aa, ab, bb = codecs_from_list(list(self.codecs)) |
| 103 | + |
| 104 | + aa_codecs: list[tuple[SupportsSyncCodec[NDBuffer, NDBuffer], ArraySpec]] = [] |
| 105 | + spec = self.array_spec |
| 106 | + for aa_codec in aa: |
| 107 | + assert isinstance(aa_codec, SupportsSyncCodec) |
| 108 | + aa_codecs.append((aa_codec, spec)) |
| 109 | + spec = aa_codec.resolve_metadata(spec) |
| 110 | + |
| 111 | + self._aa_codecs = tuple(aa_codecs) |
| 112 | + assert isinstance(ab, SupportsSyncCodec) |
| 113 | + self._ab_codec = ab |
| 114 | + self._ab_spec = spec |
| 115 | + bb_sync: list[SupportsSyncCodec[Buffer, Buffer]] = [] |
| 116 | + for bb_codec in bb: |
| 117 | + assert isinstance(bb_codec, SupportsSyncCodec) |
| 118 | + bb_sync.append(bb_codec) |
| 119 | + self._bb_codecs = tuple(bb_sync) |
| 120 | + |
| 121 | + def decode( |
| 122 | + self, |
| 123 | + chunk_bytes: Buffer, |
| 124 | + ) -> NDBuffer: |
| 125 | + """Decode a single chunk through the full codec chain, synchronously. |
| 126 | +
|
| 127 | + Pure compute -- no IO. |
| 128 | + """ |
| 129 | + data: Buffer = chunk_bytes |
| 130 | + for bb_codec in reversed(self._bb_codecs): |
| 131 | + data = bb_codec._decode_sync(data, self._ab_spec) |
| 132 | + |
| 133 | + chunk_array: NDBuffer = self._ab_codec._decode_sync(data, self._ab_spec) |
| 134 | + |
| 135 | + for aa_codec, spec in reversed(self._aa_codecs): |
| 136 | + chunk_array = aa_codec._decode_sync(chunk_array, spec) |
| 137 | + |
| 138 | + return chunk_array |
| 139 | + |
| 140 | + def encode( |
| 141 | + self, |
| 142 | + chunk_array: NDBuffer, |
| 143 | + ) -> Buffer | None: |
| 144 | + """Encode a single chunk through the full codec chain, synchronously. |
| 145 | +
|
| 146 | + Pure compute -- no IO. |
| 147 | + """ |
| 148 | + aa_data: NDBuffer = chunk_array |
| 149 | + for aa_codec, spec in self._aa_codecs: |
| 150 | + aa_result = aa_codec._encode_sync(aa_data, spec) |
| 151 | + if aa_result is None: |
| 152 | + return None |
| 153 | + aa_data = aa_result |
| 154 | + |
| 155 | + ab_result = self._ab_codec._encode_sync(aa_data, self._ab_spec) |
| 156 | + if ab_result is None: |
| 157 | + return None |
| 158 | + |
| 159 | + bb_data: Buffer = ab_result |
| 160 | + for bb_codec in self._bb_codecs: |
| 161 | + bb_result = bb_codec._encode_sync(bb_data, self._ab_spec) |
| 162 | + if bb_result is None: |
| 163 | + return None |
| 164 | + bb_data = bb_result |
| 165 | + |
| 166 | + return bb_data |
| 167 | + |
| 168 | + def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: |
| 169 | + for codec in self.codecs: |
| 170 | + byte_length = codec.compute_encoded_size(byte_length, array_spec) |
| 171 | + array_spec = codec.resolve_metadata(array_spec) |
| 172 | + return byte_length |
| 173 | + |
| 174 | + |
69 | 175 | @dataclass(frozen=True) |
70 | 176 | class BatchedCodecPipeline(CodecPipeline): |
71 | 177 | """Default codec pipeline. |
|
0 commit comments