Skip to content

Commit 0e61449

Browse files
authored
Add methods for getting bytes + json to store abc (#3638)
* add store routines for getting bytes and json * check for FileNotFoundError when a key is missing * remove storepath methods * changelog * rename methods * continue renaming / test refactoring * refactor new test functions * make new methods private * remove changelog entry for private API
1 parent b30ae18 commit 0e61449

File tree

6 files changed

+819
-5
lines changed

6 files changed

+819
-5
lines changed

src/zarr/abc/store.py

Lines changed: 210 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
from __future__ import annotations
22

3+
import asyncio
4+
import json
35
from abc import ABC, abstractmethod
4-
from asyncio import gather
56
from dataclasses import dataclass
67
from itertools import starmap
78
from typing import TYPE_CHECKING, Literal, Protocol, runtime_checkable
89

10+
from zarr.core.sync import sync
11+
912
if TYPE_CHECKING:
1013
from collections.abc import AsyncGenerator, AsyncIterator, Iterable
1114
from types import TracebackType
@@ -206,6 +209,211 @@ async def get(
206209
"""
207210
...
208211

212+
async def _get_bytes(
213+
self, key: str, *, prototype: BufferPrototype, byte_range: ByteRequest | None = None
214+
) -> bytes:
215+
"""
216+
Retrieve raw bytes from the store asynchronously.
217+
218+
This is a convenience method that wraps ``get()`` and converts the result
219+
to bytes. Use this when you need the raw byte content of a stored value.
220+
221+
Parameters
222+
----------
223+
key : str
224+
The key identifying the data to retrieve.
225+
prototype : BufferPrototype
226+
The buffer prototype to use for reading the data.
227+
byte_range : ByteRequest, optional
228+
If specified, only retrieve a portion of the stored data.
229+
Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``.
230+
231+
Returns
232+
-------
233+
bytes
234+
The raw bytes stored at the given key.
235+
236+
Raises
237+
------
238+
FileNotFoundError
239+
If the key does not exist in the store.
240+
241+
See Also
242+
--------
243+
get : Lower-level method that returns a Buffer object.
244+
get_bytes : Synchronous version of this method.
245+
get_json : Asynchronous method for retrieving and parsing JSON data.
246+
247+
Examples
248+
--------
249+
>>> store = await MemoryStore.open()
250+
>>> await store.set("data", Buffer.from_bytes(b"hello world"))
251+
>>> data = await store.get_bytes("data", prototype=default_buffer_prototype())
252+
>>> print(data)
253+
b'hello world'
254+
"""
255+
buffer = await self.get(key, prototype, byte_range)
256+
if buffer is None:
257+
raise FileNotFoundError(key)
258+
return buffer.to_bytes()
259+
260+
def _get_bytes_sync(
261+
self, key: str = "", *, prototype: BufferPrototype, byte_range: ByteRequest | None = None
262+
) -> bytes:
263+
"""
264+
Retrieve raw bytes from the store synchronously.
265+
266+
This is a synchronous wrapper around ``get_bytes()``. It should only
267+
be called from non-async code. For async contexts, use ``get_bytes()``
268+
instead.
269+
270+
Parameters
271+
----------
272+
key : str, optional
273+
The key identifying the data to retrieve. Defaults to an empty string.
274+
prototype : BufferPrototype
275+
The buffer prototype to use for reading the data.
276+
byte_range : ByteRequest, optional
277+
If specified, only retrieve a portion of the stored data.
278+
Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``.
279+
280+
Returns
281+
-------
282+
bytes
283+
The raw bytes stored at the given key.
284+
285+
Raises
286+
------
287+
FileNotFoundError
288+
If the key does not exist in the store.
289+
290+
Warnings
291+
--------
292+
Do not call this method from async functions. Use ``get_bytes()`` instead
293+
to avoid blocking the event loop.
294+
295+
See Also
296+
--------
297+
get_bytes : Asynchronous version of this method.
298+
get_json_sync : Synchronous method for retrieving and parsing JSON data.
299+
300+
Examples
301+
--------
302+
>>> store = MemoryStore()
303+
>>> await store.set("data", Buffer.from_bytes(b"hello world"))
304+
>>> data = store.get_bytes_sync("data", prototype=default_buffer_prototype())
305+
>>> print(data)
306+
b'hello world'
307+
"""
308+
309+
return sync(self._get_bytes(key, prototype=prototype, byte_range=byte_range))
310+
311+
async def _get_json(
312+
self, key: str, *, prototype: BufferPrototype, byte_range: ByteRequest | None = None
313+
) -> Any:
314+
"""
315+
Retrieve and parse JSON data from the store asynchronously.
316+
317+
This is a convenience method that retrieves bytes from the store and
318+
parses them as JSON.
319+
320+
Parameters
321+
----------
322+
key : str
323+
The key identifying the JSON data to retrieve.
324+
prototype : BufferPrototype
325+
The buffer prototype to use for reading the data.
326+
byte_range : ByteRequest, optional
327+
If specified, only retrieve a portion of the stored data.
328+
Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``.
329+
Note: Using byte ranges with JSON may result in invalid JSON.
330+
331+
Returns
332+
-------
333+
Any
334+
The parsed JSON data. This follows the behavior of ``json.loads()`` and
335+
can be any JSON-serializable type: dict, list, str, int, float, bool, or None.
336+
337+
Raises
338+
------
339+
FileNotFoundError
340+
If the key does not exist in the store.
341+
json.JSONDecodeError
342+
If the stored data is not valid JSON.
343+
344+
See Also
345+
--------
346+
get_bytes : Method for retrieving raw bytes.
347+
get_json_sync : Synchronous version of this method.
348+
349+
Examples
350+
--------
351+
>>> store = await MemoryStore.open()
352+
>>> metadata = {"zarr_format": 3, "node_type": "array"}
353+
>>> await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode()))
354+
>>> data = await store.get_json("zarr.json", prototype=default_buffer_prototype())
355+
>>> print(data)
356+
{'zarr_format': 3, 'node_type': 'array'}
357+
"""
358+
359+
return json.loads(await self._get_bytes(key, prototype=prototype, byte_range=byte_range))
360+
361+
def _get_json_sync(
362+
self, key: str = "", *, prototype: BufferPrototype, byte_range: ByteRequest | None = None
363+
) -> Any:
364+
"""
365+
Retrieve and parse JSON data from the store synchronously.
366+
367+
This is a synchronous wrapper around ``get_json()``. It should only
368+
be called from non-async code. For async contexts, use ``get_json()``
369+
instead.
370+
371+
Parameters
372+
----------
373+
key : str, optional
374+
The key identifying the JSON data to retrieve. Defaults to an empty string.
375+
prototype : BufferPrototype
376+
The buffer prototype to use for reading the data.
377+
byte_range : ByteRequest, optional
378+
If specified, only retrieve a portion of the stored data.
379+
Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``.
380+
Note: Using byte ranges with JSON may result in invalid JSON.
381+
382+
Returns
383+
-------
384+
Any
385+
The parsed JSON data. This follows the behavior of ``json.loads()`` and
386+
can be any JSON-serializable type: dict, list, str, int, float, bool, or None.
387+
388+
Raises
389+
------
390+
FileNotFoundError
391+
If the key does not exist in the store.
392+
json.JSONDecodeError
393+
If the stored data is not valid JSON.
394+
395+
Warnings
396+
--------
397+
Do not call this method from async functions. Use ``get_json()`` instead
398+
to avoid blocking the event loop.
399+
400+
See Also
401+
--------
402+
get_json : Asynchronous version of this method.
403+
get_bytes_sync : Synchronous method for retrieving raw bytes without parsing.
404+
405+
Examples
406+
--------
407+
>>> store = MemoryStore()
408+
>>> metadata = {"zarr_format": 3, "node_type": "array"}
409+
>>> store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode()))
410+
>>> data = store.get_json_sync("zarr.json", prototype=default_buffer_prototype())
411+
>>> print(data)
412+
{'zarr_format': 3, 'node_type': 'array'}
413+
"""
414+
415+
return sync(self._get_json(key, prototype=prototype, byte_range=byte_range))
416+
209417
@abstractmethod
210418
async def get_partial_values(
211419
self,
@@ -278,7 +486,7 @@ async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None:
278486
"""
279487
Insert multiple (key, value) pairs into storage.
280488
"""
281-
await gather(*starmap(self.set, values))
489+
await asyncio.gather(*starmap(self.set, values))
282490

283491
@property
284492
def supports_consolidated_metadata(self) -> bool:

0 commit comments

Comments
 (0)