Skip to content

Commit 2d8cd34

Browse files
committed
refactor URL parsing logic
1 parent aecbf06 commit 2d8cd34

File tree

4 files changed

+120
-87
lines changed

4 files changed

+120
-87
lines changed

src/zarr/storage/_common.py

Lines changed: 36 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError
1919
from zarr.storage._local import LocalStore
2020
from zarr.storage._memory import ManagedMemoryStore, MemoryStore
21-
from zarr.storage._utils import _dereference_path, normalize_path
21+
from zarr.storage._utils import _dereference_path, normalize_path, parse_store_url
2222

2323
_has_fsspec = importlib.util.find_spec("fsspec")
2424
if _has_fsspec:
@@ -293,14 +293,18 @@ async def make_store(
293293
"""
294294
from zarr.storage._fsspec import FsspecStore # circular import
295295

296-
if (
297-
not (isinstance(store_like, str) and _is_fsspec_uri(store_like))
298-
and storage_options is not None
299-
):
300-
raise TypeError(
301-
"'storage_options' was provided but unused. "
302-
"'storage_options' is only used when the store is passed as an FSSpec URI string.",
303-
)
296+
# Check if storage_options is valid for this store_like
297+
if storage_options is not None:
298+
is_fsspec_uri = False
299+
if isinstance(store_like, str):
300+
parsed = parse_store_url(store_like)
301+
# fsspec URIs have a scheme that's not memory, file, or empty
302+
is_fsspec_uri = parsed.scheme not in ("", "memory", "file")
303+
if not is_fsspec_uri:
304+
raise TypeError(
305+
"'storage_options' was provided but unused. "
306+
"'storage_options' is only used when the store is passed as an FSSpec URI string.",
307+
)
304308

305309
assert mode in (None, "r", "r+", "a", "w", "w-")
306310
_read_only = mode == "r"
@@ -329,23 +333,19 @@ async def make_store(
329333
return await LocalStore.open(root=store_like, mode=mode, read_only=_read_only)
330334

331335
elif isinstance(store_like, str):
332-
# Check for memory:// URLs first
333-
if store_like.startswith("memory://"):
334-
# Parse the URL to extract name and path
335-
url_without_scheme = store_like[len("memory://") :]
336-
parts = url_without_scheme.split("/", 1)
337-
name = parts[0] if parts[0] else None
338-
path = parts[1] if len(parts) > 1 else ""
339-
# Create or get the store - ManagedMemoryStore handles both cases
340-
return ManagedMemoryStore(name=name, path=path, read_only=_read_only)
341-
# Either an FSSpec URI or a local filesystem path
342-
elif _is_fsspec_uri(store_like):
336+
parsed = parse_store_url(store_like)
337+
338+
if parsed.scheme == "memory":
339+
# Create or get a ManagedMemoryStore
340+
return ManagedMemoryStore(name=parsed.name, path=parsed.path, read_only=_read_only)
341+
elif parsed.scheme == "file" or not parsed.scheme:
342+
# Local filesystem path
343+
return await make_store(Path(store_like), mode=mode, storage_options=storage_options)
344+
else:
345+
# Assume fsspec can handle it (s3://, gs://, http://, etc.)
343346
return FsspecStore.from_url(
344347
store_like, storage_options=storage_options, read_only=_read_only
345348
)
346-
else:
347-
# Assume a filesystem path
348-
return await make_store(Path(store_like), mode=mode, storage_options=storage_options)
349349

350350
elif _has_fsspec and isinstance(store_like, FSMap):
351351
return FsspecStore.from_mapper(store_like, read_only=_read_only)
@@ -415,42 +415,25 @@ async def make_store_path(
415415
"'path' was provided but is not used for FSMap store_like objects. Specify the path when creating the FSMap instance instead."
416416
)
417417

418-
elif isinstance(store_like, str) and store_like.startswith("memory://"):
419-
# Handle memory:// URLs specially
420-
# Parse the URL to extract name and path
421-
_read_only = mode == "r"
422-
url_without_scheme = store_like[len("memory://") :]
423-
parts = url_without_scheme.split("/", 1)
424-
name = parts[0] if parts[0] else None
425-
url_path = parts[1] if len(parts) > 1 else ""
426-
# Create or get the store - ManagedMemoryStore handles both cases
427-
memory_store = ManagedMemoryStore(name=name, path=url_path, read_only=_read_only)
428-
return await StorePath.open(memory_store, path=path_normalized, mode=mode)
418+
elif isinstance(store_like, str):
419+
parsed = parse_store_url(store_like)
420+
if parsed.scheme == "memory":
421+
# Handle memory:// URLs specially - the path in the URL becomes part of the store
422+
_read_only = mode == "r"
423+
memory_store = ManagedMemoryStore(
424+
name=parsed.name, path=parsed.path, read_only=_read_only
425+
)
426+
return await StorePath.open(memory_store, path=path_normalized, mode=mode)
427+
else:
428+
# Fall through to make_store for other URL types
429+
store = await make_store(store_like, mode=mode, storage_options=storage_options)
430+
return await StorePath.open(store, path=path_normalized, mode=mode)
429431

430432
else:
431433
store = await make_store(store_like, mode=mode, storage_options=storage_options)
432434
return await StorePath.open(store, path=path_normalized, mode=mode)
433435

434436

435-
def _is_fsspec_uri(uri: str) -> bool:
436-
"""
437-
Check if a URI looks like a non-local fsspec URI.
438-
439-
Examples
440-
--------
441-
```python
442-
from zarr.storage._common import _is_fsspec_uri
443-
_is_fsspec_uri("s3://bucket")
444-
# True
445-
_is_fsspec_uri("my-directory")
446-
# False
447-
_is_fsspec_uri("local://my-directory")
448-
# False
449-
```
450-
"""
451-
return "://" in uri or ("::" in uri and "local://" not in uri)
452-
453-
454437
async def ensure_no_existing_node(
455438
store_path: StorePath,
456439
zarr_format: ZarrFormat,

src/zarr/storage/_memory.py

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@
99
from zarr.core.buffer import Buffer, gpu
1010
from zarr.core.buffer.core import default_buffer_prototype
1111
from zarr.core.common import concurrent_map
12-
from zarr.storage._utils import _dereference_path, _normalize_byte_range_index, normalize_path
12+
from zarr.storage._utils import (
13+
_dereference_path,
14+
_normalize_byte_range_index,
15+
normalize_path,
16+
parse_store_url,
17+
)
1318

1419
if TYPE_CHECKING:
1520
from collections.abc import AsyncIterator, Iterable, MutableMapping
@@ -572,33 +577,6 @@ def get(self, name: str) -> _ManagedStoreDict | None:
572577
"""
573578
return self._registry.get(name)
574579

575-
def get_from_url(self, url: str) -> tuple[_ManagedStoreDict | None, str, str]:
576-
"""
577-
Look up a managed store dict by its URL.
578-
579-
Parameters
580-
----------
581-
url : str
582-
A URL like "memory://mystore" or "memory://mystore/path/to/node"
583-
584-
Returns
585-
-------
586-
tuple[_ManagedStoreDict | None, str, str]
587-
The store dict (if found), the extracted name, and the path.
588-
"""
589-
if not url.startswith("memory://"):
590-
return None, "", ""
591-
592-
# Parse the store name and path from the URL
593-
# "memory://mystore" -> name="mystore", path=""
594-
# "memory://mystore/path/to/data" -> name="mystore", path="path/to/data"
595-
url_without_scheme = url[len("memory://") :]
596-
parts = url_without_scheme.split("/", 1)
597-
name = parts[0]
598-
path = parts[1] if len(parts) > 1 else ""
599-
600-
return self._registry.get(name), name, path
601-
602580

603581
_managed_store_dict_registry = _ManagedStoreDictRegistry()
604582

@@ -752,13 +730,20 @@ def from_url(cls, url: str, *, read_only: bool = False) -> ManagedMemoryStore:
752730
If the URL is not a valid memory:// URL or the store has been
753731
garbage collected.
754732
"""
755-
managed_dict, name, path = _managed_store_dict_registry.get_from_url(url)
733+
parsed = parse_store_url(url)
734+
if parsed.scheme != "memory":
735+
raise ValueError(
736+
f"Memory store not found for URL '{url}'. "
737+
"The store may have been garbage collected or the URL is invalid."
738+
)
739+
name = parsed.name or ""
740+
managed_dict = _managed_store_dict_registry.get(name)
756741
if managed_dict is None:
757742
raise ValueError(
758743
f"Memory store not found for URL '{url}'. "
759744
"The store may have been garbage collected or the URL is invalid."
760745
)
761-
return cls._from_managed_dict(managed_dict, name, path=path, read_only=read_only)
746+
return cls._from_managed_dict(managed_dict, name, path=parsed.path, read_only=read_only)
762747

763748
# Override MemoryStore methods to use path prefix and check process
764749

src/zarr/storage/_utils.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
import re
44
from pathlib import Path
5-
from typing import TYPE_CHECKING, TypeVar
5+
from typing import TYPE_CHECKING, NamedTuple, TypeVar
6+
from urllib.parse import urlparse
67

78
from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest
89

@@ -13,6 +14,73 @@
1314
from zarr.core.buffer import Buffer
1415

1516

17+
class ParsedStoreUrl(NamedTuple):
18+
"""
19+
Parsed components of a store URL.
20+
21+
Attributes
22+
----------
23+
scheme : str
24+
The URL scheme (e.g., "memory", "file", "s3"). Empty string for local paths.
25+
name : str | None
26+
The store name/host component. For memory:// URLs this is the store name.
27+
None if empty.
28+
path : str
29+
The path component within the store.
30+
raw : str
31+
The original URL string.
32+
"""
33+
34+
scheme: str
35+
name: str | None
36+
path: str
37+
raw: str
38+
39+
40+
def parse_store_url(url: str) -> ParsedStoreUrl:
41+
"""
42+
Parse a store URL into its components.
43+
44+
Parameters
45+
----------
46+
url : str
47+
A URL like "memory://store-name/path" or "s3://bucket/key" or a local path.
48+
49+
Returns
50+
-------
51+
ParsedStoreUrl
52+
Named tuple with scheme, name, path, and raw URL.
53+
54+
Examples
55+
--------
56+
>>> parse_store_url("memory://mystore")
57+
ParsedStoreUrl(scheme='memory', name='mystore', path='', raw='memory://mystore')
58+
59+
>>> parse_store_url("memory://mystore/path/to/data")
60+
ParsedStoreUrl(scheme='memory', name='mystore', path='path/to/data', raw='memory://mystore/path/to/data')
61+
62+
>>> parse_store_url("s3://bucket/key")
63+
ParsedStoreUrl(scheme='s3', name='bucket', path='key', raw='s3://bucket/key')
64+
65+
>>> parse_store_url("/local/path")
66+
ParsedStoreUrl(scheme='', name=None, path='/local/path', raw='/local/path')
67+
"""
68+
parsed = urlparse(url)
69+
70+
# netloc is the "host" part (store name for memory://, bucket for s3://, etc.)
71+
name = parsed.netloc or None
72+
73+
# For URLs with a scheme and netloc (like memory://store/path or s3://bucket/key),
74+
# strip the leading slash from the path component.
75+
# For local paths (no scheme), preserve the path as-is.
76+
if parsed.scheme and parsed.netloc:
77+
path = parsed.path.lstrip("/")
78+
else:
79+
path = parsed.path
80+
81+
return ParsedStoreUrl(scheme=parsed.scheme, name=name, path=path, raw=url)
82+
83+
1684
def normalize_path(path: str | bytes | Path | None) -> str:
1785
if path is None:
1886
result = ""

tests/test_store/test_memory.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,9 +324,6 @@ def test_store_supports_writes(self, store: ManagedMemoryStore) -> None:
324324
def test_store_supports_listing(self, store: ManagedMemoryStore) -> None:
325325
assert store.supports_listing
326326

327-
async def test_list_prefix(self, store: MemoryStore) -> None:
328-
assert True
329-
330327
@pytest.mark.parametrize("dtype", ["uint8", "float32", "int64"])
331328
@pytest.mark.parametrize("zarr_format", [2, 3])
332329
async def test_deterministic_size(

0 commit comments

Comments
 (0)