Skip to content

Commit a0c61d6

Browse files
TomNicholasclaude
andcommitted
Fix ZarrParser when zarr store is nested inside object store root
ZarrParser.__call__ was discarding the store-relative path from registry.resolve(), causing two bugs with nested stores: 1. open_group_async couldn't find the zarr group within the store 2. Chunk manifest paths were doubled (e.g. subdir/data.zarr/subdir/data.zarr/temp/0.0) Now correctly uses the store-relative path for group opening and the store root URI for chunk path construction. Note: also requires zarr-developers/zarr-python#3657 (on zarr main, not yet released) which fixes ObjectStore.list_dir corrupting directory names. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3cb3ef7 commit a0c61d6

2 files changed

Lines changed: 38 additions & 4 deletions

File tree

virtualizarr/parsers/zarr.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -517,13 +517,21 @@ def __call__(
517517
virtualizarr.open_virtual_dataset : High-level interface for virtual datasets.
518518
virtualizarr.manifests.ManifestStore : The returned virtual store object.
519519
"""
520-
path = validate_and_normalize_path_to_uri(url, fs_root=Path.cwd().as_uri())
521-
object_store, _ = registry.resolve(path)
520+
uri = validate_and_normalize_path_to_uri(url, fs_root=Path.cwd().as_uri())
521+
object_store, store_relative_path = registry.resolve(uri)
522522
zarr_store = ObjectStore(store=object_store) # type: ignore[type-var]
523+
# Compute the store root URI by stripping the relative path from the full URI
524+
rel_path = str(store_relative_path)
525+
store_root_uri = uri.removesuffix(rel_path).rstrip("/") if rel_path else uri
526+
# Combine the store-relative path with optional group to get the full
527+
# path within the object store to the zarr group
528+
group_path = rel_path
529+
if self.group:
530+
group_path = f"{group_path}/{self.group}" if group_path else self.group
523531
coro = _construct_manifest_group(
524532
store=zarr_store,
525-
path=url,
526-
group=self.group,
533+
path=store_root_uri,
534+
group=group_path or None,
527535
skip_variables=self.skip_variables,
528536
)
529537
manifest_group = _run_async(coro)

virtualizarr/tests/test_parsers/test_zarr.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,32 @@ async def run_parser_in_loop():
495495
xr.testing.assert_identical(actual, expected)
496496

497497

498+
@zarr_versions()
499+
def test_parser_with_nested_store_path(tmpdir, zarr_format):
500+
"""Regression test for https://github.com/zarr-developers/VirtualiZarr/issues/912."""
501+
parent_dir = f"{tmpdir}/bucket_root"
502+
filepath = f"{parent_dir}/subdir/data.zarr"
503+
504+
ds = xr.Dataset(
505+
{"temp": (("x", "y"), np.arange(12, dtype="float32").reshape(3, 4))},
506+
)
507+
ds.to_zarr(filepath, consolidated=False, zarr_format=zarr_format)
508+
509+
store = LocalStore(prefix=parent_dir)
510+
registry = ObjectStoreRegistry({f"file://{parent_dir}": store})
511+
parser = ZarrParser()
512+
513+
manifeststore = parser(url=filepath, registry=registry)
514+
515+
with xr.open_dataset(
516+
filepath, engine="zarr", consolidated=False, zarr_format=zarr_format
517+
) as expected:
518+
with xr.open_dataset(
519+
manifeststore, engine="zarr", consolidated=False, zarr_format=3
520+
) as actual:
521+
xr.testing.assert_identical(actual, expected)
522+
523+
498524
def test_sharded_array_raises_error(tmpdir):
499525
"""Test that attempting to virtualize a sharded Zarr V3 array raises NotImplementedError."""
500526
filepath = f"{tmpdir}/test_sharded.zarr"

0 commit comments

Comments
 (0)