Skip to content

Commit 29c9dbb

Browse files
committed
copy raw bytes instead of decoding array chunks
1 parent 8a1e9e9 commit 29c9dbb

1 file changed

Lines changed: 52 additions & 48 deletions

File tree

src/zarr/core/group.py

Lines changed: 52 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,9 @@ async def copy_to(
713713
"""
714714
Copy this group and all its contents to a new store.
715715
716+
This performs a raw byte-level copy of all data, without decoding or
717+
re-encoding array contents.
718+
716719
Parameters
717720
----------
718721
store : StoreLike
@@ -731,62 +734,63 @@ async def copy_to(
731734
AsyncGroup
732735
The new group in the target store.
733736
"""
734-
if path is not None:
735-
target_store = await make_store_path(store, path=path)
737+
target_store_path = await make_store_path(store, path=path or "")
738+
739+
if overwrite:
740+
dst_store = target_store_path.store
741+
dst_prefix = target_store_path.path + "/" if target_store_path.path else ""
742+
async for key in dst_store.list_prefix(dst_prefix):
743+
await dst_store.delete(key)
736744
else:
737-
target_store = await make_store_path(store)
745+
await ensure_no_existing_node(target_store_path, zarr_format=self.metadata.zarr_format)
738746

739-
new_group = await self.from_store(
740-
target_store,
741-
overwrite=overwrite,
742-
attributes=self.metadata.attributes,
743-
consolidated_metadata=self.metadata.consolidated_metadata,
744-
zarr_format=self.metadata.zarr_format,
745-
)
747+
src_store = self.store_path.store
748+
src_prefix = self.store_path.path + "/" if self.store_path.path else ""
749+
dst_store = target_store_path.store
750+
dst_prefix = target_store_path.path + "/" if target_store_path.path else ""
751+
752+
prototype = default_buffer_prototype()
753+
754+
# Determine the metadata keys for a group based on zarr format
755+
group_metadata_keys: tuple[str, ...]
756+
if self.metadata.zarr_format == 3:
757+
group_metadata_keys = (ZARR_JSON,)
758+
else:
759+
group_metadata_keys = (ZGROUP_JSON, ZATTRS_JSON, ZMETADATA_V2_JSON)
760+
761+
async def _copy_key(src_key: str, dst_key: str) -> None:
762+
"""Copy a single key from source to destination store."""
763+
data = await src_store.get(src_key, prototype=prototype)
764+
if data is not None:
765+
await dst_store.set(dst_key, data)
766+
767+
# Copy the root group's metadata keys
768+
for key in group_metadata_keys:
769+
await _copy_key(
770+
f"{src_prefix}{key}" if src_prefix else key,
771+
f"{dst_prefix}{key}" if dst_prefix else key,
772+
)
746773

774+
# Copy all children discovered via members()
747775
async for child_path, member in self.members(
748776
max_depth=None, use_consolidated_for_children=use_consolidated_for_children
749777
):
750778
if isinstance(member, AsyncGroup):
751-
await self.from_store(
752-
store=new_group.store_path / child_path,
753-
zarr_format=self.metadata.zarr_format,
754-
overwrite=overwrite,
755-
attributes=member.metadata.attributes,
756-
consolidated_metadata=member.metadata.consolidated_metadata,
757-
)
779+
# For groups, copy only the known metadata keys
780+
for key in group_metadata_keys:
781+
src_key = f"{src_prefix}{child_path}/{key}"
782+
dst_key = f"{dst_prefix}{child_path}/{key}"
783+
await _copy_key(src_key, dst_key)
758784
else:
759-
kwargs = {}
760-
if self.metadata.zarr_format == 3:
761-
kwargs["chunk_key_encoding"] = member.metadata.chunk_key_encoding
762-
kwargs["dimension_names"] = member.metadata.dimension_names
763-
else:
764-
kwargs["chunk_key_encoding"] = {
765-
"name": "v2",
766-
"separator": member.metadata.dimension_separator,
767-
}
768-
# Serializer done this way in case of having zarr_format 2, otherwise mypy complains.
769-
new_array = await new_group.create_array(
770-
name=child_path,
771-
shape=member.shape,
772-
dtype=member.dtype,
773-
chunks=member.chunks,
774-
shards=member.shards,
775-
filters=member.filters,
776-
compressors=member.compressors,
777-
serializer=member.serializer if member.serializer is not None else "auto",
778-
fill_value=member.metadata.fill_value,
779-
attributes=member.attrs,
780-
overwrite=overwrite,
781-
config={"order": member.order},
782-
**kwargs,
783-
)
784-
785-
for region in member._iter_shard_regions():
786-
data = await member.getitem(selection=region)
787-
await new_array.setitem(selection=region, value=data)
785+
# For arrays, list all keys under the array's prefix to get
786+
# metadata + only the chunks that actually exist in the store
787+
array_src_prefix = f"{src_prefix}{child_path}/"
788+
async for src_key in src_store.list_prefix(array_src_prefix):
789+
relative_key = src_key.removeprefix(src_prefix)
790+
dst_key = f"{dst_prefix}{relative_key}" if dst_prefix else relative_key
791+
await _copy_key(src_key, dst_key)
788792

789-
return new_group
793+
return await type(self).open(target_store_path, zarr_format=self.metadata.zarr_format)
790794

791795
async def setitem(self, key: str, value: Any) -> None:
792796
"""
@@ -2000,7 +2004,7 @@ def copy_to(
20002004
20012005
Returns
20022006
-------
2003-
AsyncGroup
2007+
Group
20042008
The new group in the target store.
20052009
"""
20062010
return Group(

0 commit comments

Comments
 (0)