Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions virtualizarr/manifests/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
from typing import TYPE_CHECKING, TypeAlias, cast

import numpy as np
from zarr.core.chunk_grids import RegularChunkGrid
from zarr.core.metadata.v3 import ArrayV3Metadata

from virtualizarr.manifests.array_api import expand_dims
from .manifest import ChunkManifest
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sibling imports avoid loading the parent package's init.py, which sometimes resolves circular import problems. As I write this I can't remember if this was actually necessary in this case. I'll check.

Copy link
Copy Markdown
Member

@TomNicholas TomNicholas Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shouldn't be. If needed I think you can just import ChunkManifest inside array_selection_1d instead.


# indexer with only basic selectors, no new axes or ellipsis
T_BasicIndexer_1d: TypeAlias = int | slice | np.ndarray
Expand Down Expand Up @@ -147,32 +150,49 @@ def apply_selection(
for axis, (length, indexer_1d) in enumerate(
zip(marr.shape, indexer_without_newaxes)
):
output_arr = apply_selection_1d(output_arr, indexer_1d, length)
output_arr = apply_selection_1d(output_arr, indexer_1d, axis, length)

return output_arr


def apply_selection_1d(
marr: "ManifestArray", indexer_1d: T_BasicIndexer_1d, length: int
marr: "ManifestArray", indexer_1d: T_BasicIndexer_1d, axis: int, length: int
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function signature wasn't dictated by xarray, was it? Seems like it's not implementable without the axis parameter.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, these are all implementation details, as xarray only cares about the signatures of certain methods on the ManifestArray class (for indexing __getitem__ is what matters). So feel free to alter the the functions in this file as necessary. (That is one reason I put the indexing internals in this separate indexing.py file.)

) -> "ManifestArray":
"""
Actually index the ManifestArray along 1 dimension.

Notice that none of these options actually do any indexing right now!
Notice that most of these options don't actually do any indexing right now!
"""

if isinstance(indexer_1d, slice):
if slice_is_no_op(indexer_1d, axis_length=length):
pass
else:
NotImplementedError(
raise NotImplementedError(
f"Unsupported indexer. Indexing within a ManifestArray using ints or slices is not yet supported (see GitHub issue #51), but received {indexer_1d}"
)
elif isinstance(indexer_1d, int):
# TODO cover possibility of indexing into a length-1 dimension (which just removes that dimension)?
raise NotImplementedError(
f"Unsupported indexer. Indexing within a ManifestArray using ints or slices is not yet supported (see GitHub issue #51), but received {indexer_1d}"
)
if length == 1:
shape = marr.metadata.shape
chunk_shape = marr.metadata.chunk_grid.chunk_shape
metadata = ArrayV3Metadata.from_dict(
marr.metadata.to_dict() | dict(
shape=shape[:axis] + shape[axis + 1:],
chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape[:axis] + chunk_shape[axis + 1:]),
dimension_names=None, # TODO
)
)
manifest = ChunkManifest.from_arrays(
paths=np.squeeze(marr.manifest._paths, axis=axis),
offsets=np.squeeze(marr.manifest._offsets, axis=axis),
lengths=np.squeeze(marr.manifest._lengths, axis=axis),
validate_paths=False
)
marr = marr.__class__(metadata, manifest)
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather say ManifestArray instead of marr.__class__ for clarity, but importing array.py led to a circular import. Seems to me that this function should really be a method of ManifestArray, not split out into a separate file.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The more conventional fix is to just import ManifestArray inside the function that needs it.

else:
raise NotImplementedError(
f"Unsupported indexer. Indexing within a ManifestArray using ints or slices is not yet supported (see GitHub issue #51), but received {indexer_1d}"
)
elif isinstance(indexer_1d, np.ndarray):
raise NotImplementedError(
f"Unsupported indexer. So-called 'fancy indexing' via numpy arrays is not supported, but received {indexer_1d}"
Expand Down
Loading