Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions lindi/LindiH5pyFile/LindiH5pyDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,21 @@ def compression_opts(self):
def chunks(self):
return self._zarr_array.chunks

@property
def _is_empty(self):
# LINDI datasets backed by zarr are never HDF5 NULL-space datasets
return False

def __array__(self, dtype=None, copy=None):
if copy is False:
raise ValueError(
"LindiH5pyDataset.__array__ received copy=False "
"but memory allocation cannot be avoided on read"
)
if self._is_scalar:
return np.array(self[()], dtype=dtype)
return np.array(self[:], dtype=dtype)

def __repr__(self): # type: ignore
return f"<{self.__class__.__name__}: {self.name}>"

Expand Down
76 changes: 76 additions & 0 deletions tests/test_numpy_array_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import tempfile
import numpy as np
import h5py
import lindi


def test_numpy_array_conversion():
"""Test that LindiH5pyDataset supports np.asarray() and np.atleast_2d().

Regression test for https://github.com/NeurodataWithoutBorders/lindi/issues/120
"""
with tempfile.TemporaryDirectory() as tmpdir:
h5_fname = f'{tmpdir}/test.h5'
lindi_json_fname = f'{tmpdir}/test.lindi.json'

# Create a simple HDF5 file with a 1D dataset
with h5py.File(h5_fname, 'w') as f:
f.create_dataset('data', data=np.arange(100, dtype=np.float64))
f.create_dataset('scalar', data=42.0)

# Convert to lindi
with lindi.LindiH5pyFile.from_hdf5_file(h5_fname, url=h5_fname) as f:
f.write_lindi_file(lindi_json_fname)

# Open the lindi file and test numpy conversions
with lindi.LindiH5pyFile.from_lindi_file(lindi_json_fname) as f:
ds = f['data']

# Test _is_empty
assert ds._is_empty is False

# Test np.asarray - this triggers __array__
arr = np.asarray(ds)
assert arr.shape == (100,)
np.testing.assert_array_equal(arr, np.arange(100, dtype=np.float64))

# Test np.atleast_2d - this is what failed in the issue
arr2d = np.atleast_2d(ds)
assert arr2d.shape == (1, 100)

# Test np.array with dtype conversion
arr_int = np.array(ds, dtype=np.int32)
assert arr_int.dtype == np.int32

# Test scalar dataset
sc = f['scalar']
arr_sc = np.asarray(sc)
assert arr_sc.shape == ()
assert float(arr_sc) == 42.0


def test_numpy_array_conversion_compound():
"""Test that __array__ works for compound dtype datasets."""
with tempfile.TemporaryDirectory() as tmpdir:
h5_fname = f'{tmpdir}/test.h5'
lindi_json_fname = f'{tmpdir}/test.lindi.json'

compound_dtype = np.dtype([('x', np.int32), ('y', np.float64)])
data = np.array([(1, 2.5), (3, 4.5), (5, 6.5)], dtype=compound_dtype)

with h5py.File(h5_fname, 'w') as f:
f.create_dataset('compound', data=data)

with lindi.LindiH5pyFile.from_hdf5_file(h5_fname, url=h5_fname) as f:
f.write_lindi_file(lindi_json_fname)

with lindi.LindiH5pyFile.from_lindi_file(lindi_json_fname) as f:
ds = f['compound']
assert ds._is_empty is False

# Test field access followed by np.asarray
x_vals = np.asarray(ds['x'][:])
np.testing.assert_array_equal(x_vals, np.array([1, 3, 5], dtype=np.int32))

y_vals = np.asarray(ds['y'][:])
np.testing.assert_array_equal(y_vals, np.array([2.5, 4.5, 6.5], dtype=np.float64))