diff --git a/lindi/LindiH5pyFile/LindiH5pyDataset.py b/lindi/LindiH5pyFile/LindiH5pyDataset.py index 1ec9da3..8e12b93 100644 --- a/lindi/LindiH5pyFile/LindiH5pyDataset.py +++ b/lindi/LindiH5pyFile/LindiH5pyDataset.py @@ -168,6 +168,21 @@ def compression_opts(self): def chunks(self): return self._zarr_array.chunks + @property + def _is_empty(self): + # LINDI datasets backed by zarr are never HDF5 NULL-space datasets + return False + + def __array__(self, dtype=None, copy=None): + if copy is False: + raise ValueError( + "LindiH5pyDataset.__array__ received copy=False " + "but memory allocation cannot be avoided on read" + ) + if self._is_scalar: + return np.array(self[()], dtype=dtype) + return np.array(self[:], dtype=dtype) + def __repr__(self): # type: ignore return f"<{self.__class__.__name__}: {self.name}>" diff --git a/tests/test_numpy_array_conversion.py b/tests/test_numpy_array_conversion.py new file mode 100644 index 0000000..f2afffa --- /dev/null +++ b/tests/test_numpy_array_conversion.py @@ -0,0 +1,76 @@ +import tempfile +import numpy as np +import h5py +import lindi + + +def test_numpy_array_conversion(): + """Test that LindiH5pyDataset supports np.asarray() and np.atleast_2d(). + + Regression test for https://github.com/NeurodataWithoutBorders/lindi/issues/120 + """ + with tempfile.TemporaryDirectory() as tmpdir: + h5_fname = f'{tmpdir}/test.h5' + lindi_json_fname = f'{tmpdir}/test.lindi.json' + + # Create a simple HDF5 file with a 1D dataset + with h5py.File(h5_fname, 'w') as f: + f.create_dataset('data', data=np.arange(100, dtype=np.float64)) + f.create_dataset('scalar', data=42.0) + + # Convert to lindi + with lindi.LindiH5pyFile.from_hdf5_file(h5_fname, url=h5_fname) as f: + f.write_lindi_file(lindi_json_fname) + + # Open the lindi file and test numpy conversions + with lindi.LindiH5pyFile.from_lindi_file(lindi_json_fname) as f: + ds = f['data'] + + # Test _is_empty + assert ds._is_empty is False + + # Test np.asarray - this triggers __array__ + arr = np.asarray(ds) + assert arr.shape == (100,) + np.testing.assert_array_equal(arr, np.arange(100, dtype=np.float64)) + + # Test np.atleast_2d - this is what failed in the issue + arr2d = np.atleast_2d(ds) + assert arr2d.shape == (1, 100) + + # Test np.array with dtype conversion + arr_int = np.array(ds, dtype=np.int32) + assert arr_int.dtype == np.int32 + + # Test scalar dataset + sc = f['scalar'] + arr_sc = np.asarray(sc) + assert arr_sc.shape == () + assert float(arr_sc) == 42.0 + + +def test_numpy_array_conversion_compound(): + """Test that __array__ works for compound dtype datasets.""" + with tempfile.TemporaryDirectory() as tmpdir: + h5_fname = f'{tmpdir}/test.h5' + lindi_json_fname = f'{tmpdir}/test.lindi.json' + + compound_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) + data = np.array([(1, 2.5), (3, 4.5), (5, 6.5)], dtype=compound_dtype) + + with h5py.File(h5_fname, 'w') as f: + f.create_dataset('compound', data=data) + + with lindi.LindiH5pyFile.from_hdf5_file(h5_fname, url=h5_fname) as f: + f.write_lindi_file(lindi_json_fname) + + with lindi.LindiH5pyFile.from_lindi_file(lindi_json_fname) as f: + ds = f['compound'] + assert ds._is_empty is False + + # Test field access followed by np.asarray + x_vals = np.asarray(ds['x'][:]) + np.testing.assert_array_equal(x_vals, np.array([1, 3, 5], dtype=np.int32)) + + y_vals = np.asarray(ds['y'][:]) + np.testing.assert_array_equal(y_vals, np.array([2.5, 4.5, 6.5], dtype=np.float64))