Skip to content

Commit 7273d68

Browse files
HDF5: Support for opening scalar datasets (#1764)
* HDF5: Render scalar datasets into {1} * Read from scalar datasets * Support dataset modification * CI integration of h5py * Remove debugging output * Try setting an explicit Python executable in MacOS workflow * Ok, don't run it on MacOS if the runner does not like it * Don't fail the test if h5py is not available * Document the test * CI fixes
1 parent ba895a5 commit 7273d68

7 files changed

Lines changed: 171 additions & 52 deletions

File tree

.github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@ spack:
88
specs:
99
- adios2
1010
- hdf5
11+
- py-h5py
1112

1213
packages:
14+
py-h5py:
15+
variants: ~mpi
1316
hdf5:
1417
variants: ~mpi
1518
adios2:

.github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/spack.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ spack:
99
- adios2@2.10
1010
- hdf5
1111
- openmpi
12+
- py-h5py
1213

1314
packages:
1415
adios2:

.github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/spack.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ spack:
99
- adios2@2.10
1010
- hdf5
1111
- openmpi
12+
- py-h5py
1213

1314
packages:
1415
adios2:

.github/workflows/linux.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ jobs:
260260
- name: Install
261261
run: |
262262
sudo apt-get update
263-
sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas
263+
sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas python3-h5py-mpi
264264
# TODO ADIOS2
265265
- name: Build
266266
env: {CXXFLAGS: -Werror, PKG_CONFIG_PATH: /usr/lib/x86_64-linux-gnu/pkgconfig}
@@ -286,7 +286,7 @@ jobs:
286286
run: |
287287
apk update
288288
apk add hdf5-dev
289-
python3.10 -m pip install numpy
289+
python3.10 -m pip install numpy h5py
290290
- name: Build
291291
env: {CXXFLAGS: -Werror}
292292
run: |

conda.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ dependencies:
2424
- doxygen
2525
- git
2626
- hdf5=*=mpi_openmpi_*
27+
- h5py
2728
- mamba
2829
- make
2930
- mpi4py

src/IO/HDF5/HDF5IOHandler.cpp

Lines changed: 111 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,15 +1294,24 @@ void HDF5IOHandlerImpl::openDataset(
12941294
*dtype = d;
12951295

12961296
int ndims = H5Sget_simple_extent_ndims(dataset_space);
1297-
std::vector<hsize_t> dims(ndims, 0);
1298-
std::vector<hsize_t> maxdims(ndims, 0);
1297+
if (ndims == 0)
1298+
{
1299+
// Is a scalar. Since the openPMD-api frontend supports no scalar
1300+
// datasets, return the extent as {1}
1301+
*parameters.extent = {1};
1302+
}
1303+
else
1304+
{
1305+
std::vector<hsize_t> dims(ndims, 0);
1306+
std::vector<hsize_t> maxdims(ndims, 0);
12991307

1300-
H5Sget_simple_extent_dims(dataset_space, dims.data(), maxdims.data());
1301-
Extent e;
1302-
for (auto const &val : dims)
1303-
e.push_back(val);
1304-
auto extent = parameters.extent;
1305-
*extent = e;
1308+
H5Sget_simple_extent_dims(dataset_space, dims.data(), maxdims.data());
1309+
Extent e;
1310+
for (auto const &val : dims)
1311+
e.push_back(val);
1312+
auto &extent = parameters.extent;
1313+
*extent = e;
1314+
}
13061315

13071316
herr_t status;
13081317
status = H5Sclose(dataset_space);
@@ -1555,28 +1564,54 @@ void HDF5IOHandlerImpl::writeDataset(
15551564
"[HDF5] Internal error: Failed to open HDF5 dataset during dataset "
15561565
"write");
15571566

1558-
std::vector<hsize_t> start;
1559-
for (auto const &val : parameters.offset)
1560-
start.push_back(static_cast<hsize_t>(val));
1561-
std::vector<hsize_t> stride(start.size(), 1); /* contiguous region */
1562-
std::vector<hsize_t> count(start.size(), 1); /* single region */
1563-
std::vector<hsize_t> block;
1564-
for (auto const &val : parameters.extent)
1565-
block.push_back(static_cast<hsize_t>(val));
1566-
memspace =
1567-
H5Screate_simple(static_cast<int>(block.size()), block.data(), nullptr);
15681567
filespace = H5Dget_space(dataset_id);
1569-
status = H5Sselect_hyperslab(
1570-
filespace,
1571-
H5S_SELECT_SET,
1572-
start.data(),
1573-
stride.data(),
1574-
count.data(),
1575-
block.data());
1576-
VERIFY(
1577-
status == 0,
1578-
"[HDF5] Internal error: Failed to select hyperslab during dataset "
1579-
"write");
1568+
int ndims = H5Sget_simple_extent_ndims(filespace);
1569+
1570+
if (ndims == 0)
1571+
{
1572+
if (parameters.offset != Offset{0} || parameters.extent != Extent{1})
1573+
{
1574+
std::stringstream errorMessage;
1575+
errorMessage
1576+
<< "HDF5 dataset '" << concrete_h5_file_position(writable)
1577+
<< "' is scalar (dimensionality 0) and must be accessed with "
1578+
"offset [0] and extent [1]. Was accessed with offset ";
1579+
auxiliary::write_vec_to_stream(errorMessage, parameters.offset)
1580+
<< " and extent ";
1581+
auxiliary::write_vec_to_stream(errorMessage, parameters.extent)
1582+
<< ".";
1583+
throw error::WrongAPIUsage(errorMessage.str());
1584+
}
1585+
memspace = H5Screate_simple(0, nullptr, nullptr);
1586+
VERIFY(
1587+
memspace > 0,
1588+
"[HDF5] Internal error: Failed to create memspace during dataset "
1589+
"write");
1590+
}
1591+
else
1592+
{
1593+
std::vector<hsize_t> start;
1594+
for (auto const &val : parameters.offset)
1595+
start.push_back(static_cast<hsize_t>(val));
1596+
std::vector<hsize_t> stride(start.size(), 1); /* contiguous region */
1597+
std::vector<hsize_t> count(start.size(), 1); /* single region */
1598+
std::vector<hsize_t> block;
1599+
for (auto const &val : parameters.extent)
1600+
block.push_back(static_cast<hsize_t>(val));
1601+
memspace = H5Screate_simple(
1602+
static_cast<int>(block.size()), block.data(), nullptr);
1603+
status = H5Sselect_hyperslab(
1604+
filespace,
1605+
H5S_SELECT_SET,
1606+
start.data(),
1607+
stride.data(),
1608+
count.data(),
1609+
block.data());
1610+
VERIFY(
1611+
status == 0,
1612+
"[HDF5] Internal error: Failed to select hyperslab during dataset "
1613+
"write");
1614+
}
15801615

15811616
void const *data = parameters.data.get();
15821617

@@ -2013,28 +2048,54 @@ void HDF5IOHandlerImpl::readDataset(
20132048
"[HDF5] Internal error: Failed to open HDF5 dataset during dataset "
20142049
"read");
20152050

2016-
std::vector<hsize_t> start;
2017-
for (auto const &val : parameters.offset)
2018-
start.push_back(static_cast<hsize_t>(val));
2019-
std::vector<hsize_t> stride(start.size(), 1); /* contiguous region */
2020-
std::vector<hsize_t> count(start.size(), 1); /* single region */
2021-
std::vector<hsize_t> block;
2022-
for (auto const &val : parameters.extent)
2023-
block.push_back(static_cast<hsize_t>(val));
2024-
memspace =
2025-
H5Screate_simple(static_cast<int>(block.size()), block.data(), nullptr);
20262051
filespace = H5Dget_space(dataset_id);
2027-
status = H5Sselect_hyperslab(
2028-
filespace,
2029-
H5S_SELECT_SET,
2030-
start.data(),
2031-
stride.data(),
2032-
count.data(),
2033-
block.data());
2034-
VERIFY(
2035-
status == 0,
2036-
"[HDF5] Internal error: Failed to select hyperslab during dataset "
2037-
"read");
2052+
int ndims = H5Sget_simple_extent_ndims(filespace);
2053+
2054+
if (ndims == 0)
2055+
{
2056+
if (parameters.offset != Offset{0} || parameters.extent != Extent{1})
2057+
{
2058+
std::stringstream errorMessage;
2059+
errorMessage
2060+
<< "HDF5 dataset '" << concrete_h5_file_position(writable)
2061+
<< "' is scalar (dimensionality 0) and must be accessed with "
2062+
"offset [0] and extent [1]. Was accessed with offset ";
2063+
auxiliary::write_vec_to_stream(errorMessage, parameters.offset)
2064+
<< " and extent ";
2065+
auxiliary::write_vec_to_stream(errorMessage, parameters.extent)
2066+
<< ".";
2067+
throw error::WrongAPIUsage(errorMessage.str());
2068+
}
2069+
memspace = H5Screate_simple(0, nullptr, nullptr);
2070+
VERIFY(
2071+
memspace > 0,
2072+
"[HDF5] Internal error: Failed to create memspace during dataset "
2073+
"read");
2074+
}
2075+
else
2076+
{
2077+
std::vector<hsize_t> start;
2078+
for (auto const &val : parameters.offset)
2079+
start.push_back(static_cast<hsize_t>(val));
2080+
std::vector<hsize_t> stride(start.size(), 1); /* contiguous region */
2081+
std::vector<hsize_t> count(start.size(), 1); /* single region */
2082+
std::vector<hsize_t> block;
2083+
for (auto const &val : parameters.extent)
2084+
block.push_back(static_cast<hsize_t>(val));
2085+
memspace = H5Screate_simple(
2086+
static_cast<int>(block.size()), block.data(), nullptr);
2087+
status = H5Sselect_hyperslab(
2088+
filespace,
2089+
H5S_SELECT_SET,
2090+
start.data(),
2091+
stride.data(),
2092+
count.data(),
2093+
block.data());
2094+
VERIFY(
2095+
status == 0,
2096+
"[HDF5] Internal error: Failed to select hyperslab during dataset "
2097+
"read");
2098+
}
20382099

20392100
void *data = parameters.data.get();
20402101

test/python/unittest/API/APITest.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2239,6 +2239,58 @@ def testSeriesConstructors(self):
22392239
s = io.Series(f, io.Access.create, c)
22402240
s.close()
22412241

2242+
def testScalarHdf5Fields(self):
2243+
if "hdf5" not in io.variants:
2244+
return
2245+
try:
2246+
import h5py
2247+
except ImportError:
2248+
return
2249+
2250+
# While the openPMD-api (currently) does not create scalar HDF5
2251+
# datasets, we should at least try reading and modifying them in files
2252+
# that were created elsewhere. Scalar here refers to a dataset without
2253+
# dimension. Interacting with them in the openPMD-api is possible by
2254+
# specifying a single element, i.e. offset=[0], extent=[1].
2255+
# For testing this, create a dataset, then use h5py to create a scalar
2256+
# dataset in the file. Then, open first for reading, then for
2257+
# modifying.
2258+
2259+
file = "../samples/scalar_hdf5.h5"
2260+
series_write = io.Series(file, io.Access.create)
2261+
E_x = series_write.write_iterations()[0].meshes["E"]["x"]
2262+
E_x.reset_dataset(io.Dataset(np.dtype(np.int_), [1]))
2263+
E_x[:] = np.array([43])
2264+
series_write.close()
2265+
2266+
# Now turn E_x into a scalar
2267+
with h5py.File(file, "r+") as f:
2268+
E = f["data"]["0"]["meshes"]["E"]
2269+
reapply_attributes = \
2270+
{key: val for key, val in E["x"].attrs.items()}
2271+
del E["x"]
2272+
E["x"] = 44
2273+
for key, val in reapply_attributes.items():
2274+
E["x"].attrs[key] = val
2275+
2276+
series_read = io.Series(file, io.Access.read_only)
2277+
loaded_from_scalar = series_read.iterations[0].meshes["E"]["x"][:]
2278+
series_read.flush()
2279+
self.assertEqual(loaded_from_scalar, np.array([44]))
2280+
series_read.close()
2281+
2282+
series_read_write = io.Series(file, io.Access.read_write)
2283+
E_x = series_read_write.iterations[0].meshes["E"]["x"]
2284+
E_x[:] = np.array([45])
2285+
series_read_write.close()
2286+
2287+
series_read_again = io.Series(file, io.Access.read_only)
2288+
loaded_from_scalar = \
2289+
series_read_again.iterations[0].meshes["E"]["x"][:]
2290+
series_read_again.flush()
2291+
self.assertEqual(loaded_from_scalar, np.array([45]))
2292+
series_read_again.close()
2293+
22422294

22432295
if __name__ == '__main__':
22442296
unittest.main()

0 commit comments

Comments
 (0)