Skip to content

Commit f0398b4

Browse files
committed
hdf5 tests
1 parent f21c449 commit f0398b4

1 file changed

Lines changed: 140 additions & 0 deletions

File tree

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
"""Tests for detect_hdf5_config.
2+
3+
These tests verify that the client-side detect_hdf5_config matches the
4+
backend hdf5.py detect_config 1-to-1. Any client-specific heuristics
5+
(e.g. sibling "timestamps" resolution, 2D dataset handling, "values"
6+
leaf naming) are intentionally NOT present and should not be added.
7+
"""
8+
9+
import h5py
10+
import numpy as np
11+
import pytest
12+
13+
from sift_client._internal.util.hdf5 import detect_hdf5_config
14+
from sift_client.sift_types.channel import ChannelDataType
15+
from sift_client.sift_types.data_import import TimeFormat
16+
17+
18+
@pytest.fixture
19+
def create_hdf5_file(tmp_path):
20+
"""Return a helper that writes an HDF5 file and returns its path."""
21+
file_path = tmp_path / "test.h5"
22+
23+
def _create(populate):
24+
with h5py.File(file_path, "w") as hdf5_file:
25+
populate(hdf5_file)
26+
return file_path
27+
28+
return _create
29+
30+
31+
class TestDetectHdf5Config:
32+
def test_compound_dataset(self, create_hdf5_file):
33+
"""Compound type: first field is time, remaining fields become value channels."""
34+
compound_dtype = np.dtype([("timestamp_ns", "<i8"), ("voltage", "<f8"), ("current", "<f4")])
35+
36+
def populate(hdf5_file):
37+
hdf5_file.create_dataset("sensors", shape=(10,), dtype=compound_dtype)
38+
39+
config = detect_hdf5_config(create_hdf5_file(populate))
40+
41+
assert len(config.data) == 2
42+
assert config.data[0].time_field == "timestamp_ns"
43+
assert config.data[0].value_field == "voltage"
44+
assert config.data[0].data_type == ChannelDataType.DOUBLE
45+
assert config.data[0].time_dataset == "sensors"
46+
assert config.data[0].value_dataset == "sensors"
47+
48+
assert config.data[1].time_field == "timestamp_ns"
49+
assert config.data[1].value_field == "current"
50+
assert config.data[1].data_type == ChannelDataType.FLOAT
51+
52+
def test_single_column_with_root_time(self, create_hdf5_file):
53+
"""Single-column datasets use root 'time' as time source when present."""
54+
55+
def populate(hdf5_file):
56+
hdf5_file.create_dataset("time", data=np.arange(100, dtype="<i8"))
57+
hdf5_file.create_dataset("voltage", data=np.random.rand(100).astype("<f8"))
58+
hdf5_file.create_dataset("current", data=np.random.rand(100).astype("<f4"))
59+
60+
config = detect_hdf5_config(create_hdf5_file(populate))
61+
62+
assert len(config.data) == 2
63+
for col in config.data:
64+
assert col.time_dataset == "time"
65+
assert col.time_field is None
66+
assert col.value_field is None
67+
68+
def test_single_column_without_root_time(self, create_hdf5_file):
69+
"""Without root 'time', time_dataset is empty string."""
70+
71+
def populate(hdf5_file):
72+
hdf5_file.create_dataset("voltage", data=np.random.rand(10).astype("<f8"))
73+
74+
config = detect_hdf5_config(create_hdf5_file(populate))
75+
76+
assert len(config.data) == 1
77+
assert config.data[0].time_dataset == ""
78+
assert config.data[0].name == "voltage"
79+
80+
def test_root_time_skipped_as_value_channel(self, create_hdf5_file):
81+
"""The root 'time' dataset must not appear as a value channel."""
82+
83+
def populate(hdf5_file):
84+
hdf5_file.create_dataset("time", data=np.arange(10, dtype="<i8"))
85+
hdf5_file.create_dataset("voltage", data=np.random.rand(10).astype("<f8"))
86+
87+
config = detect_hdf5_config(create_hdf5_file(populate))
88+
89+
channel_names = [col.name for col in config.data]
90+
assert "time" not in channel_names
91+
assert "voltage" in channel_names
92+
93+
def test_duplicate_name_deduplication(self, create_hdf5_file):
94+
"""Duplicate channel names get a .{dataset_name} suffix."""
95+
96+
def populate(hdf5_file):
97+
hdf5_file.create_dataset("time", data=np.arange(10, dtype="<i8"))
98+
sensor_1 = hdf5_file.create_dataset(
99+
"group1/sensor", data=np.random.rand(10).astype("<f8")
100+
)
101+
sensor_1.attrs["Name"] = "pressure"
102+
sensor_2 = hdf5_file.create_dataset(
103+
"group2/sensor", data=np.random.rand(10).astype("<f8")
104+
)
105+
sensor_2.attrs["Name"] = "pressure"
106+
107+
config = detect_hdf5_config(create_hdf5_file(populate))
108+
109+
channel_names = [col.name for col in config.data]
110+
assert len(channel_names) == 2
111+
assert len(set(channel_names)) == 2 # all unique
112+
assert "pressure" in channel_names
113+
114+
def test_attribute_detection(self, create_hdf5_file):
115+
"""Channel name, units, and description are read from HDF5 attributes."""
116+
117+
def populate(hdf5_file):
118+
hdf5_file.create_dataset("time", data=np.arange(5, dtype="<i8"))
119+
dataset = hdf5_file.create_dataset("raw_voltage", data=np.random.rand(5).astype("<f8"))
120+
dataset.attrs["Name"] = "voltage"
121+
dataset.attrs["Units"] = "V"
122+
dataset.attrs["Description"] = "Supply voltage"
123+
124+
config = detect_hdf5_config(create_hdf5_file(populate))
125+
126+
assert len(config.data) == 1
127+
assert config.data[0].name == "voltage"
128+
assert config.data[0].units == "V"
129+
assert config.data[0].description == "Supply voltage"
130+
131+
def test_returns_correct_wrapper_type(self, create_hdf5_file):
132+
"""Config wrapper uses correct time format and empty asset_name."""
133+
134+
def populate(hdf5_file):
135+
hdf5_file.create_dataset("x", data=np.array([1.0, 2.0]))
136+
137+
config = detect_hdf5_config(create_hdf5_file(populate))
138+
139+
assert config.asset_name == ""
140+
assert config.time_format == TimeFormat.ABSOLUTE_UNIX_NANOSECONDS

0 commit comments

Comments
 (0)