-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathtest_hdf5.py
More file actions
132 lines (97 loc) · 5.2 KB
/
test_hdf5.py
File metadata and controls
132 lines (97 loc) · 5.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""Tests for detect_hdf5_config."""
import h5py
import numpy as np
import pytest
from sift_client._internal.util.hdf5 import detect_hdf5_config
from sift_client.sift_types.channel import ChannelDataType
@pytest.fixture
def create_hdf5_file(tmp_path):
"""Return a helper that writes an HDF5 file and returns its path."""
file_path = tmp_path / "test.h5"
def _create(populate):
with h5py.File(file_path, "w") as hdf5_file:
populate(hdf5_file)
return file_path
return _create
class TestDetectHdf5Config:
def test_compound_dataset(self, create_hdf5_file):
"""Compound type: first field is time, remaining fields become value channels."""
compound_dtype = np.dtype([("timestamp_ns", "<i8"), ("voltage", "<f8"), ("current", "<f4")])
def populate(hdf5_file):
hdf5_file.create_dataset("sensors", shape=(10,), dtype=compound_dtype)
config = detect_hdf5_config(create_hdf5_file(populate))
assert len(config.data) == 2
assert config.data[0].time_field == "timestamp_ns"
assert config.data[0].value_field == "voltage"
assert config.data[0].data_type == ChannelDataType.DOUBLE
assert config.data[0].time_dataset == "sensors"
assert config.data[0].value_dataset == "sensors"
assert config.data[1].time_field == "timestamp_ns"
assert config.data[1].value_field == "current"
assert config.data[1].data_type == ChannelDataType.FLOAT
def test_single_column_with_root_time(self, create_hdf5_file):
"""Single-column datasets use root 'time' as time source when present."""
def populate(hdf5_file):
hdf5_file.create_dataset("time", data=np.arange(100, dtype="<i8"))
hdf5_file.create_dataset("voltage", data=np.random.rand(100).astype("<f8"))
hdf5_file.create_dataset("current", data=np.random.rand(100).astype("<f4"))
config = detect_hdf5_config(create_hdf5_file(populate))
assert len(config.data) == 2
for col in config.data:
assert col.time_dataset == "time"
assert col.time_field is None
assert col.value_field is None
def test_single_column_without_root_time(self, create_hdf5_file):
"""Without root 'time', time_dataset is empty string."""
def populate(hdf5_file):
hdf5_file.create_dataset("voltage", data=np.random.rand(10).astype("<f8"))
config = detect_hdf5_config(create_hdf5_file(populate))
assert len(config.data) == 1
assert config.data[0].time_dataset == ""
assert config.data[0].name == "voltage"
def test_root_time_skipped_as_value_channel(self, create_hdf5_file):
"""The root 'time' dataset must not appear as a value channel."""
def populate(hdf5_file):
hdf5_file.create_dataset("time", data=np.arange(10, dtype="<i8"))
hdf5_file.create_dataset("voltage", data=np.random.rand(10).astype("<f8"))
config = detect_hdf5_config(create_hdf5_file(populate))
channel_names = [col.name for col in config.data]
assert "time" not in channel_names
assert "voltage" in channel_names
def test_duplicate_name_deduplication(self, create_hdf5_file):
"""Duplicate channel names get a .{dataset_name} suffix."""
def populate(hdf5_file):
hdf5_file.create_dataset("time", data=np.arange(10, dtype="<i8"))
sensor_1 = hdf5_file.create_dataset(
"group1/sensor", data=np.random.rand(10).astype("<f8")
)
sensor_1.attrs["Name"] = "pressure"
sensor_2 = hdf5_file.create_dataset(
"group2/sensor", data=np.random.rand(10).astype("<f8")
)
sensor_2.attrs["Name"] = "pressure"
config = detect_hdf5_config(create_hdf5_file(populate))
channel_names = [col.name for col in config.data]
assert len(channel_names) == 2
assert len(set(channel_names)) == 2 # all unique
assert "pressure" in channel_names
def test_attribute_detection(self, create_hdf5_file):
"""Channel name, units, and description are read from HDF5 attributes."""
def populate(hdf5_file):
hdf5_file.create_dataset("time", data=np.arange(5, dtype="<i8"))
dataset = hdf5_file.create_dataset("raw_voltage", data=np.random.rand(5).astype("<f8"))
dataset.attrs["Name"] = "voltage"
dataset.attrs["Units"] = "V"
dataset.attrs["Description"] = "Supply voltage"
config = detect_hdf5_config(create_hdf5_file(populate))
assert len(config.data) == 1
assert config.data[0].name == "voltage"
assert config.data[0].units == "V"
assert config.data[0].description == "Supply voltage"
def test_unsupported_dtype_raises(self, create_hdf5_file):
"""Unsupported numpy dtypes raise ValueError rather than silently dropping data."""
def populate(hdf5_file):
hdf5_file.create_dataset("time", data=np.arange(5, dtype="<i8"))
hdf5_file.create_dataset("data", data=np.zeros(5, dtype=np.float16))
with pytest.raises(ValueError, match="Unsupported numpy dtype"):
detect_hdf5_config(create_hdf5_file(populate))