Skip to content

Commit deb3590

Browse files
committed
updated unit tests
1 parent fca8331 commit deb3590

1 file changed

Lines changed: 88 additions & 146 deletions

File tree

python/lib/sift_client/_tests/resources/test_data_imports.py

Lines changed: 88 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
import pytest
66

7+
from sift_client.resources.data_imports import _resolve_data_type_key
78
from sift_client.sift_types.channel import ChannelDataType
89
from sift_client.sift_types.data_import import (
9-
EXTENSION_TO_DATA_TYPE_KEY,
1010
Ch10ImportConfig,
1111
CsvDataColumn,
1212
CsvImportConfig,
@@ -16,6 +16,7 @@
1616
Hdf5ImportConfig,
1717
ParquetDataColumn,
1818
ParquetFlatDatasetImportConfig,
19+
ParquetSingleChannelPerRowImportConfig,
1920
ParquetTimeColumn,
2021
TdmsImportConfig,
2122
TimeFormat,
@@ -57,54 +58,6 @@ def parquet_config():
5758
)
5859

5960

60-
class TestCsvConfigMutability:
61-
def test_mutate_asset_name(self, csv_config):
62-
csv_config.asset_name = "new_asset"
63-
assert csv_config.asset_name == "new_asset"
64-
65-
def test_mutate_run_name(self, csv_config):
66-
csv_config.run_name = "new_run"
67-
assert csv_config.run_name == "new_run"
68-
69-
def test_mutate_column_data_type(self, csv_config):
70-
csv_config.data_columns[1].data_type = ChannelDataType.STRING
71-
assert csv_config.data_columns[1].data_type == ChannelDataType.STRING
72-
73-
def test_mutate_column_name(self, csv_config):
74-
csv_config.data_columns[0].name = "cpu_utilization"
75-
assert csv_config.data_columns[0].name == "cpu_utilization"
76-
77-
def test_append_column(self, csv_config):
78-
csv_config.data_columns.append(
79-
CsvDataColumn(column=5, name="pressure", data_type=ChannelDataType.DOUBLE)
80-
)
81-
assert len(csv_config.data_columns) == 4
82-
assert csv_config.data_columns[-1].name == "pressure"
83-
84-
def test_remove_column(self, csv_config):
85-
csv_config.data_columns = [
86-
dc for dc in csv_config.data_columns if dc.name != "status_flags"
87-
]
88-
assert len(csv_config.data_columns) == 2
89-
assert all(dc.name != "status_flags" for dc in csv_config.data_columns)
90-
91-
92-
class TestParquetConfigMutability:
93-
def test_mutate_asset_name(self, parquet_config):
94-
parquet_config.asset_name = "new_asset"
95-
assert parquet_config.asset_name == "new_asset"
96-
97-
def test_mutate_column_data_type(self, parquet_config):
98-
parquet_config.data_columns[1].data_type = ChannelDataType.STRING
99-
assert parquet_config.data_columns[1].data_type == ChannelDataType.STRING
100-
101-
def test_append_column(self, parquet_config):
102-
parquet_config.data_columns.append(
103-
ParquetDataColumn(path="pressure", name="pressure", data_type=ChannelDataType.DOUBLE)
104-
)
105-
assert len(parquet_config.data_columns) == 4
106-
107-
10861
class TestGetColumn:
10962
def test_csv_get_column(self, csv_config):
11063
col = csv_config.get_column("cpu_util")
@@ -169,94 +122,6 @@ def test_absolute_time_does_not_require_start_time(self):
169122
assert col.relative_start_time is None
170123

171124

172-
class TestDataTypeKey:
173-
def test_csv_extension(self):
174-
assert EXTENSION_TO_DATA_TYPE_KEY[".csv"] == DataTypeKey.CSV
175-
176-
def test_parquet_not_in_extension_map(self):
177-
assert ".parquet" not in EXTENSION_TO_DATA_TYPE_KEY
178-
179-
def test_hdf5_extensions(self):
180-
assert EXTENSION_TO_DATA_TYPE_KEY[".h5"] == DataTypeKey.HDF5
181-
assert EXTENSION_TO_DATA_TYPE_KEY[".hdf5"] == DataTypeKey.HDF5
182-
183-
184-
class TestDetectConfigValidation:
185-
"""Tests for validation checks applied after detect_config."""
186-
187-
def test_csv_no_data_columns_raises(self):
188-
"""If all columns are filtered out, detect_config should raise."""
189-
config = CsvImportConfig(
190-
asset_name="",
191-
time_column=CsvTimeColumn(column=1, format=TimeFormat.ABSOLUTE_RFC3339),
192-
data_columns=[],
193-
)
194-
assert not config.data_columns
195-
196-
def test_parquet_empty_time_column_path(self):
197-
"""An empty time column path indicates detection failed."""
198-
config = ParquetFlatDatasetImportConfig(
199-
asset_name="",
200-
time_column=ParquetTimeColumn(path=""),
201-
data_columns=[
202-
ParquetDataColumn(
203-
path="cpu_util", name="cpu_util", data_type=ChannelDataType.DOUBLE
204-
),
205-
],
206-
)
207-
assert not config.time_column.path
208-
209-
def test_parquet_no_data_columns(self):
210-
"""A config with no data columns indicates detection found nothing useful."""
211-
config = ParquetFlatDatasetImportConfig(
212-
asset_name="",
213-
time_column=ParquetTimeColumn(path="timestamp"),
214-
data_columns=[],
215-
)
216-
assert not config.data_columns
217-
218-
def test_parquet_integer_time_column_fallback(self):
219-
"""An integer column starting with 'time' should be usable as the time column."""
220-
config = ParquetFlatDatasetImportConfig(
221-
asset_name="",
222-
time_column=ParquetTimeColumn(path=""),
223-
data_columns=[
224-
ParquetDataColumn(path="time_ns", name="time_ns", data_type=ChannelDataType.INT_64),
225-
ParquetDataColumn(
226-
path="cpu_util", name="cpu_util", data_type=ChannelDataType.DOUBLE
227-
),
228-
],
229-
)
230-
_integer_types = {
231-
ChannelDataType.INT_32,
232-
ChannelDataType.INT_64,
233-
ChannelDataType.UINT_32,
234-
ChannelDataType.UINT_64,
235-
}
236-
match = None
237-
for dc in config.data_columns:
238-
if dc.data_type in _integer_types and dc.name.lower().startswith("time"):
239-
match = dc
240-
break
241-
assert match is not None
242-
assert match.path == "time_ns"
243-
244-
245-
class TestRunPrecedence:
246-
def test_run_id_ignored_when_none(self, csv_config):
247-
csv_config.run_id = None
248-
csv_config.run_name = "my_run"
249-
proto = csv_config._to_proto()
250-
assert proto.run_name == "my_run"
251-
assert proto.run_id == ""
252-
253-
def test_run_id_set(self, csv_config):
254-
csv_config.run_id = "run_123"
255-
csv_config.run_name = "ignored"
256-
proto = csv_config._to_proto()
257-
assert proto.run_id == "run_123"
258-
259-
260125
class TestCh10Config:
261126
def test_to_proto(self):
262127
config = Ch10ImportConfig(asset_name="my_asset", run_name="run1", scale_values=True)
@@ -271,10 +136,6 @@ def test_to_proto_defaults(self):
271136
assert proto.run_name == ""
272137
assert proto.scale_values is False
273138

274-
def test_run_id_inherited_but_unused(self):
275-
config = Ch10ImportConfig(asset_name="my_asset")
276-
assert config.run_id is None
277-
278139

279140
class TestTdmsConfig:
280141
def test_to_proto(self):
@@ -421,9 +282,90 @@ def test_absolute_time_no_start_time_required(self):
421282
assert not proto.HasField("relative_start_time")
422283

423284

424-
class TestExtensionMap:
425-
def test_tdms_extension(self):
426-
assert EXTENSION_TO_DATA_TYPE_KEY[".tdms"] == DataTypeKey.TDMS
285+
class TestCsvToProto:
286+
def test_to_proto(self, csv_config):
287+
proto = csv_config._to_proto()
288+
assert proto.asset_name == "test_asset"
289+
assert proto.run_name == "test_run"
290+
assert proto.first_data_row == 2
291+
assert proto.time_column.column_number == 1
292+
assert len(proto.data_columns) == 3
293+
assert proto.data_columns[2].name == "cpu_util"
294+
295+
def test_from_proto_round_trip(self, csv_config):
296+
proto = csv_config._to_proto()
297+
restored = CsvImportConfig._from_proto(proto)
298+
assert restored.asset_name == csv_config.asset_name
299+
assert restored.run_name == csv_config.run_name
300+
assert restored.first_data_row == csv_config.first_data_row
301+
assert restored.time_column.column == csv_config.time_column.column
302+
assert len(restored.data_columns) == len(csv_config.data_columns)
303+
304+
305+
class TestParquetToProto:
306+
def test_flat_dataset_to_proto(self, parquet_config):
307+
proto = parquet_config._to_proto()
308+
assert proto.asset_name == "test_asset"
309+
assert proto.HasField("flat_dataset")
310+
assert proto.flat_dataset.time_column.path == "timestamp"
311+
assert len(proto.flat_dataset.data_columns) == 3
312+
313+
def test_flat_dataset_from_proto_round_trip(self, parquet_config):
314+
proto = parquet_config._to_proto()
315+
restored = ParquetFlatDatasetImportConfig._from_proto(proto)
316+
assert restored.asset_name == parquet_config.asset_name
317+
assert restored.time_column.path == parquet_config.time_column.path
318+
assert len(restored.data_columns) == len(parquet_config.data_columns)
319+
for orig, rest in zip(parquet_config.data_columns, restored.data_columns):
320+
assert orig.name == rest.name
321+
assert orig.data_type == rest.data_type
322+
323+
def test_single_channel_per_row_from_proto_round_trip(self):
324+
from sift_client.sift_types.data_import import ParquetSingleChannelConfig
325+
326+
config = ParquetSingleChannelPerRowImportConfig(
327+
asset_name="a",
328+
time_column=ParquetTimeColumn(path="ts"),
329+
single_channel=ParquetSingleChannelConfig(
330+
data_path="value",
331+
name="voltage",
332+
data_type=ChannelDataType.DOUBLE,
333+
),
334+
)
335+
proto = config._to_proto()
336+
restored = ParquetSingleChannelPerRowImportConfig._from_proto(proto)
337+
assert restored.single_channel is not None
338+
assert restored.single_channel.name == "voltage"
339+
assert restored.single_channel.data_type == ChannelDataType.DOUBLE
340+
341+
342+
class TestParquetTimeColumnToProto:
343+
def test_empty_path_raises(self):
344+
col = ParquetTimeColumn(path="")
345+
with pytest.raises(ValueError, match="path must be set"):
346+
col._to_proto()
347+
348+
349+
class TestResolveDataTypeKey:
350+
def test_parquet_requires_data_type(self):
351+
with pytest.raises(ValueError, match="data_type"):
352+
_resolve_data_type_key(".parquet", None)
353+
354+
def test_parquet_with_explicit_data_type(self):
355+
result = _resolve_data_type_key(".parquet", DataTypeKey.PARQUET_FLATDATASET)
356+
assert result == DataTypeKey.PARQUET_FLATDATASET
357+
358+
def test_pqt_requires_data_type(self):
359+
with pytest.raises(ValueError, match="data_type"):
360+
_resolve_data_type_key(".pqt", None)
361+
362+
def test_known_extension_uses_map(self):
363+
assert _resolve_data_type_key(".csv", None) == DataTypeKey.CSV
364+
365+
def test_explicit_data_type_overrides_extension(self):
366+
result = _resolve_data_type_key(".csv", DataTypeKey.TDMS)
367+
assert result == DataTypeKey.TDMS
427368

428-
def test_ch10_extension(self):
429-
assert EXTENSION_TO_DATA_TYPE_KEY[".ch10"] == DataTypeKey.CH10
369+
def test_unknown_extension_raises(self):
370+
with pytest.raises(ValueError, match="Unsupported file extension"):
371+
_resolve_data_type_key(".xyz", None)

0 commit comments

Comments
 (0)