44
55import pytest
66
7+ from sift_client .resources .data_imports import _resolve_data_type_key
78from sift_client .sift_types .channel import ChannelDataType
89from sift_client .sift_types .data_import import (
9- EXTENSION_TO_DATA_TYPE_KEY ,
1010 Ch10ImportConfig ,
1111 CsvDataColumn ,
1212 CsvImportConfig ,
1616 Hdf5ImportConfig ,
1717 ParquetDataColumn ,
1818 ParquetFlatDatasetImportConfig ,
19+ ParquetSingleChannelPerRowImportConfig ,
1920 ParquetTimeColumn ,
2021 TdmsImportConfig ,
2122 TimeFormat ,
@@ -57,54 +58,6 @@ def parquet_config():
5758 )
5859
5960
60- class TestCsvConfigMutability :
61- def test_mutate_asset_name (self , csv_config ):
62- csv_config .asset_name = "new_asset"
63- assert csv_config .asset_name == "new_asset"
64-
65- def test_mutate_run_name (self , csv_config ):
66- csv_config .run_name = "new_run"
67- assert csv_config .run_name == "new_run"
68-
69- def test_mutate_column_data_type (self , csv_config ):
70- csv_config .data_columns [1 ].data_type = ChannelDataType .STRING
71- assert csv_config .data_columns [1 ].data_type == ChannelDataType .STRING
72-
73- def test_mutate_column_name (self , csv_config ):
74- csv_config .data_columns [0 ].name = "cpu_utilization"
75- assert csv_config .data_columns [0 ].name == "cpu_utilization"
76-
77- def test_append_column (self , csv_config ):
78- csv_config .data_columns .append (
79- CsvDataColumn (column = 5 , name = "pressure" , data_type = ChannelDataType .DOUBLE )
80- )
81- assert len (csv_config .data_columns ) == 4
82- assert csv_config .data_columns [- 1 ].name == "pressure"
83-
84- def test_remove_column (self , csv_config ):
85- csv_config .data_columns = [
86- dc for dc in csv_config .data_columns if dc .name != "status_flags"
87- ]
88- assert len (csv_config .data_columns ) == 2
89- assert all (dc .name != "status_flags" for dc in csv_config .data_columns )
90-
91-
92- class TestParquetConfigMutability :
93- def test_mutate_asset_name (self , parquet_config ):
94- parquet_config .asset_name = "new_asset"
95- assert parquet_config .asset_name == "new_asset"
96-
97- def test_mutate_column_data_type (self , parquet_config ):
98- parquet_config .data_columns [1 ].data_type = ChannelDataType .STRING
99- assert parquet_config .data_columns [1 ].data_type == ChannelDataType .STRING
100-
101- def test_append_column (self , parquet_config ):
102- parquet_config .data_columns .append (
103- ParquetDataColumn (path = "pressure" , name = "pressure" , data_type = ChannelDataType .DOUBLE )
104- )
105- assert len (parquet_config .data_columns ) == 4
106-
107-
10861class TestGetColumn :
10962 def test_csv_get_column (self , csv_config ):
11063 col = csv_config .get_column ("cpu_util" )
@@ -169,94 +122,6 @@ def test_absolute_time_does_not_require_start_time(self):
169122 assert col .relative_start_time is None
170123
171124
172- class TestDataTypeKey :
173- def test_csv_extension (self ):
174- assert EXTENSION_TO_DATA_TYPE_KEY [".csv" ] == DataTypeKey .CSV
175-
176- def test_parquet_not_in_extension_map (self ):
177- assert ".parquet" not in EXTENSION_TO_DATA_TYPE_KEY
178-
179- def test_hdf5_extensions (self ):
180- assert EXTENSION_TO_DATA_TYPE_KEY [".h5" ] == DataTypeKey .HDF5
181- assert EXTENSION_TO_DATA_TYPE_KEY [".hdf5" ] == DataTypeKey .HDF5
182-
183-
184- class TestDetectConfigValidation :
185- """Tests for validation checks applied after detect_config."""
186-
187- def test_csv_no_data_columns_raises (self ):
188- """If all columns are filtered out, detect_config should raise."""
189- config = CsvImportConfig (
190- asset_name = "" ,
191- time_column = CsvTimeColumn (column = 1 , format = TimeFormat .ABSOLUTE_RFC3339 ),
192- data_columns = [],
193- )
194- assert not config .data_columns
195-
196- def test_parquet_empty_time_column_path (self ):
197- """An empty time column path indicates detection failed."""
198- config = ParquetFlatDatasetImportConfig (
199- asset_name = "" ,
200- time_column = ParquetTimeColumn (path = "" ),
201- data_columns = [
202- ParquetDataColumn (
203- path = "cpu_util" , name = "cpu_util" , data_type = ChannelDataType .DOUBLE
204- ),
205- ],
206- )
207- assert not config .time_column .path
208-
209- def test_parquet_no_data_columns (self ):
210- """A config with no data columns indicates detection found nothing useful."""
211- config = ParquetFlatDatasetImportConfig (
212- asset_name = "" ,
213- time_column = ParquetTimeColumn (path = "timestamp" ),
214- data_columns = [],
215- )
216- assert not config .data_columns
217-
218- def test_parquet_integer_time_column_fallback (self ):
219- """An integer column starting with 'time' should be usable as the time column."""
220- config = ParquetFlatDatasetImportConfig (
221- asset_name = "" ,
222- time_column = ParquetTimeColumn (path = "" ),
223- data_columns = [
224- ParquetDataColumn (path = "time_ns" , name = "time_ns" , data_type = ChannelDataType .INT_64 ),
225- ParquetDataColumn (
226- path = "cpu_util" , name = "cpu_util" , data_type = ChannelDataType .DOUBLE
227- ),
228- ],
229- )
230- _integer_types = {
231- ChannelDataType .INT_32 ,
232- ChannelDataType .INT_64 ,
233- ChannelDataType .UINT_32 ,
234- ChannelDataType .UINT_64 ,
235- }
236- match = None
237- for dc in config .data_columns :
238- if dc .data_type in _integer_types and dc .name .lower ().startswith ("time" ):
239- match = dc
240- break
241- assert match is not None
242- assert match .path == "time_ns"
243-
244-
245- class TestRunPrecedence :
246- def test_run_id_ignored_when_none (self , csv_config ):
247- csv_config .run_id = None
248- csv_config .run_name = "my_run"
249- proto = csv_config ._to_proto ()
250- assert proto .run_name == "my_run"
251- assert proto .run_id == ""
252-
253- def test_run_id_set (self , csv_config ):
254- csv_config .run_id = "run_123"
255- csv_config .run_name = "ignored"
256- proto = csv_config ._to_proto ()
257- assert proto .run_id == "run_123"
258-
259-
260125class TestCh10Config :
261126 def test_to_proto (self ):
262127 config = Ch10ImportConfig (asset_name = "my_asset" , run_name = "run1" , scale_values = True )
@@ -271,10 +136,6 @@ def test_to_proto_defaults(self):
271136 assert proto .run_name == ""
272137 assert proto .scale_values is False
273138
274- def test_run_id_inherited_but_unused (self ):
275- config = Ch10ImportConfig (asset_name = "my_asset" )
276- assert config .run_id is None
277-
278139
279140class TestTdmsConfig :
280141 def test_to_proto (self ):
@@ -421,9 +282,90 @@ def test_absolute_time_no_start_time_required(self):
421282 assert not proto .HasField ("relative_start_time" )
422283
423284
424- class TestExtensionMap :
425- def test_tdms_extension (self ):
426- assert EXTENSION_TO_DATA_TYPE_KEY [".tdms" ] == DataTypeKey .TDMS
285+ class TestCsvToProto :
286+ def test_to_proto (self , csv_config ):
287+ proto = csv_config ._to_proto ()
288+ assert proto .asset_name == "test_asset"
289+ assert proto .run_name == "test_run"
290+ assert proto .first_data_row == 2
291+ assert proto .time_column .column_number == 1
292+ assert len (proto .data_columns ) == 3
293+ assert proto .data_columns [2 ].name == "cpu_util"
294+
295+ def test_from_proto_round_trip (self , csv_config ):
296+ proto = csv_config ._to_proto ()
297+ restored = CsvImportConfig ._from_proto (proto )
298+ assert restored .asset_name == csv_config .asset_name
299+ assert restored .run_name == csv_config .run_name
300+ assert restored .first_data_row == csv_config .first_data_row
301+ assert restored .time_column .column == csv_config .time_column .column
302+ assert len (restored .data_columns ) == len (csv_config .data_columns )
303+
304+
305+ class TestParquetToProto :
306+ def test_flat_dataset_to_proto (self , parquet_config ):
307+ proto = parquet_config ._to_proto ()
308+ assert proto .asset_name == "test_asset"
309+ assert proto .HasField ("flat_dataset" )
310+ assert proto .flat_dataset .time_column .path == "timestamp"
311+ assert len (proto .flat_dataset .data_columns ) == 3
312+
313+ def test_flat_dataset_from_proto_round_trip (self , parquet_config ):
314+ proto = parquet_config ._to_proto ()
315+ restored = ParquetFlatDatasetImportConfig ._from_proto (proto )
316+ assert restored .asset_name == parquet_config .asset_name
317+ assert restored .time_column .path == parquet_config .time_column .path
318+ assert len (restored .data_columns ) == len (parquet_config .data_columns )
319+ for orig , rest in zip (parquet_config .data_columns , restored .data_columns ):
320+ assert orig .name == rest .name
321+ assert orig .data_type == rest .data_type
322+
323+ def test_single_channel_per_row_from_proto_round_trip (self ):
324+ from sift_client .sift_types .data_import import ParquetSingleChannelConfig
325+
326+ config = ParquetSingleChannelPerRowImportConfig (
327+ asset_name = "a" ,
328+ time_column = ParquetTimeColumn (path = "ts" ),
329+ single_channel = ParquetSingleChannelConfig (
330+ data_path = "value" ,
331+ name = "voltage" ,
332+ data_type = ChannelDataType .DOUBLE ,
333+ ),
334+ )
335+ proto = config ._to_proto ()
336+ restored = ParquetSingleChannelPerRowImportConfig ._from_proto (proto )
337+ assert restored .single_channel is not None
338+ assert restored .single_channel .name == "voltage"
339+ assert restored .single_channel .data_type == ChannelDataType .DOUBLE
340+
341+
342+ class TestParquetTimeColumnToProto :
343+ def test_empty_path_raises (self ):
344+ col = ParquetTimeColumn (path = "" )
345+ with pytest .raises (ValueError , match = "path must be set" ):
346+ col ._to_proto ()
347+
348+
349+ class TestResolveDataTypeKey :
350+ def test_parquet_requires_data_type (self ):
351+ with pytest .raises (ValueError , match = "data_type" ):
352+ _resolve_data_type_key (".parquet" , None )
353+
354+ def test_parquet_with_explicit_data_type (self ):
355+ result = _resolve_data_type_key (".parquet" , DataTypeKey .PARQUET_FLATDATASET )
356+ assert result == DataTypeKey .PARQUET_FLATDATASET
357+
358+ def test_pqt_requires_data_type (self ):
359+ with pytest .raises (ValueError , match = "data_type" ):
360+ _resolve_data_type_key (".pqt" , None )
361+
362+ def test_known_extension_uses_map (self ):
363+ assert _resolve_data_type_key (".csv" , None ) == DataTypeKey .CSV
364+
365+ def test_explicit_data_type_overrides_extension (self ):
366+ result = _resolve_data_type_key (".csv" , DataTypeKey .TDMS )
367+ assert result == DataTypeKey .TDMS
427368
428- def test_ch10_extension (self ):
429- assert EXTENSION_TO_DATA_TYPE_KEY [".ch10" ] == DataTypeKey .CH10
369+ def test_unknown_extension_raises (self ):
370+ with pytest .raises (ValueError , match = "Unsupported file extension" ):
371+ _resolve_data_type_key (".xyz" , None )
0 commit comments