|
7 | 7 | from typing_extensions import Self |
8 | 8 |
|
9 | 9 | from sift_py._internal.channel import channel_fqn |
| 10 | +from sift_py.data_import.parquet_complex_types import ParquetComplexTypesImportModeType |
10 | 11 | from sift_py.data_import.time_format import TimeFormatType |
11 | 12 | from sift_py.error import _component_deprecation_warning |
12 | 13 | from sift_py.ingestion.channel import ChannelBitFieldElement, ChannelDataType, ChannelEnumType |
@@ -239,3 +240,69 @@ class Hdf5DataCfg(ConfigDataModel): |
239 | 240 | time_column: int = 1 |
240 | 241 | value_dataset: str |
241 | 242 | value_column: int = 1 |
| 243 | + |
| 244 | + |
| 245 | +class ParquetTimeColumn(ConfigTimeModel): |
| 246 | + """ |
| 247 | + Defines a time column entry in the Parquet config. |
| 248 | + """ |
| 249 | + |
| 250 | + path: str |
| 251 | + |
| 252 | + |
| 253 | +class ParquetDataColumn(ConfigBaseModel): |
| 254 | + """ |
| 255 | + Defines a data column entry in the Parquet config. |
| 256 | + """ |
| 257 | + |
| 258 | + path: str |
| 259 | + channel_config: ConfigDataModel |
| 260 | + |
| 261 | + |
| 262 | +class ParquetFlatDatasetConfig(ConfigBaseModel): |
| 263 | + """ |
| 264 | + Defines the flat dataset config for Parquet files. |
| 265 | + """ |
| 266 | + |
| 267 | + time_column: ParquetTimeColumn |
| 268 | + data_columns: List[ParquetDataColumn] |
| 269 | + |
| 270 | + |
| 271 | +class ParquetConfigImpl(ConfigBaseModel): |
| 272 | + """ |
| 273 | + Defines the Parquet config spec. |
| 274 | + """ |
| 275 | + |
| 276 | + asset_name: str |
| 277 | + run_name: str = "" |
| 278 | + run_id: str = "" |
| 279 | + flat_dataset: Optional[ParquetFlatDatasetConfig] = None |
| 280 | + footer_offset: int |
| 281 | + footer_length: int |
| 282 | + complex_types_import_mode: Union[str, ParquetComplexTypesImportModeType] |
| 283 | + |
| 284 | + @model_validator(mode="after") |
| 285 | + def validate_config(self) -> Self: |
| 286 | + if self.run_name and self.run_id: |
| 287 | + raise PydanticCustomError( |
| 288 | + "invalid_config_error", "Only specify run_name or run_id, not both." |
| 289 | + ) |
| 290 | + return self |
| 291 | + |
| 292 | + @field_validator("complex_types_import_mode", mode="before") |
| 293 | + @classmethod |
| 294 | + def convert_complex_types_import_mode(cls, raw: Optional[str]) -> Optional[str]: |
| 295 | + """ |
| 296 | + Converts the provided complex_types_import_mode value to a string. |
| 297 | + """ |
| 298 | + if raw is None: |
| 299 | + return None |
| 300 | + if isinstance(raw, ParquetComplexTypesImportModeType): |
| 301 | + return raw.as_human_str() |
| 302 | + elif isinstance(raw, str): |
| 303 | + value = ParquetComplexTypesImportModeType.from_str(raw) |
| 304 | + if value is not None: |
| 305 | + return value.as_human_str() |
| 306 | + raise PydanticCustomError( |
| 307 | + "invalid_config_error", f"Invalid complex_types_import_mode: {raw}." |
| 308 | + ) |
0 commit comments