Skip to content

Commit dcae808

Browse files
committed
add detect config data types
1 parent 2e0f0f5 commit dcae808

3 files changed

Lines changed: 116 additions & 18 deletions

File tree

python/lib/sift_client/resources/data_imports.py

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@
66
from pathlib import Path
77
from typing import TYPE_CHECKING
88

9-
from sift.data_imports.v2.data_imports_pb2 import DATA_TYPE_KEY_CSV
10-
119
from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
1210
from sift_client.resources._base import ResourceBase
1311
from sift_client.sift_types.data_import import (
12+
EXTENSION_TO_DATA_TYPE_KEY,
1413
CsvImportConfig,
1514
DataImport,
1615
DataImportStatus,
@@ -49,29 +48,58 @@ async def import_from_path(
4948
self,
5049
*,
5150
file_path: str | Path,
52-
config: ImportConfig,
51+
config: ImportConfig | None = None,
52+
asset_name: str | None = None,
53+
run_name: str | None = None,
54+
run_id: str | None = None,
5355
) -> DataImport:
5456
"""Import data from a local file.
5557
5658
Creates a data import on the server and uploads the file to the
5759
returned presigned URL. Returns a :class:`DataImport` that can be
5860
polled for status via ``data_import.refresh()``.
5961
62+
When ``config`` is omitted the file format is auto-detected via
63+
:meth:`detect_config` and a :class:`CsvImportConfig` is built using
64+
the provided ``asset_name`` and optional ``run_name`` / ``run_id``.
65+
6066
Args:
6167
file_path: Path to the local file to import.
6268
config: Import configuration describing the file format and column
63-
mapping.
69+
mapping. When provided, ``asset_name``, ``run_name``, and
70+
``run_id`` are ignored.
71+
asset_name: Name of the asset to import into. Required when
72+
``config`` is not provided.
73+
run_name: Optional run name. Only used when ``config`` is not
74+
provided.
75+
run_id: Optional existing run ID. Only used when ``config`` is not
76+
provided.
6477
6578
Returns:
6679
A :class:`DataImport` representing the import operation.
6780
6881
Raises:
6982
FileNotFoundError: If the file does not exist.
83+
ValueError: If neither ``config`` nor ``asset_name`` is provided.
7084
"""
7185
path = Path(file_path)
7286
if not path.is_file():
7387
raise FileNotFoundError(f"File not found: {file_path}")
7488

89+
if config is None:
90+
if asset_name is None:
91+
raise ValueError(
92+
"Either 'config' or 'asset_name' must be provided."
93+
)
94+
detected = await self.detect_config(file_path)
95+
config = detected.model_copy(
96+
update={
97+
"asset_name": asset_name,
98+
"run_name": run_name,
99+
"run_id": run_id,
100+
}
101+
)
102+
75103
data_import_id, upload_url = await self._low_level_client.create_from_upload(config)
76104
logger.info("Created data import %s", data_import_id)
77105

@@ -168,14 +196,15 @@ async def retry(self, data_import: str | DataImport) -> None:
168196
)
169197
await self._low_level_client.retry(data_import_id)
170198

171-
async def detect_config(self, file_path: str | Path) -> CsvImportConfig:
199+
async def detect_config(self, file_path: str | Path) -> ImportConfig:
172200
"""Auto-detect import configuration from a file.
173201
174202
Reads a sample of the file, sends it to the server's DetectConfig
175-
endpoint, and returns the detected configuration. You can inspect
176-
and modify the result before passing it to :meth:`import_from_path`.
203+
endpoint, and returns the detected configuration. The file format
204+
is inferred from the file extension. You can inspect and modify the
205+
result before passing it to :meth:`import_from_path`.
177206
178-
Currently supports CSV files only.
207+
Supported extensions: .csv, .parquet, .tdms, .ch10, .ch11, .h5, .hdf5
179208
180209
Args:
181210
file_path: Path to the file to analyze.
@@ -185,19 +214,38 @@ async def detect_config(self, file_path: str | Path) -> CsvImportConfig:
185214
186215
Raises:
187216
FileNotFoundError: If the file does not exist.
188-
ValueError: If detection returns no config.
217+
ValueError: If the file extension is unsupported or detection
218+
returns no config.
189219
"""
190220
path = Path(file_path)
191221
if not path.is_file():
192222
raise FileNotFoundError(f"File not found: {file_path}")
193223

224+
ext = path.suffix.lower()
225+
data_type_key = EXTENSION_TO_DATA_TYPE_KEY.get(ext)
226+
if data_type_key is None:
227+
raise ValueError(
228+
f"Unsupported file extension '{ext}'. "
229+
f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}"
230+
)
231+
194232
with open(path, "rb") as f:
195233
sample = f.read(_DETECT_CONFIG_SAMPLE_SIZE)
196234

197-
response = await self._low_level_client.detect_config(sample, DATA_TYPE_KEY_CSV)
235+
response = await self._low_level_client.detect_config(sample, data_type_key.value)
198236

199237
if response.HasField("csv_config"):
200-
return CsvImportConfig._from_proto(response.csv_config)
238+
config = CsvImportConfig._from_proto(response.csv_config)
239+
# The server's DetectConfig may include the time column in
240+
# data_columns, but CreateDataImportFromUpload rejects that
241+
# overlap. Filter it out so the config is import-ready.
242+
time_col = config.time_column.column
243+
filtered = [dc for dc in config.data_columns if dc.column != time_col]
244+
if len(filtered) != len(config.data_columns):
245+
config = config.model_copy(update={"data_columns": filtered})
246+
return config
247+
248+
# TODO: Add parquet_config and hdf5_config once their config types are added.
201249

202250
raise ValueError("Server returned an empty DetectConfig response.")
203251

python/lib/sift_client/resources/sync_stubs/__init__.pyi

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -643,14 +643,15 @@ class DataImportAPI:
643643
...
644644

645645
def _run(self, coro): ...
646-
def detect_config(self, file_path: str | Path) -> CsvImportConfig:
646+
def detect_config(self, file_path: str | Path) -> ImportConfig:
647647
"""Auto-detect import configuration from a file.
648648
649649
Reads a sample of the file, sends it to the server's DetectConfig
650-
endpoint, and returns the detected configuration. You can inspect
651-
and modify the result before passing it to :meth:`import_from_path`.
650+
endpoint, and returns the detected configuration. The file format
651+
is inferred from the file extension. You can inspect and modify the
652+
result before passing it to :meth:`import_from_path`.
652653
653-
Currently supports CSV files only.
654+
Supported extensions: .csv, .parquet, .tdms, .ch10, .ch11, .h5, .hdf5
654655
655656
Args:
656657
file_path: Path to the file to analyze.
@@ -660,7 +661,8 @@ class DataImportAPI:
660661
661662
Raises:
662663
FileNotFoundError: If the file does not exist.
663-
ValueError: If detection returns no config.
664+
ValueError: If the file extension is unsupported or detection
665+
returns no config.
664666
"""
665667
...
666668

@@ -675,23 +677,43 @@ class DataImportAPI:
675677
"""
676678
...
677679

678-
def import_from_path(self, *, file_path: str | Path, config: ImportConfig) -> DataImport:
680+
def import_from_path(
681+
self,
682+
*,
683+
file_path: str | Path,
684+
config: ImportConfig | None = None,
685+
asset_name: str | None = None,
686+
run_name: str | None = None,
687+
run_id: str | None = None,
688+
) -> DataImport:
679689
"""Import data from a local file.
680690
681691
Creates a data import on the server and uploads the file to the
682692
returned presigned URL. Returns a :class:`DataImport` that can be
683693
polled for status via ``data_import.refresh()``.
684694
695+
When ``config`` is omitted the file format is auto-detected via
696+
:meth:`detect_config` and a :class:`CsvImportConfig` is built using
697+
the provided ``asset_name`` and optional ``run_name`` / ``run_id``.
698+
685699
Args:
686700
file_path: Path to the local file to import.
687701
config: Import configuration describing the file format and column
688-
mapping.
702+
mapping. When provided, ``asset_name``, ``run_name``, and
703+
``run_id`` are ignored.
704+
asset_name: Name of the asset to import into. Required when
705+
``config`` is not provided.
706+
run_name: Optional run name. Only used when ``config`` is not
707+
provided.
708+
run_id: Optional existing run ID. Only used when ``config`` is not
709+
provided.
689710
690711
Returns:
691712
A :class:`DataImport` representing the import operation.
692713
693714
Raises:
694715
FileNotFoundError: If the file does not exist.
716+
ValueError: If neither ``config`` nor ``asset_name`` is provided.
695717
"""
696718
...
697719

python/lib/sift_client/sift_types/data_import.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66

77
from pydantic import BaseModel, ConfigDict
88
from sift.common.type.v1.channel_config_pb2 import ChannelConfig as ChannelConfigProto
9+
from sift.data_imports.v2.data_imports_pb2 import (
10+
DATA_TYPE_KEY_CH10,
11+
DATA_TYPE_KEY_CSV,
12+
DATA_TYPE_KEY_HDF5,
13+
DATA_TYPE_KEY_PARQUET_FLATDATASET,
14+
DATA_TYPE_KEY_TDMS,
15+
)
916
from sift.data_imports.v2.data_imports_pb2 import CsvConfig as CsvConfigProto
1017
from sift.data_imports.v2.data_imports_pb2 import CsvTimeColumn as CsvTimeColumnProto
1118
from sift.data_imports.v2.data_imports_pb2 import DataImport as DataImportProto
@@ -51,6 +58,27 @@ class DataImportStatus(Enum):
5158
FAILED = DataImportStatusProto.DATA_IMPORT_STATUS_FAILED
5259

5360

61+
class DataTypeKey(Enum):
62+
"""Supported file types for data import detection."""
63+
64+
CSV = DATA_TYPE_KEY_CSV
65+
PARQUET = DATA_TYPE_KEY_PARQUET_FLATDATASET
66+
TDMS = DATA_TYPE_KEY_TDMS
67+
CH10 = DATA_TYPE_KEY_CH10
68+
HDF5 = DATA_TYPE_KEY_HDF5
69+
70+
71+
EXTENSION_TO_DATA_TYPE_KEY: dict[str, DataTypeKey] = {
72+
".csv": DataTypeKey.CSV,
73+
".parquet": DataTypeKey.PARQUET,
74+
".tdms": DataTypeKey.TDMS,
75+
".ch10": DataTypeKey.CH10,
76+
".ch11": DataTypeKey.CH10,
77+
".h5": DataTypeKey.HDF5,
78+
".hdf5": DataTypeKey.HDF5,
79+
}
80+
81+
5482
# ---------------------------------------------------------------------------
5583
# CSV config types
5684
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)