Skip to content

Commit a1aeb15

Browse files
committed
python(feat): Upload TDMS metadata to Sift Runs
1 parent c89851d commit a1aeb15

5 files changed

Lines changed: 244 additions & 48 deletions

File tree

python/lib/sift_py/_internal/metadata.py

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
1-
from typing import Dict, List, Union
1+
from typing import Any, Callable, Dict, List, Optional, Union
22

33
from sift.metadata.v1.metadata_pb2 import MetadataKey, MetadataKeyType, MetadataValue
44

55

6-
def metadata_dict_to_pb(_metadata: Dict[str, Union[str, float, bool]]) -> List[MetadataValue]:
6+
def metadata_dict_to_pb(
7+
_metadata: Dict[str, Union[str, float, bool, int]],
8+
parse: Optional[Callable[[Any], Optional[Union[str, float, bool, int]]]] = None,
9+
) -> List[MetadataValue]:
710
"""
811
Wraps metadata dictionary into a list of MetadataValue objects.
912
1013
Args:
1114
_metadata: Dictionary of metadata key-value pairs.
12-
15+
parse: Optional function to parse complex types into a compatible
16+
metadata type (i.e, str, float, int, or bool). Function should raise an
17+
Exception if it can't parse the value.
1318
Returns:
1419
List of MetadataValue objects.
1520
"""
@@ -21,6 +26,12 @@ def metadata_dict_to_pb(_metadata: Dict[str, Union[str, float, bool]]) -> List[M
2126
boolean_value = None
2227
number_value = None
2328

29+
if not isinstance(value, (str, float, bool, int)):
30+
if parse:
31+
value = parse(value)
32+
else:
33+
raise ValueError(f"Unsupported metadata value type for key '{key}': {value}")
34+
2435
if isinstance(value, str):
2536
string_value = value
2637
type = MetadataKeyType.METADATA_KEY_TYPE_STRING
@@ -46,7 +57,7 @@ def metadata_dict_to_pb(_metadata: Dict[str, Union[str, float, bool]]) -> List[M
4657
return metadata
4758

4859

49-
def metadata_pb_to_dict(metadata: List[MetadataValue]) -> Dict[str, Union[str, float, bool]]:
60+
def metadata_pb_to_dict(metadata: List[MetadataValue]) -> Dict[str, Union[str, float, bool, int]]:
5061
"""
5162
Unwraps a list of MetadataValue objects into a dictionary.
5263
@@ -56,7 +67,7 @@ def metadata_pb_to_dict(metadata: List[MetadataValue]) -> Dict[str, Union[str, f
5667
Returns:
5768
Dictionary of metadata key-value pairs.
5869
"""
59-
unwrapped_metadata: Dict[str, Union[str, float, bool]] = {}
70+
unwrapped_metadata: Dict[str, Union[str, float, bool, int]] = {}
6071
for md in metadata:
6172
if md.key.name in unwrapped_metadata:
6273
raise ValueError(f"Key already exists: {md.key.name}")
@@ -68,3 +79,31 @@ def metadata_pb_to_dict(metadata: List[MetadataValue]) -> Dict[str, Union[str, f
6879
unwrapped_metadata[md.key.name] = md.number_value
6980

7081
return unwrapped_metadata
82+
83+
84+
def metadata_pb_to_dict_api(metadata: List[MetadataValue]) -> List[Dict[str, Any]]:
85+
"""
86+
Serializes a list of MetadataValue objects to a n API compatible dict,
87+
preserving the proto structure.
88+
89+
Args:
90+
metadata: List of MetadataValue objects.
91+
92+
Returns:
93+
Dict representing the metadata with proto structure.
94+
"""
95+
96+
def metadata_value_to_dict(md: MetadataValue) -> Dict[str, Any]:
97+
value_dict: Dict[str, Any] = {"key": {"name": md.key.name, "type": md.key.type}}
98+
if md.key.type == MetadataKeyType.METADATA_KEY_TYPE_STRING:
99+
value_dict["string_value"] = md.string_value
100+
elif md.key.type == MetadataKeyType.METADATA_KEY_TYPE_BOOLEAN:
101+
value_dict["boolean_value"] = md.boolean_value
102+
elif md.key.type == MetadataKeyType.METADATA_KEY_TYPE_NUMBER:
103+
value_dict["number_value"] = md.number_value
104+
else:
105+
raise ValueError(f"{md.key.name} has no values set")
106+
return value_dict
107+
108+
metadata_list = [metadata_value_to_dict(md) for md in metadata]
109+
return metadata_list

python/lib/sift_py/data_import/_tdms_test.py

Lines changed: 119 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66
from nptdms import TdmsFile, types # type: ignore
77
from pytest_mock import MockFixture
8+
from sift.metadata.v1.metadata_pb2 import MetadataKeyType
89

910
from sift_py.data_import.tdms import TdmsTimeFormat, TdmsUploadService, sanitize_string
1011
from sift_py.rest import SiftRestConfig
@@ -40,7 +41,14 @@ def channels(self) -> List[MockTdmsChannel]:
4041
class MockTdmsFile:
4142
def __init__(self, groups: List[MockTdmsGroup]):
4243
self._groups: List[MockTdmsGroup] = groups
43-
self.properties: Dict[str, str] = {}
44+
# Example properties for each type
45+
self.properties: Dict[str, Any] = {
46+
"string_prop": "example",
47+
"int_prop": 42,
48+
"float_prop": 3.14,
49+
"bool_prop": True,
50+
"datetime_prop": pd.Timestamp("2024-01-01T12:00:00"),
51+
}
4452

4553
def groups(self) -> List[MockTdmsGroup]:
4654
return self._groups
@@ -50,9 +58,9 @@ def as_dataframe(self, *_, **__):
5058

5159

5260
class MockResponse:
53-
def __init__(self):
54-
self.status_code = 200
55-
self.text = json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"})
61+
def __init__(self, status_code=None, text=None):
62+
self.status_code = status_code or 200
63+
self.text = text or json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"})
5664

5765
def json(self) -> dict:
5866
return json.loads(self.text)
@@ -730,3 +738,110 @@ def mock_tdms_file_constructor2(path):
730738
tdms_time_format=TdmsTimeFormat.TIME_CHANNEL,
731739
ignore_errors=True,
732740
)
741+
742+
743+
def test_tdms_upload_service_upload_with_metadata(
744+
mocker: MockFixture, mock_waveform_tdms_file: MockTdmsFile
745+
):
746+
mock_path_is_file = mocker.patch("sift_py.data_import.tdms.Path.is_file")
747+
mock_path_is_file.return_value = True
748+
749+
mock_path_getsize = mocker.patch("sift_py.data_import.csv.os.path.getsize")
750+
mock_path_getsize.return_value = 10
751+
752+
# Patch TdmsFile to return our mock file
753+
mocker.patch("sift_py.data_import.tdms.TdmsFile", return_value=mock_waveform_tdms_file)
754+
755+
# Patch requests.Session.post to simulate both run creation and data import
756+
mock_requests_post = mocker.patch("sift_py.rest.requests.Session.post")
757+
758+
# The first call is for _create_run, second for config upload, third for file upload
759+
def post_side_effect(*args, **kwargs):
760+
url = kwargs.get("url") or (args[1] if len(args) > 1 else "")
761+
if "run" in url:
762+
# Simulate run creation response
763+
return MockResponse(
764+
status_code=200,
765+
text=json.dumps({"run": {"runId": "new_run_id"}}),
766+
)
767+
elif "data-imports:upload" in url:
768+
# Simulate config upload response
769+
return MockResponse()
770+
elif "some_url.com" in url:
771+
# Simulate file upload response
772+
return MockResponse()
773+
else:
774+
return MockResponse()
775+
776+
mock_requests_post.side_effect = post_side_effect
777+
778+
svc = TdmsUploadService(rest_config)
779+
780+
# Should raise if run_id is provided
781+
with pytest.raises(ValueError, match="Metadata can only be included in new runs"):
782+
svc.upload(
783+
"some_tdms.tdms",
784+
"asset_name",
785+
include_metadata=True,
786+
run_id="existing_run_id",
787+
run_name="Run Name",
788+
)
789+
790+
# Should raise if run_name is not provided
791+
with pytest.raises(ValueError, match="Must provide a run_name to include metadata"):
792+
svc.upload(
793+
"some_tdms.tdms",
794+
"asset_name",
795+
include_metadata=True,
796+
run_name=None,
797+
)
798+
799+
# Should succeed and call _create_run via POST with metadata
800+
svc.upload(
801+
"some_tdms.tdms",
802+
"asset_name",
803+
include_metadata=True,
804+
run_name="Run Name",
805+
)
806+
807+
# Check that the first POST call was for run creation and included metadata
808+
create_run_post_call = mock_requests_post.call_args_list[0]
809+
create_run_post_data = json.loads(create_run_post_call.kwargs["data"])
810+
assert create_run_post_data["name"] == "Run Name"
811+
812+
# Metadata should be present and contain expected keys
813+
assert "metadata" in create_run_post_data
814+
assert create_run_post_data["metadata"][0]["key"]["name"] == "string_prop"
815+
assert (
816+
create_run_post_data["metadata"][0]["key"]["type"]
817+
== MetadataKeyType.METADATA_KEY_TYPE_STRING
818+
)
819+
assert create_run_post_data["metadata"][0]["string_value"] == "example"
820+
821+
assert create_run_post_data["metadata"][1]["key"]["name"] == "int_prop"
822+
assert (
823+
create_run_post_data["metadata"][1]["key"]["type"]
824+
== MetadataKeyType.METADATA_KEY_TYPE_NUMBER
825+
)
826+
assert create_run_post_data["metadata"][1]["number_value"] == 42
827+
828+
assert create_run_post_data["metadata"][2]["key"]["name"] == "float_prop"
829+
assert (
830+
create_run_post_data["metadata"][2]["key"]["type"]
831+
== MetadataKeyType.METADATA_KEY_TYPE_NUMBER
832+
)
833+
assert create_run_post_data["metadata"][2]["number_value"] == 3.14
834+
835+
assert create_run_post_data["metadata"][3]["key"]["name"] == "bool_prop"
836+
assert (
837+
create_run_post_data["metadata"][3]["key"]["type"]
838+
== MetadataKeyType.METADATA_KEY_TYPE_BOOLEAN
839+
)
840+
assert create_run_post_data["metadata"][3]["boolean_value"] is True
841+
842+
assert create_run_post_data["metadata"][4]["key"]["name"] == "datetime_prop"
843+
assert (
844+
create_run_post_data["metadata"][4]["key"]["type"]
845+
== MetadataKeyType.METADATA_KEY_TYPE_STRING
846+
)
847+
assert create_run_post_data["metadata"][4]["string_value"].startswith("2024-01-01T12:00:00")

python/lib/sift_py/data_import/csv.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
import pandas as pd
99
from alive_progress import alive_bar # type: ignore
10+
from sift.metadata.v1.metadata_pb2 import MetadataValue
1011

12+
from sift_py._internal.metadata import metadata_pb_to_dict_api
1113
from sift_py.data_import.config import CsvConfig
1214
from sift_py.data_import.status import DataImportService
1315
from sift_py.data_import.time_format import TimeFormatType
@@ -18,6 +20,7 @@
1820
class CsvUploadService(_RestService):
1921
UPLOAD_PATH = "/api/v1/data-imports:upload"
2022
URL_PATH = "/api/v1/data-imports:url"
23+
RUN_PATH = "/api/v2/runs"
2124

2225
_rest_conf: SiftRestConfig
2326
_upload_uri: str
@@ -258,6 +261,50 @@ def _mime_and_content_type_from_path(path: Path) -> Tuple[str, Optional[str], Op
258261
mime, encoding = mimetypes.guess_type(path)
259262
return file_name, mime, encoding
260263

264+
def _create_run(self, run_name: str, metadata: Optional[List[MetadataValue]] = None) -> str:
265+
"""Create a new run using the REST service, and return a run_id.
266+
267+
Args:
268+
run_name: The name of the Run.
269+
metadata: Optional metadata fields to add to the run.
270+
271+
Returns:
272+
The run id.
273+
"""
274+
run_uri = urljoin(self._base_uri, self.RUN_PATH)
275+
276+
req: Dict[str, Any] = {
277+
"name": run_name,
278+
"description": "",
279+
}
280+
281+
if metadata:
282+
req["metadata"] = metadata_pb_to_dict_api(metadata)
283+
284+
response = self._session.post(
285+
url=run_uri,
286+
headers={
287+
"Content-Encoding": "application/json",
288+
},
289+
data=json.dumps(req),
290+
)
291+
if response.status_code != 200:
292+
raise Exception(
293+
f"Run creation failed with status code {response.status_code}. {response.text}"
294+
)
295+
296+
try:
297+
run_info = response.json()
298+
except (json.decoder.JSONDecodeError, KeyError):
299+
raise Exception(f"Invalid response: {response.text}")
300+
301+
if "run" not in run_info:
302+
raise Exception("Response missing key: run")
303+
if "runId" not in run_info["run"]:
304+
raise Exception("Response missing key: runId")
305+
306+
return run_info["run"]["runId"]
307+
261308

262309
class _ProgressFile:
263310
"""Displays the status with alive_bar while reading the file."""

python/lib/sift_py/data_import/hdf5.py

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1-
import json
21
import uuid
32
from collections import defaultdict
43
from contextlib import ExitStack
54
from pathlib import Path
65
from typing import Dict, List, Tuple, Union, cast
7-
from urllib.parse import urljoin
86

97
import numpy as np
108

@@ -37,7 +35,6 @@ class Hdf5UploadService:
3735
Service to upload HDF5 files.
3836
"""
3937

40-
_RUN_PATH = "/api/v2/runs"
4138
_csv_upload_service: CsvUploadService
4239
_prev_run_id: str
4340

@@ -96,7 +93,7 @@ def upload(
9693
# Perform now instead of before the config split to avoid creating a run any problems arise before ready to upload
9794
# Active run_id copied to _prev_run_id for user reference
9895
if hdf5_config._hdf5_config.run_name != "":
99-
run_id = self._create_run(hdf5_config._hdf5_config.run_name)
96+
run_id = self._csv_upload_service._create_run(hdf5_config._hdf5_config.run_name)
10097
for _, csv_config in csv_items:
10198
csv_config._csv_config.run_name = ""
10299
csv_config._csv_config.run_id = run_id
@@ -127,40 +124,6 @@ def get_previous_upload_run_id(self) -> str:
127124
"""Return the run_id used in the previous upload"""
128125
return self._prev_run_id
129126

130-
def _create_run(self, run_name: str) -> str:
131-
"""Create a new run using the REST service, and return a run_id"""
132-
run_uri = urljoin(self._csv_upload_service._base_uri, self._RUN_PATH)
133-
134-
# Since CSVUploadService is already a RestService, we can utilize that
135-
response = self._csv_upload_service._session.post(
136-
url=run_uri,
137-
headers={
138-
"Content-Encoding": "application/json",
139-
},
140-
data=json.dumps(
141-
{
142-
"name": run_name,
143-
"description": "",
144-
}
145-
),
146-
)
147-
if response.status_code != 200:
148-
raise Exception(
149-
f"Run creation failed with status code {response.status_code}. {response.text}"
150-
)
151-
152-
try:
153-
run_info = response.json()
154-
except (json.decoder.JSONDecodeError, KeyError):
155-
raise Exception(f"Invalid response: {response.text}")
156-
157-
if "run" not in run_info:
158-
raise Exception("Response missing key: run")
159-
if "runId" not in run_info["run"]:
160-
raise Exception("Response missing key: runId")
161-
162-
return run_info["run"]["runId"]
163-
164127

165128
def _convert_to_csv_file(
166129
src_path: Union[str, Path],

0 commit comments

Comments
 (0)