Skip to content

Commit 7eb9d6a

Browse files
authored
Update data_import protos. Also update sanitize to be compatible with Linux (#313)
1 parent b6e83cb commit 7eb9d6a

2 files changed

Lines changed: 79 additions & 5 deletions

File tree

protos/sift/data_imports/v2/data_imports.proto

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ message CreateDataImportFromUrlRequest {
8484
CsvConfig csv_config = 2 [(google.api.field_behavior) = OPTIONAL];
8585
Ch10Config ch10_config = 3 [(google.api.field_behavior) = OPTIONAL];
8686
TDMSConfig tdms_config = 4 [(google.api.field_behavior) = OPTIONAL];
87+
ParquetConfig parquet_config = 5 [(google.api.field_behavior) = OPTIONAL];
8788
}
8889

8990
message CreateDataImportFromUrlResponse {
@@ -102,6 +103,7 @@ message CreateDataImportFromUploadRequest {
102103
CsvConfig csv_config = 1 [(google.api.field_behavior) = OPTIONAL];
103104
Ch10Config ch10_config = 3 [(google.api.field_behavior) = OPTIONAL];
104105
TDMSConfig tdms_config = 4 [(google.api.field_behavior) = OPTIONAL];
106+
ParquetConfig parquet_config = 5 [(google.api.field_behavior) = OPTIONAL];
105107
}
106108

107109
message CreateDataImportFromUploadResponse {
@@ -136,6 +138,8 @@ message CsvConfig {
136138
optional CsvTimeColumn time_column = 5;
137139
// A map from column number (1-indexed) to the channel configuration for that column.
138140
map<uint32, sift.common.type.v1.ChannelConfig> data_columns = 6;
141+
// This will be read on upload from the file if not set.
142+
optional uint64 num_rows = 7 [(google.api.field_behavior) = OPTIONAL];
139143
}
140144

141145
message CsvTimeColumn {
@@ -145,12 +149,24 @@ message CsvTimeColumn {
145149
optional google.protobuf.Timestamp relative_start_time = 3;
146150
}
147151

152+
153+
154+
enum DataTypeKey {
155+
DATA_TYPE_KEY_UNSPECIFIED = 0 [deprecated = true];
156+
DATA_TYPE_KEY_CSV = 1;
157+
DATA_TYPE_KEY_TDMS = 2;
158+
DATA_TYPE_KEY_CH10 = 3;
159+
DATA_TYPE_KEY_PARQUET_FLATDATASET = 4;
160+
}
161+
148162
message DetectConfigRequest {
149163
bytes data = 1;
164+
DataTypeKey type = 2;
150165
}
151166

152167
message DetectConfigResponse {
153168
CsvConfig csv_config = 1;
169+
ParquetConfig parquet_config = 2;
154170
}
155171

156172
message Ch10Config {
@@ -166,6 +182,52 @@ message TDMSConfig {
166182
// Override the wf_start_time metadata field for all channels.
167183
// Useful if your waveform channels have wf_increment but no wf_start_time (Veristand is guilty of this).
168184
google.protobuf.Timestamp start_time_override = 3;
185+
186+
// The file size in bytes.
187+
// If the file has truncated chunks, this will be required to pass validation.
188+
optional uint64 file_size = 4;
189+
}
190+
191+
message ParquetTimeColumn {
192+
string path = 1;
193+
TimeFormat format = 2;
194+
optional google.protobuf.Timestamp relative_start_time = 3;
195+
}
196+
197+
message ParquetDataColumn {
198+
string path = 1;
199+
sift.common.type.v1.ChannelConfig channel_config = 2;
200+
}
201+
202+
message ParquetFlatDatasetConfig {
203+
ParquetTimeColumn time_column = 1;
204+
repeated ParquetDataColumn data_columns = 2;
205+
}
206+
207+
enum ParquetComplexTypesImportMode {
208+
PARQUET_COMPLEX_TYPES_IMPORT_MODE_UNSPECIFIED = 0 [deprecated = true];
209+
// Ignore complex types and do not ingest them.
210+
PARQUET_COMPLEX_TYPES_IMPORT_MODE_IGNORE = 1;
211+
// Import complex types as both Arrow bytes and JSON strings.
212+
PARQUET_COMPLEX_TYPES_IMPORT_MODE_BOTH = 2;
213+
// Import complex types as only JSON strings.
214+
PARQUET_COMPLEX_TYPES_IMPORT_MODE_STRING = 3;
215+
// Import complex types as only Arrow bytes.
216+
PARQUET_COMPLEX_TYPES_IMPORT_MODE_BYTES = 4;
217+
}
218+
219+
message ParquetConfig {
220+
string asset_name = 1;
221+
string run_name = 2;
222+
// The id of the run to add this data to. If set, `run_name` is ignored.
223+
string run_id = 3;
224+
oneof config {
225+
ParquetFlatDatasetConfig flat_dataset = 4;
226+
}
227+
uint64 footer_offset = 5;
228+
uint32 footer_length = 6;
229+
230+
ParquetComplexTypesImportMode complex_types_import_mode = 7;
169231
}
170232

171233
enum DataImportStatus {
@@ -186,6 +248,17 @@ message DataImport {
186248
CsvConfig csv_config = 6 [(google.api.field_behavior) = OPTIONAL];
187249
Ch10Config ch10_config = 9 [(google.api.field_behavior) = OPTIONAL];
188250
TDMSConfig tdms_config = 10 [(google.api.field_behavior) = OPTIONAL];
251+
ParquetConfig parquet_config = 16 [(google.api.field_behavior) = OPTIONAL];
252+
253+
// The run id will be set if the data import ingests to a run once the run is available.
254+
optional string run_id = 11 [(google.api.field_behavior) = OPTIONAL];
255+
256+
// The report id will be set if the data import creates a report once the report is available.
257+
optional string report_id = 12 [(google.api.field_behavior) = OPTIONAL];
258+
259+
optional string asset_id = 13 [(google.api.field_behavior) = OPTIONAL];
260+
optional google.protobuf.Timestamp data_start_time = 14 [(google.api.field_behavior) = OPTIONAL];
261+
optional google.protobuf.Timestamp data_stop_time = 15 [(google.api.field_behavior) = OPTIONAL];
189262
}
190263

191264
message ListDataImportsRequest {

scripts/sanitize.sh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
#!/usr/bin/env zsh
12
# remove go_package option from all proto files in the sift directory
23
pattern="^option go_package.*"
34
for file in $(grep -ERl "$pattern" --include="*.proto" protos/sift); do
4-
sed -E -i '' "s/${pattern}//g" $file
5+
sed -E -i.bak "s/${pattern}//g" "$file" && rm -f "$file.bak"
56
done
67

78
# remove unstable messages and fields from all proto files in the sift directory
@@ -12,13 +13,13 @@ for file in $(find protos/sift -name "*.proto"); do
1213
fi
1314

1415
# Remove messages marked as unstable
15-
awk '/^message/{p=$0;next} /option.*unstable_message.*true/{printf "/%s/,/^}/d\n", p}' "$file" | sed -i '' -f - "$file"
16-
16+
awk '/^message/{p=$0;next} /option.*unstable_message.*true/{printf "/%s/,/^}/d\n", p}' "$file" | sed -i.bak -f - "$file" && rm -f "$file.bak"
17+
1718
# Remove fields marked as unstable
18-
sed -i '' -e '/.*\[.*sift\.options\.v1\.unstable_field.*\].*/d' "$file"
19+
sed -i.bak -e '/.*\[.*sift\.options\.v1\.unstable_field.*\].*/d' "$file" && rm -f "$file.bak"
1920

2021
# Remove import of unstable proto
21-
sed -i '' -e '/^import.*unstable.proto.*/d' "$file"
22+
sed -i.bak -e '/^import.*unstable.proto.*/d' "$file" && rm -f "$file.bak"
2223
done
2324

2425
echo "Done."

0 commit comments

Comments
 (0)