Skip to content

Commit 2e8e0c7

Browse files
paleolimbotCopilot
andauthored
Support ListView/BinaryView/RunEndEncoded types in integration test JSON parser (#9888)
# Which issue does this PR close? Supporting unskipping more types in the Rust IPC/C Data tests for apache/arrow#49910 / apache/arrow#49744 . # Rationale for this change View types and decimal 32/64 are supported in Rust but aren't supported in the integration test JSON implementation (so they fail when the integration test tries to check them). # What changes are included in this PR? Integration test JSON now supports how these values are represented. # Are these changes tested? Yes. I've added to the embedded integration.json for the new types and I've run the apache/arrow PR against this branch with these types no longer being skipped. # Are there any user-facing changes? No --------- Co-authored-by: Copilot <copilot@github.com>
1 parent f1ef71a commit 2e8e0c7

4 files changed

Lines changed: 560 additions & 39 deletions

File tree

arrow-integration-test/data/integration.json

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,89 @@
319319
"children": []
320320
}
321321
]
322+
},
323+
{
324+
"name": "utf8views",
325+
"type": {
326+
"name": "utf8view"
327+
},
328+
"nullable": true,
329+
"children": []
330+
},
331+
{
332+
"name": "binaryviews",
333+
"type": {
334+
"name": "binaryview"
335+
},
336+
"nullable": true,
337+
"children": []
338+
},
339+
{
340+
"name": "listviews",
341+
"type": {
342+
"name": "listview"
343+
},
344+
"nullable": true,
345+
"children": [
346+
{
347+
"name": "item",
348+
"type": {
349+
"name": "int",
350+
"isSigned": true,
351+
"bitWidth": 32
352+
},
353+
"nullable": true,
354+
"children": []
355+
}
356+
]
357+
},
358+
{
359+
"name": "largelistviews",
360+
"type": {
361+
"name": "largelistview"
362+
},
363+
"nullable": true,
364+
"children": [
365+
{
366+
"name": "item",
367+
"type": {
368+
"name": "int",
369+
"isSigned": true,
370+
"bitWidth": 32
371+
},
372+
"nullable": true,
373+
"children": []
374+
}
375+
]
376+
},
377+
{
378+
"name": "runendencoded",
379+
"type": {
380+
"name": "runendencoded"
381+
},
382+
"nullable": true,
383+
"children": [
384+
{
385+
"name": "run_ends",
386+
"type": {
387+
"name": "int",
388+
"isSigned": true,
389+
"bitWidth": 16
390+
},
391+
"nullable": false,
392+
"children": []
393+
},
394+
{
395+
"name": "values",
396+
"type": {
397+
"name": "int",
398+
"isSigned": true,
399+
"bitWidth": 32
400+
},
401+
"nullable": true,
402+
"children": []
403+
}
404+
]
322405
}
323406
]
324407
},
@@ -801,6 +884,162 @@
801884
]
802885
}
803886
]
887+
},
888+
{
889+
"name": "utf8views",
890+
"count": 3,
891+
"VALIDITY": [
892+
1,
893+
0,
894+
1
895+
],
896+
"VIEWS": [
897+
{
898+
"SIZE": 5,
899+
"INLINED": "hello"
900+
},
901+
{
902+
"SIZE": 0,
903+
"INLINED": ""
904+
},
905+
{
906+
"SIZE": 19,
907+
"PREFIX_HEX": "74686973",
908+
"BUFFER_INDEX": 0,
909+
"OFFSET": 0
910+
}
911+
],
912+
"VARIADIC_DATA_BUFFERS": ["74686973206973206E6F7420696E6C696E6564"]
913+
},
914+
{
915+
"name": "binaryviews",
916+
"count": 3,
917+
"VALIDITY": [
918+
1,
919+
1,
920+
0
921+
],
922+
"VIEWS": [
923+
{
924+
"SIZE": 2,
925+
"INLINED": "F34D"
926+
},
927+
{
928+
"SIZE": 16,
929+
"PREFIX_HEX": "00010203",
930+
"BUFFER_INDEX": 0,
931+
"OFFSET": 0
932+
},
933+
{
934+
"SIZE": 0,
935+
"INLINED": ""
936+
}
937+
],
938+
"VARIADIC_DATA_BUFFERS": ["000102030405060708090A0B0C0D0E0F"]
939+
},
940+
{
941+
"name": "listviews",
942+
"count": 3,
943+
"VALIDITY": [
944+
1,
945+
0,
946+
1
947+
],
948+
"OFFSET": [
949+
0,
950+
2,
951+
2
952+
],
953+
"SIZE": [
954+
2,
955+
0,
956+
3
957+
],
958+
"children": [
959+
{
960+
"name": "item",
961+
"count": 5,
962+
"VALIDITY": [
963+
1,
964+
1,
965+
1,
966+
0,
967+
1
968+
],
969+
"DATA": [
970+
1,
971+
2,
972+
3,
973+
4,
974+
5
975+
]
976+
}
977+
]
978+
},
979+
{
980+
"name": "largelistviews",
981+
"count": 3,
982+
"VALIDITY": [
983+
1,
984+
1,
985+
0
986+
],
987+
"OFFSET": [
988+
"0",
989+
"2",
990+
"3"
991+
],
992+
"SIZE": [
993+
"2",
994+
"1",
995+
"0"
996+
],
997+
"children": [
998+
{
999+
"name": "item",
1000+
"count": 3,
1001+
"VALIDITY": [
1002+
1,
1003+
0,
1004+
1
1005+
],
1006+
"DATA": [
1007+
10,
1008+
20,
1009+
30
1010+
]
1011+
}
1012+
]
1013+
},
1014+
{
1015+
"name": "runendencoded",
1016+
"count": 3,
1017+
"children": [
1018+
{
1019+
"name": "run_ends",
1020+
"count": 2,
1021+
"VALIDITY": [
1022+
1,
1023+
1
1024+
],
1025+
"DATA": [
1026+
2,
1027+
3
1028+
]
1029+
},
1030+
{
1031+
"name": "values",
1032+
"count": 2,
1033+
"VALIDITY": [
1034+
1,
1035+
0
1036+
],
1037+
"DATA": [
1038+
100,
1039+
200
1040+
]
1041+
}
1042+
]
8041043
}
8051044
]
8061045
}

arrow-integration-test/src/datatype.rs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ pub fn data_type_from_json(json: &serde_json::Value) -> Result<DataType> {
2929
Some(s) if s == "bool" => Ok(DataType::Boolean),
3030
Some(s) if s == "binary" => Ok(DataType::Binary),
3131
Some(s) if s == "largebinary" => Ok(DataType::LargeBinary),
32+
Some(s) if s == "binaryview" => Ok(DataType::BinaryView),
33+
Some(s) if s == "utf8view" => Ok(DataType::Utf8View),
3234
Some(s) if s == "utf8" => Ok(DataType::Utf8),
3335
Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8),
3436
Some(s) if s == "fixedsizebinary" => {
@@ -182,6 +184,14 @@ pub fn data_type_from_json(json: &serde_json::Value) -> Result<DataType> {
182184
// return a largelist with any type as its child isn't defined in the map
183185
Ok(DataType::LargeList(default_field))
184186
}
187+
Some(s) if s == "listview" => {
188+
// return a listview with any type as its child isn't defined in the map
189+
Ok(DataType::ListView(default_field))
190+
}
191+
Some(s) if s == "largelistview" => {
192+
// return a large listview with any type as its child isn't defined in the map
193+
Ok(DataType::LargeListView(default_field))
194+
}
185195
Some(s) if s == "fixedsizelist" => {
186196
// return a list with any type as its child isn't defined in the map
187197
if let Some(Value::Number(size)) = map.get("listSize") {
@@ -199,6 +209,13 @@ pub fn data_type_from_json(json: &serde_json::Value) -> Result<DataType> {
199209
// return an empty `struct` type as its children aren't defined in the map
200210
Ok(DataType::Struct(Fields::empty()))
201211
}
212+
Some(s) if s == "runendencoded" => {
213+
// return a run end encoded with placeholder types as children aren't defined in the map
214+
Ok(DataType::RunEndEncoded(
215+
Arc::new(Field::new("run_ends", DataType::Int32, false)),
216+
default_field,
217+
))
218+
}
202219
Some(s) if s == "map" => {
203220
if let Some(Value::Bool(keys_sorted)) = map.get("keysSorted") {
204221
// Return a map with an empty type as its children aren't defined in the map
@@ -271,19 +288,17 @@ pub fn data_type_to_json(data_type: &DataType) -> serde_json::Value {
271288
DataType::LargeUtf8 => json!({"name": "largeutf8"}),
272289
DataType::Binary => json!({"name": "binary"}),
273290
DataType::LargeBinary => json!({"name": "largebinary"}),
274-
DataType::BinaryView | DataType::Utf8View => {
275-
unimplemented!("BinaryView/Utf8View not implemented")
276-
}
291+
DataType::BinaryView => json!({"name": "binaryview"}),
292+
DataType::Utf8View => json!({"name": "utf8view"}),
277293
DataType::FixedSizeBinary(byte_width) => {
278294
json!({"name": "fixedsizebinary", "byteWidth": byte_width})
279295
}
280296
DataType::Struct(_) => json!({"name": "struct"}),
281297
DataType::Union(_, _) => json!({"name": "union"}),
282298
DataType::List(_) => json!({ "name": "list"}),
283299
DataType::LargeList(_) => json!({ "name": "largelist"}),
284-
DataType::ListView(_) | DataType::LargeListView(_) => {
285-
unimplemented!("ListView/LargeListView not implemented")
286-
}
300+
DataType::ListView(_) => json!({ "name": "listview"}),
301+
DataType::LargeListView(_) => json!({ "name": "largelistview"}),
287302
DataType::FixedSizeList(_, length) => {
288303
json!({"name":"fixedsizelist", "listSize": length})
289304
}
@@ -352,7 +367,7 @@ pub fn data_type_to_json(data_type: &DataType) -> serde_json::Value {
352367
DataType::Map(_, keys_sorted) => {
353368
json!({"name": "map", "keysSorted": keys_sorted})
354369
}
355-
DataType::RunEndEncoded(_, _) => todo!(),
370+
DataType::RunEndEncoded(_, _) => json!({"name": "runendencoded"}),
356371
}
357372
}
358373

0 commit comments

Comments
 (0)