Skip to content

Commit a03e445

Browse files
chore: bump arrow/parquet to 57.3.0
Includes fix for FixedSizeBinary LEFT JOIN bug - apache/arrow-rs#8981 Cherry-picked test and API updates from - apache#19355
1 parent fabeef1 commit a03e445

12 files changed

Lines changed: 109 additions & 78 deletions

File tree

Cargo.lock

Lines changed: 32 additions & 32 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,19 +91,19 @@ ahash = { version = "0.8", default-features = false, features = [
9191
"runtime-rng",
9292
] }
9393
apache-avro = { version = "0.21", default-features = false }
94-
arrow = { version = "57.1.0", features = [
94+
arrow = { version = "57.2.0", features = [
9595
"prettyprint",
9696
"chrono-tz",
9797
] }
98-
arrow-buffer = { version = "57.1.0", default-features = false }
99-
arrow-flight = { version = "57.1.0", features = [
98+
arrow-buffer = { version = "57.2.0", default-features = false }
99+
arrow-flight = { version = "57.2.0", features = [
100100
"flight-sql-experimental",
101101
] }
102-
arrow-ipc = { version = "57.1.0", default-features = false, features = [
102+
arrow-ipc = { version = "57.2.0", default-features = false, features = [
103103
"lz4",
104104
] }
105-
arrow-ord = { version = "57.1.0", default-features = false }
106-
arrow-schema = { version = "57.1.0", default-features = false }
105+
arrow-ord = { version = "57.2.0", default-features = false }
106+
arrow-schema = { version = "57.2.0", default-features = false }
107107
async-trait = "0.1.89"
108108
bigdecimal = "0.4.8"
109109
bytes = "1.11"
@@ -166,7 +166,7 @@ log = "^0.4"
166166
num-traits = { version = "0.2" }
167167
object_store = { version = "0.12.4", default-features = false }
168168
parking_lot = "0.12"
169-
parquet = { version = "57.1.0", default-features = false, features = [
169+
parquet = { version = "57.2.0", default-features = false, features = [
170170
"arrow",
171171
"async",
172172
"object_store",

datafusion/common/src/scalar/mod.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8868,7 +8868,7 @@ mod tests {
88688868
.unwrap(),
88698869
ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
88708870
ScalarValue::try_new_null(&DataType::Union(
8871-
UnionFields::new(vec![42], vec![field_ref]),
8871+
UnionFields::try_new(vec![42], vec![field_ref]).unwrap(),
88728872
UnionMode::Dense,
88738873
))
88748874
.unwrap(),
@@ -8971,13 +8971,14 @@ mod tests {
89718971
}
89728972

89738973
// Test union type
8974-
let union_fields = UnionFields::new(
8974+
let union_fields = UnionFields::try_new(
89758975
vec![0, 1],
89768976
vec![
89778977
Field::new("i32", DataType::Int32, false),
89788978
Field::new("f64", DataType::Float64, false),
89798979
],
8980-
);
8980+
)
8981+
.unwrap();
89818982
let union_result = ScalarValue::new_default(&DataType::Union(
89828983
union_fields.clone(),
89838984
UnionMode::Sparse,

datafusion/datasource-avro/src/avro_to_arrow/schema.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ fn schema_to_field_with_props(
117117
.iter()
118118
.map(|s| schema_to_field_with_props(s, None, has_nullable, None))
119119
.collect::<Result<Vec<Field>>>()?;
120-
let type_ids = 0_i8..fields.len() as i8;
121-
DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense)
120+
// Assign type_ids based on the order in which they appear
121+
DataType::Union(UnionFields::from_fields(fields), UnionMode::Dense)
122122
}
123123
}
124124
AvroSchema::Record(RecordSchema { fields, .. }) => {

datafusion/functions/src/core/union_extract.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,13 +189,14 @@ mod tests {
189189
fn test_scalar_value() -> Result<()> {
190190
let fun = UnionExtractFun::new();
191191

192-
let fields = UnionFields::new(
192+
let fields = UnionFields::try_new(
193193
vec![1, 3],
194194
vec![
195195
Field::new("str", DataType::Utf8, false),
196196
Field::new("int", DataType::Int32, false),
197197
],
198-
);
198+
)
199+
.unwrap();
199200

200201
let args = vec![
201202
ColumnarValue::Scalar(ScalarValue::Union(

datafusion/physical-plan/src/filter.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1550,13 +1550,14 @@ mod tests {
15501550
#[test]
15511551
fn test_equivalence_properties_union_type() -> Result<()> {
15521552
let union_type = DataType::Union(
1553-
UnionFields::new(
1553+
UnionFields::try_new(
15541554
vec![0, 1],
15551555
vec![
15561556
Field::new("f1", DataType::Int32, true),
15571557
Field::new("f2", DataType::Utf8, true),
15581558
],
1559-
),
1559+
)
1560+
.unwrap(),
15601561
UnionMode::Sparse,
15611562
);
15621563

datafusion/proto-common/src/from_proto/mod.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -304,13 +304,16 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType {
304304
};
305305
let union_fields = parse_proto_fields_to_fields(&union.union_types)?;
306306

307-
// Default to index based type ids if not provided
308-
let type_ids: Vec<_> = match union.type_ids.is_empty() {
309-
true => (0..union_fields.len() as i8).collect(),
310-
false => union.type_ids.iter().map(|i| *i as i8).collect(),
307+
// Default to index based type ids if not explicitly provided
308+
let union_fields = if union.type_ids.is_empty() {
309+
UnionFields::from_fields(union_fields)
310+
} else {
311+
let type_ids = union.type_ids.iter().map(|i| *i as i8);
312+
UnionFields::try_new(type_ids, union_fields).map_err(|e| {
313+
DataFusionError::from(e).context("Deserializing Union DataType")
314+
})?
311315
};
312-
313-
DataType::Union(UnionFields::new(type_ids, union_fields), union_mode)
316+
DataType::Union(union_fields, union_mode)
314317
}
315318
arrow_type::ArrowTypeEnum::Dictionary(dict) => {
316319
let key_datatype = dict.as_ref().key.as_deref().required("key")?;
@@ -602,7 +605,9 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
602605
.collect::<Option<Vec<_>>>();
603606
let fields = fields.ok_or_else(|| Error::required("UnionField"))?;
604607
let fields = parse_proto_fields_to_fields(&fields)?;
605-
let fields = UnionFields::new(ids, fields);
608+
let union_fields = UnionFields::try_new(ids, fields).map_err(|e| {
609+
DataFusionError::from(e).context("Deserializing Union ScalarValue")
610+
})?;
606611
let v_id = val.value_id as i8;
607612
let val = match &val.value {
608613
None => None,
@@ -614,7 +619,7 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
614619
Some((v_id, Box::new(val)))
615620
}
616621
};
617-
Self::Union(val, fields, mode)
622+
Self::Union(val, union_fields, mode)
618623
}
619624
Value::FixedSizeBinaryValue(v) => {
620625
Self::FixedSizeBinary(v.length, Some(v.clone().values))

datafusion/proto/tests/cases/roundtrip_logical_plan.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1780,19 +1780,20 @@ fn round_trip_datatype() {
17801780
),
17811781
])),
17821782
DataType::Union(
1783-
UnionFields::new(
1783+
UnionFields::try_new(
17841784
vec![7, 5, 3],
17851785
vec![
17861786
Field::new("nullable", DataType::Boolean, false),
17871787
Field::new("name", DataType::Utf8, false),
17881788
Field::new("datatype", DataType::Binary, false),
17891789
],
1790-
),
1790+
)
1791+
.unwrap(),
17911792
UnionMode::Sparse,
17921793
),
17931794
DataType::Union(
1794-
UnionFields::new(
1795-
vec![5, 8, 1],
1795+
UnionFields::try_new(
1796+
vec![5, 8, 1, 100],
17961797
vec![
17971798
Field::new("nullable", DataType::Boolean, false),
17981799
Field::new("name", DataType::Utf8, false),
@@ -1807,7 +1808,8 @@ fn round_trip_datatype() {
18071808
true,
18081809
),
18091810
],
1810-
),
1811+
)
1812+
.unwrap(),
18111813
UnionMode::Dense,
18121814
),
18131815
DataType::Dictionary(

datafusion/sqllogictest/src/test_context.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,14 +436,15 @@ fn create_example_udf() -> ScalarUDF {
436436

437437
fn register_union_table(ctx: &SessionContext) {
438438
let union = UnionArray::try_new(
439-
UnionFields::new(
439+
UnionFields::try_new(
440440
// typeids: 3 for int, 1 for string
441441
vec![3, 1],
442442
vec![
443443
Field::new("int", DataType::Int32, false),
444444
Field::new("string", DataType::Utf8, false),
445445
],
446-
),
446+
)
447+
.unwrap(),
447448
ScalarBuffer::from(vec![3, 1, 3]),
448449
None,
449450
vec![

0 commit comments

Comments
 (0)