Skip to content

Commit 8dba654

Browse files
add test showing list predicate wrapping
1 parent 156b5dc commit 8dba654

File tree

1 file changed

+42
-6
lines changed

1 file changed

+42
-6
lines changed

datafusion/datasource-parquet/src/row_filter.rs

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,7 @@ mod test {
935935
};
936936
use arrow::datatypes::{Field, TimeUnit::Nanosecond};
937937
use datafusion_expr::{Expr, col};
938+
use datafusion_functions::core::get_field;
938939
use datafusion_functions_nested::array_has::{
939940
array_has_all_udf, array_has_any_udf, array_has_udf,
940941
};
@@ -1458,7 +1459,7 @@ mod test {
14581459
)]));
14591460

14601461
// get_field(struct_col, 'a') > 5
1461-
let get_field_expr = datafusion_functions::core::get_field().call(vec![
1462+
let get_field_expr = get_field().call(vec![
14621463
col("struct_col"),
14631464
Expr::Literal(ScalarValue::Utf8(Some("a".to_string())), None),
14641465
]);
@@ -1483,7 +1484,7 @@ mod test {
14831484
)]));
14841485

14851486
// get_field(struct_col, 'nested') IS NOT NULL — the leaf is still a struct
1486-
let get_field_expr = datafusion_functions::core::get_field().call(vec![
1487+
let get_field_expr = get_field().call(vec![
14871488
col("struct_col"),
14881489
Expr::Literal(ScalarValue::Utf8(Some("nested".to_string())), None),
14891490
]);
@@ -1493,6 +1494,41 @@ mod test {
14931494
assert!(!can_expr_be_pushed_down_with_schemas(&expr, &table_schema));
14941495
}
14951496

1497+
/// get_field returning a list inside a struct should allow pushdown when
1498+
/// wrapped in a supported list predicate like `array_has_any`.
1499+
/// e.g. `array_has_any(get_field(s, 'items'), make_array('x'))`
1500+
#[test]
1501+
fn get_field_list_leaf_with_array_predicate_allows_pushdown() {
1502+
let item_field = Arc::new(Field::new("item", DataType::Utf8, true));
1503+
let table_schema = Arc::new(Schema::new(vec![Field::new(
1504+
"s",
1505+
DataType::Struct(
1506+
vec![
1507+
Arc::new(Field::new("id", DataType::Int32, true)),
1508+
Arc::new(Field::new("items", DataType::List(item_field), true)),
1509+
]
1510+
.into(),
1511+
),
1512+
true,
1513+
)]));
1514+
1515+
// array_has_any(get_field(s, 'items'), make_array('x'))
1516+
let get_field_expr = get_field().call(vec![
1517+
col("s"),
1518+
Expr::Literal(ScalarValue::Utf8(Some("items".to_string())), None),
1519+
]);
1520+
let expr = array_has_any(
1521+
get_field_expr,
1522+
make_array(vec![Expr::Literal(
1523+
ScalarValue::Utf8(Some("x".to_string())),
1524+
None,
1525+
)]),
1526+
);
1527+
let expr = logical2physical(&expr, &table_schema);
1528+
1529+
assert!(can_expr_be_pushed_down_with_schemas(&expr, &table_schema));
1530+
}
1531+
14961532
/// get_field on a struct produces correct Parquet leaf indices.
14971533
#[test]
14981534
fn get_field_filter_candidate_has_correct_leaf_indices() {
@@ -1540,7 +1576,7 @@ mod test {
15401576
let file_schema = builder.schema().clone();
15411577

15421578
// get_field(s, 'value') > 5
1543-
let get_field_expr = datafusion_functions::core::get_field().call(vec![
1579+
let get_field_expr = get_field().call(vec![
15441580
col("s"),
15451581
Expr::Literal(ScalarValue::Utf8(Some("value".to_string())), None),
15461582
]);
@@ -1582,7 +1618,7 @@ mod test {
15821618
)]));
15831619

15841620
// s['outer']['inner'] > 5
1585-
let get_field_expr = datafusion_functions::core::get_field().call(vec![
1621+
let get_field_expr = get_field().call(vec![
15861622
col("s"),
15871623
Expr::Literal(ScalarValue::Utf8(Some("outer".to_string())), None),
15881624
Expr::Literal(ScalarValue::Utf8(Some("inner".to_string())), None),
@@ -1663,7 +1699,7 @@ mod test {
16631699

16641700
// get_field(s, 'outer', 'inner') > 15
16651701
// Should only need leaf 2 (s.outer.inner), not leaf 1 (s.outer.extra) or leaf 3 (s.tag).
1666-
let get_field_expr = datafusion_functions::core::get_field().call(vec![
1702+
let get_field_expr = get_field().call(vec![
16671703
col("s"),
16681704
Expr::Literal(ScalarValue::Utf8(Some("outer".to_string())), None),
16691705
Expr::Literal(ScalarValue::Utf8(Some("inner".to_string())), None),
@@ -1739,7 +1775,7 @@ mod test {
17391775
let file_schema = parquet_reader_builder.schema().clone();
17401776

17411777
// get_field(s, 'value') > 15 — should match rows with value=20 and value=30
1742-
let get_field_expr = datafusion_functions::core::get_field().call(vec![
1778+
let get_field_expr = get_field().call(vec![
17431779
col("s"),
17441780
Expr::Literal(ScalarValue::Utf8(Some("value".to_string())), None),
17451781
]);

0 commit comments

Comments
 (0)