Skip to content

Commit 506d666

Browse files
authored
Support IS NULL and IS NOT NULL in datafusion planner (#47)
1 parent e058863 commit 506d666

2 files changed

Lines changed: 141 additions & 0 deletions

File tree

rust/lance-graph/src/datafusion_planner/expression.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr {
5656
BE::Exists(prop) => Expr::IsNotNull(Box::new(to_df_value_expr(
5757
&ValueExpression::Property(prop.clone()),
5858
))),
59+
BE::IsNull(expression) => Expr::IsNull(Box::new(to_df_value_expr(expression))),
60+
BE::IsNotNull(expression) => Expr::IsNotNull(Box::new(to_df_value_expr(expression))),
5961
_ => lit(true),
6062
}
6163
}

rust/lance-graph/tests/test_datafusion_pipeline.rs

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3234,3 +3234,142 @@ async fn test_datafusion_shared_variable_distinct() {
32343234
"DISTINCT should eliminate duplicates"
32353235
);
32363236
}
3237+
3238+
#[tokio::test]
3239+
async fn test_datafusion_is_null_node_property() {
3240+
let config = create_graph_config();
3241+
let person_batch = create_person_dataset();
3242+
3243+
let query = CypherQuery::new("MATCH (p:Person) WHERE p.city IS NULL RETURN p.name")
3244+
.unwrap()
3245+
.with_config(config);
3246+
3247+
let mut datasets = HashMap::new();
3248+
datasets.insert("Person".to_string(), person_batch);
3249+
3250+
let result = query.execute_datafusion(datasets).await.unwrap();
3251+
3252+
assert_eq!(result.num_rows(), 1);
3253+
assert_eq!(result.num_columns(), 1);
3254+
3255+
let names = result
3256+
.column(0)
3257+
.as_any()
3258+
.downcast_ref::<StringArray>()
3259+
.unwrap();
3260+
assert_eq!(names.value(0), "David");
3261+
}
3262+
3263+
#[tokio::test]
3264+
async fn test_datafusion_is_not_null_node_property() {
3265+
let config = create_graph_config();
3266+
let person_batch = create_person_dataset();
3267+
3268+
let query = CypherQuery::new("MATCH (p:Person) WHERE p.city IS NOT NULL RETURN p.name")
3269+
.unwrap()
3270+
.with_config(config);
3271+
3272+
let mut datasets = HashMap::new();
3273+
datasets.insert("Person".to_string(), person_batch);
3274+
3275+
let result = query.execute_datafusion(datasets).await.unwrap();
3276+
3277+
assert_eq!(result.num_rows(), 4);
3278+
assert_eq!(result.num_columns(), 1);
3279+
3280+
let names = result
3281+
.column(0)
3282+
.as_any()
3283+
.downcast_ref::<StringArray>()
3284+
.unwrap();
3285+
3286+
let name_set: std::collections::HashSet<String> = (0..result.num_rows())
3287+
.map(|i| names.value(i).to_string())
3288+
.collect();
3289+
let expected: std::collections::HashSet<String> = ["Alice", "Bob", "Charlie", "Eve"]
3290+
.iter()
3291+
.map(|s| s.to_string())
3292+
.collect();
3293+
assert_eq!(name_set, expected);
3294+
}
3295+
3296+
#[tokio::test]
3297+
async fn test_datafusion_is_null_relationship_property() {
3298+
let config = create_graph_config();
3299+
let person_batch = create_person_dataset();
3300+
let knows_batch = create_knows_dataset();
3301+
3302+
let query = CypherQuery::new(
3303+
"MATCH (a:Person)-[r:KNOWS]->(b:Person) \
3304+
WHERE r.since_year IS NULL \
3305+
RETURN a.name, b.name",
3306+
)
3307+
.unwrap()
3308+
.with_config(config);
3309+
3310+
let mut datasets = HashMap::new();
3311+
datasets.insert("Person".to_string(), person_batch);
3312+
datasets.insert("KNOWS".to_string(), knows_batch);
3313+
3314+
let result = query.execute_datafusion(datasets).await.unwrap();
3315+
3316+
assert_eq!(result.num_rows(), 1);
3317+
assert_eq!(result.num_columns(), 2);
3318+
3319+
let a_names = result
3320+
.column(0)
3321+
.as_any()
3322+
.downcast_ref::<StringArray>()
3323+
.unwrap();
3324+
let b_names = result
3325+
.column(1)
3326+
.as_any()
3327+
.downcast_ref::<StringArray>()
3328+
.unwrap();
3329+
3330+
assert_eq!(a_names.value(0), "David");
3331+
assert_eq!(b_names.value(0), "Eve");
3332+
}
3333+
3334+
#[tokio::test]
3335+
async fn test_datafusion_is_not_null_relationship_property() {
3336+
let config = create_graph_config();
3337+
let person_batch = create_person_dataset();
3338+
let knows_batch = create_knows_dataset();
3339+
3340+
let query = CypherQuery::new(
3341+
"MATCH (a:Person)-[r:KNOWS]->(b:Person) \
3342+
WHERE r.since_year IS NOT NULL \
3343+
RETURN a.name, b.name",
3344+
)
3345+
.unwrap()
3346+
.with_config(config);
3347+
3348+
let mut datasets = HashMap::new();
3349+
datasets.insert("Person".to_string(), person_batch);
3350+
datasets.insert("KNOWS".to_string(), knows_batch);
3351+
3352+
let result = query.execute_datafusion(datasets).await.unwrap();
3353+
3354+
assert_eq!(result.num_rows(), 4);
3355+
3356+
let a_names = result
3357+
.column(0)
3358+
.as_any()
3359+
.downcast_ref::<StringArray>()
3360+
.unwrap();
3361+
let b_names = result
3362+
.column(1)
3363+
.as_any()
3364+
.downcast_ref::<StringArray>()
3365+
.unwrap();
3366+
3367+
for i in 0..result.num_rows() {
3368+
let a = a_names.value(i);
3369+
let b = b_names.value(i);
3370+
assert!(
3371+
!(a == "David" && b == "Eve"),
3372+
"David -> Eve should be filtered out by IS NOT NULL"
3373+
);
3374+
}
3375+
}

0 commit comments

Comments
 (0)