From 2803482b753883ce4b3980c26c1e24e6d9e23106 Mon Sep 17 00:00:00 2001 From: JoshuaTang <1240604020@qq.com> Date: Tue, 23 Dec 2025 18:34:17 -0800 Subject: [PATCH 1/2] feat: implement the LIKE pattern matching --- .../src/datafusion_planner/expression.rs | 57 +++++++- rust/lance-graph/src/parser.rs | 81 +++++++++++ .../tests/test_datafusion_pipeline.rs | 131 ++++++++++++++++++ 3 files changed, 268 insertions(+), 1 deletion(-) diff --git a/rust/lance-graph/src/datafusion_planner/expression.rs b/rust/lance-graph/src/datafusion_planner/expression.rs index 5414b5d8..dd88ab74 100644 --- a/rust/lance-graph/src/datafusion_planner/expression.rs +++ b/rust/lance-graph/src/datafusion_planner/expression.rs @@ -60,7 +60,13 @@ pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr { ))), BE::IsNull(expression) => Expr::IsNull(Box::new(to_df_value_expr(expression))), BE::IsNotNull(expression) => Expr::IsNotNull(Box::new(to_df_value_expr(expression))), - _ => lit(true), + BE::Like { expression, pattern } => Expr::Like(datafusion::logical_expr::Like { + negated: false, + expr: Box::new(to_df_value_expr(expression)), + pattern: Box::new(lit(pattern.clone())), + escape_char: None, + case_insensitive: false, + }), } } @@ -414,6 +420,55 @@ mod tests { } } + #[test] + fn test_boolean_expr_like() { + let expr = BooleanExpression::Like { + expression: ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + }), + pattern: "A%".into(), + }; + + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { + assert!(!like_expr.negated, "Should not be negated"); + assert!(!like_expr.case_insensitive, "Should be case sensitive"); + assert_eq!(like_expr.escape_char, None, "Should have no escape char"); + match *like_expr.expr { + Expr::Column(ref col_expr) => { + assert_eq!(col_expr.name(), "p__name"); + } + other => panic!("Expected column expression, got {:?}", other), + } + // Check pattern is a literal + match *like_expr.pattern { + Expr::Literal(..) => {} // Success + other => panic!("Expected literal pattern, got {:?}", other), + } + } else { + panic!("Expected Like expression"); + } + } + + #[test] + fn test_boolean_expr_like_with_wildcard() { + let expr = BooleanExpression::Like { + expression: ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "email".into(), + }), + pattern: "%@example.com".into(), + }; + + let df_expr = to_df_boolean_expr(&expr); + let s = format!("{:?}", df_expr); + assert!( + s.contains("Like") || s.contains("like"), + "Should be a LIKE expression" + ); + assert!(s.contains("p__email"), "Should contain column reference"); + } + // ======================================================================== // Unit tests for to_df_value_expr() // ======================================================================== diff --git a/rust/lance-graph/src/parser.rs b/rust/lance-graph/src/parser.rs index 4d0f33ec..b33b4e18 100644 --- a/rust/lance-graph/src/parser.rs +++ b/rust/lance-graph/src/parser.rs @@ -329,6 +329,18 @@ fn comparison_expression(input: &str) -> IResult<&str, BooleanExpression> { }, )); } + // Match LIKE pattern + if let Ok((input_after_like, (_, _, pattern))) = + tuple((tag_no_case("LIKE"), multispace0, string_literal))(input) + { + return Ok(( + input_after_like, + BooleanExpression::Like { + expression: left, + pattern, + }, + )); + } // Match is null if let Ok((rest, ())) = is_null_comparison(input) { return Ok((rest, BooleanExpression::IsNull(left_clone))); @@ -1018,4 +1030,73 @@ mod tests { _ => panic!("Expected Function expression"), } } + + #[test] + fn test_parse_like_pattern() { + let query = "MATCH (n:Person) WHERE n.name LIKE 'A%' RETURN n.name"; + let result = parse_cypher_query(query); + assert!(result.is_ok(), "LIKE pattern should parse successfully"); + + let ast = result.unwrap(); + let where_clause = ast.where_clause.expect("Expected WHERE clause"); + + match where_clause.expression { + BooleanExpression::Like { expression, pattern } => { + match expression { + ValueExpression::Property(prop) => { + assert_eq!(prop.variable, "n"); + assert_eq!(prop.property, "name"); + } + _ => panic!("Expected property expression"), + } + assert_eq!(pattern, "A%"); + } + _ => panic!("Expected LIKE expression"), + } + } + + #[test] + fn test_parse_like_with_double_quotes() { + let query = r#"MATCH (n:Person) WHERE n.email LIKE "%@example.com" RETURN n.email"#; + let result = parse_cypher_query(query); + assert!(result.is_ok(), "LIKE with double quotes should parse"); + + let ast = result.unwrap(); + let where_clause = ast.where_clause.expect("Expected WHERE clause"); + + match where_clause.expression { + BooleanExpression::Like { pattern, .. } => { + assert_eq!(pattern, "%@example.com"); + } + _ => panic!("Expected LIKE expression"), + } + } + + #[test] + fn test_parse_like_in_complex_where() { + let query = "MATCH (n:Person) WHERE n.age > 20 AND n.name LIKE 'J%' RETURN n.name"; + let result = parse_cypher_query(query); + assert!(result.is_ok(), "LIKE in complex WHERE should parse"); + + let ast = result.unwrap(); + let where_clause = ast.where_clause.expect("Expected WHERE clause"); + + match where_clause.expression { + BooleanExpression::And(left, right) => { + // Left should be age > 20 + match *left { + BooleanExpression::Comparison { .. } => {} + _ => panic!("Expected comparison on left"), + } + // Right should be LIKE + match *right { + BooleanExpression::Like { pattern, .. } => { + assert_eq!(pattern, "J%"); + } + _ => panic!("Expected LIKE expression on right"), + } + } + _ => panic!("Expected AND expression"), + } + } } diff --git a/rust/lance-graph/tests/test_datafusion_pipeline.rs b/rust/lance-graph/tests/test_datafusion_pipeline.rs index 1d244ea8..d67bdc04 100644 --- a/rust/lance-graph/tests/test_datafusion_pipeline.rs +++ b/rust/lance-graph/tests/test_datafusion_pipeline.rs @@ -3776,3 +3776,134 @@ async fn test_datafusion_is_not_null_relationship_property() { ); } } + +// ============================================================================ +// LIKE Pattern Matching Tests +// ============================================================================ + +#[tokio::test] +async fn test_datafusion_like_contains_match() { + // Test LIKE with contains pattern (anywhere in string) + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + let query = CypherQuery::new( + "MATCH (p:Person) \ + WHERE p.city LIKE '%ea%' \ + RETURN p.name ORDER BY p.name", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should match: Seattle (Eve) + assert_eq!(result.num_rows(), 1); + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(names.value(0), "Eve"); +} + +#[tokio::test] +async fn test_datafusion_like_with_and_condition() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + let query = CypherQuery::new( + "MATCH (p:Person) \ + WHERE p.age > 30 AND p.name LIKE '%e' \ + RETURN p.name", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should match: Charlie (age 30 is NOT > 30, so excluded) + // Bob (age 35), David (age 40), Eve (age 28 not > 30) + // Names ending with 'e': Alice, Charlie, Eve + // age > 30 AND name ends with 'e': None (Alice is 25, Charlie is 30, Eve is 28) + assert_eq!(result.num_rows(), 0); +} + +#[tokio::test] +async fn test_datafusion_like_in_relationship_query() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + let knows_batch = create_knows_dataset(); + + let query = CypherQuery::new( + "MATCH (a:Person)-[r:KNOWS]->(b:Person) \ + WHERE a.name LIKE 'A%' \ + RETURN a.name, b.name ORDER BY b.name", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + datasets.insert("KNOWS".to_string(), knows_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Alice knows Bob and Charlie + assert_eq!(result.num_rows(), 2); + let a_names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let b_names = result + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(a_names.value(0), "Alice"); + assert_eq!(b_names.value(0), "Bob"); + assert_eq!(a_names.value(1), "Alice"); + assert_eq!(b_names.value(1), "Charlie"); +} + +#[tokio::test] +async fn test_datafusion_like_case_sensitive() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + let query = CypherQuery::new( + "MATCH (p:Person) \ + WHERE p.name LIKE 'a%' \ + RETURN p.name", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should not match 'Alice' (lowercase 'a' vs uppercase 'A') + assert_eq!(result.num_rows(), 0); +} From 23088bda9f58eec1b0b95e51cd450ef54201fd75 Mon Sep 17 00:00:00 2001 From: JoshuaTang <1240604020@qq.com> Date: Tue, 23 Dec 2025 21:52:27 -0800 Subject: [PATCH 2/2] format code --- rust/lance-graph/src/datafusion_planner/expression.rs | 5 ++++- rust/lance-graph/src/parser.rs | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/rust/lance-graph/src/datafusion_planner/expression.rs b/rust/lance-graph/src/datafusion_planner/expression.rs index dd88ab74..a804e0da 100644 --- a/rust/lance-graph/src/datafusion_planner/expression.rs +++ b/rust/lance-graph/src/datafusion_planner/expression.rs @@ -60,7 +60,10 @@ pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr { ))), BE::IsNull(expression) => Expr::IsNull(Box::new(to_df_value_expr(expression))), BE::IsNotNull(expression) => Expr::IsNotNull(Box::new(to_df_value_expr(expression))), - BE::Like { expression, pattern } => Expr::Like(datafusion::logical_expr::Like { + BE::Like { + expression, + pattern, + } => Expr::Like(datafusion::logical_expr::Like { negated: false, expr: Box::new(to_df_value_expr(expression)), pattern: Box::new(lit(pattern.clone())), diff --git a/rust/lance-graph/src/parser.rs b/rust/lance-graph/src/parser.rs index b33b4e18..b7f62d51 100644 --- a/rust/lance-graph/src/parser.rs +++ b/rust/lance-graph/src/parser.rs @@ -1041,7 +1041,10 @@ mod tests { let where_clause = ast.where_clause.expect("Expected WHERE clause"); match where_clause.expression { - BooleanExpression::Like { expression, pattern } => { + BooleanExpression::Like { + expression, + pattern, + } => { match expression { ValueExpression::Property(prop) => { assert_eq!(prop.variable, "n");