Skip to content

Commit 2803482

Browse files
committed
feat: implement the LIKE pattern matching
1 parent 775c258 commit 2803482

3 files changed

Lines changed: 268 additions & 1 deletion

File tree

rust/lance-graph/src/datafusion_planner/expression.rs

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,13 @@ pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr {
6060
))),
6161
BE::IsNull(expression) => Expr::IsNull(Box::new(to_df_value_expr(expression))),
6262
BE::IsNotNull(expression) => Expr::IsNotNull(Box::new(to_df_value_expr(expression))),
63-
_ => lit(true),
63+
BE::Like { expression, pattern } => Expr::Like(datafusion::logical_expr::Like {
64+
negated: false,
65+
expr: Box::new(to_df_value_expr(expression)),
66+
pattern: Box::new(lit(pattern.clone())),
67+
escape_char: None,
68+
case_insensitive: false,
69+
}),
6470
}
6571
}
6672

@@ -414,6 +420,55 @@ mod tests {
414420
}
415421
}
416422

423+
#[test]
424+
fn test_boolean_expr_like() {
425+
let expr = BooleanExpression::Like {
426+
expression: ValueExpression::Property(PropertyRef {
427+
variable: "p".into(),
428+
property: "name".into(),
429+
}),
430+
pattern: "A%".into(),
431+
};
432+
433+
if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) {
434+
assert!(!like_expr.negated, "Should not be negated");
435+
assert!(!like_expr.case_insensitive, "Should be case sensitive");
436+
assert_eq!(like_expr.escape_char, None, "Should have no escape char");
437+
match *like_expr.expr {
438+
Expr::Column(ref col_expr) => {
439+
assert_eq!(col_expr.name(), "p__name");
440+
}
441+
other => panic!("Expected column expression, got {:?}", other),
442+
}
443+
// Check pattern is a literal
444+
match *like_expr.pattern {
445+
Expr::Literal(..) => {} // Success
446+
other => panic!("Expected literal pattern, got {:?}", other),
447+
}
448+
} else {
449+
panic!("Expected Like expression");
450+
}
451+
}
452+
453+
#[test]
454+
fn test_boolean_expr_like_with_wildcard() {
455+
let expr = BooleanExpression::Like {
456+
expression: ValueExpression::Property(PropertyRef {
457+
variable: "p".into(),
458+
property: "email".into(),
459+
}),
460+
pattern: "%@example.com".into(),
461+
};
462+
463+
let df_expr = to_df_boolean_expr(&expr);
464+
let s = format!("{:?}", df_expr);
465+
assert!(
466+
s.contains("Like") || s.contains("like"),
467+
"Should be a LIKE expression"
468+
);
469+
assert!(s.contains("p__email"), "Should contain column reference");
470+
}
471+
417472
// ========================================================================
418473
// Unit tests for to_df_value_expr()
419474
// ========================================================================

rust/lance-graph/src/parser.rs

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,18 @@ fn comparison_expression(input: &str) -> IResult<&str, BooleanExpression> {
329329
},
330330
));
331331
}
332+
// Match LIKE pattern
333+
if let Ok((input_after_like, (_, _, pattern))) =
334+
tuple((tag_no_case("LIKE"), multispace0, string_literal))(input)
335+
{
336+
return Ok((
337+
input_after_like,
338+
BooleanExpression::Like {
339+
expression: left,
340+
pattern,
341+
},
342+
));
343+
}
332344
// Match is null
333345
if let Ok((rest, ())) = is_null_comparison(input) {
334346
return Ok((rest, BooleanExpression::IsNull(left_clone)));
@@ -1018,4 +1030,73 @@ mod tests {
10181030
_ => panic!("Expected Function expression"),
10191031
}
10201032
}
1033+
1034+
#[test]
1035+
fn test_parse_like_pattern() {
1036+
let query = "MATCH (n:Person) WHERE n.name LIKE 'A%' RETURN n.name";
1037+
let result = parse_cypher_query(query);
1038+
assert!(result.is_ok(), "LIKE pattern should parse successfully");
1039+
1040+
let ast = result.unwrap();
1041+
let where_clause = ast.where_clause.expect("Expected WHERE clause");
1042+
1043+
match where_clause.expression {
1044+
BooleanExpression::Like { expression, pattern } => {
1045+
match expression {
1046+
ValueExpression::Property(prop) => {
1047+
assert_eq!(prop.variable, "n");
1048+
assert_eq!(prop.property, "name");
1049+
}
1050+
_ => panic!("Expected property expression"),
1051+
}
1052+
assert_eq!(pattern, "A%");
1053+
}
1054+
_ => panic!("Expected LIKE expression"),
1055+
}
1056+
}
1057+
1058+
#[test]
1059+
fn test_parse_like_with_double_quotes() {
1060+
let query = r#"MATCH (n:Person) WHERE n.email LIKE "%@example.com" RETURN n.email"#;
1061+
let result = parse_cypher_query(query);
1062+
assert!(result.is_ok(), "LIKE with double quotes should parse");
1063+
1064+
let ast = result.unwrap();
1065+
let where_clause = ast.where_clause.expect("Expected WHERE clause");
1066+
1067+
match where_clause.expression {
1068+
BooleanExpression::Like { pattern, .. } => {
1069+
assert_eq!(pattern, "%@example.com");
1070+
}
1071+
_ => panic!("Expected LIKE expression"),
1072+
}
1073+
}
1074+
1075+
#[test]
1076+
fn test_parse_like_in_complex_where() {
1077+
let query = "MATCH (n:Person) WHERE n.age > 20 AND n.name LIKE 'J%' RETURN n.name";
1078+
let result = parse_cypher_query(query);
1079+
assert!(result.is_ok(), "LIKE in complex WHERE should parse");
1080+
1081+
let ast = result.unwrap();
1082+
let where_clause = ast.where_clause.expect("Expected WHERE clause");
1083+
1084+
match where_clause.expression {
1085+
BooleanExpression::And(left, right) => {
1086+
// Left should be age > 20
1087+
match *left {
1088+
BooleanExpression::Comparison { .. } => {}
1089+
_ => panic!("Expected comparison on left"),
1090+
}
1091+
// Right should be LIKE
1092+
match *right {
1093+
BooleanExpression::Like { pattern, .. } => {
1094+
assert_eq!(pattern, "J%");
1095+
}
1096+
_ => panic!("Expected LIKE expression on right"),
1097+
}
1098+
}
1099+
_ => panic!("Expected AND expression"),
1100+
}
1101+
}
10211102
}

rust/lance-graph/tests/test_datafusion_pipeline.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3776,3 +3776,134 @@ async fn test_datafusion_is_not_null_relationship_property() {
37763776
);
37773777
}
37783778
}
3779+
3780+
// ============================================================================
3781+
// LIKE Pattern Matching Tests
3782+
// ============================================================================
3783+
3784+
#[tokio::test]
3785+
async fn test_datafusion_like_contains_match() {
3786+
// Test LIKE with contains pattern (anywhere in string)
3787+
let config = create_graph_config();
3788+
let person_batch = create_person_dataset();
3789+
3790+
let query = CypherQuery::new(
3791+
"MATCH (p:Person) \
3792+
WHERE p.city LIKE '%ea%' \
3793+
RETURN p.name ORDER BY p.name",
3794+
)
3795+
.unwrap()
3796+
.with_config(config);
3797+
3798+
let mut datasets = HashMap::new();
3799+
datasets.insert("Person".to_string(), person_batch);
3800+
3801+
let result = query
3802+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
3803+
.await
3804+
.unwrap();
3805+
3806+
// Should match: Seattle (Eve)
3807+
assert_eq!(result.num_rows(), 1);
3808+
let names = result
3809+
.column(0)
3810+
.as_any()
3811+
.downcast_ref::<StringArray>()
3812+
.unwrap();
3813+
assert_eq!(names.value(0), "Eve");
3814+
}
3815+
3816+
#[tokio::test]
3817+
async fn test_datafusion_like_with_and_condition() {
3818+
let config = create_graph_config();
3819+
let person_batch = create_person_dataset();
3820+
3821+
let query = CypherQuery::new(
3822+
"MATCH (p:Person) \
3823+
WHERE p.age > 30 AND p.name LIKE '%e' \
3824+
RETURN p.name",
3825+
)
3826+
.unwrap()
3827+
.with_config(config);
3828+
3829+
let mut datasets = HashMap::new();
3830+
datasets.insert("Person".to_string(), person_batch);
3831+
3832+
let result = query
3833+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
3834+
.await
3835+
.unwrap();
3836+
3837+
// Should match: Charlie (age 30 is NOT > 30, so excluded)
3838+
// Bob (age 35), David (age 40), Eve (age 28 not > 30)
3839+
// Names ending with 'e': Alice, Charlie, Eve
3840+
// age > 30 AND name ends with 'e': None (Alice is 25, Charlie is 30, Eve is 28)
3841+
assert_eq!(result.num_rows(), 0);
3842+
}
3843+
3844+
#[tokio::test]
3845+
async fn test_datafusion_like_in_relationship_query() {
3846+
let config = create_graph_config();
3847+
let person_batch = create_person_dataset();
3848+
let knows_batch = create_knows_dataset();
3849+
3850+
let query = CypherQuery::new(
3851+
"MATCH (a:Person)-[r:KNOWS]->(b:Person) \
3852+
WHERE a.name LIKE 'A%' \
3853+
RETURN a.name, b.name ORDER BY b.name",
3854+
)
3855+
.unwrap()
3856+
.with_config(config);
3857+
3858+
let mut datasets = HashMap::new();
3859+
datasets.insert("Person".to_string(), person_batch);
3860+
datasets.insert("KNOWS".to_string(), knows_batch);
3861+
3862+
let result = query
3863+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
3864+
.await
3865+
.unwrap();
3866+
3867+
// Alice knows Bob and Charlie
3868+
assert_eq!(result.num_rows(), 2);
3869+
let a_names = result
3870+
.column(0)
3871+
.as_any()
3872+
.downcast_ref::<StringArray>()
3873+
.unwrap();
3874+
let b_names = result
3875+
.column(1)
3876+
.as_any()
3877+
.downcast_ref::<StringArray>()
3878+
.unwrap();
3879+
3880+
assert_eq!(a_names.value(0), "Alice");
3881+
assert_eq!(b_names.value(0), "Bob");
3882+
assert_eq!(a_names.value(1), "Alice");
3883+
assert_eq!(b_names.value(1), "Charlie");
3884+
}
3885+
3886+
#[tokio::test]
3887+
async fn test_datafusion_like_case_sensitive() {
3888+
let config = create_graph_config();
3889+
let person_batch = create_person_dataset();
3890+
3891+
let query = CypherQuery::new(
3892+
"MATCH (p:Person) \
3893+
WHERE p.name LIKE 'a%' \
3894+
RETURN p.name",
3895+
)
3896+
.unwrap()
3897+
.with_config(config);
3898+
3899+
let mut datasets = HashMap::new();
3900+
datasets.insert("Person".to_string(), person_batch);
3901+
3902+
let result = query
3903+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
3904+
.await
3905+
.unwrap();
3906+
3907+
// Should not match 'Alice' (lowercase 'a' vs uppercase 'A')
3908+
assert_eq!(result.num_rows(), 0);
3909+
}

0 commit comments

Comments
 (0)