Skip to content

Commit 357c8aa

Browse files
feat: add toLower/toUpper support, fix type coercion error (#82)
* feat: add toLower/toUpper support, fix type coercion error - Add support for toLower/toUpper (Cypher) and lower/upper (SQL) string functions - Change fallback for unsupported functions from lit(0) to ScalarValue::Null to prevent type coercion errors in any context (string or numeric) - Add unit tests for new string functions - Add integration tests including exact bug reproduction Fixes type coercion error when using toLower(s.name) CONTAINS 'x' with integer columns in RETURN clause. * chore: remove redundant test section headers * fix: move string function tests to correct section * style: apply cargo fmt formatting Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent c49c050 commit 357c8aa

2 files changed

Lines changed: 273 additions & 2 deletions

File tree

rust/lance-graph/src/datafusion_planner/expression.rs

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
88
use crate::ast::{BooleanExpression, PropertyValue, ValueExpression};
99
use crate::datafusion_planner::udf;
10+
use datafusion::functions::string::lower;
11+
use datafusion::functions::string::upper;
1012
use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator};
1113
use datafusion_functions_aggregate::average::avg;
1214
use datafusion_functions_aggregate::count::count;
@@ -186,9 +188,29 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr {
186188
lit(0)
187189
}
188190
}
191+
// String functions
192+
"tolower" | "lower" => {
193+
if args.len() == 1 {
194+
let arg_expr = to_df_value_expr(&args[0]);
195+
lower().call(vec![arg_expr])
196+
} else {
197+
// Invalid argument count - return NULL
198+
Expr::Literal(datafusion::scalar::ScalarValue::Null, None)
199+
}
200+
}
201+
"toupper" | "upper" => {
202+
if args.len() == 1 {
203+
let arg_expr = to_df_value_expr(&args[0]);
204+
upper().call(vec![arg_expr])
205+
} else {
206+
// Invalid argument count - return NULL
207+
Expr::Literal(datafusion::scalar::ScalarValue::Null, None)
208+
}
209+
}
189210
_ => {
190-
// Unsupported function - return placeholder for now
191-
lit(0)
211+
// Unsupported function - return NULL which coerces to any type
212+
// This prevents type coercion errors in both string and numeric contexts
213+
Expr::Literal(datafusion::scalar::ScalarValue::Null, None)
192214
}
193215
}
194216
}
@@ -1032,6 +1054,126 @@ mod tests {
10321054
assert!(s.contains("p__amount"), "Should contain column reference");
10331055
}
10341056

1057+
#[test]
1058+
fn test_value_expr_function_tolower() {
1059+
let expr = ValueExpression::Function {
1060+
name: "toLower".into(),
1061+
args: vec![ValueExpression::Property(PropertyRef {
1062+
variable: "p".into(),
1063+
property: "name".into(),
1064+
})],
1065+
};
1066+
1067+
let df_expr = to_df_value_expr(&expr);
1068+
let s = format!("{:?}", df_expr);
1069+
// Should be a ScalarFunction with lower
1070+
assert!(
1071+
s.contains("lower") || s.contains("Lower"),
1072+
"Should use lower function, got: {}",
1073+
s
1074+
);
1075+
assert!(s.contains("p__name"), "Should contain column reference");
1076+
}
1077+
1078+
#[test]
1079+
fn test_value_expr_function_toupper() {
1080+
let expr = ValueExpression::Function {
1081+
name: "toUpper".into(),
1082+
args: vec![ValueExpression::Property(PropertyRef {
1083+
variable: "p".into(),
1084+
property: "name".into(),
1085+
})],
1086+
};
1087+
1088+
let df_expr = to_df_value_expr(&expr);
1089+
let s = format!("{:?}", df_expr);
1090+
// Should be a ScalarFunction with upper
1091+
assert!(
1092+
s.contains("upper") || s.contains("Upper"),
1093+
"Should use upper function, got: {}",
1094+
s
1095+
);
1096+
assert!(s.contains("p__name"), "Should contain column reference");
1097+
}
1098+
1099+
#[test]
1100+
fn test_value_expr_function_lower_alias() {
1101+
// Test that 'lower' also works (SQL-style alias)
1102+
let expr = ValueExpression::Function {
1103+
name: "lower".into(),
1104+
args: vec![ValueExpression::Property(PropertyRef {
1105+
variable: "p".into(),
1106+
property: "name".into(),
1107+
})],
1108+
};
1109+
1110+
let df_expr = to_df_value_expr(&expr);
1111+
let s = format!("{:?}", df_expr);
1112+
assert!(
1113+
s.contains("lower") || s.contains("Lower"),
1114+
"Should use lower function, got: {}",
1115+
s
1116+
);
1117+
}
1118+
1119+
#[test]
1120+
fn test_value_expr_function_upper_alias() {
1121+
// Test that 'upper' also works (SQL-style alias)
1122+
let expr = ValueExpression::Function {
1123+
name: "upper".into(),
1124+
args: vec![ValueExpression::Property(PropertyRef {
1125+
variable: "p".into(),
1126+
property: "name".into(),
1127+
})],
1128+
};
1129+
1130+
let df_expr = to_df_value_expr(&expr);
1131+
let s = format!("{:?}", df_expr);
1132+
assert!(
1133+
s.contains("upper") || s.contains("Upper"),
1134+
"Should use upper function, got: {}",
1135+
s
1136+
);
1137+
}
1138+
1139+
#[test]
1140+
fn test_tolower_with_contains_produces_valid_like() {
1141+
// This is the bug scenario: toLower(s.name) CONTAINS 'offer'
1142+
// Previously returned lit(0) which caused type coercion error
1143+
let tolower_expr = ValueExpression::Function {
1144+
name: "toLower".into(),
1145+
args: vec![ValueExpression::Property(PropertyRef {
1146+
variable: "s".into(),
1147+
property: "name".into(),
1148+
})],
1149+
};
1150+
1151+
let contains_expr = BooleanExpression::Contains {
1152+
expression: tolower_expr,
1153+
substring: "offer".into(),
1154+
};
1155+
1156+
let df_expr = to_df_boolean_expr(&contains_expr);
1157+
let s = format!("{:?}", df_expr);
1158+
1159+
// Should be a Like expression with lower() on the column, not lit(0)
1160+
assert!(
1161+
s.contains("Like"),
1162+
"Should be a LIKE expression, got: {}",
1163+
s
1164+
);
1165+
assert!(
1166+
s.contains("lower") || s.contains("Lower"),
1167+
"Should contain lower function, got: {}",
1168+
s
1169+
);
1170+
assert!(
1171+
!s.contains("Int32(0)") && !s.contains("Int64(0)") && !s.contains("Utf8(\"\")"),
1172+
"Should NOT contain literal 0 or empty string (placeholder bugs), got: {}",
1173+
s
1174+
);
1175+
}
1176+
10351177
// ========================================================================
10361178
// Unit tests for contains_aggregate()
10371179
// ========================================================================

rust/lance-graph/tests/test_datafusion_pipeline.rs

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4115,3 +4115,132 @@ async fn test_datafusion_contains_in_relationship_query() {
41154115
]
41164116
);
41174117
}
4118+
4119+
#[tokio::test]
4120+
async fn test_tolower_with_contains() {
4121+
// This test verifies the fix for: toLower(p.name) CONTAINS 'ali'
4122+
// Previously this caused: "type_coercion: There isn't a common type to coerce Int32 and Utf8"
4123+
let config = create_graph_config();
4124+
let person_batch = create_person_dataset();
4125+
4126+
// Search for names containing 'ali' (case-insensitive via toLower)
4127+
let query = CypherQuery::new(
4128+
"MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'ali' RETURN p.name, p.age",
4129+
)
4130+
.unwrap()
4131+
.with_config(config);
4132+
4133+
let mut datasets = HashMap::new();
4134+
datasets.insert("Person".to_string(), person_batch);
4135+
4136+
let result = query
4137+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
4138+
.await
4139+
.unwrap();
4140+
4141+
// Should find 'Alice' (toLower -> 'alice' contains 'ali')
4142+
assert_eq!(result.num_rows(), 1, "Expected 1 result for 'ali' search");
4143+
4144+
let names = result
4145+
.column(0)
4146+
.as_any()
4147+
.downcast_ref::<StringArray>()
4148+
.unwrap();
4149+
assert_eq!(names.value(0), "Alice");
4150+
}
4151+
4152+
#[tokio::test]
4153+
async fn test_toupper_with_contains() {
4154+
let config = create_graph_config();
4155+
let person_batch = create_person_dataset();
4156+
4157+
// Search for names containing 'BOB' (case-insensitive via toUpper)
4158+
let query =
4159+
CypherQuery::new("MATCH (p:Person) WHERE toUpper(p.name) CONTAINS 'BOB' RETURN p.name")
4160+
.unwrap()
4161+
.with_config(config);
4162+
4163+
let mut datasets = HashMap::new();
4164+
datasets.insert("Person".to_string(), person_batch);
4165+
4166+
let result = query
4167+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
4168+
.await
4169+
.unwrap();
4170+
4171+
// Should find 'Bob'
4172+
assert_eq!(result.num_rows(), 1);
4173+
let names = result
4174+
.column(0)
4175+
.as_any()
4176+
.downcast_ref::<StringArray>()
4177+
.unwrap();
4178+
assert_eq!(names.value(0), "Bob");
4179+
}
4180+
4181+
#[tokio::test]
4182+
async fn test_tolower_in_return_clause() {
4183+
let config = create_graph_config();
4184+
let person_batch = create_person_dataset();
4185+
4186+
// Return lowercased names
4187+
let query = CypherQuery::new("MATCH (p:Person) WHERE p.name = 'Alice' RETURN toLower(p.name)")
4188+
.unwrap()
4189+
.with_config(config);
4190+
4191+
let mut datasets = HashMap::new();
4192+
datasets.insert("Person".to_string(), person_batch);
4193+
4194+
let result = query
4195+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
4196+
.await
4197+
.unwrap();
4198+
4199+
assert_eq!(result.num_rows(), 1);
4200+
let names = result
4201+
.column(0)
4202+
.as_any()
4203+
.downcast_ref::<StringArray>()
4204+
.unwrap();
4205+
assert_eq!(names.value(0), "alice");
4206+
}
4207+
4208+
#[tokio::test]
4209+
async fn test_tolower_with_integer_column_in_return() {
4210+
// This is the exact bug scenario: toLower(s.name) CONTAINS 'x' RETURN s.name, s.age
4211+
// The bug was that returning an integer column (age) alongside the toLower filter
4212+
// caused type coercion errors because unsupported functions returned lit(0)
4213+
let config = create_graph_config();
4214+
let person_batch = create_person_dataset();
4215+
4216+
let query = CypherQuery::new(
4217+
"MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'cha' RETURN p.name, p.age",
4218+
)
4219+
.unwrap()
4220+
.with_config(config);
4221+
4222+
let mut datasets = HashMap::new();
4223+
datasets.insert("Person".to_string(), person_batch);
4224+
4225+
let result = query
4226+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
4227+
.await
4228+
.unwrap();
4229+
4230+
// Should find 'Charlie' (toLower -> 'charlie' contains 'cha')
4231+
assert_eq!(result.num_rows(), 1);
4232+
4233+
let names = result
4234+
.column(0)
4235+
.as_any()
4236+
.downcast_ref::<StringArray>()
4237+
.unwrap();
4238+
let ages = result
4239+
.column(1)
4240+
.as_any()
4241+
.downcast_ref::<Int64Array>()
4242+
.unwrap();
4243+
4244+
assert_eq!(names.value(0), "Charlie");
4245+
assert_eq!(ages.value(0), 30);
4246+
}

0 commit comments

Comments
 (0)