Skip to content

Commit e5969a4

Browse files
feat: add toLower/toUpper support, fix type coercion error
- Add support for toLower/toUpper (Cypher) and lower/upper (SQL) string functions - Change fallback for unsupported functions from lit(0) to ScalarValue::Null to prevent type coercion errors in any context (string or numeric) - Add unit tests for new string functions - Add integration tests including exact bug reproduction Fixes type coercion error when using toLower(s.name) CONTAINS 'x' with integer columns in RETURN clause.
1 parent cf358f4 commit e5969a4

2 files changed

Lines changed: 282 additions & 2 deletions

File tree

rust/lance-graph/src/datafusion_planner/expression.rs

Lines changed: 148 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
use crate::ast::{BooleanExpression, PropertyValue, ValueExpression};
99
use crate::datafusion_planner::udf;
1010
use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator};
11+
use datafusion::functions::string::lower;
12+
use datafusion::functions::string::upper;
1113
use datafusion_functions_aggregate::average::avg;
1214
use datafusion_functions_aggregate::count::count;
1315
use datafusion_functions_aggregate::min_max::max;
@@ -186,9 +188,29 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr {
186188
lit(0)
187189
}
188190
}
191+
// String functions
192+
"tolower" | "lower" => {
193+
if args.len() == 1 {
194+
let arg_expr = to_df_value_expr(&args[0]);
195+
lower().call(vec![arg_expr])
196+
} else {
197+
// Invalid argument count - return NULL
198+
Expr::Literal(datafusion::scalar::ScalarValue::Null, None)
199+
}
200+
}
201+
"toupper" | "upper" => {
202+
if args.len() == 1 {
203+
let arg_expr = to_df_value_expr(&args[0]);
204+
upper().call(vec![arg_expr])
205+
} else {
206+
// Invalid argument count - return NULL
207+
Expr::Literal(datafusion::scalar::ScalarValue::Null, None)
208+
}
209+
}
189210
_ => {
190-
// Unsupported function - return placeholder for now
191-
lit(0)
211+
// Unsupported function - return NULL which coerces to any type
212+
// This prevents type coercion errors in both string and numeric contexts
213+
Expr::Literal(datafusion::scalar::ScalarValue::Null, None)
192214
}
193215
}
194216
}
@@ -1255,4 +1277,128 @@ mod tests {
12551277
let name = to_cypher_column_name(&expr);
12561278
assert_eq!(name, "expr", "Arithmetic should use generic name");
12571279
}
1280+
1281+
// ========================================================================
1282+
// Unit tests for string functions (toLower, toUpper)
1283+
// ========================================================================
1284+
1285+
#[test]
1286+
fn test_value_expr_function_tolower() {
1287+
let expr = ValueExpression::Function {
1288+
name: "toLower".into(),
1289+
args: vec![ValueExpression::Property(PropertyRef {
1290+
variable: "p".into(),
1291+
property: "name".into(),
1292+
})],
1293+
};
1294+
1295+
let df_expr = to_df_value_expr(&expr);
1296+
let s = format!("{:?}", df_expr);
1297+
// Should be a ScalarFunction with lower
1298+
assert!(
1299+
s.contains("lower") || s.contains("Lower"),
1300+
"Should use lower function, got: {}",
1301+
s
1302+
);
1303+
assert!(s.contains("p__name"), "Should contain column reference");
1304+
}
1305+
1306+
#[test]
1307+
fn test_value_expr_function_toupper() {
1308+
let expr = ValueExpression::Function {
1309+
name: "toUpper".into(),
1310+
args: vec![ValueExpression::Property(PropertyRef {
1311+
variable: "p".into(),
1312+
property: "name".into(),
1313+
})],
1314+
};
1315+
1316+
let df_expr = to_df_value_expr(&expr);
1317+
let s = format!("{:?}", df_expr);
1318+
// Should be a ScalarFunction with upper
1319+
assert!(
1320+
s.contains("upper") || s.contains("Upper"),
1321+
"Should use upper function, got: {}",
1322+
s
1323+
);
1324+
assert!(s.contains("p__name"), "Should contain column reference");
1325+
}
1326+
1327+
#[test]
1328+
fn test_value_expr_function_lower_alias() {
1329+
// Test that 'lower' also works (SQL-style alias)
1330+
let expr = ValueExpression::Function {
1331+
name: "lower".into(),
1332+
args: vec![ValueExpression::Property(PropertyRef {
1333+
variable: "p".into(),
1334+
property: "name".into(),
1335+
})],
1336+
};
1337+
1338+
let df_expr = to_df_value_expr(&expr);
1339+
let s = format!("{:?}", df_expr);
1340+
assert!(
1341+
s.contains("lower") || s.contains("Lower"),
1342+
"Should use lower function, got: {}",
1343+
s
1344+
);
1345+
}
1346+
1347+
#[test]
1348+
fn test_value_expr_function_upper_alias() {
1349+
// Test that 'upper' also works (SQL-style alias)
1350+
let expr = ValueExpression::Function {
1351+
name: "upper".into(),
1352+
args: vec![ValueExpression::Property(PropertyRef {
1353+
variable: "p".into(),
1354+
property: "name".into(),
1355+
})],
1356+
};
1357+
1358+
let df_expr = to_df_value_expr(&expr);
1359+
let s = format!("{:?}", df_expr);
1360+
assert!(
1361+
s.contains("upper") || s.contains("Upper"),
1362+
"Should use upper function, got: {}",
1363+
s
1364+
);
1365+
}
1366+
1367+
#[test]
1368+
fn test_tolower_with_contains_produces_valid_like() {
1369+
// This is the bug scenario: toLower(s.name) CONTAINS 'offer'
1370+
// Previously returned lit(0) which caused type coercion error
1371+
let tolower_expr = ValueExpression::Function {
1372+
name: "toLower".into(),
1373+
args: vec![ValueExpression::Property(PropertyRef {
1374+
variable: "s".into(),
1375+
property: "name".into(),
1376+
})],
1377+
};
1378+
1379+
let contains_expr = BooleanExpression::Contains {
1380+
expression: tolower_expr,
1381+
substring: "offer".into(),
1382+
};
1383+
1384+
let df_expr = to_df_boolean_expr(&contains_expr);
1385+
let s = format!("{:?}", df_expr);
1386+
1387+
// Should be a Like expression with lower() on the column, not lit(0)
1388+
assert!(
1389+
s.contains("Like"),
1390+
"Should be a LIKE expression, got: {}",
1391+
s
1392+
);
1393+
assert!(
1394+
s.contains("lower") || s.contains("Lower"),
1395+
"Should contain lower function, got: {}",
1396+
s
1397+
);
1398+
assert!(
1399+
!s.contains("Int32(0)") && !s.contains("Int64(0)") && !s.contains("Utf8(\"\")"),
1400+
"Should NOT contain literal 0 or empty string (placeholder bugs), got: {}",
1401+
s
1402+
);
1403+
}
12581404
}

rust/lance-graph/tests/test_datafusion_pipeline.rs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4115,3 +4115,137 @@ async fn test_datafusion_contains_in_relationship_query() {
41154115
]
41164116
);
41174117
}
4118+
4119+
// ============================================================================
4120+
// String Function Tests (toLower, toUpper)
4121+
// ============================================================================
4122+
4123+
#[tokio::test]
4124+
async fn test_tolower_with_contains() {
4125+
// This test verifies the fix for: toLower(p.name) CONTAINS 'ali'
4126+
// Previously this caused: "type_coercion: There isn't a common type to coerce Int32 and Utf8"
4127+
let config = create_graph_config();
4128+
let person_batch = create_person_dataset();
4129+
4130+
// Search for names containing 'ali' (case-insensitive via toLower)
4131+
let query = CypherQuery::new(
4132+
"MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'ali' RETURN p.name, p.age",
4133+
)
4134+
.unwrap()
4135+
.with_config(config);
4136+
4137+
let mut datasets = HashMap::new();
4138+
datasets.insert("Person".to_string(), person_batch);
4139+
4140+
let result = query
4141+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
4142+
.await
4143+
.unwrap();
4144+
4145+
// Should find 'Alice' (toLower -> 'alice' contains 'ali')
4146+
assert_eq!(result.num_rows(), 1, "Expected 1 result for 'ali' search");
4147+
4148+
let names = result
4149+
.column(0)
4150+
.as_any()
4151+
.downcast_ref::<StringArray>()
4152+
.unwrap();
4153+
assert_eq!(names.value(0), "Alice");
4154+
}
4155+
4156+
#[tokio::test]
4157+
async fn test_toupper_with_contains() {
4158+
let config = create_graph_config();
4159+
let person_batch = create_person_dataset();
4160+
4161+
// Search for names containing 'BOB' (case-insensitive via toUpper)
4162+
let query = CypherQuery::new(
4163+
"MATCH (p:Person) WHERE toUpper(p.name) CONTAINS 'BOB' RETURN p.name",
4164+
)
4165+
.unwrap()
4166+
.with_config(config);
4167+
4168+
let mut datasets = HashMap::new();
4169+
datasets.insert("Person".to_string(), person_batch);
4170+
4171+
let result = query
4172+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
4173+
.await
4174+
.unwrap();
4175+
4176+
// Should find 'Bob'
4177+
assert_eq!(result.num_rows(), 1);
4178+
let names = result
4179+
.column(0)
4180+
.as_any()
4181+
.downcast_ref::<StringArray>()
4182+
.unwrap();
4183+
assert_eq!(names.value(0), "Bob");
4184+
}
4185+
4186+
#[tokio::test]
4187+
async fn test_tolower_in_return_clause() {
4188+
let config = create_graph_config();
4189+
let person_batch = create_person_dataset();
4190+
4191+
// Return lowercased names
4192+
let query = CypherQuery::new("MATCH (p:Person) WHERE p.name = 'Alice' RETURN toLower(p.name)")
4193+
.unwrap()
4194+
.with_config(config);
4195+
4196+
let mut datasets = HashMap::new();
4197+
datasets.insert("Person".to_string(), person_batch);
4198+
4199+
let result = query
4200+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
4201+
.await
4202+
.unwrap();
4203+
4204+
assert_eq!(result.num_rows(), 1);
4205+
let names = result
4206+
.column(0)
4207+
.as_any()
4208+
.downcast_ref::<StringArray>()
4209+
.unwrap();
4210+
assert_eq!(names.value(0), "alice");
4211+
}
4212+
4213+
#[tokio::test]
4214+
async fn test_tolower_with_integer_column_in_return() {
4215+
// This is the exact bug scenario: toLower(s.name) CONTAINS 'x' RETURN s.name, s.age
4216+
// The bug was that returning an integer column (age) alongside the toLower filter
4217+
// caused type coercion errors because unsupported functions returned lit(0)
4218+
let config = create_graph_config();
4219+
let person_batch = create_person_dataset();
4220+
4221+
let query = CypherQuery::new(
4222+
"MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'cha' RETURN p.name, p.age",
4223+
)
4224+
.unwrap()
4225+
.with_config(config);
4226+
4227+
let mut datasets = HashMap::new();
4228+
datasets.insert("Person".to_string(), person_batch);
4229+
4230+
let result = query
4231+
.execute(datasets, Some(ExecutionStrategy::DataFusion))
4232+
.await
4233+
.unwrap();
4234+
4235+
// Should find 'Charlie' (toLower -> 'charlie' contains 'cha')
4236+
assert_eq!(result.num_rows(), 1);
4237+
4238+
let names = result
4239+
.column(0)
4240+
.as_any()
4241+
.downcast_ref::<StringArray>()
4242+
.unwrap();
4243+
let ages = result
4244+
.column(1)
4245+
.as_any()
4246+
.downcast_ref::<Int64Array>()
4247+
.unwrap();
4248+
4249+
assert_eq!(names.value(0), "Charlie");
4250+
assert_eq!(ages.value(0), 30);
4251+
}

0 commit comments

Comments
 (0)