Skip to content

Commit 60e7756

Browse files
authored
feat: expose datafusion query execution apis (#28)
* feat: expose datafusion query execution apis * fix: fix the unqualified column names * test: add integration tests * fix typo
1 parent e89fe95 commit 60e7756

4 files changed

Lines changed: 1056 additions & 62 deletions

File tree

rust/lance-graph/src/datafusion_planner.rs

Lines changed: 182 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -372,11 +372,13 @@ impl DataFusionPlanner {
372372
.iter()
373373
.map(|p| {
374374
let expr = self.to_df_value_expr(&p.expression);
375-
// Apply alias if provided
375+
// Apply alias if provided, otherwise use Cypher dot notation
376376
if let Some(alias) = &p.alias {
377377
expr.alias(alias)
378378
} else {
379-
expr
379+
// Convert to Cypher dot notation (e.g., p__name -> p.name)
380+
let cypher_name = self.to_cypher_column_name(&p.expression);
381+
expr.alias(cypher_name)
380382
}
381383
})
382384
.collect();
@@ -1433,6 +1435,33 @@ impl DataFusionPlanner {
14331435
VE::Function { .. } | VE::Arithmetic { .. } => lit(0),
14341436
}
14351437
}
1438+
1439+
/// Convert a ValueExpression to Cypher dot notation for column naming
1440+
///
1441+
/// This generates user-friendly column names following Cypher conventions:
1442+
/// - Property references: `p.name` (variable.property)
1443+
/// - Other expressions: Use the expression as-is
1444+
///
1445+
/// This is used when no explicit alias is provided in RETURN clauses.
1446+
fn to_cypher_column_name(&self, expr: &crate::ast::ValueExpression) -> String {
1447+
use crate::ast::ValueExpression as VE;
1448+
match expr {
1449+
VE::Property(prop) => {
1450+
// Convert to Cypher dot notation: variable.property
1451+
format!("{}.{}", prop.variable, prop.property)
1452+
}
1453+
VE::Variable(v) => v.clone(),
1454+
VE::Literal(crate::ast::PropertyValue::Property(prop)) => {
1455+
// Handle nested property references
1456+
format!("{}.{}", prop.variable, prop.property)
1457+
}
1458+
_ => {
1459+
// For other expressions (literals, functions), use a generic name
1460+
// In practice, these should always have explicit aliases
1461+
"expr".to_string()
1462+
}
1463+
}
1464+
}
14361465
}
14371466

14381467
#[cfg(test)]
@@ -1549,7 +1578,7 @@ mod tests {
15491578
}
15501579

15511580
#[test]
1552-
fn test_df_planner_property_pushdown_filter() {
1581+
fn test_df_planner_inline_property_filter() {
15531582
let mut props = std::collections::HashMap::new();
15541583
props.insert(
15551584
"name".to_string(),
@@ -3042,6 +3071,156 @@ mod tests {
30423071
);
30433072
}
30443073

3074+
#[test]
3075+
fn test_cypher_dot_notation_simple_property() {
3076+
// Test that projections without aliases use Cypher dot notation
3077+
let cfg = crate::config::GraphConfig::builder()
3078+
.with_node_label("Person", "id")
3079+
.build()
3080+
.unwrap();
3081+
3082+
let planner = DataFusionPlanner::with_catalog(cfg, make_catalog());
3083+
3084+
let scan = LogicalOperator::ScanByLabel {
3085+
variable: "p".to_string(),
3086+
label: "Person".to_string(),
3087+
properties: Default::default(),
3088+
};
3089+
3090+
// Project without alias - should use Cypher dot notation
3091+
let project = LogicalOperator::Project {
3092+
input: Box::new(scan),
3093+
projections: vec![ProjectionItem {
3094+
expression: ValueExpression::Property(PropertyRef {
3095+
variable: "p".to_string(),
3096+
property: "name".to_string(),
3097+
}),
3098+
alias: None, // No explicit alias
3099+
}],
3100+
};
3101+
3102+
let df_plan = planner.plan(&project).unwrap();
3103+
let plan_str = format!("{:?}", df_plan);
3104+
3105+
// Should contain Cypher dot notation "p.name", not "p__name"
3106+
assert!(
3107+
plan_str.contains("p.name"),
3108+
"Plan should contain Cypher dot notation 'p.name': {}",
3109+
plan_str
3110+
);
3111+
assert!(
3112+
!plan_str.contains("p__name AS"),
3113+
"Plan should not contain DataFusion qualified name 'p__name AS': {}",
3114+
plan_str
3115+
);
3116+
}
3117+
3118+
#[test]
3119+
fn test_cypher_dot_notation_multiple_properties() {
3120+
// Test multiple properties from the same variable
3121+
let cfg = crate::config::GraphConfig::builder()
3122+
.with_node_label("Person", "id")
3123+
.build()
3124+
.unwrap();
3125+
3126+
let planner = DataFusionPlanner::with_catalog(cfg, make_catalog());
3127+
3128+
let scan = LogicalOperator::ScanByLabel {
3129+
variable: "p".to_string(),
3130+
label: "Person".to_string(),
3131+
properties: Default::default(),
3132+
};
3133+
3134+
// Project multiple properties without aliases
3135+
let project = LogicalOperator::Project {
3136+
input: Box::new(scan),
3137+
projections: vec![
3138+
ProjectionItem {
3139+
expression: ValueExpression::Property(PropertyRef {
3140+
variable: "p".to_string(),
3141+
property: "name".to_string(),
3142+
}),
3143+
alias: None,
3144+
},
3145+
ProjectionItem {
3146+
expression: ValueExpression::Property(PropertyRef {
3147+
variable: "p".to_string(),
3148+
property: "age".to_string(),
3149+
}),
3150+
alias: None,
3151+
},
3152+
],
3153+
};
3154+
3155+
let df_plan = planner.plan(&project).unwrap();
3156+
let plan_str = format!("{:?}", df_plan);
3157+
3158+
// Should contain both Cypher dot notations
3159+
assert!(
3160+
plan_str.contains("p.name"),
3161+
"Plan should contain 'p.name': {}",
3162+
plan_str
3163+
);
3164+
assert!(
3165+
plan_str.contains("p.age"),
3166+
"Plan should contain 'p.age': {}",
3167+
plan_str
3168+
);
3169+
}
3170+
3171+
#[test]
3172+
fn test_cypher_dot_notation_mixed_with_and_without_alias() {
3173+
// Test mix of aliased and non-aliased projections
3174+
let cfg = crate::config::GraphConfig::builder()
3175+
.with_node_label("Person", "id")
3176+
.build()
3177+
.unwrap();
3178+
3179+
let planner = DataFusionPlanner::with_catalog(cfg, make_catalog());
3180+
3181+
let scan = LogicalOperator::ScanByLabel {
3182+
variable: "p".to_string(),
3183+
label: "Person".to_string(),
3184+
properties: Default::default(),
3185+
};
3186+
3187+
let project = LogicalOperator::Project {
3188+
input: Box::new(scan),
3189+
projections: vec![
3190+
ProjectionItem {
3191+
expression: ValueExpression::Property(PropertyRef {
3192+
variable: "p".to_string(),
3193+
property: "name".to_string(),
3194+
}),
3195+
alias: Some("full_name".to_string()), // Explicit alias
3196+
},
3197+
ProjectionItem {
3198+
expression: ValueExpression::Property(PropertyRef {
3199+
variable: "p".to_string(),
3200+
property: "age".to_string(),
3201+
}),
3202+
alias: None, // No alias - should use dot notation
3203+
},
3204+
],
3205+
};
3206+
3207+
let df_plan = planner.plan(&project).unwrap();
3208+
let plan_str = format!("{:?}", df_plan);
3209+
3210+
// Should contain explicit alias
3211+
assert!(
3212+
plan_str.contains("full_name"),
3213+
"Plan should contain explicit alias 'full_name': {}",
3214+
plan_str
3215+
);
3216+
// Should contain Cypher dot notation for non-aliased property
3217+
assert!(
3218+
plan_str.contains("p.age"),
3219+
"Plan should contain Cypher dot notation 'p.age': {}",
3220+
plan_str
3221+
);
3222+
}
3223+
30453224
// ========================================================================
30463225
// Failure Scenario Tests
30473226
// ========================================================================

0 commit comments

Comments
 (0)