Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 0 additions & 34 deletions python/python/tests/test_to_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,40 +165,6 @@ def test_collaborative_network_query(knowledge_graph_env):
assert "ORDER BY" in sql_upper


def test_parameterized_complex_query(knowledge_graph_env):
"""Test complex query with multiple parameters.

Find authors from a specific country with papers above a citation threshold,
published in recent years.
"""
config, datasets = knowledge_graph_env
query = (
CypherQuery(
"""
MATCH (a:Author)-[:AUTHORED]->(p:Paper)
WHERE a.country = $country
AND p.citations > $min_citations
AND p.year >= $min_year
RETURN a.name, a.h_index, p.title, p.citations
ORDER BY p.citations DESC, a.h_index DESC
"""
)
.with_config(config)
.with_parameter("country", "USA")
.with_parameter("min_citations", 300)
.with_parameter("min_year", 2020)
)

sql = query.to_sql(datasets)

assert isinstance(sql, str)
sql_upper = sql.upper()
assert "SELECT" in sql_upper
assert "JOIN" in sql_upper
assert "WHERE" in sql_upper
assert "ORDER BY" in sql_upper


def test_to_sql_without_config_raises_error(knowledge_graph_env):
"""Test that to_sql fails gracefully without config."""
_, datasets = knowledge_graph_env
Expand Down
28 changes: 28 additions & 0 deletions rust/lance-graph/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,18 @@ pub enum BooleanExpression {
IsNotNull(ValueExpression),
}

/// Distance metric for vector similarity
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub enum DistanceMetric {
/// Euclidean distance (L2)
L2,
/// Cosine similarity (1 - cosine distance)
#[default]
Cosine,
/// Dot product
Dot,
}

/// Comparison operators
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum ComparisonOperator {
Expand Down Expand Up @@ -280,6 +292,22 @@ pub enum ValueExpression {
operator: ArithmeticOperator,
right: Box<ValueExpression>,
},
/// Vector distance function: vector_distance(left, right, metric)
/// Returns the distance as a float (lower = more similar for L2/Cosine)
VectorDistance {
left: Box<ValueExpression>,
right: Box<ValueExpression>,
metric: DistanceMetric,
},
/// Vector similarity function: vector_similarity(left, right, metric)
/// Returns the similarity score as a float (higher = more similar)
VectorSimilarity {
left: Box<ValueExpression>,
right: Box<ValueExpression>,
metric: DistanceMetric,
},
/// Parameter reference for query parameters (e.g., $query_vector)
Parameter(String),
}

/// Arithmetic operators
Expand Down
49 changes: 49 additions & 0 deletions rust/lance-graph/src/datafusion_planner/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//! Converts AST expressions to DataFusion expressions

use crate::ast::{BooleanExpression, PropertyValue, ValueExpression};
use crate::datafusion_planner::udf;
use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator};
use datafusion_functions_aggregate::average::avg;
use datafusion_functions_aggregate::count::count;
Expand Down Expand Up @@ -212,6 +213,48 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr {
right: Box::new(r),
})
}
VE::VectorDistance {
left,
right,
metric,
} => {
// Create UDF for vector distance computation
let udf = udf::create_vector_distance_udf(metric);
let left_expr = to_df_value_expr(left);
let right_expr = to_df_value_expr(right);
Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf(
udf,
vec![left_expr, right_expr],
))
}
VE::VectorSimilarity {
left,
right,
metric,
} => {
// Create UDF for vector similarity computation
let udf = udf::create_vector_similarity_udf(metric);
let left_expr = to_df_value_expr(left);
let right_expr = to_df_value_expr(right);
Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf(
udf,
vec![left_expr, right_expr],
))
}
VE::Parameter(name) => {
// TODO: Implement proper parameter resolution
// Parameters ($param) should be resolved to literal values from the query's
// parameter map (CypherQuery::parameters()) before or during planning.
//
// Current limitation: This creates a column reference as a placeholder,
// which will fail at execution if the column doesn't exist.
//
// Proper fix requires one of:
// 1. Resolve parameters during semantic analysis (substitute before planning)
// 2. Pass parameter map to to_df_value_expr and resolve here
// 3. Use DataFusion's parameter binding mechanism
col(format!("${}", name))
}
}
}

Expand All @@ -229,6 +272,12 @@ pub(crate) fn contains_aggregate(expr: &ValueExpression) -> bool {
is_aggregate || args.iter().any(contains_aggregate)
}
VE::Arithmetic { left, right, .. } => contains_aggregate(left) || contains_aggregate(right),
VE::VectorDistance { left, right, .. } => {
contains_aggregate(left) || contains_aggregate(right)
}
VE::VectorSimilarity { left, right, .. } => {
contains_aggregate(left) || contains_aggregate(right)
}
_ => false,
}
}
Expand Down
2 changes: 2 additions & 0 deletions rust/lance-graph/src/datafusion_planner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ mod config_helpers;
mod expression;
mod join_ops;
mod scan_ops;
mod udf;
mod vector_ops;

#[cfg(test)]
mod test_fixtures;
Expand Down
Loading
Loading