Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions python/python/knowledge_graph/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,16 @@ def _build_query_prompt(
"Use the schema summary to craft queries that directly answer the "
"question."
),
(
" • Use the schema summary and allowed relationship_type values to "
"identify candidate relationship directions and types."
),
(
" • When the schema lists relationship_type values and the question "
"does not narrow them down, treat the list as exhaustive and include "
"every value in your filter using OR clauses or "
"WHERE rel.relationship_type IN [...]."
),
(
"Always specify node labels and relationship types in MATCH patterns "
"that introduce aliases."
Expand All @@ -405,12 +415,25 @@ def _build_query_prompt(
(" • MATCH (e:Entity) to scan entity rows (name, name_lower, entity_id)."),
(
" • MATCH (src:Entity)-[rel:RELATIONSHIP]->(dst:Entity) to traverse "
"relationships (relationship_type column)."
"relationships (relationship_type column); `src` aligns with "
"`source_entity_id` and `dst` with `target_entity_id`."
),
(
" • Decide which node should be `src` versus `dst` based on the "
"relationship meaning in the question and schema hints."
),
(
" • Map natural language roles (team, person, product, etc.) to the "
"`entity_type` column so queries filter to the expected entities."
),
" • Use WHERE e.column = 'value' for node-level filters.",
(
" • Filter relationships with WHERE rel.relationship_type = 'VALUE' "
"or by comparing rel.source_entity_id / rel.target_entity_id."
"or by comparing rel.source_entity_id / rel.target_entity_id; when the "
"question does not name a specific relationship type, include every "
"relevant value from the schema summary using OR clauses or "
"WHERE rel.relationship_type IN [...], explicitly note which values "
"you considered, and avoid emitting only a single guessed type."
),
(
" • Select columns using the aliases you define, such as e.name or "
Expand All @@ -421,8 +444,19 @@ def _build_query_prompt(
"filter rel.relationship_type instead of [:TYPE]."
),
(
"Example: MATCH (src:Entity)-[rel:RELATIONSHIP]->(dst:Entity) "
f"WHERE rel.relationship_type = '{example_rel_type}' RETURN rel."
"Example: MATCH (part:Entity)-[rel:RELATIONSHIP]->(whole:Entity) "
f"WHERE rel.relationship_type = '{example_rel_type}' "
"RETURN part.name, whole.name."
),
(
"Example: MATCH (a:Entity)-[rel:RELATIONSHIP]->(b:Entity) WHERE "
"rel.relationship_type = 'TYPE_A' OR rel.relationship_type = 'TYPE_B' "
"RETURN a.name, b.name."
),
(
"Example: MATCH (src:Entity)-[rel:RELATIONSHIP]->(dst:Entity) WHERE "
"rel.relationship_type IN ['TYPE_A', 'TYPE_B', 'TYPE_C'] "
"RETURN src.name, dst.name."
),
(
"Example: MATCH (dst:Entity) WHERE dst.name_lower = 'acme corp' "
Expand Down
38 changes: 38 additions & 0 deletions rust/lance-graph/src/datafusion_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,15 @@ impl DataFusionPlanner {
right: Box::new(r),
})
}
BE::In { expression, list } => {
use datafusion::logical_expr::expr::InList as DFInList;
let expr = self.to_df_value_expr(expression);
let list_exprs = list
.iter()
.map(|item| self.to_df_value_expr(item))
.collect::<Vec<_>>();
Expr::InList(DFInList::new(Box::new(expr), list_exprs, false))
}
BE::And(l, r) => Expr::BinaryExpr(BinaryExpr {
left: Box::new(self.to_df_boolean_expr(l)),
op: Operator::And,
Expand Down Expand Up @@ -334,6 +343,35 @@ mod tests {
)
}

#[test]
fn test_df_boolean_expr_in_list() {
let cfg = crate::config::GraphConfig::builder().build().unwrap();
let planner = DataFusionPlanner::new(cfg);
let expr = BooleanExpression::In {
expression: ValueExpression::Property(PropertyRef {
variable: "rel".into(),
property: "relationship_type".into(),
}),
list: vec![
ValueExpression::Literal(PropertyValue::String("WORKS_FOR".into())),
ValueExpression::Literal(PropertyValue::String("PART_OF".into())),
],
};

if let Expr::InList(in_list) = planner.to_df_boolean_expr(&expr) {
assert!(!in_list.negated);
assert_eq!(in_list.list.len(), 2);
match *in_list.expr {
Expr::Column(ref col_expr) => {
assert_eq!(col_expr.name(), "relationship_type");
}
other => panic!("Expected column expression, got {:?}", other),
}
} else {
panic!("Expected InList expression");
}
}

#[test]
fn test_df_planner_scan_filter_project() {
let scan = LogicalOperator::ScanByLabel {
Expand Down
62 changes: 60 additions & 2 deletions rust/lance-graph/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use nom::{
bytes::complete::{tag, tag_no_case, take_while1},
character::complete::{char, multispace0, multispace1},
combinator::{map, opt, recognize},
multi::{many0, separated_list0},
multi::{many0, separated_list0, separated_list1},
sequence::{delimited, pair, preceded, tuple},
IResult,
};
Expand Down Expand Up @@ -316,14 +316,28 @@ fn comparison_expression(input: &str) -> IResult<&str, BooleanExpression> {
let (input, _) = multispace0(input)?;
let (input, left) = value_expression(input)?;
let (input, _) = multispace0(input)?;
let left_clone = left.clone();

if let Ok((input_after_in, (_, _, list))) =
tuple((tag_no_case("IN"), multispace0, value_expression_list))(input)
{
return Ok((
input_after_in,
BooleanExpression::In {
expression: left,
list,
},
));
}

let (input, operator) = comparison_operator(input)?;
let (input, _) = multispace0(input)?;
let (input, right) = value_expression(input)?;

Ok((
input,
BooleanExpression::Comparison {
left,
left: left_clone,
operator,
right,
},
Expand Down Expand Up @@ -352,6 +366,17 @@ fn value_expression(input: &str) -> IResult<&str, ValueExpression> {
))(input)
}

fn value_expression_list(input: &str) -> IResult<&str, Vec<ValueExpression>> {
delimited(
tuple((char('['), multispace0)),
separated_list1(
tuple((multispace0, char(','), multispace0)),
value_expression,
),
tuple((multispace0, char(']'))),
)(input)
}

// Parse a property reference: variable.property
fn property_reference(input: &str) -> IResult<&str, PropertyRef> {
let (input, variable) = identifier(input)?;
Expand Down Expand Up @@ -726,6 +751,39 @@ mod tests {
}
}

#[test]
fn test_parse_query_with_in_clause() {
let query = "MATCH (src:Entity)-[rel:RELATIONSHIP]->(dst:Entity) WHERE rel.relationship_type IN ['WORKS_FOR', 'PART_OF'] RETURN src.name";
let result = parse_cypher_query(query).unwrap();

let where_clause = result.where_clause.expect("Expected WHERE clause");
match where_clause.expression {
BooleanExpression::In { expression, list } => {
match expression {
ValueExpression::Property(prop_ref) => {
assert_eq!(prop_ref.variable, "rel");
assert_eq!(prop_ref.property, "relationship_type");
}
_ => panic!("Expected property reference in IN expression"),
}
assert_eq!(list.len(), 2);
match &list[0] {
ValueExpression::Literal(PropertyValue::String(val)) => {
assert_eq!(val, "WORKS_FOR");
}
_ => panic!("Expected first list item to be a string literal"),
}
match &list[1] {
ValueExpression::Literal(PropertyValue::String(val)) => {
assert_eq!(val, "PART_OF");
}
_ => panic!("Expected second list item to be a string literal"),
}
}
other => panic!("Expected IN expression, got {:?}", other),
}
}

#[test]
fn test_parse_query_with_limit() {
let query = "MATCH (n:Person) RETURN n.name LIMIT 10";
Expand Down
Loading