Skip to content

Commit 0de4097

Browse files
committed
fix(gfql/cypher): close wave-3 alias resolution and reentry where gaps (#1072)
1 parent 087b79d commit 0de4097

5 files changed

Lines changed: 129 additions & 16 deletions

File tree

graphistry/compute/gfql/cypher/lowering.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5865,9 +5865,24 @@ def _where_clause_expr_text(where: WhereClause) -> Optional[ExpressionText]:
58655865
derived from the same ``_span_from_meta(meta)`` of the
58665866
``generic_where_clause`` rule) to preserve error-position semantics.
58675867
"""
5868-
if where.expr_tree is None:
5869-
return None
5870-
return ExpressionText(text=boolean_expr_to_text(where.expr_tree), span=where.span)
5868+
if where.expr_tree is not None:
5869+
return ExpressionText(text=boolean_expr_to_text(where.expr_tree), span=where.span)
5870+
5871+
# Structured-predicate WHERE clauses can still require downstream rewrite
5872+
# (for example, multi-alias re-entry secondary carry demotion). Synthesize
5873+
# equivalent row-expression text when all predicates are row-renderable.
5874+
if where.predicates:
5875+
predicate_texts: List[str] = []
5876+
for predicate in where.predicates:
5877+
if not isinstance(predicate, WherePredicate):
5878+
return None
5879+
row_text = _row_where_predicate_text(predicate)
5880+
if row_text is None:
5881+
return None
5882+
predicate_texts.append(row_text)
5883+
if predicate_texts:
5884+
return ExpressionText(text=" and ".join(predicate_texts), span=where.span)
5885+
return None
58715886

58725887

58735888
def _rewrite_where_clause_and_resync(
@@ -5898,7 +5913,10 @@ def _rewrite_where_clause_and_resync(
58985913
atom_text=rewritten.text,
58995914
atom_span=rewritten.span,
59005915
)
5901-
return replace(where, expr_tree=new_tree)
5916+
rewritten_where = replace(where, expr_tree=new_tree)
5917+
if where.expr_tree is None and where.predicates:
5918+
rewritten_where = replace(rewritten_where, predicates=())
5919+
return rewritten_where
59025920

59035921

59045922
def _extract_relationship_type_where(

graphistry/compute/gfql/row/entity_props.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,11 @@ def format_edge_entity_text(
374374
if type_col in df.columns:
375375
type_series = cast(SeriesT, df[type_col])
376376
type_text = _object_text(cast(SeriesT, type_series.astype(str)))
377-
type_part = cast(SeriesT, (_const_text(df, alias_col, ":") + type_text).where(~_is_null_mask(type_series), ""))
377+
include_type = cast(SeriesT, ~_is_null_mask(type_series))
378+
if hasattr(type_text, "str"):
379+
non_blank = cast(SeriesT, type_text.str.strip() != "")
380+
include_type = cast(SeriesT, include_type & non_blank)
381+
type_part = cast(SeriesT, (_const_text(df, alias_col, ":") + type_text).where(include_type, ""))
378382
else:
379383
type_part = _empty_text(df, alias_col)
380384
prop_text, has_props = append_property_segments(df, alias_col, property_columns)

graphistry/compute/gfql/row/pipeline.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,23 +2297,19 @@ def _gfql_resolve_token(self, table_df: Any, token: str) -> Any:
22972297
# identity column (alias.{node_id_col}). This lets expressions like
22982298
# count(post) work when the table has post.id, post.name, etc. (#880)
22992299
if "." not in txt and RowPipelineMixin._gfql_has_bindings_alias_prefix(table_df, txt):
2300+
edge_aliases = getattr(self, "_gfql_rows_edge_aliases", None)
2301+
if edge_aliases is not None and txt in edge_aliases:
2302+
# Relationship aliases should render as entities (parity with
2303+
# Cypher RETURN <relAlias>) instead of collapsing to id-like
2304+
# scalar columns such as `<rel>.id`.
2305+
return self._gfql_render_relationship_alias(table_df, txt)
23002306
node_id = getattr(self, "_node", None)
23012307
id_col = f"{txt}.{node_id}" if node_id else None
23022308
if id_col is not None and id_col in table_df.columns:
23032309
return table_df[id_col]
2304-
# Relationship alias: prefer the edge-id column, else render the
2305-
# relationship as a Cypher-style string so the bare alias can flow
2306-
# through select/where/group_by like a node alias. (#1072)
2307-
edge_id = getattr(self, "_edge", None)
2308-
edge_id_col = f"{txt}.{edge_id}" if edge_id else None
2309-
if edge_id_col is not None and edge_id_col in table_df.columns:
2310-
return table_df[edge_id_col]
2311-
edge_aliases = getattr(self, "_gfql_rows_edge_aliases", None)
23122310
if edge_aliases is not None and txt not in edge_aliases:
23132311
raise ValueError(f"unsupported token in row expression: {token!r}")
2314-
if edge_aliases is None and f"{txt}.type" not in table_df.columns:
2315-
raise ValueError(f"unsupported token in row expression: {token!r}")
2316-
return self._gfql_render_relationship_alias(table_df, txt)
2312+
raise ValueError(f"unsupported token in row expression: {token!r}")
23172313
raise ValueError(f"unsupported token in row expression: {token!r}")
23182314

23192315
def _gfql_render_relationship_alias(self, table_df: Any, alias: str) -> Any:

graphistry/tests/compute/gfql/cypher/test_lowering.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5189,6 +5189,22 @@ def test_string_cypher_executes_or_xor_around_pattern_predicates(
51895189
assert result._nodes.to_dict(orient="records") == expected_rows
51905190

51915191

5192+
def test_string_cypher_failfast_rejects_not_over_pattern_or_expr_compound() -> None:
5193+
graph = _mk_graph(
5194+
pd.DataFrame({"id": ["a", "b", "c", "d"]}),
5195+
pd.DataFrame(
5196+
{
5197+
"s": ["a", "a", "b"],
5198+
"d": ["b", "c", "c"],
5199+
"type": ["R", "R", "R"],
5200+
}
5201+
),
5202+
)
5203+
5204+
with pytest.raises(GFQLValidationError, match="Pattern existence expressions"):
5205+
graph.gfql("MATCH (n) WHERE NOT ((n)-[:R]->() OR n.id = 'd') RETURN n.id AS id")
5206+
5207+
51925208
@pytest.mark.parametrize(
51935209
"query",
51945210
[
@@ -8039,6 +8055,47 @@ def test_string_cypher_executes_with_match_reentry_carried_scalar_where_on_cudf(
80398055
assert result._nodes.to_pandas().to_dict(orient="records") == [{"property": 1, "id": "b1"}]
80408056

80418057

8058+
def test_string_cypher_executes_with_match_reentry_secondary_alias_property_where() -> None:
8059+
query = (
8060+
"MATCH (a:A {id: 'a1'})-[:R]->(b:B) "
8061+
"WITH a, b "
8062+
"MATCH (b)-[:S]->(c:C) "
8063+
"WHERE a.id = 'a1' "
8064+
"RETURN a.id AS aid, c.id AS cid "
8065+
"ORDER BY cid"
8066+
)
8067+
8068+
result = _mk_connected_reentry_carried_scalar_graph().gfql(query)
8069+
assert result._nodes.to_dict(orient="records") == [{"aid": "a1", "cid": "c1"}]
8070+
8071+
8072+
def test_string_cypher_executes_with_match_reentry_where_or_on_carried_and_trailing_alias_props() -> None:
8073+
query = (
8074+
"MATCH (a:A)-[:R]->(b:B) "
8075+
"WITH a, b "
8076+
"MATCH (b)-[:S]->(c:C) "
8077+
"WHERE a.id = 'a1' OR c.id = 'missing' "
8078+
"RETURN a.id AS aid, c.id AS cid "
8079+
"ORDER BY cid"
8080+
)
8081+
8082+
result = _mk_connected_reentry_carried_scalar_graph().gfql(query)
8083+
assert result._nodes.to_dict(orient="records") == [{"aid": "a1", "cid": "c1"}]
8084+
8085+
8086+
def test_string_cypher_executes_with_match_reentry_where_xor_on_carried_and_trailing_alias_props() -> None:
8087+
query = (
8088+
"MATCH (a:A)-[:R]->(b:B) "
8089+
"WITH a, b "
8090+
"MATCH (b)-[:S]->(c:C) "
8091+
"WHERE a.id = 'a1' XOR c.id = 'c1' "
8092+
"RETURN a.id AS aid, c.id AS cid"
8093+
)
8094+
8095+
result = _mk_connected_reentry_carried_scalar_graph().gfql(query)
8096+
assert result._nodes.to_dict(orient="records") == []
8097+
8098+
80428099
@pytest.mark.parametrize(
80438100
("query", "match"),
80448101
[

graphistry/tests/compute/gfql/test_row_pipeline_ops.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2369,7 +2369,45 @@ def test_select_bare_relationship_alias_multiple_rows(self):
23692369
"[:WORKS_AT {workFrom: 2015}]",
23702370
]
23712371

2372+
def test_select_bare_relationship_alias_with_edge_id_property_renders_entity_not_scalar_id(self):
2373+
edges = pd.DataFrame([{"s": "p3", "d": "c1", "type": "WORKS_AT", "id": "edge-1", "workFrom": 2010}])
2374+
g = self._binding_graph(edges=edges)
2375+
select_result = g.gfql(self._binding_ops() + [select(items=[("rel", "workAt")])])
2376+
return_result = g.gfql(self._binding_ops() + [return_([("rel", "workAt")])])
2377+
assert select_result._nodes["rel"].tolist() == ["[:WORKS_AT {id: 'edge-1', workFrom: 2010}]"]
2378+
assert select_result._nodes["rel"].tolist() == return_result._nodes["rel"].tolist()
2379+
2380+
def test_select_bare_relationship_alias_with_only_id_property_parity(self):
2381+
edges = pd.DataFrame([{"s": "p3", "d": "c1", "type": "WORKS_AT", "id": "edge-1"}])
2382+
g = self._binding_graph(edges=edges)
2383+
select_result = g.gfql(self._binding_ops() + [select(items=[("rel", "workAt")])])
2384+
return_result = g.gfql(self._binding_ops() + [return_([("rel", "workAt")])])
2385+
assert select_result._nodes["rel"].tolist() == ["[:WORKS_AT {id: 'edge-1'}]"]
2386+
assert select_result._nodes["rel"].tolist() == return_result._nodes["rel"].tolist()
2387+
2388+
def test_select_bare_relationship_alias_with_empty_type_treated_as_missing_type(self):
2389+
edges = pd.DataFrame([{"s": "p3", "d": "c1", "type": "", "workFrom": 2010}])
2390+
g = self._binding_graph(edges=edges)
2391+
select_result = g.gfql(self._binding_ops(edge_match={}) + [select(items=[("rel", "workAt")])])
2392+
return_result = g.gfql(self._binding_ops(edge_match={}) + [return_([("rel", "workAt")])])
2393+
assert select_result._nodes["rel"].tolist() == ["[{workFrom: 2010}]"]
2394+
assert select_result._nodes["rel"].tolist() == return_result._nodes["rel"].tolist()
2395+
23722396
def test_select_node_alias_without_node_id_does_not_render_as_relationship(self):
23732397
g = self._binding_graph()
23742398
with pytest.raises((ValueError, GFQLTypeError), match="unsupported token in row expression"):
23752399
g.gfql(self._binding_ops() + [drop_cols(["friend", "friend.id"]), select(items=[("x", "friend")])])
2400+
2401+
def test_select_node_alias_with_type_property_without_node_id_does_not_render_as_relationship(self):
2402+
nodes = pd.DataFrame([
2403+
{"id": "p3", "label__Person": True, "label__Company": False, "name": "", "type": "PERSON"},
2404+
{"id": "c1", "label__Person": False, "label__Company": True, "name": "Acme", "type": "COMPANY"},
2405+
])
2406+
g = self._binding_graph(nodes=nodes)
2407+
with pytest.raises((ValueError, GFQLTypeError), match="unsupported token in row expression"):
2408+
g.gfql(self._binding_ops() + [drop_cols(["friend", "friend.id"]), select(items=[("x", "friend")])])
2409+
2410+
def test_select_plain_rows_alias_like_columns_do_not_render_relationship_text(self):
2411+
nodes_df = pd.DataFrame({"id": ["a"], "a.type": ["X"], "a.k": [1]})
2412+
with pytest.raises((ValueError, GFQLTypeError), match="unsupported token in row expression"):
2413+
_run_node_steps(nodes_df, [rows(), select([("x", "a")])], edges_df=_self_loop_edges(nodes_df))

0 commit comments

Comments
 (0)