|
3 | 3 | from dataclasses import dataclass, field, replace |
4 | 4 | import math |
5 | 5 | import re |
6 | | -from typing import AbstractSet, Any, Callable, Dict, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union, cast |
| 6 | +from typing import AbstractSet, Any, Callable, Dict, FrozenSet, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union, cast |
7 | 7 | from typing_extensions import Literal |
8 | 8 | import pandas as pd |
9 | 9 |
|
@@ -7468,6 +7468,285 @@ def _first_pattern_node_alias(clause: MatchClause) -> Optional[str]: |
7468 | 7468 | return pattern[0].variable |
7469 | 7469 |
|
7470 | 7470 |
|
| 7471 | +def _secondary_reentry_hidden_column_name(alias: str, prop: str) -> str: |
| 7472 | + """Hidden carry column name for a secondary whole-row alias's property access. |
| 7473 | +
|
| 7474 | + Distinct from `_reentry_hidden_column_name` (which is keyed only by the |
| 7475 | + output name) so secondary `<S>.<X>` carries cannot collide with user-named |
| 7476 | + scalar carries on the primary alias (#1071). |
| 7477 | + """ |
| 7478 | + return f"__cypher_reentry_{alias}_{prop}__" |
| 7479 | + |
| 7480 | + |
| 7481 | +_BARE_IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") |
| 7482 | + |
| 7483 | + |
| 7484 | +def _is_whole_row_with_item(item: ReturnItem, *, match_node_aliases: Set[str]) -> bool: |
| 7485 | + """A WITH item is a whole-row carry when it is a bare identifier referencing |
| 7486 | + a node alias bound by the prior MATCH and has no rename alias (or aliases |
| 7487 | + to itself).""" |
| 7488 | + text = item.expression.text |
| 7489 | + if not _BARE_IDENT_RE.match(text): |
| 7490 | + return False |
| 7491 | + if item.alias is not None and item.alias != text: |
| 7492 | + return False |
| 7493 | + return text in match_node_aliases |
| 7494 | + |
| 7495 | + |
| 7496 | +def _collect_secondary_property_refs( |
| 7497 | + expr: ExpressionText, |
| 7498 | + *, |
| 7499 | + secondary_aliases: Set[str], |
| 7500 | + field: str, |
| 7501 | +) -> Tuple[ExpressionText, Set[Tuple[str, str]], Set[str]]: |
| 7502 | + """Walk one ExpressionText, replacing PropertyAccessExpr(Identifier(S), X) |
| 7503 | + with Identifier(__cypher_reentry_<S>_<X>__) for each S in |
| 7504 | + ``secondary_aliases``. Reports bare Identifier(S) usages too. |
| 7505 | +
|
| 7506 | + Returns (rewritten_expr, refs, bare_alias_uses). |
| 7507 | + """ |
| 7508 | + if not secondary_aliases: |
| 7509 | + return expr, set(), set() |
| 7510 | + if not any(re.search(rf"(?<![A-Za-z0-9_]){re.escape(name)}(?![A-Za-z0-9_])", expr.text) for name in secondary_aliases): |
| 7511 | + return expr, set(), set() |
| 7512 | + try: |
| 7513 | + node = parse_expr(expr.text) |
| 7514 | + except (GFQLExprParseError, ImportError) as exc: |
| 7515 | + raise _unsupported( |
| 7516 | + "Cypher MATCH after WITH multi-alias carry rewrite requires a locally supported scalar expression", |
| 7517 | + field=field, |
| 7518 | + value=expr.text, |
| 7519 | + line=expr.span.line, |
| 7520 | + column=expr.span.column, |
| 7521 | + ) from exc |
| 7522 | + refs: Set[Tuple[str, str]] = set() |
| 7523 | + bare: Set[str] = set() |
| 7524 | + rewritten = _rewrite_secondary_alias_property_refs( |
| 7525 | + node, |
| 7526 | + secondary_aliases=secondary_aliases, |
| 7527 | + refs=refs, |
| 7528 | + bare=bare, |
| 7529 | + ) |
| 7530 | + if not refs and not bare: |
| 7531 | + return expr, set(), set() |
| 7532 | + new_text = _render_expr_node(rewritten) |
| 7533 | + if new_text == expr.text: |
| 7534 | + return expr, refs, bare |
| 7535 | + return ExpressionText(text=new_text, span=expr.span), refs, bare |
| 7536 | + |
| 7537 | + |
| 7538 | +def _rewrite_secondary_alias_property_refs( |
| 7539 | + node: ExprNode, |
| 7540 | + *, |
| 7541 | + secondary_aliases: Set[str], |
| 7542 | + refs: Set[Tuple[str, str]], |
| 7543 | + bare: Set[str], |
| 7544 | + shadowed: Optional[FrozenSet[str]] = None, |
| 7545 | +) -> ExprNode: |
| 7546 | + """AST walk that rewrites secondary `S.X` to a bare hidden identifier and |
| 7547 | + flags bare `S` references. Quantifier/ListComprehension binders shadow |
| 7548 | + matching alias names within their scope (mirrors |
| 7549 | + ``_rewrite_expr_identifiers``).""" |
| 7550 | + active_shadow = shadowed or frozenset() |
| 7551 | + if isinstance(node, PropertyAccessExpr) and isinstance(node.value, Identifier): |
| 7552 | + alias_name = node.value.name |
| 7553 | + if alias_name in secondary_aliases and alias_name not in active_shadow: |
| 7554 | + refs.add((alias_name, node.property)) |
| 7555 | + return Identifier(_secondary_reentry_hidden_column_name(alias_name, node.property)) |
| 7556 | + if isinstance(node, Identifier): |
| 7557 | + if node.name in secondary_aliases and node.name not in active_shadow: |
| 7558 | + bare.add(node.name) |
| 7559 | + return node |
| 7560 | + if isinstance(node, QuantifierExpr): |
| 7561 | + next_shadow = active_shadow | {node.var} |
| 7562 | + return QuantifierExpr( |
| 7563 | + node.fn, |
| 7564 | + node.var, |
| 7565 | + _rewrite_secondary_alias_property_refs( |
| 7566 | + node.source, secondary_aliases=secondary_aliases, refs=refs, bare=bare, shadowed=next_shadow, |
| 7567 | + ), |
| 7568 | + _rewrite_secondary_alias_property_refs( |
| 7569 | + node.predicate, secondary_aliases=secondary_aliases, refs=refs, bare=bare, shadowed=next_shadow, |
| 7570 | + ), |
| 7571 | + ) |
| 7572 | + if isinstance(node, ListComprehension): |
| 7573 | + next_shadow = active_shadow | {node.var} |
| 7574 | + return ListComprehension( |
| 7575 | + node.var, |
| 7576 | + _rewrite_secondary_alias_property_refs( |
| 7577 | + node.source, secondary_aliases=secondary_aliases, refs=refs, bare=bare, shadowed=next_shadow, |
| 7578 | + ), |
| 7579 | + predicate=None if node.predicate is None else _rewrite_secondary_alias_property_refs( |
| 7580 | + node.predicate, secondary_aliases=secondary_aliases, refs=refs, bare=bare, shadowed=next_shadow, |
| 7581 | + ), |
| 7582 | + projection=None if node.projection is None else _rewrite_secondary_alias_property_refs( |
| 7583 | + node.projection, secondary_aliases=secondary_aliases, refs=refs, bare=bare, shadowed=next_shadow, |
| 7584 | + ), |
| 7585 | + ) |
| 7586 | + return _rebuild_expr_node( |
| 7587 | + node, |
| 7588 | + rewrite=lambda child: _rewrite_secondary_alias_property_refs( |
| 7589 | + child, secondary_aliases=secondary_aliases, refs=refs, bare=bare, shadowed=active_shadow, |
| 7590 | + ), |
| 7591 | + error_context="secondary alias rewrite", |
| 7592 | + ) |
| 7593 | + |
| 7594 | + |
| 7595 | +def _all_match_node_aliases(query: CypherQuery) -> Set[str]: |
| 7596 | + out: Set[str] = set() |
| 7597 | + for clause in query.matches: |
| 7598 | + for pattern in clause.patterns: |
| 7599 | + out.update(_pattern_node_aliases(pattern)) |
| 7600 | + for element in _match_pattern_elements(clause): |
| 7601 | + if isinstance(element, NodePattern) and element.variable is not None: |
| 7602 | + out.add(element.variable) |
| 7603 | + return out |
| 7604 | + |
| 7605 | + |
| 7606 | +def _demote_secondary_whole_row_aliases( |
| 7607 | + query: CypherQuery, |
| 7608 | + *, |
| 7609 | + prefix_stage: ProjectionStage, |
| 7610 | + primary_alias: Optional[str], |
| 7611 | +) -> Tuple[CypherQuery, ProjectionStage, Tuple[str, ...]]: |
| 7612 | + """Rewrite ``query`` to demote any secondary whole-row alias in the prefix |
| 7613 | + ``WITH`` to a synthesized scalar property carry (#1071). |
| 7614 | +
|
| 7615 | + Returns ``(rewritten_query, rewritten_prefix_stage, secondary_aliases)``. |
| 7616 | + When no demotion is needed, returns the inputs unchanged with empty |
| 7617 | + ``secondary_aliases``. |
| 7618 | +
|
| 7619 | + Why: the existing MATCH-after-WITH machinery requires exactly one whole-row |
| 7620 | + alias in the prefix projection. Other carried aliases need only support |
| 7621 | + property access (``S.X``) in subsequent clauses. By rewriting ``S.X`` to a |
| 7622 | + bare hidden identifier and synthesizing a new prefix item ``S.X AS |
| 7623 | + __cypher_reentry_<S>_<X>__``, multi-alias carry reduces to the existing |
| 7624 | + single-alias-plus-scalars path. |
| 7625 | + """ |
| 7626 | + if not query.reentry_matches: |
| 7627 | + return query, prefix_stage, () |
| 7628 | + match_node_aliases = _all_match_node_aliases(query) |
| 7629 | + whole_row_items: List[Tuple[int, ReturnItem]] = [ |
| 7630 | + (idx, item) |
| 7631 | + for idx, item in enumerate(prefix_stage.clause.items) |
| 7632 | + if _is_whole_row_with_item(item, match_node_aliases=match_node_aliases) |
| 7633 | + ] |
| 7634 | + if len(whole_row_items) <= 1: |
| 7635 | + return query, prefix_stage, () |
| 7636 | + if primary_alias is None: |
| 7637 | + # Existing downstream check at the trailing-MATCH start raises a clearer |
| 7638 | + # error; bail out so it fires. |
| 7639 | + return query, prefix_stage, () |
| 7640 | + |
| 7641 | + primary_indices = {idx for idx, item in whole_row_items if item.expression.text == primary_alias} |
| 7642 | + if not primary_indices: |
| 7643 | + # Trailing MATCH starts from an alias not carried by the prefix: existing |
| 7644 | + # check at line ~7779 raises "must start from the same carried node alias". |
| 7645 | + return query, prefix_stage, () |
| 7646 | + secondary_items = [(idx, item) for idx, item in whole_row_items if idx not in primary_indices] |
| 7647 | + secondary_aliases: Set[str] = {item.expression.text for _idx, item in secondary_items} |
| 7648 | + |
| 7649 | + # Reject re-binding a secondary alias as a node variable in any trailing MATCH. |
| 7650 | + for trailing in (*query.reentry_matches,): |
| 7651 | + trailing_aliases: Set[str] = set() |
| 7652 | + for pattern in trailing.patterns: |
| 7653 | + trailing_aliases.update(_pattern_node_aliases(pattern)) |
| 7654 | + rebound = sorted(trailing_aliases & secondary_aliases) |
| 7655 | + if rebound: |
| 7656 | + raise _unsupported_at_span( |
| 7657 | + "Cypher MATCH after WITH does not yet support re-binding a carried secondary alias as a node variable in the trailing MATCH", |
| 7658 | + field="match", |
| 7659 | + value=rebound, |
| 7660 | + span=trailing.span, |
| 7661 | + ) |
| 7662 | + |
| 7663 | + refs_collected: Set[Tuple[str, str]] = set() |
| 7664 | + bare_collected: Set[str] = set() |
| 7665 | + |
| 7666 | + def rewrite_text(expr: ExpressionText, field: str) -> ExpressionText: |
| 7667 | + rewritten, refs, bare = _collect_secondary_property_refs( |
| 7668 | + expr, |
| 7669 | + secondary_aliases=secondary_aliases, |
| 7670 | + field=field, |
| 7671 | + ) |
| 7672 | + refs_collected.update(refs) |
| 7673 | + bare_collected.update(bare) |
| 7674 | + return rewritten |
| 7675 | + |
| 7676 | + # Rewrite trailing MATCH expressions (node/edge property maps via WHERE |
| 7677 | + # comes through reentry_wheres / clause.where). |
| 7678 | + rewritten_reentry_matches = tuple( |
| 7679 | + _rewrite_reentry_match_clause(clause, rewrite_expr=rewrite_text) |
| 7680 | + for clause in query.reentry_matches |
| 7681 | + ) |
| 7682 | + rewritten_reentry_wheres = tuple( |
| 7683 | + where_clause if where_clause is None else _rewrite_where_clause_and_resync(where_clause, rewrite_text, "where") |
| 7684 | + for where_clause in query.reentry_wheres |
| 7685 | + ) |
| 7686 | + rewritten_with_stages_tail = tuple( |
| 7687 | + _rewrite_reentry_projection_stage(stage, rewrite_expr=rewrite_text) |
| 7688 | + for stage in query.with_stages[1:] |
| 7689 | + ) |
| 7690 | + rewritten_unwinds = tuple( |
| 7691 | + replace(unwind, expression=rewrite_text(unwind.expression, "unwind")) |
| 7692 | + for unwind in query.reentry_unwinds |
| 7693 | + ) |
| 7694 | + rewritten_return = _rewrite_reentry_projection_clause(query.return_, rewrite_expr=rewrite_text) |
| 7695 | + rewritten_order_by = ( |
| 7696 | + None |
| 7697 | + if query.order_by is None |
| 7698 | + else replace( |
| 7699 | + query.order_by, |
| 7700 | + items=tuple( |
| 7701 | + replace(item, expression=rewrite_text(item.expression, "order_by")) |
| 7702 | + for item in query.order_by.items |
| 7703 | + ), |
| 7704 | + ) |
| 7705 | + ) |
| 7706 | + |
| 7707 | + if bare_collected: |
| 7708 | + raise _unsupported_at_span( |
| 7709 | + "Cypher MATCH after WITH does not yet support carrying secondary whole-row aliases as whole-row outputs; reference them by property only", |
| 7710 | + field="return", |
| 7711 | + value=sorted(bare_collected), |
| 7712 | + span=query.return_.span, |
| 7713 | + ) |
| 7714 | + |
| 7715 | + # Synthesize prefix WITH items: drop the secondaries, append S.X AS hidden |
| 7716 | + # for each unique referenced (S, X) pair. |
| 7717 | + new_items: List[ReturnItem] = [] |
| 7718 | + secondary_drop_indices = {idx for idx, _item in secondary_items} |
| 7719 | + for idx, item in enumerate(prefix_stage.clause.items): |
| 7720 | + if idx in secondary_drop_indices: |
| 7721 | + continue |
| 7722 | + new_items.append(item) |
| 7723 | + template_span = prefix_stage.span |
| 7724 | + for alias_name, prop in sorted(refs_collected): |
| 7725 | + hidden_alias = _secondary_reentry_hidden_column_name(alias_name, prop) |
| 7726 | + new_items.append( |
| 7727 | + ReturnItem( |
| 7728 | + expression=ExpressionText(text=f"{alias_name}.{prop}", span=template_span), |
| 7729 | + alias=hidden_alias, |
| 7730 | + span=template_span, |
| 7731 | + ) |
| 7732 | + ) |
| 7733 | + rewritten_prefix_stage = replace( |
| 7734 | + prefix_stage, |
| 7735 | + clause=replace(prefix_stage.clause, items=tuple(new_items)), |
| 7736 | + ) |
| 7737 | + |
| 7738 | + rewritten_query = replace( |
| 7739 | + query, |
| 7740 | + with_stages=(rewritten_prefix_stage,) + rewritten_with_stages_tail, |
| 7741 | + reentry_matches=rewritten_reentry_matches, |
| 7742 | + reentry_wheres=rewritten_reentry_wheres, |
| 7743 | + reentry_unwinds=rewritten_unwinds, |
| 7744 | + return_=rewritten_return, |
| 7745 | + order_by=rewritten_order_by, |
| 7746 | + ) |
| 7747 | + return rewritten_query, rewritten_prefix_stage, tuple(sorted(secondary_aliases)) |
| 7748 | + |
| 7749 | + |
7471 | 7750 | def _map_terminal_reentry_query( |
7472 | 7751 | compiled_query: CompiledCypherQuery, |
7473 | 7752 | *, |
@@ -7673,14 +7952,20 @@ def _compile_bounded_reentry_query( |
7673 | 7952 | span=query.return_.span, |
7674 | 7953 | ) |
7675 | 7954 | prefix_stage = query.with_stages[0] |
7676 | | - projection_items = [item.expression.text for item in prefix_stage.clause.items] |
7677 | 7955 | if prefix_stage.where is not None: |
7678 | 7956 | raise _unsupported_at_span( |
7679 | 7957 | "Cypher MATCH after WITH does not yet support WITH ... WHERE in the prefix stage", |
7680 | 7958 | field="with.where", |
7681 | 7959 | value=prefix_stage.where.text, |
7682 | 7960 | span=prefix_stage.span, |
7683 | 7961 | ) |
| 7962 | + primary_alias_hint = _first_pattern_node_alias(query.reentry_matches[0]) |
| 7963 | + query, prefix_stage, _demoted_secondary_aliases = _demote_secondary_whole_row_aliases( |
| 7964 | + query, |
| 7965 | + prefix_stage=prefix_stage, |
| 7966 | + primary_alias=primary_alias_hint, |
| 7967 | + ) |
| 7968 | + projection_items = [item.expression.text for item in prefix_stage.clause.items] |
7684 | 7969 | prefix_query = replace( |
7685 | 7970 | query, |
7686 | 7971 | call=None, |
|
0 commit comments