Skip to content

Commit 49067ae

Browse files
qianheng-awsLantaoJin
authored andcommitted
Support filter params in graphlookup (opensearch-project#5134)
* Struct return array value instead of string Signed-off-by: Heng Qian <qianheng@amazon.com> * Support filter in GraphLookup Signed-off-by: Heng Qian <qianheng@amazon.com> * Fix IT Signed-off-by: Heng Qian <qianheng@amazon.com> * Add experimental tag in doc Signed-off-by: Heng Qian <qianheng@amazon.com> --------- Signed-off-by: Heng Qian <qianheng@amazon.com> Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent c78c05a commit 49067ae

15 files changed

Lines changed: 339 additions & 63 deletions

File tree

core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.opensearch.sql.ast.AbstractNodeVisitor;
1919
import org.opensearch.sql.ast.expression.Field;
2020
import org.opensearch.sql.ast.expression.Literal;
21+
import org.opensearch.sql.ast.expression.UnresolvedExpression;
2122

2223
/**
2324
* AST node for graphLookup command. Performs BFS graph traversal on a lookup table.
@@ -74,6 +75,11 @@ public enum Direction {
7475
/** Whether to use PIT (Point In Time) search for the lookup table to get complete results. */
7576
private final boolean usePIT;
7677

78+
/**
79+
* Optional filter condition to restrict which lookup table documents participate in traversal.
80+
*/
81+
private @Nullable final UnresolvedExpression filter;
82+
7783
private UnresolvedPlan child;
7884

7985
public String getDepthFieldName() {

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2606,9 +2606,16 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) {
26062606
// 3. Visit and materialize lookup table
26072607
analyze(node.getFromTable(), context);
26082608
tryToRemoveMetaFields(context, true);
2609+
2610+
// 4. Convert filter expression to RexNode against lookup table schema
2611+
RexNode filterRex = null;
2612+
if (node.getFilter() != null) {
2613+
filterRex = rexVisitor.analyze(node.getFilter(), context);
2614+
}
2615+
26092616
RelNode lookupTable = builder.build();
26102617

2611-
// 4. Create LogicalGraphLookup RelNode
2618+
// 5. Create LogicalGraphLookup RelNode
26122619
// The conversion rule will extract the OpenSearchIndex from the lookup table
26132620
RelNode graphLookup =
26142621
LogicalGraphLookup.create(
@@ -2623,7 +2630,8 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) {
26232630
bidirectional,
26242631
supportArray,
26252632
batchMode,
2626-
usePIT);
2633+
usePIT,
2634+
filterRex);
26272635

26282636
builder.push(graphLookup);
26292637
return builder.peek();

core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.apache.calcite.rel.metadata.RelMetadataQuery;
1717
import org.apache.calcite.rel.type.RelDataType;
1818
import org.apache.calcite.rel.type.RelDataTypeFactory;
19+
import org.apache.calcite.rex.RexNode;
1920
import org.apache.calcite.sql.type.SqlTypeName;
2021

2122
/**
@@ -51,6 +52,7 @@ public abstract class GraphLookup extends BiRel {
5152
protected final boolean supportArray;
5253
protected final boolean batchMode;
5354
protected final boolean usePIT;
55+
@Nullable protected final RexNode filter;
5456

5557
private RelDataType outputRowType;
5658

@@ -72,6 +74,7 @@ public abstract class GraphLookup extends BiRel {
7274
* pushdown)
7375
* @param batchMode Whether to batch all source start values into a single unified BFS
7476
* @param usePIT Whether to use PIT (Point In Time) search for complete results
77+
* @param filter Optional filter condition for lookup table documents
7578
*/
7679
protected GraphLookup(
7780
RelOptCluster cluster,
@@ -87,7 +90,8 @@ protected GraphLookup(
8790
boolean bidirectional,
8891
boolean supportArray,
8992
boolean batchMode,
90-
boolean usePIT) {
93+
boolean usePIT,
94+
@Nullable RexNode filter) {
9195
super(cluster, traitSet, source, lookup);
9296
this.startField = startField;
9397
this.fromField = fromField;
@@ -99,6 +103,7 @@ protected GraphLookup(
99103
this.supportArray = supportArray;
100104
this.batchMode = batchMode;
101105
this.usePIT = usePIT;
106+
this.filter = filter;
102107
}
103108

104109
/** Returns the source table RelNode. */
@@ -181,6 +186,7 @@ public RelWriter explainTerms(RelWriter pw) {
181186
.item("bidirectional", bidirectional)
182187
.itemIf("supportArray", supportArray, supportArray)
183188
.itemIf("batchMode", batchMode, batchMode)
184-
.itemIf("usePIT", usePIT, usePIT);
189+
.itemIf("usePIT", usePIT, usePIT)
190+
.itemIf("filter", filter, filter != null);
185191
}
186192
}

core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.apache.calcite.plan.RelOptCluster;
1313
import org.apache.calcite.plan.RelTraitSet;
1414
import org.apache.calcite.rel.RelNode;
15+
import org.apache.calcite.rex.RexNode;
1516

1617
/**
1718
* Logical RelNode for graphLookup command. TODO: need to support trim fields and several transpose
@@ -37,6 +38,7 @@ public class LogicalGraphLookup extends GraphLookup {
3738
* @param supportArray Whether to support array-typed fields
3839
* @param batchMode Whether to batch all source start values into a single unified BFS
3940
* @param usePIT Whether to use PIT (Point In Time) search for complete results
41+
* @param filter Optional filter condition for lookup table documents
4042
*/
4143
protected LogicalGraphLookup(
4244
RelOptCluster cluster,
@@ -52,7 +54,8 @@ protected LogicalGraphLookup(
5254
boolean bidirectional,
5355
boolean supportArray,
5456
boolean batchMode,
55-
boolean usePIT) {
57+
boolean usePIT,
58+
@Nullable RexNode filter) {
5659
super(
5760
cluster,
5861
traitSet,
@@ -67,7 +70,8 @@ protected LogicalGraphLookup(
6770
bidirectional,
6871
supportArray,
6972
batchMode,
70-
usePIT);
73+
usePIT,
74+
filter);
7175
}
7276

7377
/**
@@ -85,6 +89,7 @@ protected LogicalGraphLookup(
8589
* @param supportArray Whether to support array-typed fields
8690
* @param batchMode Whether to batch all source start values into a single unified BFS
8791
* @param usePIT Whether to use PIT (Point In Time) search for complete results
92+
* @param filter Optional filter condition for lookup table documents
8893
* @return A new LogicalGraphLookup instance
8994
*/
9095
public static LogicalGraphLookup create(
@@ -99,7 +104,8 @@ public static LogicalGraphLookup create(
99104
boolean bidirectional,
100105
boolean supportArray,
101106
boolean batchMode,
102-
boolean usePIT) {
107+
boolean usePIT,
108+
@Nullable RexNode filter) {
103109
RelOptCluster cluster = source.getCluster();
104110
RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE);
105111
return new LogicalGraphLookup(
@@ -116,7 +122,8 @@ public static LogicalGraphLookup create(
116122
bidirectional,
117123
supportArray,
118124
batchMode,
119-
usePIT);
125+
usePIT,
126+
filter);
120127
}
121128

122129
@Override
@@ -135,6 +142,7 @@ public RelNode copy(RelTraitSet traitSet, List<RelNode> inputs) {
135142
bidirectional,
136143
supportArray,
137144
batchMode,
138-
usePIT);
145+
usePIT,
146+
filter);
139147
}
140148
}

docs/user/ppl/cmd/graphlookup.md

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
# graphLookup
2+
# graphLookup (Experimental)
33

44
The `graphLookup` command performs recursive graph traversal on a collection using a breadth-first search (BFS) algorithm. It searches for documents matching a start value and recursively traverses connections between documents based on specified fields. This is useful for hierarchical data like organizational charts, social networks, or routing graphs.
55

@@ -8,7 +8,7 @@ The `graphLookup` command performs recursive graph traversal on a collection usi
88
The `graphLookup` command has the following syntax:
99

1010
```syntax
11-
graphLookup <lookupIndex> startField=<startField> fromField=<fromField> toField=<toField> [maxDepth=<maxDepth>] [depthField=<depthField>] [direction=(uni | bi)] [supportArray=(true | false)] [batchMode=(true | false)] [usePIT=(true | false)] as <outputField>
11+
graphLookup <lookupIndex> startField=<startField> fromField=<fromField> toField=<toField> [maxDepth=<maxDepth>] [depthField=<depthField>] [direction=(uni | bi)] [supportArray=(true | false)] [batchMode=(true | false)] [usePIT=(true | false)] [filter=(<condition>)] as <outputField>
1212
```
1313

1414
The following are examples of the `graphLookup` command syntax:
@@ -20,6 +20,7 @@ source = employees | graphLookup employees startField=reportsTo fromField=report
2020
source = employees | graphLookup employees startField=reportsTo fromField=reportsTo toField=name direction=bi as connections
2121
source = travelers | graphLookup airports startField=nearestAirport fromField=connects toField=airport supportArray=true as reachableAirports
2222
source = airports | graphLookup airports startField=airport fromField=connects toField=airport supportArray=true as reachableAirports
23+
source = employees | graphLookup employees startField=reportsTo fromField=reportsTo toField=name filter=(status = 'active' AND age > 18) as reportingHierarchy
2324
```
2425

2526
## Parameters
@@ -38,6 +39,7 @@ The `graphLookup` command supports the following parameters.
3839
| `supportArray=(true \| false)` | Optional | When `true`, disables early visited-node filter pushdown to OpenSearch. Default is `false`. Set to `true` when `fromField` or `toField` contains array values to ensure correct traversal behavior. See [Array Field Handling](#array-field-handling) for details. |
3940
| `batchMode=(true \| false)` | Optional | When `true`, collects all start values from all source rows and performs a single unified BFS traversal. Default is `false`. The output changes to two arrays: `[Array<sourceRows>, Array<lookupResults>]`. See [Batch Mode](#batch-mode) for details. |
4041
| `usePIT=(true \| false)` | Optional | When `true`, enables PIT (Point In Time) search for the lookup table, allowing paginated retrieval of complete results without the `max_result_window` size limit. Default is `false`. See [PIT Search](#pit-search) for details. |
42+
| `filter=(<condition>)` | Optional | A filter condition to restrict which lookup table documents participate in the graph traversal. Only documents matching the condition are considered as candidates during BFS. Parentheses around the condition are required. Example: `filter=(status = 'active' AND age > 18)`. |
4143
| `as <outputField>` | Required | The name of the output array field that will contain all documents found during the graph traversal. |
4244

4345
## How It Works
@@ -329,6 +331,26 @@ source = employees
329331
as reportingHierarchy
330332
```
331333

334+
## Filtered Graph Traversal
335+
336+
The `filter` parameter restricts which documents in the lookup table are considered during the BFS traversal. Only documents matching the filter condition participate as candidates at each traversal level.
337+
338+
### Example
339+
340+
The following query traverses only active employees in the reporting hierarchy:
341+
342+
```ppl ignore
343+
source = employees
344+
| graphLookup employees
345+
startField=reportsTo
346+
fromField=reportsTo
347+
toField=name
348+
filter=(status = 'active')
349+
as reportingHierarchy
350+
```
351+
352+
The filter is applied at the OpenSearch query level, so it combines efficiently with the BFS traversal queries. At each BFS level, the query sent to OpenSearch is effectively: `bool { filter: [user_filter, bfs_terms_query] }`.
353+
332354
## Limitations
333355

334356
- The source input, which provides the starting point for the traversal, has a limitation of 100 documents to avoid performance issues.

docs/user/ppl/index.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ source=accounts
8282
| [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. |
8383
| [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. |
8484
| [mvcombine command](cmd/mvcombine.md) | 3.5 | stable (since 3.4) | Combines values of a specified field across rows identical on all other fields. |
85+
| [graphlookup command](cmd/graphlookup.md) | 3.5 | experimental (since 3.5) | Performs recursive graph traversal on a collection using a BFS algorithm.|
86+
8587

8688
- [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting
8789
* **Functions**
@@ -101,4 +103,4 @@ source=accounts
101103
* **Optimization**
102104
- [Optimization](../../user/optimization/optimization.rst)
103105
* **Limitations**
104-
- [Limitations](limitations/limitations.md)
106+
- [Limitations](limitations/limitations.md)

0 commit comments

Comments
 (0)