From 96860cb22892f9bf5740bcebf27ae7ad58f7a9bf Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Wed, 14 Jan 2026 21:57:58 +0000 Subject: [PATCH 1/2] Add OUTPUT as an alias for REPLACE in Lookup Signed-off-by: Simeon Widdis --- docs/user/ppl/cmd/lookup.md | 33 +++++- .../calcite/remote/CalcitePPLLookupIT.java | 55 +++++++++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 1 + .../sql/ppl/calcite/CalcitePPLLookupTest.java | 108 ++++++++++++++++++ 5 files changed, 195 insertions(+), 4 deletions(-) diff --git a/docs/user/ppl/cmd/lookup.md b/docs/user/ppl/cmd/lookup.md index 745a39a28bf..954e1fbb0d3 100644 --- a/docs/user/ppl/cmd/lookup.md +++ b/docs/user/ppl/cmd/lookup.md @@ -8,7 +8,7 @@ The `lookup` command enriches search data by adding or replacing values from a l The `lookup` command has the following syntax: ```syntax -lookup ( [as ])... [(replace | append) ( [as ])...] +lookup ( [as ])... [(replace | append | output) ( [as ])...] ``` The following are examples of the `lookup` command syntax: @@ -21,6 +21,8 @@ source = table1 | lookup table2 id as cid, name replace dept as department source = table1 | lookup table2 id as cid, name replace dept as department, city as location source = table1 | lookup table2 id as cid, name append dept as department source = table1 | lookup table2 id as cid, name append dept as department, city as location +source = table1 | lookup table2 id as cid, name output dept as department +source = table1 | lookup table2 id as cid, name output dept as department, city as location ``` ## Parameters @@ -34,7 +36,7 @@ The `lookup` command supports the following parameters. | `` | Optional | A key from the source data (left side) used for matching, similar to a join key in the left table. Default is `lookupMappingField`. | | `` | Optional | A field in the lookup index whose matched values are applied to the results (output). Specify multiple fields as a comma-separated list. If not specified, all fields except `lookupMappingField` from the lookup index are applied to the results. | | `` | Optional | The name of the field in the results (output) in which matched values are placed. Specify multiple fields as a comma-separated list. If the `outputField` specifies an existing field in the source query, its values are replaced or appended with matched values from the `inputField`. If the field specified in the `outputField` is not an existing field, a new field is added to the results when using `replace`, or the operation fails when using `append`. | -| `(replace | append)` | Optional | Specifies how matched values are applied to the output. `replace` overwrites existing values with matched values from the lookup index. `append` fills only missing values in the results with matched values from the lookup index. Default is `replace`. | +| `(replace | append | output)` | Optional | Specifies how matched values are applied to the output. `replace` overwrites existing values with matched values from the lookup index. `append` fills only missing values in the results with matched values from the lookup index. `output` is a synonym for `replace` (provided for SPL compatibility). Default is `replace`. | ## Example 1: Replace existing values @@ -121,4 +123,29 @@ The query returns the following results: | 1005 | Jane | Scientist | Canada | 90000 | Engineer | | 1002 | John | Doctor | Canada | 120000 | Scientist | +------+-------+------------+---------+--------+-----------+ -``` \ No newline at end of file +``` + +## Example 5: Using OUTPUT keyword + +The `OUTPUT` keyword is a synonym for `REPLACE`. The following query demonstrates using `OUTPUT` to overwrite existing values: + +```ppl ignore +source = worker + | LOOKUP work_information uid AS id OUTPUT department + | fields id, name, occupation, country, salary, department +``` + +This query produces the same results as Example 1 (using `REPLACE`): + +```text ++------+-------+------------+---------+--------+------------+ +| id | name | occupation | country | salary | department | +|------+-------+------------+---------+--------+------------| +| 1000 | Jake | Engineer | England | 100000 | IT | +| 1001 | Hello | Artist | USA | 70000 | null | +| 1002 | John | Doctor | Canada | 120000 | DATA | +| 1003 | David | Doctor | null | 120000 | HR | +| 1004 | David | null | Canada | 0 | null | +| 1005 | Jane | Scientist | Canada | 90000 | DATA | ++------+-------+------------+---------+--------+------------+ +``` diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLLookupIT.java index dd1086c8cae..f039a54e39a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLLookupIT.java @@ -438,4 +438,59 @@ public void testRnameAsIdShouldnWork() throws IOException { TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION)); verifyNumOfRows(result, 6); } + + @Test + public void testUidAsIdOutputDepartment() throws IOException { + // OUTPUT is a synonym for REPLACE (SPL compatibility) + JSONObject result = + executeQuery( + String.format( + "source = %s" + + "| LOOKUP %s uid AS id OUTPUT department" + + "| fields id, name, occupation, country, salary, department", + TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION)); + verifySchema( + result, + schema("id", "int"), + schema("name", "string"), + schema("occupation", "string"), + schema("country", "string"), + schema("salary", "int"), + schema("department", "string")); + verifyDataRows( + result, + rows(1000, "Jake", "Engineer", "England", 100000, "IT"), + rows(1001, "Hello", "Artist", "USA", 70000, null), + rows(1002, "John", "Doctor", "Canada", 120000, "DATA"), + rows(1003, "David", "Doctor", null, 120000, "HR"), + rows(1004, "David", null, "Canada", 0, null), + rows(1005, "Jane", "Scientist", "Canada", 90000, "DATA")); + } + + @Test + public void testUidAsIdOutputDepartmentAsCountry() throws IOException { + // OUTPUT with field aliasing (SPL compatibility) + JSONObject result = + executeQuery( + String.format( + "source = %s" + + "| LOOKUP %s uid AS id OUTPUT department AS country" + + "| fields id, name, occupation, salary, country", + TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION)); + verifySchema( + result, + schema("id", "int"), + schema("name", "string"), + schema("occupation", "string"), + schema("salary", "int"), + schema("country", "string")); + verifyDataRows( + result, + rows(1000, "Jake", "Engineer", 100000, "IT"), + rows(1001, "Hello", "Artist", 70000, null), + rows(1002, "John", "Doctor", 120000, "DATA"), + rows(1003, "David", "Doctor", 120000, "HR"), + rows(1004, "David", null, 0, null), + rows(1005, "Jane", "Scientist", 90000, "DATA")); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 7045796a03c..e4500ee1e6f 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -476,7 +476,7 @@ patternMode // lookup lookupCommand - : LOOKUP tableSource lookupMappingList ((APPEND | REPLACE) outputCandidateList)? + : LOOKUP tableSource lookupMappingList ((APPEND | REPLACE | OUTPUT) outputCandidateList)? ; lookupMappingList diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 3f4f3049365..25f39c34405 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -972,6 +972,7 @@ public UnresolvedPlan visitPatternsCommand(OpenSearchPPLParser.PatternsCommandCo @Override public UnresolvedPlan visitLookupCommand(OpenSearchPPLParser.LookupCommandContext ctx) { Relation lookupRelation = new Relation(this.internalVisitExpression(ctx.tableSource())); + // OUTPUT and REPLACE are synonyms - both overwrite existing fields Lookup.OutputStrategy strategy = ctx.APPEND() != null ? Lookup.OutputStrategy.APPEND : Lookup.OutputStrategy.REPLACE; java.util.Map mappingAliasMap = diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLLookupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLLookupTest.java index 192170e0407..fc872192f25 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLLookupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLLookupTest.java @@ -122,6 +122,114 @@ public void testReplaceAs() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testOutput() { + // OUTPUT is a synonym for REPLACE (for SPL compatibility) + String ppl = "source=EMP | lookup DEPT DEPTNO output LOC"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], LOC=[$8])\n" + + " LogicalJoin(condition=[=($7, $9)], joinType=[left])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(LOC=[$2], DEPTNO=[$0])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00;" + + " COMM=null; DEPTNO=10; LOC=NEW YORK\n" + + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00;" + + " COMM=null; DEPTNO=10; LOC=NEW YORK\n" + + "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00;" + + " COMM=null; DEPTNO=10; LOC=NEW YORK\n" + + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00;" + + " COMM=null; DEPTNO=20; LOC=DALLAS\n" + + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00;" + + " COMM=null; DEPTNO=20; LOC=DALLAS\n" + + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00;" + + " COMM=null; DEPTNO=20; LOC=DALLAS\n" + + "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00;" + + " COMM=null; DEPTNO=20; LOC=DALLAS\n" + + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00;" + + " COMM=null; DEPTNO=20; LOC=DALLAS\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30; LOC=CHICAGO\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30; LOC=CHICAGO\n" + + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00;" + + " COMM=1400.00; DEPTNO=30; LOC=CHICAGO\n" + + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00;" + + " COMM=null; DEPTNO=30; LOC=CHICAGO\n" + + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00;" + + " COMM=0.00; DEPTNO=30; LOC=CHICAGO\n" + + "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00;" + + " COMM=null; DEPTNO=30; LOC=CHICAGO\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`," + + " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `t`.`LOC`\n" + + "FROM `scott`.`EMP`\n" + + "LEFT JOIN (SELECT `LOC`, `DEPTNO`\n" + + "FROM `scott`.`DEPT`) `t` ON `EMP`.`DEPTNO` = `t`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testOutputAs() { + // OUTPUT with field aliasing (SPL compatibility) + String ppl = "source=EMP | lookup DEPT DEPTNO output LOC as JOB"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6]," + + " DEPTNO=[$7], JOB=[$8])\n" + + " LogicalJoin(condition=[=($7, $9)], joinType=[left])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(LOC=[$2], DEPTNO=[$0])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = + "EMPNO=7782; ENAME=CLARK; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00; COMM=null; DEPTNO=10;" + + " JOB=NEW YORK\n" + + "EMPNO=7839; ENAME=KING; MGR=null; HIREDATE=1981-11-17; SAL=5000.00; COMM=null;" + + " DEPTNO=10; JOB=NEW YORK\n" + + "EMPNO=7934; ENAME=MILLER; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00; COMM=null;" + + " DEPTNO=10; JOB=NEW YORK\n" + + "EMPNO=7369; ENAME=SMITH; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" + + " DEPTNO=20; JOB=DALLAS\n" + + "EMPNO=7566; ENAME=JONES; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00; COMM=null;" + + " DEPTNO=20; JOB=DALLAS\n" + + "EMPNO=7788; ENAME=SCOTT; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00; COMM=null;" + + " DEPTNO=20; JOB=DALLAS\n" + + "EMPNO=7876; ENAME=ADAMS; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00; COMM=null;" + + " DEPTNO=20; JOB=DALLAS\n" + + "EMPNO=7902; ENAME=FORD; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00; COMM=null;" + + " DEPTNO=20; JOB=DALLAS\n" + + "EMPNO=7499; ENAME=ALLEN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00; COMM=300.00;" + + " DEPTNO=30; JOB=CHICAGO\n" + + "EMPNO=7521; ENAME=WARD; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00; COMM=500.00;" + + " DEPTNO=30; JOB=CHICAGO\n" + + "EMPNO=7654; ENAME=MARTIN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00; COMM=1400.00;" + + " DEPTNO=30; JOB=CHICAGO\n" + + "EMPNO=7698; ENAME=BLAKE; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00; COMM=null;" + + " DEPTNO=30; JOB=CHICAGO\n" + + "EMPNO=7844; ENAME=TURNER; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00; COMM=0.00;" + + " DEPTNO=30; JOB=CHICAGO\n" + + "EMPNO=7900; ENAME=JAMES; MGR=7698; HIREDATE=1981-12-03; SAL=950.00; COMM=null;" + + " DEPTNO=30; JOB=CHICAGO\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`MGR`, `EMP`.`HIREDATE`, `EMP`.`SAL`," + + " `EMP`.`COMM`, `EMP`.`DEPTNO`, `t`.`LOC` `JOB`\n" + + "FROM `scott`.`EMP`\n" + + "LEFT JOIN (SELECT `LOC`, `DEPTNO`\n" + + "FROM `scott`.`DEPT`) `t` ON `EMP`.`DEPTNO` = `t`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Ignore public void testMultipleLookupKeysReplace() { String ppl = From 04b0699617b5181985cef617b6a5abbba5823287 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Wed, 14 Jan 2026 22:07:33 +0000 Subject: [PATCH 2/2] Escape pipes in lookup Signed-off-by: Simeon Widdis --- docs/user/ppl/cmd/lookup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/cmd/lookup.md b/docs/user/ppl/cmd/lookup.md index 954e1fbb0d3..90f747b0ee8 100644 --- a/docs/user/ppl/cmd/lookup.md +++ b/docs/user/ppl/cmd/lookup.md @@ -36,7 +36,7 @@ The `lookup` command supports the following parameters. | `` | Optional | A key from the source data (left side) used for matching, similar to a join key in the left table. Default is `lookupMappingField`. | | `` | Optional | A field in the lookup index whose matched values are applied to the results (output). Specify multiple fields as a comma-separated list. If not specified, all fields except `lookupMappingField` from the lookup index are applied to the results. | | `` | Optional | The name of the field in the results (output) in which matched values are placed. Specify multiple fields as a comma-separated list. If the `outputField` specifies an existing field in the source query, its values are replaced or appended with matched values from the `inputField`. If the field specified in the `outputField` is not an existing field, a new field is added to the results when using `replace`, or the operation fails when using `append`. | -| `(replace | append | output)` | Optional | Specifies how matched values are applied to the output. `replace` overwrites existing values with matched values from the lookup index. `append` fills only missing values in the results with matched values from the lookup index. `output` is a synonym for `replace` (provided for SPL compatibility). Default is `replace`. | +| `(replace \| append \| output)` | Optional | Specifies how matched values are applied to the output. `replace` overwrites existing values with matched values from the lookup index. `append` fills only missing values in the results with matched values from the lookup index. `output` is a synonym for `replace` (provided for SPL compatibility). Default is `replace`. | ## Example 1: Replace existing values