Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions docs/user/ppl/cmd/lookup.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The `lookup` command enriches search data by adding or replacing values from a l
The `lookup` command has the following syntax:

```syntax
lookup <lookupIndex> (<lookupMappingField> [as <sourceMappingField>])... [(replace | append) (<inputField> [as <outputField>])...]
lookup <lookupIndex> (<lookupMappingField> [as <sourceMappingField>])... [(replace | append | output) (<inputField> [as <outputField>])...]
```

The following are examples of the `lookup` command syntax:
Expand All @@ -21,6 +21,8 @@ source = table1 | lookup table2 id as cid, name replace dept as department
source = table1 | lookup table2 id as cid, name replace dept as department, city as location
source = table1 | lookup table2 id as cid, name append dept as department
source = table1 | lookup table2 id as cid, name append dept as department, city as location
source = table1 | lookup table2 id as cid, name output dept as department
source = table1 | lookup table2 id as cid, name output dept as department, city as location
```

## Parameters
Expand All @@ -34,7 +36,7 @@ The `lookup` command supports the following parameters.
| `<sourceMappingField>` | Optional | A key from the source data (left side) used for matching, similar to a join key in the left table. Default is `lookupMappingField`. |
| `<inputField>` | Optional | A field in the lookup index whose matched values are applied to the results (output). Specify multiple fields as a comma-separated list. If not specified, all fields except `lookupMappingField` from the lookup index are applied to the results. |
| `<outputField>` | Optional | The name of the field in the results (output) in which matched values are placed. Specify multiple fields as a comma-separated list. If the `outputField` specifies an existing field in the source query, its values are replaced or appended with matched values from the `inputField`. If the field specified in the `outputField` is not an existing field, a new field is added to the results when using `replace`, or the operation fails when using `append`. |
| `(replace | append)` | Optional | Specifies how matched values are applied to the output. `replace` overwrites existing values with matched values from the lookup index. `append` fills only missing values in the results with matched values from the lookup index. Default is `replace`. |
| `(replace \| append \| output)` | Optional | Specifies how matched values are applied to the output. `replace` overwrites existing values with matched values from the lookup index. `append` fills only missing values in the results with matched values from the lookup index. `output` is a synonym for `replace` (provided for SPL compatibility). Default is `replace`. |

## Example 1: Replace existing values

Expand Down Expand Up @@ -121,4 +123,29 @@ The query returns the following results:
| 1005 | Jane | Scientist | Canada | 90000 | Engineer |
| 1002 | John | Doctor | Canada | 120000 | Scientist |
+------+-------+------------+---------+--------+-----------+
```
```

## Example 5: Using OUTPUT keyword

The `OUTPUT` keyword is a synonym for `REPLACE`. The following query demonstrates using `OUTPUT` to overwrite existing values:

```ppl ignore
source = worker
| LOOKUP work_information uid AS id OUTPUT department
| fields id, name, occupation, country, salary, department
```

This query produces the same results as Example 1 (using `REPLACE`):

```text
+------+-------+------------+---------+--------+------------+
| id | name | occupation | country | salary | department |
|------+-------+------------+---------+--------+------------|
| 1000 | Jake | Engineer | England | 100000 | IT |
| 1001 | Hello | Artist | USA | 70000 | null |
| 1002 | John | Doctor | Canada | 120000 | DATA |
| 1003 | David | Doctor | null | 120000 | HR |
| 1004 | David | null | Canada | 0 | null |
| 1005 | Jane | Scientist | Canada | 90000 | DATA |
+------+-------+------------+---------+--------+------------+
```
Original file line number Diff line number Diff line change
Expand Up @@ -438,4 +438,59 @@ public void testRnameAsIdShouldnWork() throws IOException {
TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION));
verifyNumOfRows(result, 6);
}

@Test
public void testUidAsIdOutputDepartment() throws IOException {
// OUTPUT is a synonym for REPLACE (SPL compatibility)
JSONObject result =
executeQuery(
String.format(
"source = %s"
+ "| LOOKUP %s uid AS id OUTPUT department"
+ "| fields id, name, occupation, country, salary, department",
TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION));
verifySchema(
result,
schema("id", "int"),
schema("name", "string"),
schema("occupation", "string"),
schema("country", "string"),
schema("salary", "int"),
schema("department", "string"));
verifyDataRows(
result,
rows(1000, "Jake", "Engineer", "England", 100000, "IT"),
rows(1001, "Hello", "Artist", "USA", 70000, null),
rows(1002, "John", "Doctor", "Canada", 120000, "DATA"),
rows(1003, "David", "Doctor", null, 120000, "HR"),
rows(1004, "David", null, "Canada", 0, null),
rows(1005, "Jane", "Scientist", "Canada", 90000, "DATA"));
}

@Test
public void testUidAsIdOutputDepartmentAsCountry() throws IOException {
// OUTPUT with field aliasing (SPL compatibility)
JSONObject result =
executeQuery(
String.format(
"source = %s"
+ "| LOOKUP %s uid AS id OUTPUT department AS country"
+ "| fields id, name, occupation, salary, country",
TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION));
verifySchema(
result,
schema("id", "int"),
schema("name", "string"),
schema("occupation", "string"),
schema("salary", "int"),
schema("country", "string"));
verifyDataRows(
result,
rows(1000, "Jake", "Engineer", 100000, "IT"),
rows(1001, "Hello", "Artist", 70000, null),
rows(1002, "John", "Doctor", 120000, "DATA"),
rows(1003, "David", "Doctor", 120000, "HR"),
rows(1004, "David", null, 0, null),
rows(1005, "Jane", "Scientist", 90000, "DATA"));
}
}
2 changes: 1 addition & 1 deletion ppl/src/main/antlr/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ patternMode

// lookup
lookupCommand
: LOOKUP tableSource lookupMappingList ((APPEND | REPLACE) outputCandidateList)?
: LOOKUP tableSource lookupMappingList ((APPEND | REPLACE | OUTPUT) outputCandidateList)?
;

lookupMappingList
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@ public UnresolvedPlan visitPatternsCommand(OpenSearchPPLParser.PatternsCommandCo
@Override
public UnresolvedPlan visitLookupCommand(OpenSearchPPLParser.LookupCommandContext ctx) {
Relation lookupRelation = new Relation(this.internalVisitExpression(ctx.tableSource()));
// OUTPUT and REPLACE are synonyms - both overwrite existing fields
Lookup.OutputStrategy strategy =
ctx.APPEND() != null ? Lookup.OutputStrategy.APPEND : Lookup.OutputStrategy.REPLACE;
java.util.Map<String, String> mappingAliasMap =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,114 @@ public void testReplaceAs() {
verifyPPLToSparkSQL(root, expectedSparkSql);
}

@Test
public void testOutput() {
// OUTPUT is a synonym for REPLACE (for SPL compatibility)
String ppl = "source=EMP | lookup DEPT DEPTNO output LOC";
RelNode root = getRelNode(ppl);
String expectedLogical =
"LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5],"
+ " COMM=[$6], DEPTNO=[$7], LOC=[$8])\n"
+ " LogicalJoin(condition=[=($7, $9)], joinType=[left])\n"
+ " LogicalTableScan(table=[[scott, EMP]])\n"
+ " LogicalProject(LOC=[$2], DEPTNO=[$0])\n"
+ " LogicalTableScan(table=[[scott, DEPT]])\n";
verifyLogical(root, expectedLogical);

String expectedResult =
"EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00;"
+ " COMM=null; DEPTNO=10; LOC=NEW YORK\n"
+ "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00;"
+ " COMM=null; DEPTNO=10; LOC=NEW YORK\n"
+ "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00;"
+ " COMM=null; DEPTNO=10; LOC=NEW YORK\n"
+ "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00;"
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
+ "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00;"
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
+ "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00;"
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
+ "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00;"
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
+ "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00;"
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
+ "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;"
+ " COMM=300.00; DEPTNO=30; LOC=CHICAGO\n"
+ "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;"
+ " COMM=500.00; DEPTNO=30; LOC=CHICAGO\n"
+ "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00;"
+ " COMM=1400.00; DEPTNO=30; LOC=CHICAGO\n"
+ "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00;"
+ " COMM=null; DEPTNO=30; LOC=CHICAGO\n"
+ "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00;"
+ " COMM=0.00; DEPTNO=30; LOC=CHICAGO\n"
+ "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00;"
+ " COMM=null; DEPTNO=30; LOC=CHICAGO\n";
verifyResult(root, expectedResult);

String expectedSparkSql =
"SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`,"
+ " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `t`.`LOC`\n"
+ "FROM `scott`.`EMP`\n"
+ "LEFT JOIN (SELECT `LOC`, `DEPTNO`\n"
+ "FROM `scott`.`DEPT`) `t` ON `EMP`.`DEPTNO` = `t`.`DEPTNO`";
verifyPPLToSparkSQL(root, expectedSparkSql);
}

@Test
public void testOutputAs() {
// OUTPUT with field aliasing (SPL compatibility)
String ppl = "source=EMP | lookup DEPT DEPTNO output LOC as JOB";
RelNode root = getRelNode(ppl);
String expectedLogical =
"LogicalProject(EMPNO=[$0], ENAME=[$1], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6],"
+ " DEPTNO=[$7], JOB=[$8])\n"
+ " LogicalJoin(condition=[=($7, $9)], joinType=[left])\n"
+ " LogicalTableScan(table=[[scott, EMP]])\n"
+ " LogicalProject(LOC=[$2], DEPTNO=[$0])\n"
+ " LogicalTableScan(table=[[scott, DEPT]])\n";
verifyLogical(root, expectedLogical);

String expectedResult =
"EMPNO=7782; ENAME=CLARK; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00; COMM=null; DEPTNO=10;"
+ " JOB=NEW YORK\n"
+ "EMPNO=7839; ENAME=KING; MGR=null; HIREDATE=1981-11-17; SAL=5000.00; COMM=null;"
+ " DEPTNO=10; JOB=NEW YORK\n"
+ "EMPNO=7934; ENAME=MILLER; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00; COMM=null;"
+ " DEPTNO=10; JOB=NEW YORK\n"
+ "EMPNO=7369; ENAME=SMITH; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;"
+ " DEPTNO=20; JOB=DALLAS\n"
+ "EMPNO=7566; ENAME=JONES; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00; COMM=null;"
+ " DEPTNO=20; JOB=DALLAS\n"
+ "EMPNO=7788; ENAME=SCOTT; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00; COMM=null;"
+ " DEPTNO=20; JOB=DALLAS\n"
+ "EMPNO=7876; ENAME=ADAMS; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00; COMM=null;"
+ " DEPTNO=20; JOB=DALLAS\n"
+ "EMPNO=7902; ENAME=FORD; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00; COMM=null;"
+ " DEPTNO=20; JOB=DALLAS\n"
+ "EMPNO=7499; ENAME=ALLEN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00; COMM=300.00;"
+ " DEPTNO=30; JOB=CHICAGO\n"
+ "EMPNO=7521; ENAME=WARD; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00; COMM=500.00;"
+ " DEPTNO=30; JOB=CHICAGO\n"
+ "EMPNO=7654; ENAME=MARTIN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00; COMM=1400.00;"
+ " DEPTNO=30; JOB=CHICAGO\n"
+ "EMPNO=7698; ENAME=BLAKE; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00; COMM=null;"
+ " DEPTNO=30; JOB=CHICAGO\n"
+ "EMPNO=7844; ENAME=TURNER; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00; COMM=0.00;"
+ " DEPTNO=30; JOB=CHICAGO\n"
+ "EMPNO=7900; ENAME=JAMES; MGR=7698; HIREDATE=1981-12-03; SAL=950.00; COMM=null;"
+ " DEPTNO=30; JOB=CHICAGO\n";
verifyResult(root, expectedResult);

String expectedSparkSql =
"SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`MGR`, `EMP`.`HIREDATE`, `EMP`.`SAL`,"
+ " `EMP`.`COMM`, `EMP`.`DEPTNO`, `t`.`LOC` `JOB`\n"
+ "FROM `scott`.`EMP`\n"
+ "LEFT JOIN (SELECT `LOC`, `DEPTNO`\n"
+ "FROM `scott`.`DEPT`) `t` ON `EMP`.`DEPTNO` = `t`.`DEPTNO`";
verifyPPLToSparkSQL(root, expectedSparkSql);
}

@Ignore
public void testMultipleLookupKeysReplace() {
String ppl =
Expand Down
Loading