Skip to content

Commit 37bfac3

Browse files
committed
Use left join in expand to keep documents where their expanded array field is empty
Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
1 parent 4587b5d commit 37bfac3

2 files changed

Lines changed: 126 additions & 2 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,9 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) {
10301030
// array field.
10311031
// The last parameter has to refer to the array to be expanded on the left side. It will
10321032
// be used by the right side to correlate with the left side.
1033-
relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex));
1033+
// Using left join to keep the records where the array field is empty. The corresponding
1034+
// field in the result will be null.
1035+
relBuilder.correlate(JoinRelType.LEFT, correlVariable.get().id, List.of(arrayFieldRex));
10341036

10351037
// 9. Remove the original array field from the output.
10361038
// TODO: RFC: should we keep the original array field when alias is present?

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.json.JSONObject;
2020
import org.junit.Ignore;
2121
import org.junit.jupiter.api.Test;
22+
import org.opensearch.client.Request;
2223
import org.opensearch.sql.ppl.PPLIntegTestCase;
2324

2425
public class CalciteExpandCommandIT extends PPLIntegTestCase {
@@ -41,7 +42,6 @@ public void testExpandOnNested() throws Exception {
4142
schema("age", "bigint"),
4243
schema("id", "bigint"),
4344
schema("address", "struct"));
44-
verifyNumOfRows(response, 11);
4545
verifyDataRows(
4646
response,
4747
rows(
@@ -276,4 +276,126 @@ public void testExpandWithEval() throws Exception {
276276
schema("addr", "struct"));
277277
verifyNumOfRows(response, 11);
278278
}
279+
280+
@Test
281+
public void testExpandEmptyArray() throws Exception {
282+
final int docId = 6;
283+
Request insertRequest =
284+
new Request(
285+
"PUT", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_NESTED_SIMPLE, docId));
286+
insertRequest.setJsonEntity("{\"name\":\"ben\",\"age\":47, \"id\": 437821, \"address\":[]}\n");
287+
client().performRequest(insertRequest);
288+
289+
JSONObject response =
290+
executeQuery(String.format("source=%s | expand address", TEST_INDEX_NESTED_SIMPLE));
291+
verifySchema(
292+
response,
293+
schema("name", "string"),
294+
schema("age", "bigint"),
295+
schema("id", "bigint"),
296+
schema("address", "struct"));
297+
verifyDataRows(
298+
response,
299+
rows(
300+
"abbas",
301+
null,
302+
24,
303+
new JSONObject()
304+
.put("city", "New york city")
305+
.put("state", "NY")
306+
.put("moveInDate", new JSONObject().put("dateAndTime", "1984-04-12 09:07:42"))),
307+
rows(
308+
"abbas",
309+
null,
310+
24,
311+
new JSONObject()
312+
.put("city", "bellevue")
313+
.put("state", "WA")
314+
.put(
315+
"moveInDate",
316+
new JSONArray()
317+
.put(new JSONObject().put("dateAndTime", "2023-05-03 08:07:42"))
318+
.put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44")))),
319+
rows(
320+
"abbas",
321+
null,
322+
24,
323+
new JSONObject()
324+
.put("city", "seattle")
325+
.put("state", "WA")
326+
.put("moveInDate", new JSONObject().put("dateAndTime", "1966-03-19 03:04:55"))),
327+
rows(
328+
"abbas",
329+
null,
330+
24,
331+
new JSONObject()
332+
.put("city", "chicago")
333+
.put("state", "IL")
334+
.put("moveInDate", new JSONObject().put("dateAndTime", "2011-06-01 01:01:42"))),
335+
rows(
336+
"chen",
337+
null,
338+
32,
339+
new JSONObject()
340+
.put("city", "Miami")
341+
.put("state", "Florida")
342+
.put("moveInDate", new JSONObject().put("dateAndTime", "1901-08-11 04:03:33"))),
343+
rows(
344+
"chen",
345+
null,
346+
32,
347+
new JSONObject()
348+
.put("city", "los angeles")
349+
.put("state", "CA")
350+
.put("moveInDate", new JSONObject().put("dateAndTime", "2023-05-03 08:07:42"))),
351+
rows(
352+
"peng",
353+
null,
354+
26,
355+
new JSONObject()
356+
.put("city", "san diego")
357+
.put("state", "CA")
358+
.put("moveInDate", new JSONObject().put("dateAndTime", "2001-11-11 04:07:44"))),
359+
rows(
360+
"peng",
361+
null,
362+
26,
363+
new JSONObject()
364+
.put("city", "austin")
365+
.put("state", "TX")
366+
.put("moveInDate", new JSONObject().put("dateAndTime", "1977-07-13 09:04:41"))),
367+
rows(
368+
"andy",
369+
4,
370+
19,
371+
new JSONObject()
372+
.put("city", "houston")
373+
.put("state", "TX")
374+
.put("moveInDate", new JSONObject().put("dateAndTime", "1933-12-12 05:05:45"))),
375+
rows(
376+
"david",
377+
null,
378+
25,
379+
new JSONObject()
380+
.put("city", "raleigh")
381+
.put("state", "NC")
382+
.put("moveInDate", new JSONObject().put("dateAndTime", "1909-06-17 01:04:21"))),
383+
rows(
384+
"david",
385+
null,
386+
25,
387+
new JSONObject()
388+
.put("city", "charlotte")
389+
.put("state", "SC")
390+
.put(
391+
"moveInDate",
392+
new JSONArray()
393+
.put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44")))),
394+
rows("ben", 437821, 47, null));
395+
396+
verifyNumOfRows(response, 12);
397+
Request deleteRequest =
398+
new Request("DELETE", String.format("/%s/_doc/%d", TEST_INDEX_NESTED_SIMPLE, docId));
399+
client().performRequest(deleteRequest);
400+
}
279401
}

0 commit comments

Comments
 (0)