From d69b0299246f6237f96a2866f8ad04bb940ec866 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 19 Jun 2026 15:39:07 -0700 Subject: [PATCH 1/2] Fix NPE when rex sits inside appendcol subsearch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rex.getChild() returned ImmutableList.of(child) with no null guard. For a top-level "... | rex ..." that's fine because the parser always sets child. But the rex in "appendcol [ rex ... | ... ]" is the leaf of the subsearch as parsed — child stays null until visitAppendCol calls PlanUtils.transformPlanToAttachChild to wire the main relation in at the subsearch leaf. That walker recurses through getChild() to find the attach point, so it hits Rex.getChild() before child is set; ImmutableList.of(null) then throws NPE and the query 500s. Every sibling AST node (Project / Filter / Eval / Aggregation) already uses "child == null ? ImmutableList.of() : ImmutableList.of(child)" — Rex is the only one missing the guard. This change mirrors the same pattern. Reproduces with: source = signoff_events | stats count() as base_c by event_type | appendcol [ rex field=region "^(?[a-z]+)-.*$" | fields event_id, r1 ] Without the fix the stack is: NPE -> SingletonImmutableList. -> ImmutableList.of(ImmutableList.java:99) -> Rex.getChild(Rex.java:82) -> PlanUtils$5.visitChildren(PlanUtils.java:462) -> AbstractNodeVisitor.visitRex(AbstractNodeVisitor.java:309) -> Rex.accept New regression test CalcitePPLRexTest.testRexInsideAppendCol exercises the rex-in-appendcol pipeline and asserts planning succeeds. Verified to fail with the exact stack above on unpatched main and pass with the fix. Signed-off-by: Jialiang Liang --- .../java/org/opensearch/sql/ast/tree/Rex.java | 2 +- .../sql/ppl/calcite/CalcitePPLRexTest.java | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Rex.java b/core/src/main/java/org/opensearch/sql/ast/tree/Rex.java index c3b1d975ac1..4b0e2188617 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Rex.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Rex.java @@ -79,7 +79,7 @@ public Rex attach(UnresolvedPlan child) { @Override public List getChild() { - return ImmutableList.of(child); + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); } @Override diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java index 619cb26b64a..a7dde4edf86 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java @@ -306,4 +306,25 @@ public void testRexWithMaxMatchAndOffsetField() { + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + /** + * Regression for the rex-inside-appendcol NPE. The subsearch starts with rex (no upstream + * source), so the parsed Rex node has a null {@code child} until {@link + * org.opensearch.sql.calcite.utils.PlanUtils#transformPlanToAttachChild} walks the subsearch tree + * to attach the main relation. That walker calls {@code getChild()} on every node it crosses. + * {@code Rex.getChild()} previously returned {@code ImmutableList.of(child)} which throws NPE + * when {@code child} is null; sibling AST nodes (Project / Filter / Eval / Aggregation) all guard + * with {@code child == null ? ImmutableList.of() : ...}. This test ensures the rex-in-appendcol + * pipeline plans without NPE. + */ + @Test + public void testRexInsideAppendCol() { + String ppl = + "source=EMP | stats count() as base_c by JOB" + + " | appendcol [ rex field=ENAME '^(?[A-Z])' | fields ENAME, first ]"; + RelNode root = getRelNode(ppl); + // Just verify planning succeeds (does not throw). Plan-shape assertions for AppendCol's join + // shape live in CalcitePPLAppendColumnsTest; this regression only covers the NPE. + org.junit.Assert.assertNotNull(root); + } } From a8dd18ccb19bdddfd556798f667ed02590d23930 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 19 Jun 2026 15:46:37 -0700 Subject: [PATCH 2/2] Add integration test for rex in subsearch + simplify unit-test javadoc Adds CalcitePPLAppendcolIT.testAppendColWithRexInSubsearch to cover the end-to-end execution path against a real cluster. Trims the existing unit-test javadoc to a single line describing what is being verified, not the historical bug shape. Signed-off-by: Jialiang Liang --- .../sql/calcite/remote/CalcitePPLAppendcolIT.java | 14 ++++++++++++++ .../sql/ppl/calcite/CalcitePPLRexTest.java | 15 ++------------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendcolIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendcolIT.java index 877c10947b8..c6fc62fae74 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendcolIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendcolIT.java @@ -81,4 +81,18 @@ public void testAppendColOverride() throws IOException { rows("F", "DE", 101, null), rows("F", "FL", 310, null)); } + + /** Verifies that rex can be used as the first command of an appendcol subsearch. */ + @Test + public void testAppendColWithRexInSubsearch() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats count() as cnt by gender" + + " | appendcol [ rex field=email '^(?[^@]+)@.*' | fields user ]" + + " | head 2", + TEST_INDEX_ACCOUNT)); + verifySchema( + actual, schema("gender", "string"), schema("cnt", "bigint"), schema("user", "string")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java index a7dde4edf86..2275f9352de 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java @@ -307,24 +307,13 @@ public void testRexWithMaxMatchAndOffsetField() { verifyPPLToSparkSQL(root, expectedSparkSql); } - /** - * Regression for the rex-inside-appendcol NPE. The subsearch starts with rex (no upstream - * source), so the parsed Rex node has a null {@code child} until {@link - * org.opensearch.sql.calcite.utils.PlanUtils#transformPlanToAttachChild} walks the subsearch tree - * to attach the main relation. That walker calls {@code getChild()} on every node it crosses. - * {@code Rex.getChild()} previously returned {@code ImmutableList.of(child)} which throws NPE - * when {@code child} is null; sibling AST nodes (Project / Filter / Eval / Aggregation) all guard - * with {@code child == null ? ImmutableList.of() : ...}. This test ensures the rex-in-appendcol - * pipeline plans without NPE. - */ + /** Verifies that rex plans correctly when it appears as the first command of a subsearch. */ @Test - public void testRexInsideAppendCol() { + public void testRexInsideSubsearch() { String ppl = "source=EMP | stats count() as base_c by JOB" + " | appendcol [ rex field=ENAME '^(?[A-Z])' | fields ENAME, first ]"; RelNode root = getRelNode(ppl); - // Just verify planning succeeds (does not throw). Plan-shape assertions for AppendCol's join - // shape live in CalcitePPLAppendColumnsTest; this regression only covers the NPE. org.junit.Assert.assertNotNull(root); } }