Skip to content

Commit 16a2d9b

Browse files
ahkcsimRishN
authored andcommitted
[QA] Add FieldFormatCommandIT for the analytics-engine REST path (opensearch-project#21544)
PPL `fieldformat` is a Calcite-only command that lowers to a plain Eval node (see SQL plugin's `AstBuilder.visitFieldformatCommand`). Its expressions go through Calcite's || (CONCAT) operator and CAST, both already wired in the DataFusion backend's STANDARD_PROJECT_OPS via opensearch-project#21498. **No code changes required for the analytics route — this PR is QA-only.** The unique surface vs plain `eval` is the prefix-{`.`} and suffix-{`.`} string-concat sugar emitted by `AstExpressionBuilder.visitFieldFormatEvalClause` for the StringDotlogicalExpression / LogicalExpressionDotString rules: fieldformat x = "prefix".CAST(y AS STRING)." suffix" expands to a chain of CONCAT calls. Both forms route through the existing CONCAT capability — no extension lookup or adapter needed since isthmus' default catalog binds the || operator natively. Four tests against the in-process QA cluster, exercising the analytics path end-to-end via the test-ppl-frontend plugin: | Test | Shape | |---|---| | `testFieldformatPlusConcat` | `'Hello ' + str0` — basic +-concat. | | `testFieldformatPrefixDotCast` | `'Code: '.CAST(int0 AS STRING)` — StringDotlogicalExpression branch. | | `testFieldformatCastDotSuffix` | `CAST(int0 AS STRING).' pts'` — LogicalExpressionDotString branch. | | `testFieldformatPrefixDotCastDotSuffix` | `'Code: '.CAST(int0 AS STRING).' pts'` — combined. | Tests filter `where isnotnull(int0)` before sorting/limiting so the deterministic-row assertions don't flap on the calcs dataset's six null int0 rows (Calcite's default ascending sort puts nulls first). Out of scope: the v2-side `testFieldFormatStringConcatenationWithNullFieldToString` uses `tostring(age, "commas")` — a multi-mode UDF (binary / hex / commas / duration) with substantial Java logic in `ToStringFunction`. Adding it to the analytics path would need either Calcite-level rewrites or a DataFusion Rust UDF; tracked separately. Validates: 4/4 FieldFormatCommandIT pass; full :sandbox:qa:analytics-engine-rest:integTest suite green (**132 tests across 17 ITs**, no regressions). Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 56687e7 commit 16a2d9b

1 file changed

Lines changed: 188 additions & 0 deletions

File tree

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.analytics.qa;
10+
11+
import org.opensearch.client.Request;
12+
import org.opensearch.client.Response;
13+
14+
import java.io.IOException;
15+
import java.util.Arrays;
16+
import java.util.List;
17+
import java.util.Map;
18+
19+
/**
20+
* Self-contained integration test for PPL {@code fieldformat} on the analytics-engine route.
21+
*
22+
* <p>Mirrors {@code CalciteFieldFormatCommandIT} from the {@code opensearch-project/sql}
23+
* repository so the analytics-engine path can be verified inside core without
24+
* cross-plugin dependencies on the SQL plugin.
25+
*
26+
* <p>{@code fieldformat} is a Calcite-only command (gated on
27+
* {@code plugins.calcite.enabled}; the gate is satisfied here because
28+
* {@code test-ppl-frontend}'s {@code UnifiedQueryService} sets the cluster setting
29+
* to true on every request). It lowers to a plain {@code Eval} node — see
30+
* {@code AstBuilder.visitFieldformatCommand} in the SQL plugin. The unique surface
31+
* vs plain {@code eval} is the prefix-{@code .} and suffix-{@code .} string-concat
32+
* sugar: {@code fieldformat x = "prefix".CAST(y AS STRING)." suffix"} expands to
33+
* a chain of {@code CONCAT} calls. Both {@code +}-style concat and the dotted form
34+
* route through Calcite's {@code ||} operator and resolve to
35+
* {@link org.opensearch.analytics.spi.ScalarFunction#CONCAT}, already in
36+
* {@code STANDARD_PROJECT_OPS}.
37+
*
38+
* <p>Provisions the {@code calcs} dataset (parquet-backed) once per class via
39+
* {@link DatasetProvisioner}.
40+
*/
41+
public class FieldFormatCommandIT extends AnalyticsRestTestCase {
42+
43+
private static final Dataset DATASET = new Dataset("calcs", "calcs");
44+
45+
private static boolean dataProvisioned = false;
46+
47+
private void ensureDataProvisioned() throws IOException {
48+
if (dataProvisioned == false) {
49+
DatasetProvisioner.provision(client(), DATASET);
50+
dataProvisioned = true;
51+
}
52+
}
53+
54+
// ── basic +-concat — same expression shape as `eval x = 'lit' + field` ─────
55+
56+
public void testFieldformatPlusConcat() throws IOException {
57+
// `'Hello ' + str0` — Calcite emits || (CONCAT). calcs has 17 rows; str0 has three
58+
// distinct values: FURNITURE (×2), OFFICE SUPPLIES (×6), TECHNOLOGY (×9). After
59+
// `head 3 | sort str0`, the first three are the FURNITURE/FURNITURE pair plus the
60+
// first OFFICE SUPPLIES — but ordering inside identical str0 isn't pinned, so we
61+
// sort by both key and a deterministic int0 first.
62+
assertRows(
63+
"source=" + DATASET.indexName
64+
+ " | sort str0, int0"
65+
+ " | head 3"
66+
+ " | fieldformat greeting = \"Hello \" + str0"
67+
+ " | fields str0, greeting",
68+
row("FURNITURE", "Hello FURNITURE"),
69+
row("FURNITURE", "Hello FURNITURE"),
70+
row("OFFICE SUPPLIES", "Hello OFFICE SUPPLIES")
71+
);
72+
}
73+
74+
// ── dotted-concat: prefix.CAST(int AS STRING) ────────────────────────────────
75+
76+
public void testFieldformatPrefixDotCast() throws IOException {
77+
// `"Code: ".CAST(int0 AS STRING)` — prefix string + CAST-to-string of an integer,
78+
// chained with the `.` form unique to fieldformat. AstExpressionBuilder's
79+
// StringDotlogicalExpression branch emits a Let with prefix=literal, expression=CAST,
80+
// and the Eval's CalciteRexNodeVisitor wraps both in a CONCAT.
81+
assertRows(
82+
"source=" + DATASET.indexName
83+
+ " | where isnotnull(int0)"
84+
+ " | sort int0"
85+
+ " | head 3"
86+
+ " | fieldformat code_desc = \"Code: \".CAST(int0 AS STRING)"
87+
+ " | fields int0, code_desc",
88+
row(1, "Code: 1"),
89+
row(3, "Code: 3"),
90+
row(4, "Code: 4")
91+
);
92+
}
93+
94+
// ── dotted-concat: CAST(int AS STRING).suffix ────────────────────────────────
95+
96+
public void testFieldformatCastDotSuffix() throws IOException {
97+
// Mirror image of the prefix case — LogicalExpressionDotString branch emits a Let
98+
// with suffix=literal, expression=CAST. Output column type is string regardless of
99+
// input type because CAST coerces and CONCAT preserves string.
100+
assertRows(
101+
"source=" + DATASET.indexName
102+
+ " | where isnotnull(int0)"
103+
+ " | sort int0"
104+
+ " | head 3"
105+
+ " | fieldformat code_desc = CAST(int0 AS STRING).\" pts\""
106+
+ " | fields int0, code_desc",
107+
row(1, "1 pts"),
108+
row(3, "3 pts"),
109+
row(4, "4 pts")
110+
);
111+
}
112+
113+
// ── dotted-concat: prefix.CAST(int AS STRING).suffix ─────────────────────────
114+
115+
public void testFieldformatPrefixDotCastDotSuffix() throws IOException {
116+
// Combined prefix + middle expression + suffix. The Eval emitted has a single Let
117+
// whose expression is CONCAT(CONCAT(prefix, CAST(...)), suffix). All three operands
118+
// route through the CONCAT capability in STANDARD_PROJECT_OPS — no extension lookup
119+
// needed since isthmus' default catalog binds the || operator natively.
120+
assertRows(
121+
"source=" + DATASET.indexName
122+
+ " | where isnotnull(int0)"
123+
+ " | sort int0"
124+
+ " | head 3"
125+
+ " | fieldformat code_desc = \"Code: \".CAST(int0 AS STRING).\" pts\""
126+
+ " | fields int0, code_desc",
127+
row(1, "Code: 1 pts"),
128+
row(3, "Code: 3 pts"),
129+
row(4, "Code: 4 pts")
130+
);
131+
}
132+
133+
// ── helpers ─────────────────────────────────────────────────────────────────
134+
135+
private static List<Object> row(Object... values) {
136+
return Arrays.asList(values);
137+
}
138+
139+
@SafeVarargs
140+
@SuppressWarnings("varargs")
141+
private final void assertRows(String ppl, List<Object>... expected) throws IOException {
142+
Map<String, Object> response = executePpl(ppl);
143+
@SuppressWarnings("unchecked")
144+
List<List<Object>> actualRows = (List<List<Object>>) response.get("rows");
145+
assertNotNull("Response missing 'rows' for query: " + ppl, actualRows);
146+
assertEquals("Row count mismatch for query: " + ppl, expected.length, actualRows.size());
147+
for (int i = 0; i < expected.length; i++) {
148+
List<Object> want = expected[i];
149+
List<Object> got = actualRows.get(i);
150+
assertEquals(
151+
"Column count mismatch at row " + i + " for query: " + ppl,
152+
want.size(),
153+
got.size()
154+
);
155+
for (int j = 0; j < want.size(); j++) {
156+
assertCellEquals(
157+
"Cell mismatch at row " + i + ", col " + j + " for query: " + ppl,
158+
want.get(j),
159+
got.get(j)
160+
);
161+
}
162+
}
163+
}
164+
165+
private Map<String, Object> executePpl(String ppl) throws IOException {
166+
ensureDataProvisioned();
167+
Request request = new Request("POST", "/_analytics/ppl");
168+
request.setJsonEntity("{\"query\": \"" + escapeJson(ppl) + "\"}");
169+
Response response = client().performRequest(request);
170+
return assertOkAndParse(response, "PPL: " + ppl);
171+
}
172+
173+
private static void assertCellEquals(String message, Object expected, Object actual) {
174+
if (expected == null || actual == null) {
175+
assertEquals(message, expected, actual);
176+
return;
177+
}
178+
if (expected instanceof Number && actual instanceof Number) {
179+
double e = ((Number) expected).doubleValue();
180+
double a = ((Number) actual).doubleValue();
181+
if (Double.compare(e, a) != 0) {
182+
fail(message + ": expected <" + expected + "> but was <" + actual + ">");
183+
}
184+
return;
185+
}
186+
assertEquals(message, expected, actual);
187+
}
188+
}

0 commit comments

Comments
 (0)