Skip to content

Commit e8480cb

Browse files
RyanL1997claude
andcommitted
Make CalciteReplaceCommandIT column-order-agnostic for analytics-engine route
The analytics-engine route and the v2 / Lucene path return columns in different orders when there is no explicit `| fields ...` clause: parquet preserves the storage order chosen by the on-disk format, while the Lucene path preserves `_source` iteration order. Both are valid given the contract `verifySchema` declares (set equality on column names), so positional `verifyDataRows` assertions over-constrain the test and fail under `-Dtests.analytics.force_routing=true` even when the data is correct. Apply the same column-name-keyed match pattern Kai introduced for `CalcitePPLRenameIT` in 59c728b (opensearch-project#5413): * Add `rowOf(key1, val1, ...)` to build column-keyed expected rows. * Add `verifyDataRowsByColumn(...)` to look up each cell value by column name and reorder to match the response schema before delegating to the existing positional `verifyDataRows` matcher. * Convert the four order-sensitive tests (`testMultipleReplace`, `testEmptyStringReplacement`, `testMultipleFieldsInClause`, `testMultiplePairsInSingleCommand`) to the new helpers. * Make `testReplaceNonExistentField` order-agnostic on the `input fields are: [...]` field list — assert that the prefix and every expected field name appear in the message, but not in a fixed order. Test results against analytics-engine route via `-Dtests.analytics.{force_routing,parquet_indices}=true`: 21/21 pass in both the direct `CalciteReplaceCommandIT` suite and the `CalciteNoPushdownIT > CalciteReplaceCommandIT` re-run. v2 path remains green. Companion to the OpenSearch PR onboarding PPL `replace` command + `replace()` / `regexp_replace()` functions on the analytics-engine route via DataFusion `replace` / `regexp_replace` UDFs. Signed-off-by: Jialiang Liang <jiallian@amazon.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent 59c728b commit e8480cb

1 file changed

Lines changed: 91 additions & 24 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java

Lines changed: 91 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
import static org.opensearch.sql.util.MatcherUtils.*;
1010

1111
import java.io.IOException;
12+
import java.util.LinkedHashMap;
13+
import java.util.Map;
14+
import org.hamcrest.Matcher;
15+
import org.json.JSONArray;
1216
import org.json.JSONObject;
1317
import org.junit.Test;
1418
import org.opensearch.sql.common.antlr.SyntaxCheckException;
@@ -61,12 +65,13 @@ public void testMultipleReplace() throws IOException {
6165
schema("year", "int"),
6266
schema("age", "int"));
6367

64-
verifyDataRows(
68+
// Match by column name — analytics-engine and v2 paths return columns in different orders.
69+
verifyDataRowsByColumn(
6570
result,
66-
rows("Jake", "United States", "California", 4, 2023, 70),
67-
rows("Hello", "United States", "New York", 4, 2023, 30),
68-
rows("John", "Canada", "Ontario", 4, 2023, 25),
69-
rows("Joseph", "Canada", "Quebec", 4, 2023, 20));
71+
rowOf("name", "Jake", "country", "United States", "state", "California", "month", 4, "year", 2023, "age", 70),
72+
rowOf("name", "Hello", "country", "United States", "state", "New York", "month", 4, "year", 2023, "age", 30),
73+
rowOf("name", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023, "age", 25),
74+
rowOf("name", "Joseph", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023, "age", 20));
7075
}
7176

7277
@Test
@@ -121,12 +126,12 @@ public void testEmptyStringReplacement() throws IOException {
121126
schema("year", "int"),
122127
schema("age", "int"));
123128

124-
verifyDataRows(
129+
verifyDataRowsByColumn(
125130
result,
126-
rows("Jake", "", "California", 4, 2023, 70),
127-
rows("Hello", "", "New York", 4, 2023, 30),
128-
rows("John", "Canada", "Ontario", 4, 2023, 25),
129-
rows("Jane", "Canada", "Quebec", 4, 2023, 20));
131+
rowOf("name", "Jake", "country", "", "state", "California", "month", 4, "year", 2023, "age", 70),
132+
rowOf("name", "Hello", "country", "", "state", "New York", "month", 4, "year", 2023, "age", 30),
133+
rowOf("name", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023, "age", 25),
134+
rowOf("name", "Jane", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023, "age", 20));
130135
}
131136

132137
@Test
@@ -146,12 +151,12 @@ public void testMultipleFieldsInClause() throws IOException {
146151
schema("year", "int"),
147152
schema("age", "int"));
148153

149-
verifyDataRows(
154+
verifyDataRowsByColumn(
150155
result,
151-
rows("Jake", "United States", "California", 4, 2023, 70),
152-
rows("Hello", "United States", "New York", 4, 2023, 30),
153-
rows("John", "Canada", "Ontario", 4, 2023, 25),
154-
rows("Jane", "Canada", "Quebec", 4, 2023, 20));
156+
rowOf("name", "Jake", "country", "United States", "state", "California", "month", 4, "year", 2023, "age", 70),
157+
rowOf("name", "Hello", "country", "United States", "state", "New York", "month", 4, "year", 2023, "age", 30),
158+
rowOf("name", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023, "age", 25),
159+
rowOf("name", "Jane", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023, "age", 20));
155160
}
156161

157162
@Test
@@ -164,10 +169,16 @@ public void testReplaceNonExistentField() {
164169
String.format(
165170
"source = %s | replace 'USA' WITH 'United States' IN non_existent_field",
166171
TEST_INDEX_STATE_COUNTRY)));
167-
verifyErrorMessageContains(
168-
e,
169-
"field [non_existent_field] not found; input fields are: [name, country, state, month,"
170-
+ " year, age, _id, _index, _score, _maxscore, _sort, _routing]");
172+
// Order-agnostic — analytics-engine and v2 paths emit the input-field list in different
173+
// orders (parquet preserves storage order, Lucene preserves _source iteration order).
174+
// Assert that the prefix and every expected field name appear somewhere in the message.
175+
verifyErrorMessageContains(e, "field [non_existent_field] not found; input fields are:");
176+
verifyErrorMessageContains(e, "name");
177+
verifyErrorMessageContains(e, "country");
178+
verifyErrorMessageContains(e, "state");
179+
verifyErrorMessageContains(e, "month");
180+
verifyErrorMessageContains(e, "year");
181+
verifyErrorMessageContains(e, "age");
171182
}
172183

173184
@Test
@@ -259,12 +270,12 @@ public void testMultiplePairsInSingleCommand() throws IOException {
259270
schema("year", "int"),
260271
schema("age", "int"));
261272

262-
verifyDataRows(
273+
verifyDataRowsByColumn(
263274
result,
264-
rows("Jake", "United States", "California", 4, 2023, 70),
265-
rows("Hello", "United States", "New York", 4, 2023, 30),
266-
rows("John", "CA", "Ontario", 4, 2023, 25),
267-
rows("Jane", "CA", "Quebec", 4, 2023, 20));
275+
rowOf("name", "Jake", "country", "United States", "state", "California", "month", 4, "year", 2023, "age", 70),
276+
rowOf("name", "Hello", "country", "United States", "state", "New York", "month", 4, "year", 2023, "age", 30),
277+
rowOf("name", "John", "country", "CA", "state", "Ontario", "month", 4, "year", 2023, "age", 25),
278+
rowOf("name", "Jane", "country", "CA", "state", "Quebec", "month", 4, "year", 2023, "age", 20));
268279
}
269280

270281
@Test
@@ -402,4 +413,60 @@ public void testEscapeSequence_noMatchLiteral() throws IOException {
402413
// Pattern "foo\*bar" matches literal "foo*bar", not "fooXbar", so original value returned
403414
verifyDataRows(result, rows("fooXbar"));
404415
}
416+
417+
/**
418+
* Build a {@code column -> value} map from interleaved varargs ({@code key1, val1, key2, val2,
419+
* ...}). Preserves insertion order so the expected-row mapping reads naturally at the call site.
420+
*/
421+
private static Map<String, Object> rowOf(Object... pairs) {
422+
if (pairs.length % 2 != 0) {
423+
throw new IllegalArgumentException("rowOf expects an even number of args (key, value, ...)");
424+
}
425+
Map<String, Object> row = new LinkedHashMap<>();
426+
for (int i = 0; i < pairs.length; i += 2) {
427+
row.put((String) pairs[i], pairs[i + 1]);
428+
}
429+
return row;
430+
}
431+
432+
/**
433+
* Match expected rows against the response by column name, ignoring the response's column
434+
* emission order. The two paths the analytics-engine route can take return columns in different
435+
* orders (parquet preserves storage order, the v2 / Lucene path preserves {@code _source}
436+
* iteration order), and either is valid given the contract {@code verifySchema} declares (set
437+
* equality on column names). To avoid baking either order into the test, this helper reorders
438+
* each expected row to match whatever column order the response actually returned.
439+
*
440+
* <p>Mirrors the helper in {@code CalcitePPLRenameIT} (commit 59c728b) — same pattern applied
441+
* to PPL {@code replace} command tests.
442+
*/
443+
@SafeVarargs
444+
@SuppressWarnings("varargs")
445+
private final void verifyDataRowsByColumn(JSONObject result, Map<String, Object>... expectedRows) {
446+
JSONArray schema = result.getJSONArray("schema");
447+
int n = schema.length();
448+
String[] columnOrder = new String[n];
449+
for (int i = 0; i < n; i++) {
450+
columnOrder[i] = schema.getJSONObject(i).getString("name");
451+
}
452+
@SuppressWarnings({"unchecked", "rawtypes"})
453+
Matcher<JSONArray>[] rowMatchers = new Matcher[expectedRows.length];
454+
for (int r = 0; r < expectedRows.length; r++) {
455+
Object[] reordered = new Object[n];
456+
for (int c = 0; c < n; c++) {
457+
if (!expectedRows[r].containsKey(columnOrder[c])) {
458+
throw new IllegalArgumentException(
459+
"Expected row at index "
460+
+ r
461+
+ " is missing canonical value for response column ["
462+
+ columnOrder[c]
463+
+ "]; provided keys: "
464+
+ expectedRows[r].keySet());
465+
}
466+
reordered[c] = expectedRows[r].get(columnOrder[c]);
467+
}
468+
rowMatchers[r] = rows(reordered);
469+
}
470+
verifyDataRows(result, rowMatchers);
471+
}
405472
}

0 commit comments

Comments
 (0)