Skip to content

Commit 93a646f

Browse files
authored
[BugFix] Fix transpose command name collision with 'value' field (opensearch-project#5172) (opensearch-project#5352)
* [BugFix] Fix transpose command name collision with 'value' field (opensearch-project#5172) Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix wildcard import in CalciteTransposeCommandIT Signed-off-by: Songkan Tang <songkant@amazon.com> --------- Signed-off-by: Songkan Tang <songkant@amazon.com>
1 parent 9178001 commit 93a646f

6 files changed

Lines changed: 198 additions & 39 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,7 @@ public RelNode visitTranspose(
941941
// Step 2: UNPIVOT
942942
b.unpivot(
943943
false,
944-
ImmutableList.of("value"),
944+
ImmutableList.of(PlanUtils.VALUE_COLUMN_FOR_TRANSPOSE),
945945
ImmutableList.of(columnName),
946946
fieldNames.stream()
947947
.map(
@@ -963,7 +963,7 @@ public RelNode visitTranspose(
963963
// Step 4: PIVOT
964964
b.pivot(
965965
b.groupKey(trimmedColumnName),
966-
ImmutableList.of(b.max(b.field("value"))),
966+
ImmutableList.of(b.max(b.field(PlanUtils.VALUE_COLUMN_FOR_TRANSPOSE))),
967967
ImmutableList.of(b.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)),
968968
IntStream.rangeClosed(1, maxRows)
969969
.mapToObj(i -> Map.entry("row " + i, ImmutableList.of((RexNode) b.literal(i))))

core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ public interface PlanUtils {
8484
String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__";
8585
String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_";
8686
String ROW_NUMBER_COLUMN_FOR_TRANSPOSE = "_row_number_transpose_";
87+
String VALUE_COLUMN_FOR_TRANSPOSE = "_value_transpose_";
8788

8889
static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) {
8990
return switch (unit) {

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.junit.jupiter.api.Assertions.assertEquals;
9+
import static org.junit.jupiter.api.Assertions.assertNotEquals;
10+
import static org.junit.jupiter.api.Assertions.assertTrue;
811
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT;
912
import static org.opensearch.sql.util.MatcherUtils.*;
1013
import static org.opensearch.sql.util.MatcherUtils.rows;
@@ -141,6 +144,50 @@ public void testTransposeLowerLimit() throws IOException {
141144
rows("age", "32", "36", "28", "33", "36"));
142145
}
143146

147+
/**
148+
* Regression test for #5172: transpose fails when input has a field named 'value', because the
149+
* internal unpivot column was also hardcoded as 'value'.
150+
*/
151+
@Test
152+
public void testTransposeWithValueFieldNameCollision() throws IOException {
153+
var result =
154+
executeQuery(
155+
String.format(
156+
"source=%s | stats count() as value, avg(age) as avg_age | transpose",
157+
TEST_INDEX_ACCOUNT));
158+
159+
verifySchema(
160+
result,
161+
schema("column", "string"),
162+
schema("row 1", "string"),
163+
schema("row 2", "string"),
164+
schema("row 3", "string"),
165+
schema("row 4", "string"),
166+
schema("row 5", "string"));
167+
168+
var dataRows = result.getJSONArray("datarows");
169+
// Verify that each transposed row has distinct correct values
170+
// (not all duplicated from the 'value' field)
171+
assertEquals(2, dataRows.length());
172+
boolean foundValue = false;
173+
boolean foundAvgAge = false;
174+
for (int i = 0; i < dataRows.length(); i++) {
175+
var row = dataRows.getJSONArray(i);
176+
String colName = row.getString(0);
177+
if ("value".equals(colName)) {
178+
foundValue = true;
179+
// count should be 1000 (total accounts)
180+
assertEquals("1000", row.getString(1));
181+
} else if ("avg_age".equals(colName)) {
182+
foundAvgAge = true;
183+
// avg_age should not equal the count value
184+
assertNotEquals("1000", row.getString(1));
185+
}
186+
}
187+
assertTrue("Should have 'value' row in transposed result", foundValue);
188+
assertTrue("Should have 'avg_age' row in transposed result", foundAvgAge);
189+
}
190+
144191
@Test
145192
public void testTransposeColumnName() throws IOException {
146193
var result =

integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ calcite:
33
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
44
LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4])
55
LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5])
6-
LogicalProject(value=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)])
6+
LogicalProject(_value_transpose_=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)])
77
LogicalFilter(condition=[IS NOT NULL($19)])
8-
LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)])
8+
LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], _value_transpose_=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)])
99
LogicalJoin(condition=[true], joinType=[inner])
1010
LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[ROW_NUMBER() OVER ()])
1111
LogicalSort(fetch=[5])
@@ -14,7 +14,7 @@ calcite:
1414
physical: |
1515
EnumerableLimit(fetch=[10000])
1616
EnumerableAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5])
17-
EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], value=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59])
17+
EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], _value_transpose_=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59])
1818
EnumerableNestedLoopJoin(condition=[true], joinType=[inner])
1919
EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])
2020
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)])
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
setup:
2+
- do:
3+
query.settings:
4+
body:
5+
transient:
6+
plugins.calcite.enabled: true
7+
8+
- do:
9+
indices.create:
10+
index: issue5172
11+
body:
12+
settings:
13+
number_of_shards: 1
14+
number_of_replicas: 0
15+
mappings:
16+
properties:
17+
count:
18+
type: integer
19+
category:
20+
type: keyword
21+
subcategory:
22+
type: keyword
23+
value:
24+
type: double
25+
ts:
26+
type: date
27+
28+
- do:
29+
bulk:
30+
refresh: true
31+
body:
32+
- '{"index": {"_index": "issue5172", "_id": "1"}}'
33+
- '{"count": 1, "category": "A", "subcategory": "X", "value": 10.5, "ts": "2024-01-01"}'
34+
- '{"index": {"_index": "issue5172", "_id": "2"}}'
35+
- '{"count": 2, "category": "A", "subcategory": "Y", "value": 20.3, "ts": "2024-01-02"}'
36+
37+
---
38+
teardown:
39+
- do:
40+
indices.delete:
41+
index: issue5172
42+
ignore_unavailable: true
43+
- do:
44+
query.settings:
45+
body:
46+
transient:
47+
plugins.calcite.enabled: false
48+
49+
---
50+
"Issue 5172: transpose with value field name collision":
51+
- skip:
52+
features:
53+
- headers
54+
- do:
55+
headers:
56+
Content-Type: 'application/json'
57+
ppl:
58+
body:
59+
query: source=issue5172 | where category = "A" | fields category, value | transpose 2
60+
61+
- match: { total: 2 }
62+
- match: { schema: [ { name: column, type: string }, { name: "row 1", type: string }, { name: "row 2", type: string } ] }
63+
- length: { datarows: 2 }
64+
65+
---
66+
"Issue 5172: transpose with stats alias named value":
67+
- skip:
68+
features:
69+
- headers
70+
- do:
71+
headers:
72+
Content-Type: 'application/json'
73+
ppl:
74+
body:
75+
query: source=issue5172 | stats count() as value, avg(value) as avg_val | transpose
76+
77+
- match: { total: 2 }
78+
- length: { datarows: 2 }

0 commit comments

Comments
 (0)