Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,7 @@ public RelNode visitTranspose(
// Step 2: UNPIVOT
b.unpivot(
false,
ImmutableList.of("value"),
ImmutableList.of(PlanUtils.VALUE_COLUMN_FOR_TRANSPOSE),
ImmutableList.of(columnName),
fieldNames.stream()
.map(
Expand All @@ -945,7 +945,7 @@ public RelNode visitTranspose(
// Step 4: PIVOT
b.pivot(
b.groupKey(trimmedColumnName),
ImmutableList.of(b.max(b.field("value"))),
ImmutableList.of(b.max(b.field(PlanUtils.VALUE_COLUMN_FOR_TRANSPOSE))),
ImmutableList.of(b.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)),
IntStream.rangeClosed(1, maxRows)
.mapToObj(i -> Map.entry("row " + i, ImmutableList.of((RexNode) b.literal(i))))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ public interface PlanUtils {
String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__";
String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_";
String ROW_NUMBER_COLUMN_FOR_TRANSPOSE = "_row_number_transpose_";
String VALUE_COLUMN_FOR_TRANSPOSE = "_value_transpose_";

static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) {
return switch (unit) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

package org.opensearch.sql.calcite.remote;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT;
import static org.opensearch.sql.util.MatcherUtils.*;
import static org.opensearch.sql.util.MatcherUtils.rows;
Expand Down Expand Up @@ -141,6 +144,50 @@ public void testTransposeLowerLimit() throws IOException {
rows("age", "32", "36", "28", "33", "36"));
}

/**
* Regression test for #5172: transpose fails when input has a field named 'value', because the
* internal unpivot column was also hardcoded as 'value'.
*/
@Test
public void testTransposeWithValueFieldNameCollision() throws IOException {
var result =
executeQuery(
String.format(
"source=%s | stats count() as value, avg(age) as avg_age | transpose",
TEST_INDEX_ACCOUNT));

verifySchema(
result,
schema("column", "string"),
schema("row 1", "string"),
schema("row 2", "string"),
schema("row 3", "string"),
schema("row 4", "string"),
schema("row 5", "string"));

var dataRows = result.getJSONArray("datarows");
// Verify that each transposed row has distinct correct values
// (not all duplicated from the 'value' field)
assertEquals(2, dataRows.length());
boolean foundValue = false;
boolean foundAvgAge = false;
for (int i = 0; i < dataRows.length(); i++) {
var row = dataRows.getJSONArray(i);
String colName = row.getString(0);
if ("value".equals(colName)) {
foundValue = true;
// count should be 1000 (total accounts)
assertEquals("1000", row.getString(1));
} else if ("avg_age".equals(colName)) {
foundAvgAge = true;
// avg_age should not equal the count value
assertNotEquals("1000", row.getString(1));
}
}
assertTrue("Should have 'value' row in transposed result", foundValue);
assertTrue("Should have 'avg_age' row in transposed result", foundAvgAge);
}

@Test
public void testTransposeColumnName() throws IOException {
var result =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ calcite:
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4])
LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5])
LogicalProject(value=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)])
LogicalProject(_value_transpose_=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)])
LogicalFilter(condition=[IS NOT NULL($19)])
LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)])
LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], _value_transpose_=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)])
LogicalJoin(condition=[true], joinType=[inner])
LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[ROW_NUMBER() OVER ()])
LogicalSort(fetch=[5])
Expand All @@ -14,7 +14,7 @@ calcite:
physical: |
EnumerableLimit(fetch=[10000])
EnumerableAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5])
EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], value=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59])
EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], _value_transpose_=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59])
EnumerableNestedLoopJoin(condition=[true], joinType=[inner])
EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)])
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
setup:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: true

- do:
indices.create:
index: issue5172
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
count:
type: integer
category:
type: keyword
subcategory:
type: keyword
value:
type: double
ts:
type: date

- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "issue5172", "_id": "1"}}'
- '{"count": 1, "category": "A", "subcategory": "X", "value": 10.5, "ts": "2024-01-01"}'
- '{"index": {"_index": "issue5172", "_id": "2"}}'
- '{"count": 2, "category": "A", "subcategory": "Y", "value": 20.3, "ts": "2024-01-02"}'

---
teardown:
- do:
indices.delete:
index: issue5172
ignore_unavailable: true
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: false

---
"Issue 5172: transpose with value field name collision":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=issue5172 | where category = "A" | fields category, value | transpose 2

- match: { total: 2 }
- match: { schema: [ { name: column, type: string }, { name: "row 1", type: string }, { name: "row 2", type: string } ] }
- length: { datarows: 2 }

---
"Issue 5172: transpose with stats alias named value":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=issue5172 | stats count() as value, avg(value) as avg_val | transpose

- match: { total: 2 }
- length: { datarows: 2 }
Loading
Loading