Skip to content

Commit 2804165

Browse files
committed
[BugFix] Fix eval overwrites MAP root field when assigning multiple dotted-path names (#5185)
Signed-off-by: Songkan Tang <songkant@amazon.com>
1 parent c26897d commit 2804165

4 files changed

Lines changed: 140 additions & 4 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,7 +1295,7 @@ private void projectPlusOverriding(
12951295
List<String> originalFieldNames = context.relBuilder.peek().getRowType().getFieldNames();
12961296
List<RexNode> toOverrideList =
12971297
originalFieldNames.stream()
1298-
.filter(originalName -> shouldOverrideField(originalName, newNames))
1298+
.filter(originalName -> shouldOverrideField(originalName, newNames, originalFieldNames))
12991299
.map(a -> (RexNode) context.relBuilder.field(a))
13001300
.toList();
13011301
// 1. add the new fields, For example "age0, country0"
@@ -1315,15 +1315,31 @@ private void projectPlusOverriding(
13151315
context.relBuilder.rename(expectedRenameFields);
13161316
}
13171317

1318-
private boolean shouldOverrideField(String originalName, List<String> newNames) {
1318+
private boolean shouldOverrideField(
1319+
String originalName, List<String> newNames, List<String> allFieldNames) {
13191320
return newNames.stream()
13201321
.anyMatch(
13211322
newName ->
13221323
// Match exact field names (e.g., "age" == "age") for flat fields
13231324
newName.equals(originalName)
13241325
// OR match nested paths (e.g., "resource.attributes..." starts with
1325-
// "resource.")
1326-
|| newName.startsWith(originalName + "."));
1326+
// "resource."), but only when the schema already contains sub-fields of
1327+
// originalName. This distinguishes real struct parents (which have flattened
1328+
// sub-field columns) from MAP columns produced by spath where dotted names
1329+
// are just flat column names, not nested sub-fields.
1330+
|| (newName.startsWith(originalName + ".")
1331+
&& hasSubFields(originalName, allFieldNames)));
1332+
}
1333+
1334+
/**
1335+
* Checks whether the schema contains any field whose name starts with {@code parentName + "."}.
1336+
* This indicates that {@code parentName} is a real struct parent with flattened sub-field
1337+
* columns, as opposed to a standalone MAP column (e.g., from spath) that has no sub-fields in the
1338+
* schema.
1339+
*/
1340+
private boolean hasSubFields(String parentName, List<String> allFieldNames) {
1341+
String prefix = parentName + ".";
1342+
return allFieldNames.stream().anyMatch(name -> name.startsWith(prefix));
13271343
}
13281344

13291345
private List<List<RexInputRef>> extractInputRefList(List<RelBuilder.AggCall> aggCalls) {

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,4 +216,27 @@ public void testSpathAutoExtractWithSort() throws IOException {
216216
verifySchema(result, schema("doc.user.name", "string"));
217217
verifyDataRowsInOrder(result, rows("Alice"), rows("John"));
218218
}
219+
220+
@Test
221+
public void testSpathAutoExtractWithMultiFieldEval() throws IOException {
222+
JSONObject result =
223+
executeQuery(
224+
"source=test_spath_cmd | spath input=doc"
225+
+ " | eval doc.user.name=doc.user.name, doc.user.age=doc.user.age"
226+
+ " | fields doc.user.name, doc.user.age");
227+
verifySchema(result, schema("doc.user.name", "string"), schema("doc.user.age", "string"));
228+
verifyDataRows(result, rows("Alice", "25"), rows("John", "30"));
229+
}
230+
231+
@Test
232+
public void testSpathAutoExtractWithSeparateEvalCommands() throws IOException {
233+
JSONObject result =
234+
executeQuery(
235+
"source=test_spath_cmd | spath input=doc"
236+
+ " | eval doc.user.name=doc.user.name"
237+
+ " | eval doc.user.age=doc.user.age"
238+
+ " | fields doc.user.name, doc.user.age");
239+
verifySchema(result, schema("doc.user.name", "string"), schema("doc.user.age", "string"));
240+
verifyDataRows(result, rows("Alice", "25"), rows("John", "30"));
241+
}
219242
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
setup:
2+
- do:
3+
query.settings:
4+
body:
5+
transient:
6+
plugins.calcite.enabled: true
7+
- do:
8+
indices.create:
9+
index: issue5185
10+
body:
11+
settings:
12+
number_of_shards: 1
13+
number_of_replicas: 0
14+
mappings:
15+
properties:
16+
doc:
17+
type: text
18+
- do:
19+
bulk:
20+
refresh: true
21+
body:
22+
- '{"index": {"_index": "issue5185", "_id": "1"}}'
23+
- '{"doc": "{\"user\":{\"name\":\"John\",\"age\":30}}"}'
24+
- '{"index": {"_index": "issue5185", "_id": "2"}}'
25+
- '{"doc": "{\"user\":{\"name\":\"Alice\",\"age\":25}}"}'
26+
27+
---
28+
teardown:
29+
- do:
30+
indices.delete:
31+
index: issue5185
32+
ignore_unavailable: true
33+
- do:
34+
query.settings:
35+
body:
36+
transient:
37+
plugins.calcite.enabled: false
38+
39+
---
40+
"Issue 5185: eval with multiple dotted-path assignments from MAP column":
41+
- skip:
42+
features:
43+
- headers
44+
- allowed_warnings
45+
- do:
46+
headers:
47+
Content-Type: 'application/json'
48+
ppl:
49+
body:
50+
query: "source=issue5185 | spath input=doc | eval doc.user.name=doc.user.name, doc.user.age=doc.user.age | fields doc.user.name, doc.user.age"
51+
52+
- match: { total: 2 }
53+
- length: { datarows: 2 }
54+
55+
---
56+
"Issue 5185: separate eval commands with dotted-path from MAP column":
57+
- skip:
58+
features:
59+
- headers
60+
- allowed_warnings
61+
- do:
62+
headers:
63+
Content-Type: 'application/json'
64+
ppl:
65+
body:
66+
query: "source=issue5185 | spath input=doc | eval doc.user.name=doc.user.name | eval doc.user.age=doc.user.age | fields doc.user.name, doc.user.age"
67+
68+
- match: { total: 2 }
69+
- length: { datarows: 2 }

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,34 @@ public void testSpathAutoExtractModeWithFields() {
123123
+ "FROM `scott`.`EMP`");
124124
}
125125

126+
@Test
127+
public void testSpathAutoExtractWithMultiFieldEval() {
128+
// Issue #5185: eval with multiple dotted-path assignments from MAP column
129+
// should not remove the MAP root field
130+
withPPLQuery(
131+
"source=EMP | spath input=ENAME"
132+
+ " | eval ENAME.user.name=ENAME.user.name, ENAME.user.age=ENAME.user.age"
133+
+ " | fields ENAME.user.name, ENAME.user.age")
134+
.expectLogical(
135+
"LogicalProject(ENAME.user.name=[ITEM(JSON_EXTRACT_ALL($1), 'user.name')],"
136+
+ " ENAME.user.age=[ITEM(JSON_EXTRACT_ALL($1), 'user.age')])\n"
137+
+ " LogicalTableScan(table=[[scott, EMP]])\n");
138+
}
139+
140+
@Test
141+
public void testSpathAutoExtractWithSeparateEvalCommands() {
142+
// Issue #5185: separate eval commands with dotted-path assignments from MAP column
143+
withPPLQuery(
144+
"source=EMP | spath input=ENAME"
145+
+ " | eval ENAME.user.name=ENAME.user.name"
146+
+ " | eval ENAME.user.age=ENAME.user.age"
147+
+ " | fields ENAME.user.name, ENAME.user.age")
148+
.expectLogical(
149+
"LogicalProject(ENAME.user.name=[ITEM(JSON_EXTRACT_ALL($1), 'user.name')],"
150+
+ " ENAME.user.age=[ITEM(JSON_EXTRACT_ALL($1), 'user.age')])\n"
151+
+ " LogicalTableScan(table=[[scott, EMP]])\n");
152+
}
153+
126154
@Test
127155
public void testSpathAutoExtractModeWithSort() {
128156
withPPLQuery("source=EMP | spath input=ENAME output=result" + " | sort result.user.name")

0 commit comments

Comments
 (0)