Skip to content

Commit a7d35a0

Browse files
committed
Remove prefix-override branch in shouldOverrideField; scope struct-parent pruning to bin
PR #4606 introduced a `newName.startsWith(originalName + ".")` branch in `shouldOverrideField` to make `bin` on a nested field work without an explicit `fields` projection (issue #4482). That heuristic silently removed any column that happened to be a prefix of an eval target, which broke two real cases: * Issue #5185 — assigning multiple dotted-path fields from a `spath` MAP root dropped the root column, so subsequent evals failed with "Field not found". * Reviewer's case on PR #5351 — `source=big5 | fields agent | eval agent.name = "test"` dropped the `agent` column entirely, violating SPL1 semantics where a dotted assignment creates a literal column without touching any sibling or parent field. Rather than layer another heuristic (`hasSubFields`) on top of the prefix branch, this change removes the prefix branch entirely and returns `shouldOverrideField` to an exact-name match. The command-specific behaviour that actually needed parent-pruning — `bin` on a nested leaf inside a struct that OpenSearch exposes as both parent and flattened leaves — is now handled locally in `visitBin` via `dropStructParentsFor`, which only fires when the prefixes really exist as struct-parent columns in the current row schema. Tests: * Kept the two spath regression tests from PR #5351 (`CalcitePPLSpathTest`, `CalcitePPLSpathCommandIT`, `issues/5185.yml`). * Added `CalciteEvalCommandIT.testEvalDottedNameDoesNotDropStructParent` and `testEvalDottedNamePreservesStructParent_ImplicitProject` to cover the reviewer's `agent` case. * `CalciteBinCommandIT.testBinWithNestedFieldWithoutExplicitProjection` and `testBinWithEvalCreatedDottedFieldName` still pass via the new `dropStructParentsFor` path instead of the global prefix branch. Resolves #5185 Signed-off-by: Songkan Tang <songkant@amazon.com>
1 parent 2804165 commit a7d35a0

2 files changed

Lines changed: 109 additions & 24 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 60 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -973,10 +973,47 @@ public RelNode visitBin(Bin node, CalcitePlanContext context) {
973973

974974
String alias = node.getAlias() != null ? node.getAlias() : fieldName;
975975
projectPlusOverriding(List.of(binExpression), List.of(alias), context);
976+
dropStructParentsFor(alias, context);
976977

977978
return context.relBuilder.peek();
978979
}
979980

981+
/**
982+
* If {@code dottedName} addresses a nested leaf inside a struct that OpenSearch has exposed
983+
* through both its struct-parent columns and its flattened leaf columns (e.g. the telemetry
984+
* mapping exposes {@code resource}, {@code resource.attributes}, ..., {@code
985+
* resource.attributes.telemetry.sdk.version} side-by-side), drop the struct-parent prefixes from
986+
* the current row. This keeps a subsequent {@link #tryToRemoveNestedFields(CalcitePlanContext)}
987+
* pass from collapsing the flattened leaves back into the parents when the final implicit {@code
988+
* fields *} projection runs.
989+
*
990+
* <p>This preserves the behaviour that issue #4482 originally required for {@code bin} on a
991+
* nested field without an explicit {@code fields} projection, scoped to the command that actually
992+
* needs it instead of a global prefix-override in {@link #shouldOverrideField} (which broke #5185
993+
* and the reviewer's {@code eval agent.name = ...} case).
994+
*
995+
* <p>No-op when no such struct-parent columns exist (e.g. flat columns or MAP roots from {@code
996+
* spath}).
997+
*/
998+
private void dropStructParentsFor(String dottedName, CalcitePlanContext context) {
999+
if (dottedName == null || dottedName.indexOf('.') < 0) {
1000+
return;
1001+
}
1002+
List<String> fieldNames = context.relBuilder.peek().getRowType().getFieldNames();
1003+
List<RexNode> parentsToDrop = new ArrayList<>();
1004+
int dotIdx = dottedName.indexOf('.');
1005+
while (dotIdx >= 0) {
1006+
String prefix = dottedName.substring(0, dotIdx);
1007+
if (fieldNames.contains(prefix)) {
1008+
parentsToDrop.add(context.relBuilder.field(prefix));
1009+
}
1010+
dotIdx = dottedName.indexOf('.', dotIdx + 1);
1011+
}
1012+
if (!parentsToDrop.isEmpty()) {
1013+
context.relBuilder.projectExcept(parentsToDrop);
1014+
}
1015+
}
1016+
9801017
@Override
9811018
public RelNode visitParse(Parse node, CalcitePlanContext context) {
9821019
visitChildren(node, context);
@@ -1295,7 +1332,7 @@ private void projectPlusOverriding(
12951332
List<String> originalFieldNames = context.relBuilder.peek().getRowType().getFieldNames();
12961333
List<RexNode> toOverrideList =
12971334
originalFieldNames.stream()
1298-
.filter(originalName -> shouldOverrideField(originalName, newNames, originalFieldNames))
1335+
.filter(originalName -> shouldOverrideField(originalName, newNames))
12991336
.map(a -> (RexNode) context.relBuilder.field(a))
13001337
.toList();
13011338
// 1. add the new fields, For example "age0, country0"
@@ -1315,31 +1352,30 @@ private void projectPlusOverriding(
13151352
context.relBuilder.rename(expectedRenameFields);
13161353
}
13171354

1318-
private boolean shouldOverrideField(
1319-
String originalName, List<String> newNames, List<String> allFieldNames) {
1320-
return newNames.stream()
1321-
.anyMatch(
1322-
newName ->
1323-
// Match exact field names (e.g., "age" == "age") for flat fields
1324-
newName.equals(originalName)
1325-
// OR match nested paths (e.g., "resource.attributes..." starts with
1326-
// "resource."), but only when the schema already contains sub-fields of
1327-
// originalName. This distinguishes real struct parents (which have flattened
1328-
// sub-field columns) from MAP columns produced by spath where dotted names
1329-
// are just flat column names, not nested sub-fields.
1330-
|| (newName.startsWith(originalName + ".")
1331-
&& hasSubFields(originalName, allFieldNames)));
1332-
}
1333-
13341355
/**
1335-
* Checks whether the schema contains any field whose name starts with {@code parentName + "."}.
1336-
* This indicates that {@code parentName} is a real struct parent with flattened sub-field
1337-
* columns, as opposed to a standalone MAP column (e.g., from spath) that has no sub-fields in the
1338-
* schema.
1356+
* Determine whether the column {@code originalName} should be replaced when a batch of new
1357+
* columns named {@code newNames} is being added. Only exact-name matches count as overrides —
1358+
* {@code eval foo.bar = ...} creates a brand new field literally named {@code foo.bar} and must
1359+
* never drop sibling or parent fields. This mirrors SPL1 semantics, where assigning a dotted name
1360+
* introduces a literal column of that name without touching any other field.
1361+
*
1362+
* <p>Earlier revisions (see PR #4606 / #5351) attempted to broaden this to a {@code
1363+
* newName.startsWith(originalName + ".")} prefix match. That prefix branch silently dropped any
1364+
* column that happened to be a prefix of an eval target, which caused two regressions:
1365+
*
1366+
* <ul>
1367+
* <li>Issue #5185 — a MAP-typed root column produced by {@code spath} got dropped when eval
1368+
* introduced multiple dotted-path fields under it.
1369+
* <li>The reviewer's case on PR #5351 — {@code source=big5 | fields agent | eval agent.name =
1370+
* "test"} dropped the {@code agent} column entirely.
1371+
* </ul>
1372+
*
1373+
* Command-specific behaviour that genuinely wants to prune struct-parent columns (such as {@code
1374+
* bin} on a nested field, the original motivation for #4606) must handle that inside the command
1375+
* visitor — see {@link #dropStructParentsFor(String, CalcitePlanContext)}.
13391376
*/
1340-
private boolean hasSubFields(String parentName, List<String> allFieldNames) {
1341-
String prefix = parentName + ".";
1342-
return allFieldNames.stream().anyMatch(name -> name.startsWith(prefix));
1377+
private boolean shouldOverrideField(String originalName, List<String> newNames) {
1378+
return newNames.contains(originalName);
13431379
}
13441380

13451381
private List<List<RexInputRef>> extractInputRefList(List<RelBuilder.AggCall> aggCalls) {

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteEvalCommandIT.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
1212
import static org.opensearch.sql.util.MatcherUtils.verifySchema;
1313

14+
import com.google.common.collect.ImmutableMap;
1415
import java.io.IOException;
1516
import org.json.JSONObject;
1617
import org.junit.jupiter.api.Test;
@@ -38,6 +39,21 @@ public void init() throws Exception {
3839
Request request3 = new Request("PUT", "/test_eval/_doc/3?refresh=true");
3940
request3.setJsonEntity("{\"name\": \"Charlie\", \"age\": null, \"title\": \"Analyst\"}");
4041
client().performRequest(request3);
42+
43+
// Index with a struct field `agent` to reproduce the reviewer's case from PR #5351:
44+
// source=<idx with agent struct> | fields agent | eval agent.name = "test"
45+
// Rely on dynamic mapping — OpenSearch infers `agent` as an object with string children
46+
// from the document contents. Using dynamic mapping keeps the init idempotent across
47+
// repeated `@Before` invocations in the preserved cluster.
48+
Request agentDoc1 = new Request("PUT", "/test_eval_agent/_doc/1?refresh=true");
49+
agentDoc1.setJsonEntity(
50+
"{\"agent\": {\"name\": \"winlogbeat\", \"version\": \"7.0\"}, \"message\": \"hello\"}");
51+
client().performRequest(agentDoc1);
52+
53+
Request agentDoc2 = new Request("PUT", "/test_eval_agent/_doc/2?refresh=true");
54+
agentDoc2.setJsonEntity(
55+
"{\"agent\": {\"name\": \"filebeat\", \"version\": \"8.1\"}, \"message\": \"world\"}");
56+
client().performRequest(agentDoc2);
4157
}
4258

4359
@Test
@@ -86,6 +102,39 @@ public void testEvalStringConcatenationWithLiterals() throws IOException {
86102
rows("Charlie", "Analyst", "Name: Charlie, Title: Analyst"));
87103
}
88104

105+
@Test
106+
public void testEvalDottedNameDoesNotDropStructParent() throws IOException {
107+
// Reviewer's case from PR #5351: assigning a new dotted-path column must not remove the
108+
// struct-parent column that happens to be a prefix of the eval target.
109+
// Equivalent SPL1 query:
110+
// source=<idx with agent struct> | fields agent | eval agent.name = "test"
111+
// Before the fix, the prefix-override in shouldOverrideField silently dropped the `agent`
112+
// column entirely from the result schema. With the fix, `agent` is preserved.
113+
// The newly-created literal column `agent.name` is also available (verified via an
114+
// explicit trailing `fields` projection that bypasses tryToRemoveNestedFields).
115+
JSONObject result =
116+
executeQuery(
117+
"source=test_eval_agent | fields agent | eval `agent.name` = 'test' | fields agent,"
118+
+ " `agent.name`");
119+
verifySchema(result, schema("agent", "struct"), schema("agent.name", "string"));
120+
verifyDataRows(
121+
result,
122+
rows(ImmutableMap.of("name", "winlogbeat", "version", "7.0"), "test"),
123+
rows(ImmutableMap.of("name", "filebeat", "version", "8.1"), "test"));
124+
}
125+
126+
@Test
127+
public void testEvalDottedNamePreservesStructParent_ImplicitProject() throws IOException {
128+
// Complementary coverage for the reviewer's case without the explicit trailing projection.
129+
// With the implicit `fields *` (AllFields) that the PPL parser appends, the downstream
130+
// `tryToRemoveNestedFields` pass still collapses the flattened leaf back into its struct
131+
// parent -- but the important regression guard is that the struct parent `agent` is no
132+
// longer dropped by `shouldOverrideField`'s prefix branch.
133+
JSONObject result =
134+
executeQuery("source=test_eval_agent | fields agent | eval `agent.name` = 'test'");
135+
verifySchema(result, schema("agent", "struct"));
136+
}
137+
89138
@Test
90139
public void testEvalStringConcatenationWithExistingData() throws IOException {
91140
JSONObject result =

0 commit comments

Comments
 (0)