Fix #5150: Fix dedup aggregation pushdown nullifying renamed fields

opensearchpplteam · opensearchpplteam · commit 6eb5ac5a8d8b · 2026-02-27T01:43:30.000Z
When the DedupPushdownRule converts a dedup to an aggregation-based
top_hits query, fields that were renamed (via rename or eval) would
return null values. This happened because the TopHitsParser returned
results using original OpenSearch field names, but the output schema
expected the renamed names.

Added a field name mapping to TopHitsParser so it can translate original
OS field names to their renamed output names in the LITERAL_AGG (dedup)
aggregation response path.

Signed-off-by: opensearchpplteam &lt;opensearchpplteam@gmail.com&gt;
diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml
@@ -0,0 +1,74 @@
+setup:
+  - do:
+      indices.create:
+        index: test_5150
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled : true
+
+  - do:
+      bulk:
+        index: test_5150
+        refresh: true
+        body:
+          - '{"index": {}}'
+          - '{"category":"X","value":100}'
+          - '{"index": {}}'
+          - '{"category":"X","value":200}'
+          - '{"index": {}}'
+          - '{"category":"Y","value":300}'
+          - '{"index": {}}'
+          - '{"category":"Y","value":400}'
+
+---
+teardown:
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled : false
+
+---
+"5150: Rename non-dedup field then dedup retains renamed values":
+  - skip:
+      features:
+        - headers
+        - allowed_warnings
+  - do:
+      allowed_warnings:
+        - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_5150 | rename value as val | dedup category | sort category | fields category, val
+
+  - match: { total: 2 }
+  - match: { schema: [{"name": "category", "type": "string"}, {"name": "val", "type": "bigint"}] }
+  - length: { datarows: 2 }
+  # Each row should have non-null val
+  - is_true: datarows.0.1
+  - is_true: datarows.1.1
+
+---
+"5150: Eval new field then dedup on different field retains eval values":
+  - skip:
+      features:
+        - headers
+        - allowed_warnings
+  - do:
+      allowed_warnings:
+        - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_5150 | eval doubled = value * 2 | dedup category | sort category | fields category, value, doubled
+
+  - match: { total: 2 }
+  - length: { datarows: 2 }
+  # Each row should have non-null doubled
+  - is_true: datarows.0.2
+  - is_true: datarows.1.2
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java
@@ -38,6 +38,7 @@
 import com.google.common.collect.ImmutableList;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -601,7 +602,21 @@ yield switch (functionName) {
         TopHitsAggregationBuilder topHitsAggregationBuilder =
             createTopHitsBuilder(
                 aggCall, args, aggName, helper, dedupNumber, false, false, null, null);
-        yield Pair.of(topHitsAggregationBuilder, new TopHitsParser(aggName, false, false));
+        // Build field name mapping for renamed fields (e.g., rename value as val).
+        // The top_hits response uses original OS field names, but the output schema expects
+        // the renamed names from the project.
+        Map<String, String> fieldNameMapping = new HashMap<>();
+        for (Pair<RexNode, String> arg : args) {
+          if (arg.getKey() instanceof RexInputRef) {
+            String originalName = helper.inferNamedField(arg.getKey()).getRootName();
+            String outputName = arg.getValue();
+            if (!originalName.equals(outputName)) {
+              fieldNameMapping.put(originalName, outputName);
+            }
+          }
+        }
+        yield Pair.of(
+            topHitsAggregationBuilder, new TopHitsParser(aggName, false, false, fieldNameMapping));
       }
       default ->
           throw new AggregateAnalyzer.AggregateAnalyzerException(
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java
@@ -27,10 +27,27 @@ public class TopHitsParser implements MetricParser {
   private final boolean returnSingleValue;
   private final boolean returnMergeValue;
 
+  /**
+   * Mapping from original OpenSearch field names to output field names (e.g., renamed via {@code
+   * rename} command). When a field is renamed (e.g., {@code rename value as val}), the top_hits
+   * response still contains the original field name ({@code value}), but the output schema expects
+   * the renamed name ({@code val}). This mapping enables the translation.
+   */
+  private final Map<String, String> fieldNameMapping;
+
   public TopHitsParser(String name, boolean returnSingleValue, boolean returnMergeValue) {
+    this(name, returnSingleValue, returnMergeValue, Collections.emptyMap());
+  }
+
+  public TopHitsParser(
+      String name,
+      boolean returnSingleValue,
+      boolean returnMergeValue,
+      Map<String, String> fieldNameMapping) {
     this.name = name;
     this.returnSingleValue = returnSingleValue;
     this.returnMergeValue = returnMergeValue;
+    this.fieldNameMapping = fieldNameMapping;
   }
 
   @Override
@@ -129,12 +146,28 @@ public List<Map<String, Object>> parse(Aggregation agg) {
                         ? new LinkedHashMap<>()
                         : new LinkedHashMap<>(hit.getSourceAsMap());
                 hit.getFields().values().forEach(f -> map.put(f.getName(), f.getValue()));
-                return map;
+                return applyFieldNameMapping(map);
               })
           .toList();
     }
   }
 
+  /**
+   * Apply field name mapping to translate original OpenSearch field names to output field names.
+   * Fields not present in the mapping are kept as-is.
+   */
+  private Map<String, Object> applyFieldNameMapping(Map<String, Object> map) {
+    if (fieldNameMapping.isEmpty()) {
+      return map;
+    }
+    Map<String, Object> result = new LinkedHashMap<>();
+    for (Map.Entry<String, Object> entry : map.entrySet()) {
+      String mappedName = fieldNameMapping.getOrDefault(entry.getKey(), entry.getKey());
+      result.put(mappedName, entry.getValue());
+    }
+    return result;
+  }
+
   private boolean isEmptyHits(SearchHit[] hits) {
     return isFieldsEmpty(hits) && isSourceEmpty(hits);
   }
diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java
@@ -570,6 +570,57 @@ void two_bucket_percentiles_should_pass() {
                 ImmutableMap.of("percentiles", List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0))));
   }
 
+  /**
+   * Test for issue #5150: dedup aggregation pushdown with renamed fields. When a field is renamed
+   * (e.g., rename value as val), the top_hits response uses original field names. The TopHitsParser
+   * with fieldNameMapping should translate them to the renamed names.
+   */
+  @Test
+  void dedup_top_hits_with_field_name_mapping_should_remap_fields() {
+    String response =
+        "{\n"
+            + "  \"composite#composite_buckets\": {\n"
+            + "    \"buckets\": [\n"
+            + "      {\n"
+            + "        \"key\": {\n"
+            + "          \"category\": \"X\"\n"
+            + "        },\n"
+            + "        \"doc_count\": 2,\n"
+            + "        \"top_hits#dedup\": {\n"
+            + "          \"hits\": {\n"
+            + "            \"total\": { \"value\": 1, \"relation\": \"eq\" },\n"
+            + "            \"max_score\": 1.0,\n"
+            + "            \"hits\": [\n"
+            + "              {\n"
+            + "                \"_index\": \"test\",\n"
+            + "                \"_id\": \"1\",\n"
+            + "                \"_score\": 1.0,\n"
+            + "                \"fields\": {\n"
+            + "                  \"category\": [\"X\"],\n"
+            + "                  \"value\": [100]\n"
+            + "                }\n"
+            + "              }\n"
+            + "            ]\n"
+            + "          }\n"
+            + "        }\n"
+            + "      }\n"
+            + "    ]\n"
+            + "  }\n"
+            + "}";
+    // "value" is renamed to "val" — the mapping should translate it in the response.
+    // Use BucketAggregationParser as used by the dedup aggregation pushdown path.
+    OpenSearchAggregationResponseParser parser =
+        new BucketAggregationParser(
+            List.of(new TopHitsParser("dedup", false, false, Map.of("value", "val"))), List.of());
+    List<Map<String, Object>> result = parse(parser, response);
+    assertEquals(1, result.size());
+    Map<String, Object> row = result.get(0);
+    // The renamed field "val" should be present, not the original "value"
+    assertEquals(100, row.get("val"));
+    assertNull(row.get("value"));
+    assertEquals("X", row.get("category"));
+  }
+
   public List<Map<String, Object>> parse(OpenSearchAggregationResponseParser parser, String json) {
     return parser.parse(fromJson(json));
   }