opensearch-project · yuancu · Apr 22, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
@@ -116,6 +116,7 @@
   CalciteNoMvCommandIT.class,
   CalciteMvExpandCommandIT.class,
   CalcitePPLGraphLookupIT.class,
+  CalciteMixedFieldTypeIT.class,
 })
 public class CalciteNoPushdownIT {
   private static boolean wasPushdownEnabled;

@@ -0,0 +1,110 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.calcite.remote;
+
+import static org.opensearch.sql.util.MatcherUtils.rows;
+import static org.opensearch.sql.util.MatcherUtils.schema;
+import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
+import static org.opensearch.sql.util.MatcherUtils.verifySchema;
+import static org.opensearch.sql.util.TestUtils.createIndexByRestClient;
+import static org.opensearch.sql.util.TestUtils.isIndexExist;
+import static org.opensearch.sql.util.TestUtils.performRequest;
+
+import java.io.IOException;
+import org.json.JSONObject;
+import org.junit.jupiter.api.Test;
+import org.opensearch.client.Request;
+import org.opensearch.sql.ppl.PPLIntegTestCase;
+
+/**
+ * Integration tests for querying wildcard indices where a field has conflicting types (e.g., text
+ * vs keyword) across different indices. See GitHub issue #4659.
+ */
+public class CalciteMixedFieldTypeIT extends PPLIntegTestCase {
+
+  private static final String LOG_TEXT_INDEX = "test_log_text_4659";
+  private static final String LOG_KEYWORD_INDEX = "test_log_keyword_4659";
+
+  @Override
+  public void init() throws Exception {
+    super.init();
+    enableCalcite();
+    createTestIndices();
+  }
+
+  private void createTestIndices() throws IOException {
+    // Create index with msg as text type
+    if (!isIndexExist(client(), LOG_TEXT_INDEX)) {
+      String textMapping =
+          "{\"mappings\":{\"properties\":{\"msg\":{\"type\":\"text\"},"
+              + "\"idx\":{\"type\":\"integer\"}}}}";
+      createIndexByRestClient(client(), LOG_TEXT_INDEX, textMapping);
+      Request bulkReq = new Request("POST", "/" + LOG_TEXT_INDEX + "/_bulk?refresh=true");
+      bulkReq.setJsonEntity(
+          "{\"index\":{\"_id\":\"1\"}}\n" + "{\"msg\":\"status=200\",\"idx\":1}\n");
+      performRequest(client(), bulkReq);
+    }
+
+    // Create index with msg as keyword type
+    if (!isIndexExist(client(), LOG_KEYWORD_INDEX)) {
+      String keywordMapping =
+          "{\"mappings\":{\"properties\":{\"msg\":{\"type\":\"keyword\"},"
+              + "\"idx\":{\"type\":\"integer\"}}}}";
+      createIndexByRestClient(client(), LOG_KEYWORD_INDEX, keywordMapping);
+      Request bulkReq = new Request("POST", "/" + LOG_KEYWORD_INDEX + "/_bulk?refresh=true");
+      bulkReq.setJsonEntity(
+          "{\"index\":{\"_id\":\"1\"}}\n" + "{\"msg\":\"status=200\",\"idx\":2}\n");
+      performRequest(client(), bulkReq);
+    }
+  }
+
+  @Test
+  public void testWildcardQueryWithMixedTextAndKeywordField() throws IOException {
+    // Query using wildcard index pattern that matches both indices
+    // Both documents should be returned regardless of field type conflict
+    JSONObject result = executeQuery("source=test_log_*_4659 | fields msg, idx | sort idx");
+    verifySchema(result, schema("msg", "string"), schema("idx", "int"));
+    verifyDataRows(result, rows("status=200", 1), rows("status=200", 2));
+  }
+
+  @Test
+  public void testWildcardQueryWithEvalOnMixedField() throws IOException {
+    // Eval uses the Calcite script engine to compute expression on each shard.
+    // When the merged type is keyword, DOC_VALUE is used, but text shards have no doc_values
+    // which returns null and causes the eval to produce null for the text-typed shard.
+    JSONObject result =
+        executeQuery(
+            "source=test_log_*_4659 | eval upper_msg = upper(msg) | fields idx, upper_msg"
+                + " | sort idx");
+    verifySchema(result, schema("idx", "int"), schema("upper_msg", "string"));
+    verifyDataRows(result, rows(1, "STATUS=200"), rows(2, "STATUS=200"));
+  }
+
+  @Test
+  public void testWildcardQueryWithScriptFilterOnMixedField() throws IOException {
+    // Script-based filter pushed down to each shard uses DOC_VALUE retrieval.
+    // When the merged type is keyword but the field is text on some shards,
+    // doc_values are not available, causing shard failures and missing results.
+    JSONObject result =
+        executeQuery(
+            "source=test_log_*_4659 | where upper(msg) = 'STATUS=200' | fields msg, idx"
+                + " | sort idx");
+    verifySchema(result, schema("msg", "string"), schema("idx", "int"));
+    verifyDataRows(result, rows("status=200", 1), rows("status=200", 2));
+  }
+
+  @Test
+  public void testWildcardQueryWithRexOnMixedField() throws IOException {
+    // Rex command on the conflicting field should work across both indices
+    JSONObject result =
+        executeQuery(
+            "source=test_log_*_4659 | rex field=msg 'status=(?<statusCode>\\\\d+)'"
+                + " | fields msg, idx, statusCode | sort idx");
+    verifySchema(
+        result, schema("msg", "string"), schema("idx", "int"), schema("statusCode", "string"));
+    verifyDataRows(result, rows("status=200", 1, "200"), rows("status=200", 2, "200"));
+  }
+}
@@ -0,0 +1,90 @@
+setup:
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled : true
+  - do:
+      indices.create:
+        index: log_text_4659
+        body:
+          mappings:
+            properties:
+              msg:
+                type: text
+              idx:
+                type: integer
+  - do:
+      indices.create:
+        index: log_keyword_4659
+        body:
+          mappings:
+            properties:
+              msg:
+                type: keyword
+              idx:
+                type: integer
+  - do:
+      bulk:
+        index: log_text_4659
+        refresh: true
+        body:
+          - '{"index": {"_id": "1"}}'
+          - '{"msg": "status=200", "idx": 1}'
+  - do:
+      bulk:
+        index: log_keyword_4659
+        refresh: true
+        body:
+          - '{"index": {"_id": "1"}}'
+          - '{"msg": "status=200", "idx": 2}'
+
+---
+teardown:
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled : false
+  - do:
+      indices.delete:
+        index: log_text_4659
+        ignore: 404
+  - do:
+      indices.delete:
+        index: log_keyword_4659
+        ignore: 404
+
+---
+"PPL wildcard query returns all documents across indices with mixed text/keyword field types":
+  - skip:
+      features:
+        - headers
+        - allowed_warnings
+  - do:
+      allowed_warnings:
+        - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: 'source=log_*_4659 | fields msg, idx | sort idx'
+  - match: {"total": 2}
+  - match: {"datarows": [["status=200", 1], ["status=200", 2]]}
+
+---
+"PPL script filter works across indices with mixed text/keyword field types":
+  - skip:
+      features:
+        - headers
+        - allowed_warnings
+  - do:
+      allowed_warnings:
+        - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: "source=log_*_4659 | where upper(msg) = 'STATUS=200' | fields msg, idx | sort idx"
+  - match: {"total": 2}
+  - match: {"datarows": [["status=200", 1], ["status=200", 2]]}
@@ -12,7 +12,7 @@
 public class MergeRuleHelper {
   private static final List<MergeRule> RULES =
       List.of(
-          new DeepMergeRule(), new LatestRule() // must come last
+          new DeepMergeRule(), new TextKeywordConflictRule(), new LatestRule() // must come last
           );
 
   public static MergeRule selectRule(OpenSearchDataType source, OpenSearchDataType target) {

@@ -0,0 +1,72 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.util.MergeRules;
+
+import java.util.Map;
+import org.opensearch.sql.opensearch.data.type.OpenSearchDataType;
+import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType;
+import org.opensearch.sql.opensearch.data.type.OpenSearchTextType;
+
+/**
+ * Merge rule for text/keyword type conflicts across indices. When a field is text in one index and
+ * keyword in another, or text-with-keyword-subfield in one and text-without in another, we merge to
+ * text WITHOUT keyword subfields. This forces _source retrieval instead of doc_values, which works
+ * universally across all shards regardless of the actual field type.
+ *
+ * <p>See GitHub issue #4659.
+ */
+public class TextKeywordConflictRule implements MergeRule {
+
+  @Override
+  public boolean isMatch(OpenSearchDataType source, OpenSearchDataType target) {
+    if (source == null || target == null) {
+      return false;
+    }
+    MappingType sourceMapping = source.getMappingType();
+    MappingType targetMapping = target.getMappingType();
+    if (sourceMapping == null || targetMapping == null) {
+      return false;
+    }
+    // Match when one is text and the other is keyword
+    if (isTextLike(sourceMapping) && isKeyword(targetMapping)) {
+      return true;
+    }
+    if (isKeyword(sourceMapping) && isTextLike(targetMapping)) {
+      return true;
+    }
+    // Match when both are text but one has keyword subfields and the other does not
+    if (isTextLike(sourceMapping) && isTextLike(targetMapping)) {
+      boolean sourceHasKeywordSub = hasKeywordSubField(source);
+      boolean targetHasKeywordSub = hasKeywordSubField(target);
+      return sourceHasKeywordSub != targetHasKeywordSub;
+    }
+    return false;
+  }
+
+  @Override
+  public void mergeInto(
+      String key, OpenSearchDataType source, Map<String, OpenSearchDataType> target) {
+    // Always merge to text WITHOUT keyword subfields.
+    // This forces _source retrieval, which works for both text and keyword fields.
+    target.put(key, OpenSearchTextType.of());
+  }
+
+  private static boolean isTextLike(MappingType mappingType) {
+    return mappingType == MappingType.Text || mappingType == MappingType.MatchOnlyText;
+  }
+
+  private static boolean isKeyword(MappingType mappingType) {
+    return mappingType == MappingType.Keyword;
+  }
+
+  private static boolean hasKeywordSubField(OpenSearchDataType type) {
+    if (type instanceof OpenSearchTextType textType) {
+      return textType.getFields().values().stream()
+          .anyMatch(f -> f.getMappingType() == MappingType.Keyword);
+    }
+    return false;
+  }
+}