sternadsoftware
diff --git a/‎sandbox/libs/analytics-api/src/main/java/org/opensearch/analytics/schema/OpenSearchSchemaBuilder.java‎
Lines changed: 114 additions & 19 deletions b/‎sandbox/libs/analytics-api/src/main/java/org/opensearch/analytics/schema/OpenSearchSchemaBuilder.java‎
Lines changed: 114 additions & 19 deletions
diff --git a/‎sandbox/plugins/analytics-backend-datafusion/rust/src/api.rs‎
Lines changed: 44 additions & 0 deletions b/‎sandbox/plugins/analytics-backend-datafusion/rust/src/api.rs‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎sandbox/plugins/analytics-backend-datafusion/rust/src/ffm.rs‎
Lines changed: 16 additions & 1 deletion b/‎sandbox/plugins/analytics-backend-datafusion/rust/src/ffm.rs‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎sandbox/plugins/analytics-backend-datafusion/rust/src/query_executor.rs‎
Lines changed: 1 addition & 0 deletions b/‎sandbox/plugins/analytics-backend-datafusion/rust/src/query_executor.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sandbox/plugins/analytics-backend-datafusion/rust/src/schema_coerce.rs‎
Lines changed: 58 additions & 0 deletions b/‎sandbox/plugins/analytics-backend-datafusion/rust/src/schema_coerce.rs‎
Lines changed: 58 additions & 0 deletions
@@ -11,56 +11,151 @@
 import org.apache.calcite.jdbc.CalciteSchema;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.schema.Schema;
 import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.schema.Table;
+import org.apache.calcite.schema.impl.AbstractSchema;
 import org.apache.calcite.schema.impl.AbstractTable;
 import org.apache.calcite.sql.type.SqlTypeName;
+import org.opensearch.action.support.IndicesOptions;
 import org.opensearch.cluster.ClusterState;
 import org.opensearch.cluster.metadata.IndexMetadata;
+import org.opensearch.cluster.metadata.IndexNameExpressionResolver;
 import org.opensearch.cluster.metadata.MappingMetadata;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.util.concurrent.ThreadContext;
+import org.opensearch.core.common.Strings;
+import org.opensearch.index.IndexNotFoundException;
 
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 
 /**
  * Builds a Calcite {@link SchemaPlus} from OpenSearch {@link ClusterState} index mappings.
  *
  * <p>One Calcite table per index. Reads field types from index mapping properties.
  * Navigates: IndexMetadata -> MappingMetadata -> sourceAsMap() -> "properties" -> per-field "type".
- * // TODO: This is for illustation - use version sql plugin has built and re-purpose to not call node-client
  */
 public class OpenSearchSchemaBuilder {
 
     private OpenSearchSchemaBuilder() {}
 
+    public static SchemaPlus buildSchema(ClusterState clusterState) {
+        return buildSchema(clusterState, new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)));
+    }
+
     /**
      * Builds a Calcite SchemaPlus from the given ClusterState.
-     * Each index becomes a table; each mapped field becomes a column.
      *
-     * @param clusterState the current cluster state to derive schema from
+     * <p>Tables are resolved lazily on first lookup, mirroring the sql-plugin
+     * {@code OpenSearchSchema}. A requested name may be a concrete index, an alias, a comma list,
+     * a wildcard, an exclusion, or date-math; it is resolved through {@code resolver} — the same
+     * canonical resolution the execution-side {@code IndexResolution} uses — and the matching
+     * indices' supported fields are unioned into one row type. No upfront enumeration of cluster
+     * indices: construction is O(1) regardless of cluster size, and each referenced name costs
+     * one {@code IndexNameExpressionResolver} call plus a single mapping union.
+     *
+     * <p>The lazy schema is wrapped in a NON-caching root: a caching root enumerates
+     * {@code getTableNames()} and would never perform the implicit {@code getTable(name)} lookup
+     * that drives lazy resolution of expressions.
      */
-    public static SchemaPlus buildSchema(ClusterState clusterState) {
-        CalciteSchema rootSchema = CalciteSchema.createRootSchema(true);
-        SchemaPlus schemaPlus = rootSchema.plus();
+    public static SchemaPlus buildSchema(ClusterState clusterState, IndexNameExpressionResolver resolver) {
+        Schema lazySchema = new AbstractSchema() {
+            // Truly lazy table map, mirroring sql-plugin's OpenSearchSchema pattern: no upfront
+            // enumeration of cluster indices. get() registers on first lookup and caches under the
+            // lower-cased name. PPL's RelBuilder.scan and Calcite's case-sensitive validator both
+            // reach the schema via getTable(name), which routes here directly — no entrySet /
+            // keySet iteration needed for production resolution. Callers that need name
+            // enumeration (Calcite's withCaseSensitive(false) parser path used in some unit tests)
+            // get only resolved names back, which is fine when the lookup name is exact-case.
+            private final Map<String, Table> tableMap = new HashMap<>() {
+                @Override
+                public Table get(Object key) {
+                    String name = ((String) key).toLowerCase(java.util.Locale.ROOT);
+                    if (!super.containsKey(name)) {
+                        Table resolved = resolveTable(clusterState, resolver, name);
+                        if (resolved != null) {
+                            super.put(name, resolved);
+                        }
+                    }
+                    return super.get(name);
+                }
+            };
 
-        for (Map.Entry<String, IndexMetadata> entry : clusterState.metadata().indices().entrySet()) {
-            String indexName = entry.getKey();
-            IndexMetadata indexMetadata = entry.getValue();
-            MappingMetadata mapping = indexMetadata.mapping();
+            @Override
+            protected Map<String, Table> getTableMap() {
+                return tableMap;
+            }
+        };
+
+        return CalciteSchema.createRootSchema(true, false, "", lazySchema).plus();
+    }
+
+    /**
+     * Resolves a source expression (concrete name, alias, comma list, wildcard, exclusion, or
+     * date-math) to a single table whose row type unions the supported fields of all matching
+     * concrete indices, or {@code null} when nothing matches (so Calcite reports a clean "table not
+     * found"). Resolution goes through {@link IndexNameExpressionResolver} so schema membership
+     * matches the execution-side {@code IndexResolution}. First-wins on field-name conflict across
+     * the union; the planner's scan rule validates cross-index mapping compatibility when the table
+     * is referenced.
+     */
+    @SuppressWarnings("unchecked")
+    private static Table resolveTable(ClusterState clusterState, IndexNameExpressionResolver resolver, String expression) {
+        // Short-circuit literal alias / data stream names so the resolver's lenientExpandOpen
+        // (which does not include hidden backings) doesn't filter out data stream backings. The
+        // alias / data-stream abstraction already carries the full backing list — use it directly.
+        java.util.SortedMap<String, org.opensearch.cluster.metadata.IndexAbstraction> lookup = clusterState.metadata().getIndicesLookup();
+        org.opensearch.cluster.metadata.IndexAbstraction abstraction = lookup == null ? null : lookup.get(expression);
+        List<IndexMetadata> backing;
+        if (abstraction != null
+            && (abstraction.getType() == org.opensearch.cluster.metadata.IndexAbstraction.Type.ALIAS
+                || abstraction.getType() == org.opensearch.cluster.metadata.IndexAbstraction.Type.DATA_STREAM)) {
+            backing = abstraction.getIndices();
+        } else {
+            String[] concrete;
+            try {
+                // Comma-split first: concreteIndexNames treats each vararg as one expression, and
+                // splitting lets the resolver honor exclusions across tokens (e.g. "test*,-test1").
+                // includeDataStreams=true so wildcards / comma-lists that match a data stream NAME
+                // expand to its backings (the resolver normally excludes data streams from
+                // wildcard expansion otherwise). Literal data stream / alias names take the
+                // abstraction short-circuit above and skip the resolver entirely.
+                concrete = resolver.concreteIndexNames(
+                    clusterState,
+                    IndicesOptions.lenientExpandOpen(),
+                    true,
+                    Strings.splitStringByCommaToArray(expression)
+                );
+            } catch (IndexNotFoundException e) {
+                return null;
+            }
+            backing = new java.util.ArrayList<>(concrete.length);
+            for (String name : concrete) {
+                IndexMetadata index = clusterState.metadata().index(name);
+                if (index != null) {
+                    backing.add(index);
+                }
+            }
+        }
+        LinkedHashMap<String, Object> merged = new LinkedHashMap<>();
+        for (IndexMetadata index : backing) {
+            MappingMetadata mapping = index.mapping();
             if (mapping == null) {
                 continue;
             }
-
-            @SuppressWarnings("unchecked")
-            Map<String, Object> sourceMap = mapping.sourceAsMap();
-            @SuppressWarnings("unchecked")
-            Map<String, Object> properties = (Map<String, Object>) sourceMap.get("properties");
+            Map<String, Object> properties = (Map<String, Object>) mapping.sourceAsMap().get("properties");
             if (properties == null) {
                 continue;
             }
-
-            schemaPlus.add(indexName, buildTable(properties));
+            properties.forEach(merged::putIfAbsent);
         }
-
-        return schemaPlus;
+        if (merged.isEmpty()) {
+            return null;
+        }
+        return buildTable(merged);
     }
 
     /**
 
@@ -987,6 +987,40 @@ fn collect_reads(rel: &substrait::proto::Rel, out: &mut Vec<substrait::proto::Re
     }
 }
 
+/// All `ReadRel`s reachable from the plan's roots.
+fn collect_plan_reads(plan: &substrait::proto::Plan) -> Vec<substrait::proto::ReadRel> {
+    let mut reads = Vec::new();
+    for plan_rel in &plan.relations {
+        if let Some(rel) = root_rel(plan_rel) {
+            collect_reads(&rel, &mut reads);
+        }
+    }
+    reads
+}
+
+/// Extracts the table name from the first NamedTable read in the plan bytes.
+pub(crate) fn first_named_table_name(plan_bytes: &[u8]) -> Option<String> {
+    use substrait::proto::read_rel::ReadType;
+    let plan: substrait::proto::Plan = prost::Message::decode(plan_bytes).ok()?;
+    for read in collect_plan_reads(&plan) {
+        if let Some(ReadType::NamedTable(nt)) = read.read_type {
+            return nt.names.last().cloned();
+        }
+    }
+    None
+}
+
+/// Extracts the `base_schema` NamedStruct from the plan's first ReadRel matching `table_name`.
+pub(crate) fn base_schema_for_table(plan: &substrait::proto::Plan, table_name: &str) -> Option<substrait::proto::NamedStruct> {
+    use substrait::proto::read_rel::ReadType;
+    for read in collect_plan_reads(plan) {
+        let Some(ReadType::NamedTable(nt)) = read.read_type.as_ref() else { continue };
+        if nt.names.last().map(String::as_str) != Some(table_name) { continue }
+        return read.base_schema.clone();
+    }
+    None
+}
+
 // ---------------------------------------------------------------------------
 // Coordinator-reduce local execution API
 //
@@ -1373,6 +1407,16 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_first_named_table_name_returns_none_on_empty() {
+        assert_eq!(super::first_named_table_name(&[]), None);
+    }
+
+    #[test]
+    fn test_first_named_table_name_returns_none_on_garbage() {
+        assert_eq!(super::first_named_table_name(&[0xFF, 0x00, 0x01]), None);
+    }
+
     #[test]
     fn view_needs_gc_detects_bloat() {
         let strings: Vec<String> = (0..10_000)
 
@@ -649,11 +649,18 @@ pub unsafe extern "C" fn df_create_session_context(
     table_name_len: i64,
     context_id: i64,
     query_config_ptr: i64,
+    plan_ptr: *const u8,
+    plan_len: i64,
 ) -> i64 {
     let table_name = str_from_raw(table_name_ptr, table_name_len)
         .map_err(|e| format!("df_create_session_context: {}", e))?;
     let query_config =
         crate::datafusion_query_config::DatafusionQueryConfig::from_ffm_ptr(query_config_ptr);
+    let plan_bytes: &[u8] = if plan_len > 0 {
+        slice::from_raw_parts(plan_ptr, plan_len as usize)
+    } else {
+        &[]
+    };
     let mgr = get_rt_manager()?;
     mgr.io_runtime
         .block_on(crate::task_monitors::plan_setup_monitor().instrument(
@@ -663,6 +670,7 @@ pub unsafe extern "C" fn df_create_session_context(
                 table_name,
                 context_id,
                 query_config,
+                plan_bytes,
             )
         ))
         .map_err(|e| e.to_string())
@@ -679,16 +687,23 @@ pub unsafe extern "C" fn df_create_session_context_indexed(
     tree_shape: i32,
     delegated_predicate_count: i32,
     query_config_ptr: i64,
+    plan_ptr: *const u8,
+    plan_len: i64,
 ) -> i64 {
     let table_name = str_from_raw(table_name_ptr, table_name_len)
         .map_err(|e| format!("df_create_session_context_indexed: {}", e))?;
     let query_config =
         crate::datafusion_query_config::DatafusionQueryConfig::from_ffm_ptr(query_config_ptr);
+    let plan_bytes: &[u8] = if plan_len > 0 {
+        slice::from_raw_parts(plan_ptr, plan_len as usize)
+    } else {
+        &[]
+    };
     let mgr = get_rt_manager()?;
     mgr.io_runtime
         .block_on(crate::task_monitors::plan_setup_monitor().instrument(
             crate::session_context::create_session_context_indexed(
-                runtime_ptr, shard_view_ptr, table_name, context_id, tree_shape, delegated_predicate_count, query_config,
+                runtime_ptr, shard_view_ptr, table_name, context_id, tree_shape, delegated_predicate_count, query_config, plan_bytes,
             )
         ))
         .map_err(|e| e.to_string())
 
@@ -217,6 +217,7 @@ pub async fn execute_with_context(
             DataFusionError::Execution(format!("Failed to decode Substrait: {}", e))
         })?;
 
+        // Union schema widening was applied at table registration (session_context::widen_to_union_schema).
         let logical_plan = from_substrait_plan(&handle.ctx.state(), &substrait_plan).await?;
         log_debug!("DataFusion logical plan:\n{}", logical_plan.display_indent());
         let dataframe = handle.ctx.execute_logical_plan(logical_plan).await?;
 
@@ -157,10 +157,68 @@ fn rewrite_data_type(dt: &DataType) -> DataType {
     }
 }
 
+/// Appends to `registered` any `expected` field whose name is absent, as a nullable column.
+/// `Some(augmented)` if anything was added, `None` if `registered` already covers `expected`.
+///
+/// The Substrait consumer binds `base_schema` to the provider BY NAME, so the registered schema
+/// only needs to *contain* every expected column — order is irrelevant and present columns keep
+/// their inferred (coerced) types. Appended columns are forced nullable; DataFusion's parquet
+/// `SchemaAdapter` null-fills them at read time.
+pub fn append_missing_nullable(registered: &Schema, expected: &Schema) -> Option<SchemaRef> {
+    let mut added: Vec<Field> = Vec::new();
+    for ef in expected.fields() {
+        if registered.field_with_name(ef.name()).is_err() {
+            added.push(
+                Field::new(ef.name(), ef.data_type().clone(), true).with_metadata(ef.metadata().clone()),
+            );
+        }
+    }
+    if added.is_empty() {
+        return None;
+    }
+    let mut fields: Vec<Field> = registered.fields().iter().map(|f| f.as_ref().clone()).collect();
+    fields.extend(added);
+    Some(Arc::new(Schema::new_with_metadata(fields, registered.metadata().clone())))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    #[test]
+    fn append_missing_adds_absent_columns_as_nullable() {
+        let registered = Schema::new(vec![
+            Field::new("name", DataType::Utf8, true),
+            Field::new("age", DataType::Int64, true),
+        ]);
+        // `alias` is declared non-nullable in the plan; it must still be appended as nullable
+        // since the shard has no data for it.
+        let expected = Schema::new(vec![
+            Field::new("name", DataType::Utf8, true),
+            Field::new("age", DataType::Int64, true),
+            Field::new("alias", DataType::Utf8, false),
+        ]);
+
+        let merged = append_missing_nullable(&registered, &expected).expect("alias missing → augmented");
+        assert_eq!(merged.fields().len(), 3);
+        let alias = merged.field_with_name("alias").unwrap();
+        assert_eq!(alias.data_type(), &DataType::Utf8);
+        assert!(alias.is_nullable(), "appended column must be nullable");
+        assert!(merged.field_with_name("name").is_ok());
+        assert!(merged.field_with_name("age").is_ok());
+    }
+
+    #[test]
+    fn append_missing_returns_none_when_registered_covers_expected() {
+        let registered = Schema::new(vec![
+            Field::new("name", DataType::Utf8, true),
+            Field::new("age", DataType::Int64, true),
+        ]);
+        // Registered may carry extra columns the plan doesn't reference — still nothing to add.
+        let expected = Schema::new(vec![Field::new("name", DataType::Utf8, true)]);
+        assert!(append_missing_nullable(&registered, &expected).is_none());
+    }
+
     #[test]
     fn top_level_binary_view_gets_rewritten() {
         let schema = Arc::new(Schema::new(vec![