RyanL1997
diff --git a/‎plugins/arrow-flight-rpc/build.gradle‎
Lines changed: 5 additions & 0 deletions b/‎plugins/arrow-flight-rpc/build.gradle‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎plugins/arrow-flight-rpc/licenses/jackson-datatype-jsr310-2.21.3.jar.sha1‎
Lines changed: 1 addition & 0 deletions b/‎plugins/arrow-flight-rpc/licenses/jackson-datatype-jsr310-2.21.3.jar.sha1‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/FieldType.java‎
Lines changed: 11 additions & 1 deletion b/‎sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/FieldType.java‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java‎
Lines changed: 54 additions & 1 deletion b/‎sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java‎
Lines changed: 54 additions & 1 deletion
diff --git a/‎sandbox/plugins/analytics-backend-datafusion/build.gradle‎
Lines changed: 5 additions & 0 deletions b/‎sandbox/plugins/analytics-backend-datafusion/build.gradle‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎sandbox/plugins/analytics-backend-datafusion/rust/Cargo.toml‎
Lines changed: 2 additions & 0 deletions b/‎sandbox/plugins/analytics-backend-datafusion/rust/Cargo.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sandbox/plugins/analytics-backend-datafusion/rust/src/session_context.rs‎
Lines changed: 4 additions & 0 deletions b/‎sandbox/plugins/analytics-backend-datafusion/rust/src/session_context.rs‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎sandbox/plugins/analytics-backend-datafusion/rust/src/udf/mod.rs‎
Lines changed: 7 additions & 1 deletion b/‎sandbox/plugins/analytics-backend-datafusion/rust/src/udf/mod.rs‎
Lines changed: 7 additions & 1 deletion
@@ -36,6 +36,11 @@ dependencies {
   api "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
   api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}"
   api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson_annotations}"
+  // arrow-vector's JsonStringArrayList static-initializes a Jackson ObjectMapper that registers
+  // JavaTimeModule. Without jsr310 on arrow-flight-rpc's classpath, any reader of an Arrow
+  // ListVector (e.g. DataFusion's array-returning UDFs flowing through analytics-engine) hits
+  // a fatal NoClassDefFoundError that exits the JVM.
+  api "com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${versions.jackson}"
   api "commons-codec:commons-codec:${versions.commonscodec}"
 
   // arrow flight dependencies.
 
@@ -0,0 +1 @@
+a0958ebdaba836d31e5462ebc37b6349a0725ff9
@@ -55,7 +55,16 @@ public enum FieldType {
     NESTED("nested"),
     OBJECT("object"),
     FLAT_OBJECT("flat_object"),
-    COMPLETION("completion");
+    COMPLETION("completion"),
+    /**
+     * Array-typed expression result. Used for the return-type slot of array-producing scalar
+     * functions (PPL {@code array(…)}, {@code array_slice}, {@code array_distinct}). Has no
+     * OpenSearch mapping equivalent — arrays in OpenSearch are multi-value fields with the
+     * underlying element type, not a separate type. The mapping string is {@code "array"} as a
+     * placeholder; {@link #fromMappingType} keeps working unchanged because no source
+     * advertises that mapping string.
+     */
+    ARRAY("array");
 
     private final String mappingType;
 
@@ -117,6 +126,7 @@ public static FieldType fromSqlTypeName(SqlTypeName sqlTypeName) {
             case TIME, TIMESTAMP, TIMESTAMP_WITH_LOCAL_TIME_ZONE -> FieldType.DATE;
             case BOOLEAN -> FieldType.BOOLEAN;
             case BINARY, VARBINARY -> FieldType.BINARY;
+            case ARRAY -> FieldType.ARRAY;
             default -> null;
         };
     }
 
@@ -177,7 +177,60 @@ public enum ScalarFunction {
     JSON_EXTEND(Category.SCALAR, SqlKind.OTHER_FUNCTION),
     JSON_EXTRACT(Category.SCALAR, SqlKind.OTHER_FUNCTION),
     JSON_KEYS(Category.SCALAR, SqlKind.OTHER_FUNCTION),
-    JSON_SET(Category.SCALAR, SqlKind.OTHER_FUNCTION);
+    JSON_SET(Category.SCALAR, SqlKind.OTHER_FUNCTION),
+
+    // ── Array ────────────────────────────────────────────────────────
+    /**
+     * PPL {@code array(a, b, …)} constructor — resolves through the SQL plugin's
+     * {@code ArrayFunctionImpl} UDF named {@code "array"}. DataFusion's native
+     * equivalent is {@code make_array}, so a backend that supports this needs a
+     * name-mapping adapter (see {@code MakeArrayAdapter} in the DataFusion backend).
+     */
+    ARRAY(Category.SCALAR, SqlKind.OTHER_FUNCTION),
+    ARRAY_LENGTH(Category.SCALAR, SqlKind.OTHER_FUNCTION),
+    ARRAY_SLICE(Category.SCALAR, SqlKind.OTHER_FUNCTION),
+    ARRAY_DISTINCT(Category.SCALAR, SqlKind.OTHER_FUNCTION),
+    /**
+     * Calcite's {@code ARRAY_JOIN} — joins array elements with a separator. PPL
+     * {@code mvjoin} is registered to this operator. DataFusion's native equivalent
+     * is named {@code array_to_string}, so the DataFusion backend rewrites to that
+     * via a name-mapping adapter.
+     */
+    ARRAY_JOIN(Category.SCALAR, SqlKind.OTHER_FUNCTION),
+    /**
+     * Calcite's {@code SqlStdOperatorTable.ITEM} — element access ({@code arr[N]}).
+     * PPL's {@code mvindex(arr, N)} single-element form lowers through
+     * {@code MVIndexFunctionImp.resolveSingleElement} to ITEM with a 1-based index
+     * (already converted from PPL's 0-based input). DataFusion's native equivalent
+     * is {@code array_element}, also 1-based; the DataFusion backend renames via a
+     * name-mapping adapter.
+     */
+    ITEM(Category.SCALAR, SqlKind.ITEM),
+    /**
+     * PPL {@code mvzip(left, right [, sep])} — element-wise zip of two arrays into an
+     * array of strings, joined per pair by a separator (default {@code ","}). Resolves
+     * through the SQL plugin's {@code MVZipFunctionImpl} UDF named {@code "mvzip"}.
+     * No DataFusion stdlib equivalent — the analytics-backend-datafusion plugin ships
+     * a custom Rust UDF (`udf::mvzip`) registered on its session context.
+     */
+    MVZIP(Category.SCALAR, SqlKind.OTHER_FUNCTION),
+    /**
+     * PPL {@code mvfind(arr, regex)} — find the 0-based index of the first array
+     * element matching a regex, or NULL if no match. Resolves through the SQL
+     * plugin's {@code MVFindFunctionImpl} UDF named {@code "mvfind"}. No
+     * DataFusion stdlib equivalent — the analytics-backend-datafusion plugin
+     * ships a custom Rust UDF (`udf::mvfind`) registered on its session context.
+     */
+    MVFIND(Category.SCALAR, SqlKind.OTHER_FUNCTION),
+    /**
+     * PPL {@code mvappend(arg1, arg2, …)} — flatten a mixed list of array and
+     * scalar arguments into one array, dropping null args and null elements.
+     * Resolves through the SQL plugin's {@code MVAppendFunctionImpl} UDF named
+     * {@code "mvappend"}. DataFusion's {@code array_concat} only accepts arrays
+     * and preserves nulls, so the analytics-backend-datafusion plugin ships a
+     * custom Rust UDF ({@code udf::mvappend}) registered on its session context.
+     */
+    MVAPPEND(Category.SCALAR, SqlKind.OTHER_FUNCTION);
 
     /**
      * Category of scalar function.
 
@@ -73,6 +73,11 @@ dependencies {
   implementation "io.substrait:isthmus:0.89.1"
   implementation "io.substrait:core:0.89.1"
   implementation "com.fasterxml.jackson.datatype:jackson-datatype-jdk8:${versions.jackson}"
+  // jackson-datatype-jsr310 — added to arrow-flight-rpc (the parent plugin that bundles
+  // arrow-vector). arrow-vector's JsonStringArrayList eagerly registers JavaTimeModule on
+  // its ObjectMapper, so jsr310 must be visible to arrow-vector's defining classloader,
+  // not this plugin's. compileOnly here would also work; runtime is provided by parent.
+  compileOnly "com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${versions.jackson}"
 
   calciteCompile "com.google.guava:guava:${versions.guava}"
   calciteTestCompile "com.google.guava:guava:${versions.guava}"
 
@@ -68,6 +68,8 @@ serde_json = { workspace = true, features = ["preserve_order"] }
 # multi-path JSON-array output. Moving to 1.x is a follow-up once we can
 # reproduce that distinction against the new API surface.
 jsonpath-rust = "0.7"
+# mvfind UDF — regex matching against stringified array elements
+regex = "1.10"
 
 [dev-dependencies]
 criterion = { workspace = true }
 
@@ -111,6 +111,10 @@ pub async unsafe fn create_session_context(
         .build();
 
     let ctx = SessionContext::new_with_state(state);
+    // Register OpenSearch UDFs (mvappend, mvfind, mvzip, convert_tz, …) on this session
+    // so the substrait converter at execute_with_context can resolve their function names.
+    // Without this, fragment execution fails with "Unsupported function name" because
+    // df_execute_with_context reuses this handle's ctx instead of building a fresh one.
     crate::udf::register_all(&ctx);
 
     // Register default ListingTable for parquet scans
 
@@ -122,6 +122,9 @@ pub mod json_extend;
 pub mod json_extract;
 pub mod json_keys;
 pub mod json_set;
+pub mod mvappend;
+pub mod mvfind;
+pub mod mvzip;
 pub mod tonumber;
 pub mod tostring;
 
@@ -141,10 +144,13 @@ pub fn register_all(ctx: &SessionContext) {
     json_extract::register_all(ctx);
     json_keys::register_all(ctx);
     json_set::register_all(ctx);
+    mvzip::register_all(ctx);
+    mvfind::register_all(ctx);
+    mvappend::register_all(ctx);
     tonumber::register_all(ctx);
     tostring::register_all(ctx);
     log::info!(
-        "OpenSearch UDF register_all: convert_tz, json_append, json_array_length, json_delete, json_extend, json_extract, json_keys, json_set, tonumber, tostring registered"
+        "OpenSearch UDF register_all: convert_tz, json_append, json_array_length, json_delete, json_extend, json_extract, json_keys, json_set, mvzip, mvfind, mvappend, tonumber, tostring registered"
     );
 }
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+a0958ebdaba836d31e5462ebc37b6349a0725ff9`