Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@
## element_at

- Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
- Spark 3.5.8 (audited 2026-05-27): baseline. `ElementAt(left, right, defaultValueOutOfBound, failOnError)`; group label `map_funcs`. Comet supports only `ArrayType` input; `MapType` input falls back.
- Spark 3.5.8 (audited 2026-05-27): baseline. `ElementAt(left, right, defaultValueOutOfBound, failOnError)`; group label `map_funcs`. Comet supports `ArrayType` input through native `ListExtract` and `MapType` input through native `map_extract`.
- Spark 4.0.1 (audited 2026-05-27): `NullIntolerant` -> `nullIntolerant` field refactor; group label changes to `collection_funcs`; ANSI default flips to `true` so out-of-bound throws by default. Comet wires `failOnError` through to native `ListExtract`.
- Spark 4.1.1 (audited 2026-05-27): identical to 4.0.1.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
## element_at

- Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
- Spark 3.5.8 (audited 2026-05-27): baseline. `ElementAt(left, right, defaultValueOutOfBound, failOnError) extends GetMapValueUtil`; the parser routes `element_at(<array>, ...)` to one overload and `element_at(<map>, ...)` to another. Comet `CometElementAt` only supports `ArrayType` input; `MapType` input falls back.
- Spark 3.5.8 (audited 2026-05-27): baseline. `ElementAt(left, right, defaultValueOutOfBound, failOnError) extends GetMapValueUtil`; the parser routes `element_at(<array>, ...)` to one overload and `element_at(<map>, ...)` to another. Comet routes `MapType` input through the same native `map_extract` path used by `GetMapValue`.
- Spark 4.0.1 (audited 2026-05-27): adds `nullIntolerant: Boolean` field; semantics unchanged.
- Spark 4.1.1 (audited 2026-05-27): identical to 4.0.1.

Expand Down
6 changes: 3 additions & 3 deletions docs/source/user-guide/latest/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ The tables below list every Spark built-in expression with its current status.
| `array_union` | ✅ | NaN/signed-zero handling may differ ([details](compatibility/floating-point.md)) |
| `arrays_overlap` | ✅ | |
| `arrays_zip` | ✅ | |
| `element_at` | ✅ | MapType input falls back |
| `element_at` | ✅ | ArrayType and MapType inputs |
| `flatten` | ✅ | Binary/struct/map elements fall back |
| `get` | ✅ | |
| `sequence` | ✅ | |
Expand Down Expand Up @@ -378,7 +378,7 @@ expression-level). The `outer` variants are wired but marked `Incompatible`; the

| Function | Status | Notes |
| --- | --- | --- |
| `element_at` | ✅ | MapType input falls back |
| `element_at` | ✅ | |
| `map` | ✅ | Routed through the JVM codegen dispatcher |
| `map_concat` | ✅ | |
| `map_contains_key` | ✅ | |
Expand All @@ -388,7 +388,7 @@ expression-level). The `outer` variants are wired but marked `Incompatible`; the
| `map_keys` | ✅ | |
| `map_values` | ✅ | |
| `str_to_map` | ✅ | |
| `try_element_at` | ✅ | Lowers to `element_at`; array input (MapType falls back) |
| `try_element_at` | ✅ | Lowers to `element_at` |

---

Expand Down
59 changes: 32 additions & 27 deletions spark/src/main/scala/org/apache/comet/serde/arrays.scala
Original file line number Diff line number Diff line change
Expand Up @@ -579,14 +579,11 @@ object CometArrayReverse extends CometExpressionSerde[Reverse] with ArraysBase {

object CometElementAt extends CometExpressionSerde[ElementAt] {

override def getUnsupportedReasons(): Seq[String] = Seq(
"Input must be an array. `Map` inputs are not supported.")

override def getSupportLevel(expr: ElementAt): SupportLevel = {
if (expr.left.dataType.isInstanceOf[ArrayType]) {
Compatible()
} else {
Unsupported(Some("Input is not an array"))
expr.left.dataType match {
case _: ArrayType => Compatible()
case _: MapType => Compatible()
case _ => Unsupported(Some("Input must be an array or map"))
}
}

Expand All @@ -596,27 +593,35 @@ object CometElementAt extends CometExpressionSerde[ElementAt] {
binding: Boolean): Option[ExprOuterClass.Expr] = {
val childExpr = exprToProtoInternal(expr.left, inputs, binding)
val ordinalExpr = exprToProtoInternal(expr.right, inputs, binding)
val defaultExpr = expr.defaultValueOutOfBound.flatMap(exprToProtoInternal(_, inputs, binding))

if (childExpr.isDefined && ordinalExpr.isDefined &&
defaultExpr.isDefined == expr.defaultValueOutOfBound.isDefined) {
val arrayExtractBuilder = ExprOuterClass.ListExtract
.newBuilder()
.setChild(childExpr.get)
.setOrdinal(ordinalExpr.get)
.setOneBased(true)
.setFailOnError(expr.failOnError)

defaultExpr.foreach(arrayExtractBuilder.setDefaultValue)

Some(
ExprOuterClass.Expr
.newBuilder()
.setListExtract(arrayExtractBuilder)
.build())
} else {
withFallbackReason(expr, "unsupported arguments for ElementAt", expr.left, expr.right)
None
expr.left.dataType match {
case _: MapType =>
val mapExtractExpr = scalarFunctionExprToProto("map_extract", childExpr, ordinalExpr)
optExprWithFallbackReason(mapExtractExpr, expr, expr.left, expr.right)
case _ =>
val defaultExpr =
expr.defaultValueOutOfBound.flatMap(exprToProtoInternal(_, inputs, binding))

if (childExpr.isDefined && ordinalExpr.isDefined &&
defaultExpr.isDefined == expr.defaultValueOutOfBound.isDefined) {
val arrayExtractBuilder = ExprOuterClass.ListExtract
.newBuilder()
.setChild(childExpr.get)
.setOrdinal(ordinalExpr.get)
.setOneBased(true)
.setFailOnError(expr.failOnError)

defaultExpr.foreach(arrayExtractBuilder.setDefaultValue)

Some(
ExprOuterClass.Expr
.newBuilder()
.setListExtract(arrayExtractBuilder)
.build())
} else {
withFallbackReason(expr, "unsupported arguments for ElementAt", expr.left, expr.right)
None
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,6 @@ SELECT try_element_at(CAST(NULL AS ARRAY<INT>), 1)
query ignore(Spark codegen bug with literal element_at when constant folding is disabled)
SELECT try_element_at(array(10, 20, 30), 1), try_element_at(array(10, 20, 30), 99)

-- map input falls back to Spark
query spark_answer_only
-- map input
query
SELECT try_element_at(m, 'a'), try_element_at(m, 'missing') FROM test_try_element_at
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

statement
CREATE TABLE test_element_at_map(m map<string, int>, mi map<int, string>) USING parquet

statement
INSERT INTO test_element_at_map VALUES
(map('a', 1, 'b', 2, 'c', 3), map(1, 'x', 2, 'y')),
(map('x', 10), map(99, 'z')),
(NULL, NULL)

-- key found
query
SELECT element_at(m, 'a'), element_at(m, 'b') FROM test_element_at_map

-- key not found → NULL
query
SELECT element_at(m, 'missing') FROM test_element_at_map

-- null map → NULL
query
SELECT element_at(CAST(NULL AS MAP<STRING, INT>), 'a')

-- null key → NULL
query
SELECT element_at(m, CAST(NULL AS STRING)) FROM test_element_at_map

-- integer key type
query
SELECT element_at(mi, 1), element_at(mi, 2), element_at(mi, 99) FROM test_element_at_map

-- key type coercion
query
SELECT element_at(mi, CAST(1 AS BIGINT)), element_at(mi, CAST(2 AS SMALLINT)) FROM test_element_at_map

-- literal map arguments
query
SELECT element_at(map('a', 1, 'b', 2), 'a'), element_at(map('a', 1, 'b', 2), 'missing'), element_at(map('a', 1, 'b', 2), NULL)