From 03e6d69fdfb1c5b876cc8c83dd6c541cfdc5007e Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 11 Jun 2026 08:42:49 -0700 Subject: [PATCH 1/4] chore: fallback for `spark.sql.legacy.castComplexTypesToString.enabled` = true --- .../apache/comet/expressions/CometCast.scala | 18 ++++++++ .../apache/comet/rules/CometExecRule.scala | 5 +- .../apache/comet/serde/CometSortOrder.scala | 17 ++----- .../apache/comet/serde/QueryPlanSerde.scala | 13 ------ .../org/apache/comet/serde/SupportLevel.scala | 46 +++++++++++++++---- .../org/apache/comet/serde/aggregates.scala | 18 +++----- .../scala/org/apache/comet/serde/arrays.scala | 23 ++-------- .../scala/org/apache/comet/serde/maps.scala | 13 +----- .../apache/spark/sql/comet/operators.scala | 3 +- 9 files changed, 81 insertions(+), 75 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala index e4f341efa8..9aae855b8b 100644 --- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala +++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala @@ -36,6 +36,18 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { private[comet] val negativeScaleDecimalToStringReason: String = "Negative-scale decimal requires spark.sql.legacy.allowNegativeScaleOfDecimal=true" + // Spark's legacy formatter wraps maps/structs with `[]` and omits NULL elements, while the + // default (Spark 3.1+) wraps with `{}` and renders NULL as "null". Comet only implements the + // default. The flag is internal in Spark 4.0 and defaults to false; fall back to Spark when + // it is enabled. + private[comet] val legacyCastComplexTypesToStringReason: String = + "spark.sql.legacy.castComplexTypesToString.enabled=true is not supported" + + private def legacyCastComplexTypesToString: Boolean = + SQLConf.get + .getConfString("spark.sql.legacy.castComplexTypesToString.enabled", "false") + .toBoolean + def supportedTypes: Seq[DataType] = Seq( DataTypes.BooleanType, @@ -150,6 +162,12 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { return Compatible() } + if (toType == DataTypes.StringType && legacyCastComplexTypesToString && (fromType + .isInstanceOf[ArrayType] || fromType.isInstanceOf[StructType] || + fromType.isInstanceOf[MapType])) { + return Unsupported(Some(legacyCastComplexTypesToStringReason)) + } + (fromType, toType) match { case (dt: ArrayType, _: ArrayType) if dt.elementType == NullType => Compatible() case (ArrayType(DataTypes.DateType, _), ArrayType(toElementType, _)) diff --git a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala index e261ac45d1..50cd0927b4 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala @@ -870,7 +870,10 @@ case class CometExecRule(session: SparkSession) if (groupingExpressions.isEmpty && aggregateExpressions.isEmpty) return false - if (groupingExpressions.exists(e => QueryPlanSerde.containsMapType(e.dataType))) return false + if (groupingExpressions.exists(e => + SupportLevel.containsType(e.dataType, classOf[MapType]))) { + return false + } if (!groupingExpressions.forall(e => QueryPlanSerde.exprToProto(e, agg.child.output).isDefined)) { diff --git a/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala b/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala index 3dcd67a65d..346bb454eb 100644 --- a/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala +++ b/spark/src/main/scala/org/apache/comet/serde/CometSortOrder.scala @@ -32,18 +32,11 @@ object CometSortOrder extends CometExpressionSerde[SortOrder] { " floating-point types is not 100% compatible with Spark") override def getSupportLevel(expr: SortOrder): SupportLevel = { - - if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get() && - SupportLevel.containsFloatingPoint(expr.child.dataType)) { - // https://github.com/apache/datafusion-comet/issues/2626 - Incompatible( - Some( - "Sorting on floating-point is not 100% compatible with Spark, and Comet is running " + - s"with ${CometConf.COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + - s"${CometConf.COMPAT_GUIDE}")) - } else { - Compatible() - } + // https://github.com/apache/datafusion-comet/issues/2626 + SupportLevel + .strictFloatingPointReason(expr.child.dataType, "Sorting on floating-point") + .map(reason => Incompatible(Some(reason))) + .getOrElse(Compatible()) } override def convert( diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index a21d930226..a3d9944485 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -490,19 +490,6 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { false } - /** - * Returns true if the given data type is or contains a `MapType` at any nesting level. Arrow's - * row format (used by DataFusion's grouped hash aggregate for composite group keys) does not - * support `Map`, so grouping on any type that transitively contains a map would crash in native - * execution. - */ - def containsMapType(dt: DataType): Boolean = dt match { - case _: MapType => true - case a: ArrayType => containsMapType(a.elementType) - case s: StructType => s.fields.exists(f => containsMapType(f.dataType)) - case _ => false - } - /** * Serializes Spark datatype to protobuf. Note that, a datatype can be serialized by this method * doesn't mean it is supported by Comet native execution, i.e., `supportedDataType` may return diff --git a/spark/src/main/scala/org/apache/comet/serde/SupportLevel.scala b/spark/src/main/scala/org/apache/comet/serde/SupportLevel.scala index cb78c7d2d4..84db46923e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/SupportLevel.scala +++ b/spark/src/main/scala/org/apache/comet/serde/SupportLevel.scala @@ -21,6 +21,9 @@ package org.apache.comet.serde import org.apache.spark.sql.types._ +import org.apache.comet.CometConf +import org.apache.comet.CometConf.COMET_EXEC_STRICT_FLOATING_POINT + sealed trait SupportLevel /** @@ -46,14 +49,41 @@ case class Unsupported(notes: Option[String] = None) extends SupportLevel object SupportLevel { /** - * Returns true if the given data type contains FloatType or DoubleType at any nesting level. + * Returns true if `dt` is, or transitively contains, an instance of any of the given `DataType` + * classes. Walks `ArrayType` element, `StructType` fields, and `MapType` key/value at every + * nesting level. + */ + def containsType(dt: DataType, classes: Class[_ <: DataType]*): Boolean = { + if (classes.exists(_.isInstance(dt))) { + true + } else { + dt match { + case ArrayType(elementType, _) => containsType(elementType, classes: _*) + case StructType(fields) => fields.exists(f => containsType(f.dataType, classes: _*)) + case MapType(keyType, valueType, _) => + containsType(keyType, classes: _*) || containsType(valueType, classes: _*) + case _ => false + } + } + } + + /** + * Gate for [[CometConf.COMET_EXEC_STRICT_FLOATING_POINT]]: returns the standard incompatibility + * reason when strict mode is enabled and `dt` contains a float or double (at any nesting + * level), and `None` otherwise. Callers wrap the reason with `Incompatible` or pass it to + * `withFallbackReason` as appropriate. + * + * `what` describes the operation being gated, e.g. "Sorting on floating-point" or "MapSort on + * floating-point key", and is interpolated into the returned message. */ - def containsFloatingPoint(dt: DataType): Boolean = dt match { - case FloatType | DoubleType => true - case ArrayType(elementType, _) => containsFloatingPoint(elementType) - case StructType(fields) => fields.exists(f => containsFloatingPoint(f.dataType)) - case MapType(keyType, valueType, _) => - containsFloatingPoint(keyType) || containsFloatingPoint(valueType) - case _ => false + def strictFloatingPointReason(dt: DataType, what: String): Option[String] = { + if (COMET_EXEC_STRICT_FLOATING_POINT.get() && + containsType(dt, classOf[FloatType], classOf[DoubleType])) { + Some( + s"$what is not 100% compatible with Spark, and Comet is running with " + + s"${COMET_EXEC_STRICT_FLOATING_POINT.key}=true. ${CometConf.COMPAT_GUIDE}") + } else { + None + } } } diff --git a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala index bd5737b54c..cf392e4214 100644 --- a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala +++ b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala @@ -715,17 +715,13 @@ object CometCollectSet extends CometAggregateExpressionSerde[CollectSet] { " `spark.comet.expression.CollectSet.allowIncompatible=true` is set.") override def getSupportLevel(expr: CollectSet): SupportLevel = { - if (COMET_EXEC_STRICT_FLOATING_POINT.get() && - SupportLevel.containsFloatingPoint(expr.children.head.dataType)) { - Incompatible( - Some( - "collect_set on floating-point types is not 100% compatible with Spark " + - "(Comet deduplicates NaN values while Spark treats each NaN as distinct), " + - s"and Comet is running with ${COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + - s"${CometConf.COMPAT_GUIDE}")) - } else { - Compatible() - } + SupportLevel + .strictFloatingPointReason( + expr.children.head.dataType, + "collect_set on floating-point types " + + "(Comet deduplicates NaN values while Spark treats each NaN as distinct)") + .map(reason => Incompatible(Some(reason))) + .getOrElse(Compatible()) } override def convert( diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index 690bc376c7..7f8dd7b40f 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -152,15 +152,11 @@ object CometSortArray extends CometExpressionSerde[SortArray] { if (!supportedSortArrayElementType(elementType)) { Unsupported(Some(s"Sort on array element type $elementType is not supported")) - } else if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get() && - SupportLevel.containsFloatingPoint(elementType)) { - Incompatible( - Some( - "Sorting on floating-point is not 100% compatible with Spark, and Comet is running " + - s"with ${CometConf.COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + - s"${CometConf.COMPAT_GUIDE}")) } else { - Compatible() + SupportLevel + .strictFloatingPointReason(elementType, "Sorting on floating-point") + .map(reason => Incompatible(Some(reason))) + .getOrElse(Compatible()) } } @@ -553,17 +549,8 @@ object CometArrayReverse extends CometExpressionSerde[Reverse] with ArraysBase { override def getIncompatibleReasons(): Seq[String] = Seq(unsupportedReason) - @tailrec - private def containsBinary(dt: DataType): Boolean = { - dt match { - case BinaryType => true - case ArrayType(elementType, _) => containsBinary(elementType) - case _ => false - } - } - override def getSupportLevel(expr: Reverse): SupportLevel = { - if (containsBinary(expr.child.dataType)) { + if (SupportLevel.containsType(expr.child.dataType, classOf[BinaryType])) { Incompatible(Some(unsupportedReason)) } else { Compatible(None) diff --git a/spark/src/main/scala/org/apache/comet/serde/maps.scala b/spark/src/main/scala/org/apache/comet/serde/maps.scala index ab388c7ad0..accf0407a7 100644 --- a/spark/src/main/scala/org/apache/comet/serde/maps.scala +++ b/spark/src/main/scala/org/apache/comet/serde/maps.scala @@ -144,20 +144,11 @@ object CometMapFromEntries override def getIncompatibleReasons(): Seq[String] = Seq(keyUnsupportedReason, valueUnsupportedReason) - private def containsBinary(dataType: DataType): Boolean = { - dataType match { - case BinaryType => true - case StructType(fields) => fields.exists(field => containsBinary(field.dataType)) - case ArrayType(elementType, _) => containsBinary(elementType) - case _ => false - } - } - override def getSupportLevel(expr: MapFromEntries): SupportLevel = { - if (containsBinary(expr.dataType.keyType)) { + if (SupportLevel.containsType(expr.dataType.keyType, classOf[BinaryType])) { return Incompatible(Some(keyUnsupportedReason)) } - if (containsBinary(expr.dataType.valueType)) { + if (SupportLevel.containsType(expr.dataType.valueType, classOf[BinaryType])) { return Incompatible(Some(valueUnsupportedReason)) } Compatible(None) diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala index 53b09e92b3..ebb22d2361 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala @@ -1546,7 +1546,8 @@ trait CometBaseAggregate { return None } - if (groupingExpressions.exists(expr => QueryPlanSerde.containsMapType(expr.dataType))) { + if (groupingExpressions.exists(expr => + SupportLevel.containsType(expr.dataType, classOf[MapType]))) { withFallbackReason(aggregate, "Grouping on map-containing types is not supported") return None } From 07747964c665a37cb83729f5be006b5702c8e0e4 Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 11 Jun 2026 17:18:07 -0700 Subject: [PATCH 2/4] chore: fallback for `spark.sql.legacy.castComplexTypesToString.enabled` = true --- .../cast/cast_struct_to_string.sql | 153 ++++++++++++++++++ .../cast/cast_struct_to_string_legacy.sql | 26 +++ 2 files changed, 179 insertions(+) create mode 100644 spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string_legacy.sql diff --git a/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string.sql b/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string.sql new file mode 100644 index 0000000000..54eae98f7e --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string.sql @@ -0,0 +1,153 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- Default (non-legacy) struct-to-string formatting: `{f1, f2, ...}` with NULL elements +-- rendered as "null". The legacy `[...]` mode is covered separately in +-- cast_struct_to_string_legacy.sql. + +-- Config: spark.sql.legacy.castComplexTypesToString.enabled=false +-- ConfigMatrix: parquet.enable.dictionary=false,true + +statement +CREATE TABLE test_cast_struct_to_string( + id int, + s_unnamed struct, + s_named struct, + s_floats struct, + s_bounds struct, + s_decimal struct, + s_temporal struct, + s_binary struct, + s_nested struct, tag: string>, + s_with_array struct, label: string>, + s_all_null struct +) USING parquet + +statement +INSERT INTO test_cast_struct_to_string VALUES + ( + 1, + named_struct('col1', 1, 'col2', 'hello'), + named_struct('a', 42, 'b', 'world', 'c', true), + named_struct('f', cast(1.5 as float), 'd', cast(2.5 as double)), + named_struct('b', cast(127 as tinyint), 's', cast(32767 as smallint), 'i', 2147483647, 'l', 9223372036854775807), + named_struct('d1', cast('12345678.90' as decimal(10, 2)), 'd2', cast('1.234567890123456789' as decimal(38, 18))), + named_struct('dt', date '2024-01-15', 'ts', timestamp '2024-01-15 10:30:45'), + named_struct('b', X'616263'), + named_struct('inner', named_struct('x', 10, 'y', 'inner'), 'tag', 'outer'), + named_struct('arr', array(1, 2, 3), 'label', 'three'), + named_struct('a', 1, 'b', 'present') + ), + ( + 2, + named_struct('col1', cast(null as int), 'col2', 'with-null-int'), + named_struct('a', 0, 'b', cast(null as string), 'c', false), + named_struct('f', cast('NaN' as float), 'd', cast('NaN' as double)), + named_struct('b', cast(-128 as tinyint), 's', cast(-32768 as smallint), 'i', -2147483648, 'l', -9223372036854775808), + named_struct('d1', cast('-12345678.90' as decimal(10, 2)), 'd2', cast('-1.234567890123456789' as decimal(38, 18))), + named_struct('dt', date '1970-01-01', 'ts', timestamp '1970-01-01 00:00:00'), + named_struct('b', X''), + named_struct('inner', named_struct('x', cast(null as int), 'y', cast(null as string)), 'tag', cast(null as string)), + named_struct('arr', array(cast(null as int), 1, cast(null as int)), 'label', cast(null as string)), + named_struct('a', cast(null as int), 'b', cast(null as string)) + ), + ( + 3, + named_struct('col1', -1, 'col2', ''), + named_struct('a', cast(null as int), 'b', '', 'c', cast(null as boolean)), + named_struct('f', cast('Infinity' as float), 'd', cast('-Infinity' as double)), + named_struct('b', cast(0 as tinyint), 's', cast(0 as smallint), 'i', 0, 'l', cast(0 as bigint)), + named_struct('d1', cast(0 as decimal(10, 2)), 'd2', cast(0 as decimal(38, 18))), + named_struct('dt', date '9999-12-31', 'ts', timestamp '9999-12-31 23:59:59'), + named_struct('b', X'00FF7F80'), + named_struct('inner', named_struct('x', 0, 'y', ''), 'tag', ''), + named_struct('arr', cast(array() as array), 'label', ''), + cast(null as struct) + ), + ( + 4, + named_struct('col1', cast(null as int), 'col2', cast(null as string)), + named_struct('a', cast(null as int), 'b', cast(null as string), 'c', cast(null as boolean)), + named_struct('f', cast(-0.0 as float), 'd', cast(-0.0 as double)), + named_struct('b', cast(null as tinyint), 's', cast(null as smallint), 'i', cast(null as int), 'l', cast(null as bigint)), + named_struct('d1', cast(null as decimal(10, 2)), 'd2', cast(null as decimal(38, 18))), + named_struct('dt', cast(null as date), 'ts', cast(null as timestamp)), + named_struct('b', cast(null as binary)), + named_struct('inner', cast(null as struct), 'tag', cast(null as string)), + named_struct('arr', cast(null as array), 'label', cast(null as string)), + cast(null as struct) + ) + +-- Anonymous struct fields are auto-named col1, col2, ... by `struct(...)`. +query +SELECT cast(s_unnamed as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Named struct fields propagate user-supplied names into the formatted output. +query +SELECT cast(s_named as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Floating-point: NaN, ±0, ±Infinity, NULL. +query +SELECT cast(s_floats as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Integer min/max for byte, short, int, long. +query +SELECT cast(s_bounds as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Decimal at the small and the 38-precision limit, positive / negative / zero / NULL. +query +SELECT cast(s_decimal as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Date and timestamp at common values plus the date range edges. +query +SELECT cast(s_temporal as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Binary content including empty bytes and non-printable values. +query +SELECT cast(s_binary as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Nested struct: inner struct rendered inside the outer braces. +query +SELECT cast(s_nested as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Struct containing an array field. +query +SELECT cast(s_with_array as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Whole struct is NULL vs. all inner fields NULL. +query +SELECT cast(s_all_null as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Literal anonymous struct, mixed types with NULL. +query +SELECT cast(struct(1, 'two', cast(null as double)) as string) + +-- Literal named struct, mixed types. +query +SELECT cast(named_struct('k', 'key', 'v', 100, 'flag', true) as string) + +-- Deeply nested literal struct (3 levels). +query +SELECT cast(named_struct('a', named_struct('b', named_struct('c', 1, 'd', 'leaf'))) as string) + +-- Empty-string and whitespace string-field rendering. +query +SELECT cast(named_struct('s1', '', 's2', ' ', 's3', cast(null as string)) as string) + +-- Map-valued field: not supported, falls back to Spark. +query expect_fallback(to StringType is not supported) +SELECT cast(named_struct('m', map('k', 1)) as string) diff --git a/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string_legacy.sql b/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string_legacy.sql new file mode 100644 index 0000000000..df7ddbedb8 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string_legacy.sql @@ -0,0 +1,26 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- When spark.sql.legacy.castComplexTypesToString.enabled is true Spark uses the legacy +-- formatter (`[...]` with NULL elements omitted). Comet only implements the default +-- (`{...}` with NULL elements rendered as "null"), so the cast must fall back to Spark. +-- The flag is internal in Spark 4.0 and defaults to false. + +-- Config: spark.sql.legacy.castComplexTypesToString.enabled=true + +query expect_fallback(spark.sql.legacy.castComplexTypesToString.enabled=true is not supported) +SELECT CAST(struct(1, 2, null) AS STRING) From 108f799964430e58793c94305bc453e1a9038900 Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 11 Jun 2026 17:26:14 -0700 Subject: [PATCH 3/4] chore: fallback for `spark.sql.legacy.castComplexTypesToString.enabled` = true --- .../org/apache/comet/serde/CometMapSort.scala | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/spark/src/main/spark-4.x/org/apache/comet/serde/CometMapSort.scala b/spark/src/main/spark-4.x/org/apache/comet/serde/CometMapSort.scala index 4aecda4547..341c8fc24e 100644 --- a/spark/src/main/spark-4.x/org/apache/comet/serde/CometMapSort.scala +++ b/spark/src/main/spark-4.x/org/apache/comet/serde/CometMapSort.scala @@ -39,15 +39,11 @@ object CometMapSort extends CometExpressionSerde[MapSort] { val keyType = expr.dataType.asInstanceOf[MapType].keyType if (!supportedScalarSortElementType(keyType)) { Unsupported(Some(s"MapSort on map with key type $keyType is not supported")) - } else if (CometConf.COMET_EXEC_STRICT_FLOATING_POINT.get() && - SupportLevel.containsFloatingPoint(keyType)) { - Incompatible( - Some( - "MapSort on floating-point key is not 100% compatible with Spark, and Comet is " + - s"running with ${CometConf.COMET_EXEC_STRICT_FLOATING_POINT.key}=true. " + - s"${CometConf.COMPAT_GUIDE}")) } else { - Compatible(None) + SupportLevel + .strictFloatingPointReason(keyType, "MapSort on floating-point key") + .map(reason => Incompatible(Some(reason))) + .getOrElse(Compatible(None)) } } From 984de926565188dfbffd6f2aa8c999334b292224 Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 11 Jun 2026 18:00:20 -0700 Subject: [PATCH 4/4] chore: fallback for `spark.sql.legacy.castComplexTypesToString.enabled` = true --- .../apache/comet/expressions/CometCast.scala | 9 +- .../cast/cast_complex_types_to_string.sql | 345 ++++++++++++++++++ ...> cast_complex_types_to_string_legacy.sql} | 20 +- .../cast/cast_struct_to_string.sql | 153 -------- 4 files changed, 367 insertions(+), 160 deletions(-) create mode 100644 spark/src/test/resources/sql-tests/expressions/cast/cast_complex_types_to_string.sql rename spark/src/test/resources/sql-tests/expressions/cast/{cast_struct_to_string_legacy.sql => cast_complex_types_to_string_legacy.sql} (50%) delete mode 100644 spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string.sql diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala index 9aae855b8b..42da809206 100644 --- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala +++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala @@ -36,10 +36,11 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { private[comet] val negativeScaleDecimalToStringReason: String = "Negative-scale decimal requires spark.sql.legacy.allowNegativeScaleOfDecimal=true" - // Spark's legacy formatter wraps maps/structs with `[]` and omits NULL elements, while the - // default (Spark 3.1+) wraps with `{}` and renders NULL as "null". Comet only implements the - // default. The flag is internal in Spark 4.0 and defaults to false; fall back to Spark when - // it is enabled. + // When `spark.sql.legacy.castComplexTypesToString.enabled` is true, Spark wraps maps and + // structs with `[]` (instead of `{}`) when casting to string, and omits NULL elements of + // structs/maps/arrays (instead of rendering them as the literal "null"). Comet only + // implements the default formatting, so fall back to Spark for any array/map/struct to-string + // cast when the flag is enabled. The flag is internal in Spark 4.0 and defaults to false. private[comet] val legacyCastComplexTypesToStringReason: String = "spark.sql.legacy.castComplexTypesToString.enabled=true is not supported" diff --git a/spark/src/test/resources/sql-tests/expressions/cast/cast_complex_types_to_string.sql b/spark/src/test/resources/sql-tests/expressions/cast/cast_complex_types_to_string.sql new file mode 100644 index 0000000000..8b1d989ae7 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/cast/cast_complex_types_to_string.sql @@ -0,0 +1,345 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- Default (non-legacy) formatting for array / map / struct → string: +-- `{f1, f2, ...}` for structs, `[e1, e2, ...]` for arrays, `{k1 -> v1, k2 -> v2}` for maps, +-- with NULL elements rendered as the literal "null". The legacy `[...]`-wrapped / +-- NULL-omitting mode is covered separately in cast_complex_types_to_string_legacy.sql. + +-- Config: spark.sql.legacy.castComplexTypesToString.enabled=false +-- ConfigMatrix: parquet.enable.dictionary=false,true + +statement +CREATE TABLE test_cast_struct_to_string( + id int, + s_unnamed struct, + s_named struct, + s_floats struct, + s_bounds struct, + s_decimal struct, + s_temporal struct, + s_binary struct, + s_nested struct, tag: string>, + s_with_array struct, label: string>, + s_all_null struct +) USING parquet + +statement +INSERT INTO test_cast_struct_to_string VALUES + ( + 1, + named_struct('col1', 1, 'col2', 'hello'), + named_struct('a', 42, 'b', 'world', 'c', true), + named_struct('f', cast(1.5 as float), 'd', cast(2.5 as double)), + named_struct('b', cast(127 as tinyint), 's', cast(32767 as smallint), 'i', 2147483647, 'l', 9223372036854775807), + named_struct('d1', cast('12345678.90' as decimal(10, 2)), 'd2', cast('1.234567890123456789' as decimal(38, 18))), + named_struct('dt', date '2024-01-15', 'ts', timestamp '2024-01-15 10:30:45'), + named_struct('b', X'616263'), + named_struct('inner', named_struct('x', 10, 'y', 'inner'), 'tag', 'outer'), + named_struct('arr', array(1, 2, 3), 'label', 'three'), + named_struct('a', 1, 'b', 'present') + ), + ( + 2, + named_struct('col1', cast(null as int), 'col2', 'with-null-int'), + named_struct('a', 0, 'b', cast(null as string), 'c', false), + named_struct('f', cast('NaN' as float), 'd', cast('NaN' as double)), + named_struct('b', cast(-128 as tinyint), 's', cast(-32768 as smallint), 'i', -2147483648, 'l', -9223372036854775808), + named_struct('d1', cast('-12345678.90' as decimal(10, 2)), 'd2', cast('-1.234567890123456789' as decimal(38, 18))), + named_struct('dt', date '1970-01-01', 'ts', timestamp '1970-01-01 00:00:00'), + named_struct('b', X''), + named_struct('inner', named_struct('x', cast(null as int), 'y', cast(null as string)), 'tag', cast(null as string)), + named_struct('arr', array(cast(null as int), 1, cast(null as int)), 'label', cast(null as string)), + named_struct('a', cast(null as int), 'b', cast(null as string)) + ), + ( + 3, + named_struct('col1', -1, 'col2', ''), + named_struct('a', cast(null as int), 'b', '', 'c', cast(null as boolean)), + named_struct('f', cast('Infinity' as float), 'd', cast('-Infinity' as double)), + named_struct('b', cast(0 as tinyint), 's', cast(0 as smallint), 'i', 0, 'l', cast(0 as bigint)), + named_struct('d1', cast(0 as decimal(10, 2)), 'd2', cast(0 as decimal(38, 18))), + named_struct('dt', date '9999-12-31', 'ts', timestamp '9999-12-31 23:59:59'), + named_struct('b', X'00FF7F80'), + named_struct('inner', named_struct('x', 0, 'y', ''), 'tag', ''), + named_struct('arr', cast(array() as array), 'label', ''), + cast(null as struct) + ), + ( + 4, + named_struct('col1', cast(null as int), 'col2', cast(null as string)), + named_struct('a', cast(null as int), 'b', cast(null as string), 'c', cast(null as boolean)), + named_struct('f', cast(-0.0 as float), 'd', cast(-0.0 as double)), + named_struct('b', cast(null as tinyint), 's', cast(null as smallint), 'i', cast(null as int), 'l', cast(null as bigint)), + named_struct('d1', cast(null as decimal(10, 2)), 'd2', cast(null as decimal(38, 18))), + named_struct('dt', cast(null as date), 'ts', cast(null as timestamp)), + named_struct('b', cast(null as binary)), + named_struct('inner', cast(null as struct), 'tag', cast(null as string)), + named_struct('arr', cast(null as array), 'label', cast(null as string)), + cast(null as struct) + ) + +-- Anonymous struct fields are auto-named col1, col2, ... by `struct(...)`. +query +SELECT cast(s_unnamed as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Named struct fields propagate user-supplied names into the formatted output. +query +SELECT cast(s_named as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Floating-point: NaN, ±0, ±Infinity, NULL. +query +SELECT cast(s_floats as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Integer min/max for byte, short, int, long. +query +SELECT cast(s_bounds as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Decimal at the small and the 38-precision limit, positive / negative / zero / NULL. +query +SELECT cast(s_decimal as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Date and timestamp at common values plus the date range edges. +query +SELECT cast(s_temporal as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Binary content including empty bytes and non-printable values. +query +SELECT cast(s_binary as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Nested struct: inner struct rendered inside the outer braces. +query +SELECT cast(s_nested as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Struct containing an array field. +query +SELECT cast(s_with_array as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Whole struct is NULL vs. all inner fields NULL. +query +SELECT cast(s_all_null as string), id FROM test_cast_struct_to_string ORDER BY id + +-- Literal anonymous struct, mixed types with NULL. +query +SELECT cast(struct(1, 'two', cast(null as double)) as string) + +-- Literal named struct, mixed types. +query +SELECT cast(named_struct('k', 'key', 'v', 100, 'flag', true) as string) + +-- Deeply nested literal struct (3 levels). +query +SELECT cast(named_struct('a', named_struct('b', named_struct('c', 1, 'd', 'leaf'))) as string) + +-- Empty-string and whitespace string-field rendering. +query +SELECT cast(named_struct('s1', '', 's2', ' ', 's3', cast(null as string)) as string) + +-- Map-valued field: not supported, falls back to Spark. +query expect_fallback(to StringType is not supported) +SELECT cast(named_struct('m', map('k', 1)) as string) + +-- ---------------------------------------------------------------------------- +-- Array → string +-- ---------------------------------------------------------------------------- + +statement +CREATE TABLE test_cast_array_to_string( + id int, + a_int array, + a_string array, + a_bool array, + a_bounds array, + a_decimal array, + a_date array, + a_ts array, + a_binary array, + a_struct array>, + a_nested array> +) USING parquet + +statement +INSERT INTO test_cast_array_to_string VALUES + ( + 1, + array(1, 2, 3), + array('a', 'b', 'c'), + array(true, false, true), + array(9223372036854775807, -9223372036854775808, 0), + array(cast('1.234567890123456789' as decimal(38, 18)), cast('-1.234567890123456789' as decimal(38, 18))), + array(date '2024-01-15', date '1970-01-01'), + array(timestamp '2024-01-15 10:30:45', timestamp '1970-01-01 00:00:00'), + array(X'616263', X'', X'00FF7F80'), + array(named_struct('x', 1, 'y', 'first'), named_struct('x', 2, 'y', 'second')), + array(array(1, 2), array(3, 4, 5)) + ), + ( + 2, + array(cast(null as int), 1, cast(null as int)), + array(cast(null as string), '', ' '), + array(cast(null as boolean), true), + array(cast(null as bigint), 0), + array(cast(null as decimal(38, 18))), + array(cast(null as date)), + array(cast(null as timestamp)), + array(cast(null as binary), X'00'), + array(named_struct('x', cast(null as int), 'y', cast(null as string)), cast(null as struct)), + array(cast(null as array), array(cast(null as int))) + ), + ( + 3, + cast(array() as array), + cast(array() as array), + cast(array() as array), + cast(array() as array), + cast(array() as array), + cast(array() as array), + cast(array() as array), + cast(array() as array), + cast(array() as array>), + cast(array() as array>) + ), + ( + 4, + cast(null as array), + cast(null as array), + cast(null as array), + cast(null as array), + cast(null as array), + cast(null as array), + cast(null as array), + cast(null as array), + cast(null as array>), + cast(null as array>) + ) + +query +SELECT cast(a_int as string), id FROM test_cast_array_to_string ORDER BY id + +query +SELECT cast(a_string as string), id FROM test_cast_array_to_string ORDER BY id + +query +SELECT cast(a_bool as string), id FROM test_cast_array_to_string ORDER BY id + +query +SELECT cast(a_bounds as string), id FROM test_cast_array_to_string ORDER BY id + +query +SELECT cast(a_decimal as string), id FROM test_cast_array_to_string ORDER BY id + +query +SELECT cast(a_date as string), id FROM test_cast_array_to_string ORDER BY id + +query +SELECT cast(a_ts as string), id FROM test_cast_array_to_string ORDER BY id + +query +SELECT cast(a_binary as string), id FROM test_cast_array_to_string ORDER BY id + +-- Array of structs: each element rendered as `{f1, f2, ...}`. +query +SELECT cast(a_struct as string), id FROM test_cast_array_to_string ORDER BY id + +-- Nested array>: outer `[...]` containing inner `[...]`. +query +SELECT cast(a_nested as string), id FROM test_cast_array_to_string ORDER BY id + +-- Array of floats / doubles with NaN / ±0 / ±Infinity / NULL. +query +SELECT cast(array(cast(1.5 as float), cast('NaN' as float), cast(-0.0 as float), cast(null as float)) as string) + +query +SELECT cast(array(cast(1.5 as double), cast('NaN' as double), cast('-Infinity' as double), cast(null as double)) as string) + +-- Deeply nested literal array (3 levels). +query +SELECT cast(array(array(array(1, 2), array(3)), array(array(cast(null as int)))) as string) + +-- Array of map: not supported, falls back to Spark. +query expect_fallback(to StringType is not supported) +SELECT cast(array(map('k', 1)) as string) + +-- ---------------------------------------------------------------------------- +-- Map → string +-- ---------------------------------------------------------------------------- +-- Comet does not implement map-to-string casts, so every map → string falls back to Spark. +-- Note: maps materialized through parquet have nondeterministic entry order, so map column +-- tests use literal maps directly rather than reading from a parquet table. + +-- Map with string keys, int values. +query expect_fallback(Cast from MapType) +SELECT cast(map('a', 1, 'b', 2, 'c', 3) as string) + +-- Map with NULL values rendered as "null". +query expect_fallback(Cast from MapType) +SELECT cast(map('a', 1, 'b', cast(null as int), 'c', 3) as string) + +-- Map with int keys, string values. +query expect_fallback(Cast from MapType) +SELECT cast(map(1, 'one', 2, 'two', 3, 'three') as string) + +-- Map with boolean values. +query expect_fallback(Cast from MapType) +SELECT cast(map('t', true, 'f', false, 'n', cast(null as boolean)) as string) + +-- Map with bigint values at min/max. +query expect_fallback(Cast from MapType) +SELECT cast(map('max', 9223372036854775807, 'min', -9223372036854775808, 'zero', cast(0 as bigint)) as string) + +-- Map with decimal values. +query expect_fallback(Cast from MapType) +SELECT cast(map('pos', cast('1.234567890123456789' as decimal(38, 18)), 'neg', cast('-1.234567890123456789' as decimal(38, 18)), 'null', cast(null as decimal(38, 18))) as string) + +-- Map with date and timestamp values. +query expect_fallback(Cast from MapType) +SELECT cast(map('a', date '2024-01-15', 'b', date '1970-01-01', 'c', cast(null as date)) as string) + +query expect_fallback(Cast from MapType) +SELECT cast(map('a', timestamp '2024-01-15 10:30:45', 'b', cast(null as timestamp)) as string) + +-- Map with binary values. +query expect_fallback(Cast from MapType) +SELECT cast(map('a', X'616263', 'b', X'', 'c', cast(null as binary)) as string) + +-- Map with float / double values: NaN / ±0 / ±Infinity / NULL. +query expect_fallback(Cast from MapType) +SELECT cast(map('nan', cast('NaN' as float), 'neg0', cast(-0.0 as float), 'null', cast(null as float)) as string) + +query expect_fallback(Cast from MapType) +SELECT cast(map('nan', cast('NaN' as double), 'inf', cast('Infinity' as double), 'ninf', cast('-Infinity' as double), 'null', cast(null as double)) as string) + +-- Map with struct values: each value rendered as `{f1, f2, ...}`. +query expect_fallback(Cast from MapType) +SELECT cast(map('a', named_struct('x', 1, 'y', 'first'), 'b', cast(null as struct)) as string) + +-- Map with array values. +query expect_fallback(Cast from MapType) +SELECT cast(map('a', array(1, 2, 3), 'b', array(cast(null as int)), 'c', cast(null as array)) as string) + +-- Empty map. +query expect_fallback(Cast from MapType) +SELECT cast(map() as string) + +-- NULL map: Spark constant-folds this to a literal NULL, so the cast never reaches Comet +-- and there is no fallback. +query +SELECT cast(cast(null as map) as string) + +-- Map of map. +query expect_fallback(Cast from MapType) +SELECT cast(map('outer', map('inner', 1)) as string) diff --git a/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string_legacy.sql b/spark/src/test/resources/sql-tests/expressions/cast/cast_complex_types_to_string_legacy.sql similarity index 50% rename from spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string_legacy.sql rename to spark/src/test/resources/sql-tests/expressions/cast/cast_complex_types_to_string_legacy.sql index df7ddbedb8..2c0bc19b3b 100644 --- a/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string_legacy.sql +++ b/spark/src/test/resources/sql-tests/expressions/cast/cast_complex_types_to_string_legacy.sql @@ -15,12 +15,26 @@ -- specific language governing permissions and limitations -- under the License. --- When spark.sql.legacy.castComplexTypesToString.enabled is true Spark uses the legacy --- formatter (`[...]` with NULL elements omitted). Comet only implements the default --- (`{...}` with NULL elements rendered as "null"), so the cast must fall back to Spark. +-- When `spark.sql.legacy.castComplexTypesToString.enabled` is true Spark wraps maps and +-- structs with `[...]` (instead of `{...}`) and omits NULL elements of structs/maps/arrays +-- (instead of rendering them as the literal "null"). Comet only implements the default +-- formatting, so any array/map/struct → string cast must fall back to Spark. -- The flag is internal in Spark 4.0 and defaults to false. -- Config: spark.sql.legacy.castComplexTypesToString.enabled=true +-- Struct → string falls back. query expect_fallback(spark.sql.legacy.castComplexTypesToString.enabled=true is not supported) SELECT CAST(struct(1, 2, null) AS STRING) + +-- Array → string falls back (NULL elements rendered differently between modes). +query expect_fallback(spark.sql.legacy.castComplexTypesToString.enabled=true is not supported) +SELECT CAST(array(1, 2, null) AS STRING) + +-- Map → string falls back (`[]` vs `{}` wrapping differs between modes). +query expect_fallback(spark.sql.legacy.castComplexTypesToString.enabled=true is not supported) +SELECT CAST(map('a', 1, 'b', null) AS STRING) + +-- Nested complex types still fall back through the outer type. +query expect_fallback(spark.sql.legacy.castComplexTypesToString.enabled=true is not supported) +SELECT CAST(struct(array(1, null), map('k', null)) AS STRING) diff --git a/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string.sql b/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string.sql deleted file mode 100644 index 54eae98f7e..0000000000 --- a/spark/src/test/resources/sql-tests/expressions/cast/cast_struct_to_string.sql +++ /dev/null @@ -1,153 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. - --- Default (non-legacy) struct-to-string formatting: `{f1, f2, ...}` with NULL elements --- rendered as "null". The legacy `[...]` mode is covered separately in --- cast_struct_to_string_legacy.sql. - --- Config: spark.sql.legacy.castComplexTypesToString.enabled=false --- ConfigMatrix: parquet.enable.dictionary=false,true - -statement -CREATE TABLE test_cast_struct_to_string( - id int, - s_unnamed struct, - s_named struct, - s_floats struct, - s_bounds struct, - s_decimal struct, - s_temporal struct, - s_binary struct, - s_nested struct, tag: string>, - s_with_array struct, label: string>, - s_all_null struct -) USING parquet - -statement -INSERT INTO test_cast_struct_to_string VALUES - ( - 1, - named_struct('col1', 1, 'col2', 'hello'), - named_struct('a', 42, 'b', 'world', 'c', true), - named_struct('f', cast(1.5 as float), 'd', cast(2.5 as double)), - named_struct('b', cast(127 as tinyint), 's', cast(32767 as smallint), 'i', 2147483647, 'l', 9223372036854775807), - named_struct('d1', cast('12345678.90' as decimal(10, 2)), 'd2', cast('1.234567890123456789' as decimal(38, 18))), - named_struct('dt', date '2024-01-15', 'ts', timestamp '2024-01-15 10:30:45'), - named_struct('b', X'616263'), - named_struct('inner', named_struct('x', 10, 'y', 'inner'), 'tag', 'outer'), - named_struct('arr', array(1, 2, 3), 'label', 'three'), - named_struct('a', 1, 'b', 'present') - ), - ( - 2, - named_struct('col1', cast(null as int), 'col2', 'with-null-int'), - named_struct('a', 0, 'b', cast(null as string), 'c', false), - named_struct('f', cast('NaN' as float), 'd', cast('NaN' as double)), - named_struct('b', cast(-128 as tinyint), 's', cast(-32768 as smallint), 'i', -2147483648, 'l', -9223372036854775808), - named_struct('d1', cast('-12345678.90' as decimal(10, 2)), 'd2', cast('-1.234567890123456789' as decimal(38, 18))), - named_struct('dt', date '1970-01-01', 'ts', timestamp '1970-01-01 00:00:00'), - named_struct('b', X''), - named_struct('inner', named_struct('x', cast(null as int), 'y', cast(null as string)), 'tag', cast(null as string)), - named_struct('arr', array(cast(null as int), 1, cast(null as int)), 'label', cast(null as string)), - named_struct('a', cast(null as int), 'b', cast(null as string)) - ), - ( - 3, - named_struct('col1', -1, 'col2', ''), - named_struct('a', cast(null as int), 'b', '', 'c', cast(null as boolean)), - named_struct('f', cast('Infinity' as float), 'd', cast('-Infinity' as double)), - named_struct('b', cast(0 as tinyint), 's', cast(0 as smallint), 'i', 0, 'l', cast(0 as bigint)), - named_struct('d1', cast(0 as decimal(10, 2)), 'd2', cast(0 as decimal(38, 18))), - named_struct('dt', date '9999-12-31', 'ts', timestamp '9999-12-31 23:59:59'), - named_struct('b', X'00FF7F80'), - named_struct('inner', named_struct('x', 0, 'y', ''), 'tag', ''), - named_struct('arr', cast(array() as array), 'label', ''), - cast(null as struct) - ), - ( - 4, - named_struct('col1', cast(null as int), 'col2', cast(null as string)), - named_struct('a', cast(null as int), 'b', cast(null as string), 'c', cast(null as boolean)), - named_struct('f', cast(-0.0 as float), 'd', cast(-0.0 as double)), - named_struct('b', cast(null as tinyint), 's', cast(null as smallint), 'i', cast(null as int), 'l', cast(null as bigint)), - named_struct('d1', cast(null as decimal(10, 2)), 'd2', cast(null as decimal(38, 18))), - named_struct('dt', cast(null as date), 'ts', cast(null as timestamp)), - named_struct('b', cast(null as binary)), - named_struct('inner', cast(null as struct), 'tag', cast(null as string)), - named_struct('arr', cast(null as array), 'label', cast(null as string)), - cast(null as struct) - ) - --- Anonymous struct fields are auto-named col1, col2, ... by `struct(...)`. -query -SELECT cast(s_unnamed as string), id FROM test_cast_struct_to_string ORDER BY id - --- Named struct fields propagate user-supplied names into the formatted output. -query -SELECT cast(s_named as string), id FROM test_cast_struct_to_string ORDER BY id - --- Floating-point: NaN, ±0, ±Infinity, NULL. -query -SELECT cast(s_floats as string), id FROM test_cast_struct_to_string ORDER BY id - --- Integer min/max for byte, short, int, long. -query -SELECT cast(s_bounds as string), id FROM test_cast_struct_to_string ORDER BY id - --- Decimal at the small and the 38-precision limit, positive / negative / zero / NULL. -query -SELECT cast(s_decimal as string), id FROM test_cast_struct_to_string ORDER BY id - --- Date and timestamp at common values plus the date range edges. -query -SELECT cast(s_temporal as string), id FROM test_cast_struct_to_string ORDER BY id - --- Binary content including empty bytes and non-printable values. -query -SELECT cast(s_binary as string), id FROM test_cast_struct_to_string ORDER BY id - --- Nested struct: inner struct rendered inside the outer braces. -query -SELECT cast(s_nested as string), id FROM test_cast_struct_to_string ORDER BY id - --- Struct containing an array field. -query -SELECT cast(s_with_array as string), id FROM test_cast_struct_to_string ORDER BY id - --- Whole struct is NULL vs. all inner fields NULL. -query -SELECT cast(s_all_null as string), id FROM test_cast_struct_to_string ORDER BY id - --- Literal anonymous struct, mixed types with NULL. -query -SELECT cast(struct(1, 'two', cast(null as double)) as string) - --- Literal named struct, mixed types. -query -SELECT cast(named_struct('k', 'key', 'v', 100, 'flag', true) as string) - --- Deeply nested literal struct (3 levels). -query -SELECT cast(named_struct('a', named_struct('b', named_struct('c', 1, 'd', 'leaf'))) as string) - --- Empty-string and whitespace string-field rendering. -query -SELECT cast(named_struct('s1', '', 's2', ' ', 's3', cast(null as string)) as string) - --- Map-valued field: not supported, falls back to Spark. -query expect_fallback(to StringType is not supported) -SELECT cast(named_struct('m', map('k', 1)) as string)