diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 5761028f60234..744c472b20179 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1483,7 +1483,10 @@ nonTrivialPrimitiveType | INTERVAL (fromYearMonth=(YEAR | MONTH) (TO to=MONTH)? | fromDayTime=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))?)? - | TIMESTAMP (withLocalTimeZone | withoutTimeZone)? + | TIMESTAMP (LEFT_PAREN precision=integerValue RIGHT_PAREN)? + (withLocalTimeZone | withoutTimeZone)? + | TIMESTAMP_LTZ (LEFT_PAREN precision=integerValue RIGHT_PAREN)? + | TIMESTAMP_NTZ (LEFT_PAREN precision=integerValue RIGHT_PAREN)? | TIME (LEFT_PAREN precision=integerValue RIGHT_PAREN)? (withoutTimeZone)? | GEOGRAPHY LEFT_PAREN (srid=integerValue | any=ANY) RIGHT_PAREN | GEOMETRY LEFT_PAREN (srid=integerValue | any=ANY) RIGHT_PAREN @@ -1498,7 +1501,6 @@ trivialPrimitiveType | FLOAT | REAL | DOUBLE | DATE - | TIMESTAMP_LTZ | TIMESTAMP_NTZ | BINARY | VOID | VARIANT diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 43b93f8f3d060..9de6aceb757b9 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -28,9 +28,9 @@ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.util.CollationFactory import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin} import org.apache.spark.sql.connector.catalog.IdentityColumnSpec -import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryParsingErrors} +import org.apache.spark.sql.errors.{DataTypeErrors, DataTypeErrorsBase, QueryParsingErrors} import org.apache.spark.sql.internal.SqlApiConf -import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType, GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType} +import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType, GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampLTZNanosType, TimestampNTZNanosType, TimestampNTZType, TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType} /** * AST builder for parsing data type definitions and table schemas. @@ -350,11 +350,42 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE CalendarIntervalType } case TIMESTAMP if currentCtx.withLocalTimeZone() != null => - TimestampType + if (currentCtx.precision == null) { + TimestampType + } else { + parseTimestampLtzNanosPrecision(currentCtx.precision.getText) + } case TIMESTAMP if currentCtx.withoutTimeZone() != null => - TimestampNTZType + if (currentCtx.precision == null) { + TimestampNTZType + } else { + parseTimestampNtzNanosPrecision(currentCtx.precision.getText) + } case TIMESTAMP => - SqlApiConf.get.timestampType + if (currentCtx.precision == null) { + SqlApiConf.get.timestampType + } else { + SqlApiConf.get.timestampType match { + case TimestampType => + parseTimestampLtzNanosPrecision(currentCtx.precision.getText) + case TimestampNTZType => + parseTimestampNtzNanosPrecision(currentCtx.precision.getText) + case other => + throw SparkException.internalError(s"Unexpected default timestamp type: $other") + } + } + case TIMESTAMP_LTZ => + if (currentCtx.precision == null) { + TimestampType + } else { + parseTimestampLtzNanosPrecision(currentCtx.precision.getText) + } + case TIMESTAMP_NTZ => + if (currentCtx.precision == null) { + TimestampNTZType + } else { + parseTimestampNtzNanosPrecision(currentCtx.precision.getText) + } case TIME => val precision = if (currentCtx.precision == null) { TimeType.DEFAULT_PRECISION @@ -398,8 +429,6 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE case FLOAT | REAL => FloatType case DOUBLE => DoubleType case DATE => DateType - case TIMESTAMP_LTZ => TimestampType - case TIMESTAMP_NTZ => TimestampNTZType case BINARY => BinaryType case VOID => NullType case VARIANT => VariantType @@ -448,6 +477,24 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE } } + private def parseTimestampLtzNanosPrecision(precision: String): TimestampLTZNanosType = { + DataTypeErrors.checkTimestampNanosTypesEnabled() + try TimestampLTZNanosType(precision.toInt) + catch { + case _: NumberFormatException => + throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_LTZ") + } + } + + private def parseTimestampNtzNanosPrecision(precision: String): TimestampNTZNanosType = { + DataTypeErrors.checkTimestampNanosTypesEnabled() + try TimestampNTZNanosType(precision.toInt) + catch { + case _: NumberFormatException => + throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_NTZ") + } + } + /** * Create a complex DataType. Arrays, Maps and Structures are supported. */ diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala index 6e8cb8077be81..b89da2c246a70 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.catalyst.util.QuotingUtils import org.apache.spark.sql.catalyst.util.QuotingUtils.toSQLSchema +import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types.{DataType, Decimal, StringType} import org.apache.spark.unsafe.types.UTF8String @@ -282,4 +283,16 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase { messageParameters = Map("precision" -> precision, "type" -> typeName), cause = null) } + + def checkTimestampNanosTypesEnabled(): Unit = { + if (!SqlApiConf.get.timestampNanosTypesEnabled) { + throw new SparkException( + errorClass = "FEATURE_NOT_ENABLED", + messageParameters = Map( + "featureName" -> "Nanosecond-precision timestamp types", + "configKey" -> "spark.sql.timestampNanosTypes.enabled", + "configValue" -> "true"), + cause = null) + } + } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala index bedd4afe0ed5e..6bd747c743991 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala @@ -54,6 +54,7 @@ private[sql] trait SqlApiConf { def legacyParameterSubstitutionConstantsOnly: Boolean def legacyIdentifierClauseOnly: Boolean def typesFrameworkEnabled: Boolean + def timestampNanosTypesEnabled: Boolean } private[sql] object SqlApiConf { @@ -112,4 +113,5 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf { override def legacyParameterSubstitutionConstantsOnly: Boolean = false override def legacyIdentifierClauseOnly: Boolean = false override def typesFrameworkEnabled: Boolean = false + override def timestampNanosTypesEnabled: Boolean = false } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala index fbd70cf8b899c..c1d1430bacee9 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -236,12 +236,14 @@ object DataType { // For backwards compatibility, previously the type name of NullType is "null" case "null" => NullType case TIMESTAMP_LTZ_NANOS_TYPE(precision) => + DataTypeErrors.checkTimestampNanosTypesEnabled() try TimestampLTZNanosType(precision.toInt) catch { case _: NumberFormatException => throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_LTZ") } case TIMESTAMP_NTZ_NANOS_TYPE(precision) => + DataTypeErrors.checkTimestampNanosTypesEnabled() try TimestampNTZNanosType(precision.toInt) catch { case _: NumberFormatException => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 77ef8bb600f9c..bdf8af0871411 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -644,6 +644,19 @@ object SQLConf { .booleanConf .createWithDefaultFunction(() => Utils.isTesting) + val TIMESTAMP_NANOS_TYPES_ENABLED = + buildConf("spark.sql.timestampNanosTypes.enabled") + .internal() + .doc("When true, the parameterized nanosecond-precision timestamp types " + + "TIMESTAMP_NTZ(p) / TIMESTAMP_LTZ(p) for p in [7, 9] are recognized as " + + "Spark SQL data types at user-facing entry points. Default is false because " + + "downstream execution paths (Cast, PhysicalDataType, AnyTimestampType, encoders, " + + "Connect proto) are not yet wired for these types. See SPARK-56822.") + .version("4.2.0") + .withBindingPolicy(ConfigBindingPolicy.SESSION) + .booleanConf + .createWithDefault(false) + val EXTENDED_EXPLAIN_PROVIDERS = buildConf("spark.sql.extendedExplainProviders") .doc("A comma-separated list of classes that implement the" + " org.apache.spark.sql.ExtendedExplainGenerator trait. If provided, Spark will print" + @@ -7512,6 +7525,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def typesFrameworkEnabled: Boolean = getConf(TYPES_FRAMEWORK_ENABLED) + def timestampNanosTypesEnabled: Boolean = getConf(TIMESTAMP_NANOS_TYPES_ENABLED) + def dataSourceV2JoinPushdown: Boolean = getConf(DATA_SOURCE_V2_JOIN_PUSHDOWN) def dynamicPartitionPruningEnabled: Boolean = getConf(DYNAMIC_PARTITION_PRUNING_ENABLED) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala index 03dbf0a28663a..b55ed2b9c18a3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala @@ -161,14 +161,81 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper { assert(parse("timestamp") === TimestampNTZType) assert(parse("timestamp with local time zone") === TimestampType) assert(parse("timestamp without time zone") === TimestampNTZType) + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + assert(parse("timestamp(9)") === TimestampNTZNanosType(9)) + // Bare TIMESTAMP(p) routes through SqlApiConf.get.timestampType, so an + // out-of-range precision must surface as the NTZ error here. + Seq("6", "10").foreach { p => + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType(s"timestamp($p)") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p, "type" -> "TIMESTAMP_NTZ")) + } + } } withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_LTZ.toString) { assert(parse("timestamp") === TimestampType) assert(parse("timestamp with local time zone") === TimestampType) assert(parse("timestamp without time zone") === TimestampNTZType) + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + assert(parse("timestamp(9)") === TimestampLTZNanosType(9)) + // Bare TIMESTAMP(p) under LTZ default must surface as the LTZ error. + Seq("6", "10").foreach { p => + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType(s"timestamp($p)") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p, "type" -> "TIMESTAMP_LTZ")) + } + } } } + test("parse nanos timestamp types when the preview flag is enabled") { + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + assert(parse("TIMESTAMP_NTZ(7)") === TimestampNTZNanosType(7)) + assert(parse("TIMESTAMP_NTZ(8)") === TimestampNTZNanosType(8)) + assert(parse("TIMESTAMP_NTZ(9)") === TimestampNTZNanosType(9)) + assert(parse("TIMESTAMP_LTZ(7)") === TimestampLTZNanosType(7)) + assert(parse("TIMESTAMP_LTZ(8)") === TimestampLTZNanosType(8)) + assert(parse("TIMESTAMP_LTZ(9)") === TimestampLTZNanosType(9)) + assert(parse("Timestamp_Ntz(9)") === TimestampNTZNanosType(9)) + assert(parse("timestamp_ltz(7)") === TimestampLTZNanosType(7)) + assert(parse("TIMESTAMP(9) WITHOUT TIME ZONE") === TimestampNTZNanosType(9)) + assert(parse("TIMESTAMP(7) WITH LOCAL TIME ZONE") === TimestampLTZNanosType(7)) + assert(parse("timestamp(8) without time zone") === TimestampNTZNanosType(8)) + assert(parse("timestamp(8) with local time zone") === TimestampLTZNanosType(8)) + } + } + + test("nanos timestamp parser surface is gated by SQL conf, disabled by default") { + val gatedSpellings = Seq( + "TIMESTAMP_NTZ(7)", + "TIMESTAMP_LTZ(9)", + "TIMESTAMP(9) WITHOUT TIME ZONE", + "TIMESTAMP(9) WITH LOCAL TIME ZONE", + "TIMESTAMP(9)") + gatedSpellings.foreach { spelling => + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType(spelling) + }, + condition = "FEATURE_NOT_ENABLED", + parameters = Map( + "featureName" -> "Nanosecond-precision timestamp types", + "configKey" -> "spark.sql.timestampNanosTypes.enabled", + "configValue" -> "true")) + } + // Bare unparameterized forms remain accepted even with the gate off. + assert(parse("TIMESTAMP_NTZ") === TimestampNTZType) + assert(parse("TIMESTAMP_LTZ") === TimestampType) + assert(parse("TIMESTAMP WITHOUT TIME ZONE") === TimestampNTZType) + assert(parse("TIMESTAMP WITH LOCAL TIME ZONE") === TimestampType) + } + // DataType parser accepts certain reserved keywords. checkDataType( "Struct", @@ -241,4 +308,53 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper { condition = "PARSE_SYNTAX_ERROR", parameters = Map("error" -> "'WITH'", "hint" -> "")) } + + test("invalid precision of the nanos timestamp data type") { + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + Seq("TIMESTAMP_NTZ" -> "TIMESTAMP_NTZ", "TIMESTAMP_LTZ" -> "TIMESTAMP_LTZ").foreach { + case (spelling, errorType) => + Seq(0, 1, 6, 10, 99).foreach { p => + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType(s"$spelling($p)") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p.toString, "type" -> errorType)) + } + } + // Integer overflow: regex matches but Int.parseInt fails. Original digits are preserved. + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType("TIMESTAMP_NTZ(99999999999)") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> "99999999999", "type" -> "TIMESTAMP_NTZ")) + // TIMESTAMP(p) with zone aliases route to the corresponding nanos type's error. + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType("TIMESTAMP(6) WITHOUT TIME ZONE") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> "6", "type" -> "TIMESTAMP_NTZ")) + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType("TIMESTAMP(10) WITH LOCAL TIME ZONE") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> "10", "type" -> "TIMESTAMP_LTZ")) + // Negative precision is rejected by the parser, not by the type constructor. + checkError( + exception = intercept[ParseException] { + CatalystSqlParser.parseDataType("TIMESTAMP_NTZ(-1)") + }, + condition = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'-'", "hint" -> "")) + checkError( + exception = intercept[ParseException] { + CatalystSqlParser.parseDataType("TIMESTAMP_LTZ(-100)") + }, + condition = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'-'", "hint" -> "")) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 1a7524dbc5a73..afa657c95ede7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -25,11 +25,13 @@ import org.json4s.jackson.JsonMethods import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException} import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} +import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.types.{DataTypeUtils, PhysicalDataType, UninitializedPhysicalType} import org.apache.spark.sql.catalyst.util.{CollationFactory, StringConcat} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearMonthIntervalTypes} -class DataTypeSuite extends SparkFunSuite { +class DataTypeSuite extends SparkFunSuite with SQLHelper { private val UNICODE_COLLATION_ID = CollationFactory.collationNameToId("UNICODE") private val UTF8_LCASE_COLLATION_ID = CollationFactory.collationNameToId("UTF8_LCASE") @@ -257,12 +259,19 @@ class DataTypeSuite extends SparkFunSuite { checkDataTypeFromJson(TimestampNTZType) checkDataTypeFromDDL(TimestampNTZType) - checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION)) - checkDataTypeFromJson(TimestampLTZNanosType(8)) - checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION)) - checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION)) - checkDataTypeFromJson(TimestampNTZNanosType(8)) - checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION)) + test("SPARK-56876: from Json roundtrip for nanos timestamp types (preview flag enabled)") { + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + Seq( + TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION), + TimestampLTZNanosType(8), + TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION), + TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION), + TimestampNTZNanosType(8), + TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION)).foreach { dt => + assert(DataType.fromJson(dt.json) === dt) + } + } + } checkDataTypeFromJson(StringType) checkDataTypeFromDDL(StringType) @@ -1484,64 +1493,84 @@ class DataTypeSuite extends SparkFunSuite { } test("SPARK-56876: parse timestamp with nanosecond precision from JSON") { - // (json-type-name, sql-type-name-in-error, factory) - val variants = Seq[(String, String, Int => DataType)]( - ("timestamp_ltz", "TIMESTAMP_LTZ", TimestampLTZNanosType(_)), - ("timestamp_ntz", "TIMESTAMP_NTZ", TimestampNTZNanosType(_))) - val overflowing = "9" * 20 - - variants.foreach { case (name, sqlTypeName, factory) => - // Happy path across valid precisions, tolerant of surrounding whitespace. - TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { n => - assert(DataType.fromJson(s"""\"$name($n)\"""") === factory(n)) - assert(DataType.fromJson(s"""\"$name( $n)\"""") === factory(n)) - assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n)) - } - - // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. The overflowing - // case verifies the original digit string is preserved instead of leaking - // NumberFormatException. - Seq("0", "6", "10", overflowing).foreach { p => - checkError( - exception = intercept[SparkException] { - DataType.fromJson(s"""\"$name($p)\"""") - }, - condition = "INVALID_TIMESTAMP_PRECISION", - parameters = Map("precision" -> p, "type" -> sqlTypeName)) + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + // (json-type-name, sql-type-name-in-error, factory) + val variants = Seq[(String, String, Int => DataType)]( + ("timestamp_ltz", "TIMESTAMP_LTZ", TimestampLTZNanosType(_)), + ("timestamp_ntz", "TIMESTAMP_NTZ", TimestampNTZNanosType(_))) + val overflowing = "9" * 20 + + variants.foreach { case (name, sqlTypeName, factory) => + // Happy path across valid precisions, tolerant of surrounding whitespace. + TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { n => + assert(DataType.fromJson(s"""\"$name($n)\"""") === factory(n)) + assert(DataType.fromJson(s"""\"$name( $n)\"""") === factory(n)) + assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n)) + } + + // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. The overflowing + // case verifies the original digit string is preserved instead of leaking + // NumberFormatException. + Seq("0", "6", "10", overflowing).foreach { p => + checkError( + exception = intercept[SparkException] { + DataType.fromJson(s"""\"$name($p)\"""") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p, "type" -> sqlTypeName)) + } + + // Malformed precision forms that don't match the regex fall through to + // INVALID_JSON_DATA_TYPE: negative, empty parens, non-numeric, and uppercase + // (JSON type-name convention is lowercase). + Seq( + s"$name(-1)", + s"$name()", + s"$name(abc)", + s"${name.toUpperCase(Locale.ROOT)}(7)").foreach { raw => + checkError( + exception = intercept[SparkIllegalArgumentException] { + DataType.fromJson(s"""\"$raw\"""") + }, + condition = "INVALID_JSON_DATA_TYPE", + parameters = Map("invalidType" -> raw)) + } } - // Malformed precision forms that don't match the regex fall through to - // INVALID_JSON_DATA_TYPE: negative, empty parens, non-numeric, and uppercase - // (JSON type-name convention is lowercase). - Seq( - s"$name(-1)", - s"$name()", - s"$name(abc)", - s"${name.toUpperCase(Locale.ROOT)}(7)").foreach { raw => - checkError( - exception = intercept[SparkIllegalArgumentException] { - DataType.fromJson(s"""\"$raw\"""") - }, - condition = "INVALID_JSON_DATA_TYPE", - parameters = Map("invalidType" -> raw)) - } + // JSON round-trip for nanos timestamp types inside struct, array, and map. + val structWithNanos = StructType(Seq( + StructField("ntz", TimestampNTZNanosType(7)), + StructField("ltz", TimestampLTZNanosType(8)))) + assert(DataType.fromJson(structWithNanos.json) === structWithNanos) + val arrayOfNanos = ArrayType(TimestampNTZNanosType(9), containsNull = false) + assert(DataType.fromJson(arrayOfNanos.json) === arrayOfNanos) + val mapOfNanos = MapType(StringType, TimestampNTZNanosType(7), valueContainsNull = true) + assert(DataType.fromJson(mapOfNanos.json) === mapOfNanos) } - // JSON round-trip for nanos timestamp types inside struct, array, and map. - val structWithNanos = StructType(Seq( - StructField("ntz", TimestampNTZNanosType(7)), - StructField("ltz", TimestampLTZNanosType(8)))) - assert(DataType.fromJson(structWithNanos.json) === structWithNanos) - val arrayOfNanos = ArrayType(TimestampNTZNanosType(9), containsNull = false) - assert(DataType.fromJson(arrayOfNanos.json) === arrayOfNanos) - val mapOfNanos = MapType(StringType, TimestampNTZNanosType(7), valueContainsNull = true) - assert(DataType.fromJson(mapOfNanos.json) === mapOfNanos) - - // Bare names without parens still map to the legacy single-precision types. + // Bare names without parens still map to the legacy single-precision types, regardless + // of the preview flag. assert(DataType.fromJson("\"timestamp_ltz\"") === TimestampType) assert(DataType.fromJson("\"timestamp_ntz\"") === TimestampNTZType) } + test("SPARK-56965: JSON parser rejects nanos timestamp types when preview flag is off") { + Seq( + "\"timestamp_ltz(7)\"" -> "Nanosecond-precision timestamp types", + "\"timestamp_ntz(9)\"" -> "Nanosecond-precision timestamp types").foreach { + case (json, featureName) => + checkError( + exception = intercept[SparkException] { + DataType.fromJson(json) + }, + condition = "FEATURE_NOT_ENABLED", + parameters = Map( + "featureName" -> featureName, + "configKey" -> "spark.sql.timestampNanosTypes.enabled", + "configValue" -> "true")) + } + } + test("singleton DataType equality after deserialization") { // Singleton DataTypes that use `case object` pattern matching (e.g., `case BinaryType =>`). // If a non-singleton instance is created (e.g., via Kryo deserialization which doesn't call