From c2878373e547ddb7921c8862707b8e97939683c2 Mon Sep 17 00:00:00 2001 From: Stevo Mitric Date: Thu, 21 May 2026 12:50:54 +0000 Subject: [PATCH 1/4] Initial commit --- .../sql/catalyst/parser/SqlBaseParser.g4 | 6 +- .../catalyst/parser/DataTypeAstBuilder.scala | 74 ++++++++++++-- .../spark/sql/internal/SqlApiConf.scala | 2 + .../apache/spark/sql/internal/SQLConf.scala | 15 +++ .../catalyst/parser/DataTypeParserSuite.scala | 97 +++++++++++++++++++ 5 files changed, 185 insertions(+), 9 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 5761028f60234..744c472b20179 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1483,7 +1483,10 @@ nonTrivialPrimitiveType | INTERVAL (fromYearMonth=(YEAR | MONTH) (TO to=MONTH)? | fromDayTime=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))?)? - | TIMESTAMP (withLocalTimeZone | withoutTimeZone)? + | TIMESTAMP (LEFT_PAREN precision=integerValue RIGHT_PAREN)? + (withLocalTimeZone | withoutTimeZone)? + | TIMESTAMP_LTZ (LEFT_PAREN precision=integerValue RIGHT_PAREN)? + | TIMESTAMP_NTZ (LEFT_PAREN precision=integerValue RIGHT_PAREN)? | TIME (LEFT_PAREN precision=integerValue RIGHT_PAREN)? (withoutTimeZone)? | GEOGRAPHY LEFT_PAREN (srid=integerValue | any=ANY) RIGHT_PAREN | GEOMETRY LEFT_PAREN (srid=integerValue | any=ANY) RIGHT_PAREN @@ -1498,7 +1501,6 @@ trivialPrimitiveType | FLOAT | REAL | DOUBLE | DATE - | TIMESTAMP_LTZ | TIMESTAMP_NTZ | BINARY | VOID | VARIANT diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 43b93f8f3d060..377eb0f5d7727 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -28,9 +28,9 @@ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.util.CollationFactory import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin} import org.apache.spark.sql.connector.catalog.IdentityColumnSpec -import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryParsingErrors} +import org.apache.spark.sql.errors.{DataTypeErrors, DataTypeErrorsBase, QueryParsingErrors} import org.apache.spark.sql.internal.SqlApiConf -import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType, GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType} +import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType, GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampLTZNanosType, TimestampNTZNanosType, TimestampNTZType, TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType} /** * AST builder for parsing data type definitions and table schemas. @@ -350,11 +350,43 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE CalendarIntervalType } case TIMESTAMP if currentCtx.withLocalTimeZone() != null => - TimestampType + if (currentCtx.precision == null) { + TimestampType + } else { + parseTimestampLtzNanosPrecision(currentCtx.precision.getText) + } case TIMESTAMP if currentCtx.withoutTimeZone() != null => - TimestampNTZType + if (currentCtx.precision == null) { + TimestampNTZType + } else { + parseTimestampNtzNanosPrecision(currentCtx.precision.getText) + } case TIMESTAMP => - SqlApiConf.get.timestampType + if (currentCtx.precision == null) { + SqlApiConf.get.timestampType + } else { + SqlApiConf.get.timestampType match { + case TimestampType => + parseTimestampLtzNanosPrecision(currentCtx.precision.getText) + case TimestampNTZType => + parseTimestampNtzNanosPrecision(currentCtx.precision.getText) + case other => + throw SparkException.internalError( + s"Unexpected default timestamp type: $other") + } + } + case TIMESTAMP_LTZ => + if (currentCtx.precision == null) { + TimestampType + } else { + parseTimestampLtzNanosPrecision(currentCtx.precision.getText) + } + case TIMESTAMP_NTZ => + if (currentCtx.precision == null) { + TimestampNTZType + } else { + parseTimestampNtzNanosPrecision(currentCtx.precision.getText) + } case TIME => val precision = if (currentCtx.precision == null) { TimeType.DEFAULT_PRECISION @@ -398,8 +430,6 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE case FLOAT | REAL => FloatType case DOUBLE => DoubleType case DATE => DateType - case TIMESTAMP_LTZ => TimestampType - case TIMESTAMP_NTZ => TimestampNTZType case BINARY => BinaryType case VOID => NullType case VARIANT => VariantType @@ -448,6 +478,36 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE } } + private def parseTimestampLtzNanosPrecision(precision: String): TimestampLTZNanosType = { + checkTimestampNanosTypesEnabled() + try TimestampLTZNanosType(precision.toInt) + catch { + case _: NumberFormatException => + throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_LTZ") + } + } + + private def parseTimestampNtzNanosPrecision(precision: String): TimestampNTZNanosType = { + checkTimestampNanosTypesEnabled() + try TimestampNTZNanosType(precision.toInt) + catch { + case _: NumberFormatException => + throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_NTZ") + } + } + + private def checkTimestampNanosTypesEnabled(): Unit = { + if (!SqlApiConf.get.timestampNanosTypesEnabled) { + throw new SparkException( + errorClass = "FEATURE_NOT_ENABLED", + messageParameters = Map( + "featureName" -> "Nanosecond-precision timestamp types", + "configKey" -> "spark.sql.timestampNanosTypes.enabled", + "configValue" -> "true"), + cause = null) + } + } + /** * Create a complex DataType. Arrays, Maps and Structures are supported. */ diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala index bedd4afe0ed5e..6bd747c743991 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala @@ -54,6 +54,7 @@ private[sql] trait SqlApiConf { def legacyParameterSubstitutionConstantsOnly: Boolean def legacyIdentifierClauseOnly: Boolean def typesFrameworkEnabled: Boolean + def timestampNanosTypesEnabled: Boolean } private[sql] object SqlApiConf { @@ -112,4 +113,5 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf { override def legacyParameterSubstitutionConstantsOnly: Boolean = false override def legacyIdentifierClauseOnly: Boolean = false override def typesFrameworkEnabled: Boolean = false + override def timestampNanosTypesEnabled: Boolean = false } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 77ef8bb600f9c..b70718593a5d0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -644,6 +644,19 @@ object SQLConf { .booleanConf .createWithDefaultFunction(() => Utils.isTesting) + val TIMESTAMP_NANOS_TYPES_ENABLED = + buildConf("spark.sql.timestampNanosTypes.enabled") + .internal() + .doc("When true, the SQL parser accepts the parameterized nanosecond-precision " + + "timestamp types TIMESTAMP_NTZ(p), TIMESTAMP_LTZ(p), and TIMESTAMP(p) (with " + + "optional WITH LOCAL TIME ZONE / WITHOUT TIME ZONE suffix) for p in [7, 9], " + + "producing TimestampNTZNanosType / TimestampLTZNanosType. Default is false " + + "because downstream execution paths (Cast, PhysicalDataType, AnyTimestampType, " + + "encoders, Connect proto) are not yet wired for these types. See SPARK-56822.") + .version("4.2.0") + .booleanConf + .createWithDefault(false) + val EXTENDED_EXPLAIN_PROVIDERS = buildConf("spark.sql.extendedExplainProviders") .doc("A comma-separated list of classes that implement the" + " org.apache.spark.sql.ExtendedExplainGenerator trait. If provided, Spark will print" + @@ -7512,6 +7525,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def typesFrameworkEnabled: Boolean = getConf(TYPES_FRAMEWORK_ENABLED) + def timestampNanosTypesEnabled: Boolean = getConf(TIMESTAMP_NANOS_TYPES_ENABLED) + def dataSourceV2JoinPushdown: Boolean = getConf(DATA_SOURCE_V2_JOIN_PUSHDOWN) def dynamicPartitionPruningEnabled: Boolean = getConf(DYNAMIC_PARTITION_PRUNING_ENABLED) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala index 03dbf0a28663a..5c19374723f64 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala @@ -161,14 +161,62 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper { assert(parse("timestamp") === TimestampNTZType) assert(parse("timestamp with local time zone") === TimestampType) assert(parse("timestamp without time zone") === TimestampNTZType) + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + assert(parse("timestamp(9)") === TimestampNTZNanosType(9)) + } } withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_LTZ.toString) { assert(parse("timestamp") === TimestampType) assert(parse("timestamp with local time zone") === TimestampType) assert(parse("timestamp without time zone") === TimestampNTZType) + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + assert(parse("timestamp(9)") === TimestampLTZNanosType(9)) + } } } + test("parse nanos timestamp types when the preview flag is enabled") { + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + assert(parse("TIMESTAMP_NTZ(7)") === TimestampNTZNanosType(7)) + assert(parse("TIMESTAMP_NTZ(8)") === TimestampNTZNanosType(8)) + assert(parse("TIMESTAMP_NTZ(9)") === TimestampNTZNanosType(9)) + assert(parse("TIMESTAMP_LTZ(7)") === TimestampLTZNanosType(7)) + assert(parse("TIMESTAMP_LTZ(8)") === TimestampLTZNanosType(8)) + assert(parse("TIMESTAMP_LTZ(9)") === TimestampLTZNanosType(9)) + assert(parse("Timestamp_Ntz(9)") === TimestampNTZNanosType(9)) + assert(parse("timestamp_ltz(7)") === TimestampLTZNanosType(7)) + assert(parse("TIMESTAMP(9) WITHOUT TIME ZONE") === TimestampNTZNanosType(9)) + assert(parse("TIMESTAMP(7) WITH LOCAL TIME ZONE") === TimestampLTZNanosType(7)) + assert(parse("timestamp(8) without time zone") === TimestampNTZNanosType(8)) + assert(parse("timestamp(8) with local time zone") === TimestampLTZNanosType(8)) + } + } + + test("nanos timestamp parser surface is gated by SQL conf, disabled by default") { + val gatedSpellings = Seq( + "TIMESTAMP_NTZ(7)", + "TIMESTAMP_LTZ(9)", + "TIMESTAMP(9) WITHOUT TIME ZONE", + "TIMESTAMP(9) WITH LOCAL TIME ZONE", + "TIMESTAMP(9)") + gatedSpellings.foreach { spelling => + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType(spelling) + }, + condition = "FEATURE_NOT_ENABLED", + parameters = Map( + "featureName" -> "Nanosecond-precision timestamp types", + "configKey" -> "spark.sql.timestampNanosTypes.enabled", + "configValue" -> "true")) + } + // Bare unparameterized forms remain accepted even with the gate off. + assert(parse("TIMESTAMP_NTZ") === TimestampNTZType) + assert(parse("TIMESTAMP_LTZ") === TimestampType) + assert(parse("TIMESTAMP WITHOUT TIME ZONE") === TimestampNTZType) + assert(parse("TIMESTAMP WITH LOCAL TIME ZONE") === TimestampType) + } + // DataType parser accepts certain reserved keywords. checkDataType( "Struct", @@ -241,4 +289,53 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper { condition = "PARSE_SYNTAX_ERROR", parameters = Map("error" -> "'WITH'", "hint" -> "")) } + + test("invalid precision of the nanos timestamp data type") { + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + Seq("TIMESTAMP_NTZ" -> "TIMESTAMP_NTZ", "TIMESTAMP_LTZ" -> "TIMESTAMP_LTZ").foreach { + case (spelling, errorType) => + Seq(0, 1, 6, 10, 99).foreach { p => + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType(s"$spelling($p)") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p.toString, "type" -> errorType)) + } + } + // Integer overflow: regex matches but Int.parseInt fails. Original digits are preserved. + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType("TIMESTAMP_NTZ(99999999999)") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> "99999999999", "type" -> "TIMESTAMP_NTZ")) + // TIMESTAMP(p) with zone aliases route to the corresponding nanos type's error. + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType("TIMESTAMP(6) WITHOUT TIME ZONE") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> "6", "type" -> "TIMESTAMP_NTZ")) + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType("TIMESTAMP(10) WITH LOCAL TIME ZONE") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> "10", "type" -> "TIMESTAMP_LTZ")) + // Negative precision is rejected by the parser, not by the type constructor. + checkError( + exception = intercept[ParseException] { + CatalystSqlParser.parseDataType("TIMESTAMP_NTZ(-1)") + }, + condition = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'-'", "hint" -> "")) + checkError( + exception = intercept[ParseException] { + CatalystSqlParser.parseDataType("TIMESTAMP_LTZ(-100)") + }, + condition = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'-'", "hint" -> "")) + } + } } From 2f53804b336a011076f1680f31024a3fda81a3b8 Mon Sep 17 00:00:00 2001 From: Stevo Mitric Date: Fri, 22 May 2026 09:49:32 +0000 Subject: [PATCH 2/4] resolve timestamp doc comment --- .../catalyst/parser/DataTypeAstBuilder.scala | 16 +- .../spark/sql/errors/DataTypeErrors.scala | 13 ++ .../org/apache/spark/sql/types/DataType.scala | 2 + .../apache/spark/sql/internal/SQLConf.scala | 11 +- .../spark/sql/types/DataTypeSuite.scala | 143 +++++++++++------- 5 files changed, 108 insertions(+), 77 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 377eb0f5d7727..eabd128b594c7 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -479,7 +479,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE } private def parseTimestampLtzNanosPrecision(precision: String): TimestampLTZNanosType = { - checkTimestampNanosTypesEnabled() + DataTypeErrors.checkTimestampNanosTypesEnabled() try TimestampLTZNanosType(precision.toInt) catch { case _: NumberFormatException => @@ -488,7 +488,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE } private def parseTimestampNtzNanosPrecision(precision: String): TimestampNTZNanosType = { - checkTimestampNanosTypesEnabled() + DataTypeErrors.checkTimestampNanosTypesEnabled() try TimestampNTZNanosType(precision.toInt) catch { case _: NumberFormatException => @@ -496,18 +496,6 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE } } - private def checkTimestampNanosTypesEnabled(): Unit = { - if (!SqlApiConf.get.timestampNanosTypesEnabled) { - throw new SparkException( - errorClass = "FEATURE_NOT_ENABLED", - messageParameters = Map( - "featureName" -> "Nanosecond-precision timestamp types", - "configKey" -> "spark.sql.timestampNanosTypes.enabled", - "configValue" -> "true"), - cause = null) - } - } - /** * Create a complex DataType. Arrays, Maps and Structures are supported. */ diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala index 6e8cb8077be81..b89da2c246a70 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.catalyst.util.QuotingUtils import org.apache.spark.sql.catalyst.util.QuotingUtils.toSQLSchema +import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types.{DataType, Decimal, StringType} import org.apache.spark.unsafe.types.UTF8String @@ -282,4 +283,16 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase { messageParameters = Map("precision" -> precision, "type" -> typeName), cause = null) } + + def checkTimestampNanosTypesEnabled(): Unit = { + if (!SqlApiConf.get.timestampNanosTypesEnabled) { + throw new SparkException( + errorClass = "FEATURE_NOT_ENABLED", + messageParameters = Map( + "featureName" -> "Nanosecond-precision timestamp types", + "configKey" -> "spark.sql.timestampNanosTypes.enabled", + "configValue" -> "true"), + cause = null) + } + } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala index fbd70cf8b899c..c1d1430bacee9 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -236,12 +236,14 @@ object DataType { // For backwards compatibility, previously the type name of NullType is "null" case "null" => NullType case TIMESTAMP_LTZ_NANOS_TYPE(precision) => + DataTypeErrors.checkTimestampNanosTypesEnabled() try TimestampLTZNanosType(precision.toInt) catch { case _: NumberFormatException => throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_LTZ") } case TIMESTAMP_NTZ_NANOS_TYPE(precision) => + DataTypeErrors.checkTimestampNanosTypesEnabled() try TimestampNTZNanosType(precision.toInt) catch { case _: NumberFormatException => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index b70718593a5d0..968c894f8b692 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -647,12 +647,11 @@ object SQLConf { val TIMESTAMP_NANOS_TYPES_ENABLED = buildConf("spark.sql.timestampNanosTypes.enabled") .internal() - .doc("When true, the SQL parser accepts the parameterized nanosecond-precision " + - "timestamp types TIMESTAMP_NTZ(p), TIMESTAMP_LTZ(p), and TIMESTAMP(p) (with " + - "optional WITH LOCAL TIME ZONE / WITHOUT TIME ZONE suffix) for p in [7, 9], " + - "producing TimestampNTZNanosType / TimestampLTZNanosType. Default is false " + - "because downstream execution paths (Cast, PhysicalDataType, AnyTimestampType, " + - "encoders, Connect proto) are not yet wired for these types. See SPARK-56822.") + .doc("When true, the parameterized nanosecond-precision timestamp types " + + "TIMESTAMP_NTZ(p) / TIMESTAMP_LTZ(p) for p in [7, 9] are recognized as " + + "Spark SQL data types at user-facing entry points. Default is false because " + + "downstream execution paths (Cast, PhysicalDataType, AnyTimestampType, encoders, " + + "Connect proto) are not yet wired for these types. See SPARK-56822.") .version("4.2.0") .booleanConf .createWithDefault(false) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 1a7524dbc5a73..afa657c95ede7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -25,11 +25,13 @@ import org.json4s.jackson.JsonMethods import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException} import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} +import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.types.{DataTypeUtils, PhysicalDataType, UninitializedPhysicalType} import org.apache.spark.sql.catalyst.util.{CollationFactory, StringConcat} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearMonthIntervalTypes} -class DataTypeSuite extends SparkFunSuite { +class DataTypeSuite extends SparkFunSuite with SQLHelper { private val UNICODE_COLLATION_ID = CollationFactory.collationNameToId("UNICODE") private val UTF8_LCASE_COLLATION_ID = CollationFactory.collationNameToId("UTF8_LCASE") @@ -257,12 +259,19 @@ class DataTypeSuite extends SparkFunSuite { checkDataTypeFromJson(TimestampNTZType) checkDataTypeFromDDL(TimestampNTZType) - checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION)) - checkDataTypeFromJson(TimestampLTZNanosType(8)) - checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION)) - checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION)) - checkDataTypeFromJson(TimestampNTZNanosType(8)) - checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION)) + test("SPARK-56876: from Json roundtrip for nanos timestamp types (preview flag enabled)") { + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + Seq( + TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION), + TimestampLTZNanosType(8), + TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION), + TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION), + TimestampNTZNanosType(8), + TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION)).foreach { dt => + assert(DataType.fromJson(dt.json) === dt) + } + } + } checkDataTypeFromJson(StringType) checkDataTypeFromDDL(StringType) @@ -1484,64 +1493,84 @@ class DataTypeSuite extends SparkFunSuite { } test("SPARK-56876: parse timestamp with nanosecond precision from JSON") { - // (json-type-name, sql-type-name-in-error, factory) - val variants = Seq[(String, String, Int => DataType)]( - ("timestamp_ltz", "TIMESTAMP_LTZ", TimestampLTZNanosType(_)), - ("timestamp_ntz", "TIMESTAMP_NTZ", TimestampNTZNanosType(_))) - val overflowing = "9" * 20 - - variants.foreach { case (name, sqlTypeName, factory) => - // Happy path across valid precisions, tolerant of surrounding whitespace. - TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { n => - assert(DataType.fromJson(s"""\"$name($n)\"""") === factory(n)) - assert(DataType.fromJson(s"""\"$name( $n)\"""") === factory(n)) - assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n)) - } - - // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. The overflowing - // case verifies the original digit string is preserved instead of leaking - // NumberFormatException. - Seq("0", "6", "10", overflowing).foreach { p => - checkError( - exception = intercept[SparkException] { - DataType.fromJson(s"""\"$name($p)\"""") - }, - condition = "INVALID_TIMESTAMP_PRECISION", - parameters = Map("precision" -> p, "type" -> sqlTypeName)) + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + // (json-type-name, sql-type-name-in-error, factory) + val variants = Seq[(String, String, Int => DataType)]( + ("timestamp_ltz", "TIMESTAMP_LTZ", TimestampLTZNanosType(_)), + ("timestamp_ntz", "TIMESTAMP_NTZ", TimestampNTZNanosType(_))) + val overflowing = "9" * 20 + + variants.foreach { case (name, sqlTypeName, factory) => + // Happy path across valid precisions, tolerant of surrounding whitespace. + TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { n => + assert(DataType.fromJson(s"""\"$name($n)\"""") === factory(n)) + assert(DataType.fromJson(s"""\"$name( $n)\"""") === factory(n)) + assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n)) + } + + // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. The overflowing + // case verifies the original digit string is preserved instead of leaking + // NumberFormatException. + Seq("0", "6", "10", overflowing).foreach { p => + checkError( + exception = intercept[SparkException] { + DataType.fromJson(s"""\"$name($p)\"""") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p, "type" -> sqlTypeName)) + } + + // Malformed precision forms that don't match the regex fall through to + // INVALID_JSON_DATA_TYPE: negative, empty parens, non-numeric, and uppercase + // (JSON type-name convention is lowercase). + Seq( + s"$name(-1)", + s"$name()", + s"$name(abc)", + s"${name.toUpperCase(Locale.ROOT)}(7)").foreach { raw => + checkError( + exception = intercept[SparkIllegalArgumentException] { + DataType.fromJson(s"""\"$raw\"""") + }, + condition = "INVALID_JSON_DATA_TYPE", + parameters = Map("invalidType" -> raw)) + } } - // Malformed precision forms that don't match the regex fall through to - // INVALID_JSON_DATA_TYPE: negative, empty parens, non-numeric, and uppercase - // (JSON type-name convention is lowercase). - Seq( - s"$name(-1)", - s"$name()", - s"$name(abc)", - s"${name.toUpperCase(Locale.ROOT)}(7)").foreach { raw => - checkError( - exception = intercept[SparkIllegalArgumentException] { - DataType.fromJson(s"""\"$raw\"""") - }, - condition = "INVALID_JSON_DATA_TYPE", - parameters = Map("invalidType" -> raw)) - } + // JSON round-trip for nanos timestamp types inside struct, array, and map. + val structWithNanos = StructType(Seq( + StructField("ntz", TimestampNTZNanosType(7)), + StructField("ltz", TimestampLTZNanosType(8)))) + assert(DataType.fromJson(structWithNanos.json) === structWithNanos) + val arrayOfNanos = ArrayType(TimestampNTZNanosType(9), containsNull = false) + assert(DataType.fromJson(arrayOfNanos.json) === arrayOfNanos) + val mapOfNanos = MapType(StringType, TimestampNTZNanosType(7), valueContainsNull = true) + assert(DataType.fromJson(mapOfNanos.json) === mapOfNanos) } - // JSON round-trip for nanos timestamp types inside struct, array, and map. - val structWithNanos = StructType(Seq( - StructField("ntz", TimestampNTZNanosType(7)), - StructField("ltz", TimestampLTZNanosType(8)))) - assert(DataType.fromJson(structWithNanos.json) === structWithNanos) - val arrayOfNanos = ArrayType(TimestampNTZNanosType(9), containsNull = false) - assert(DataType.fromJson(arrayOfNanos.json) === arrayOfNanos) - val mapOfNanos = MapType(StringType, TimestampNTZNanosType(7), valueContainsNull = true) - assert(DataType.fromJson(mapOfNanos.json) === mapOfNanos) - - // Bare names without parens still map to the legacy single-precision types. + // Bare names without parens still map to the legacy single-precision types, regardless + // of the preview flag. assert(DataType.fromJson("\"timestamp_ltz\"") === TimestampType) assert(DataType.fromJson("\"timestamp_ntz\"") === TimestampNTZType) } + test("SPARK-56965: JSON parser rejects nanos timestamp types when preview flag is off") { + Seq( + "\"timestamp_ltz(7)\"" -> "Nanosecond-precision timestamp types", + "\"timestamp_ntz(9)\"" -> "Nanosecond-precision timestamp types").foreach { + case (json, featureName) => + checkError( + exception = intercept[SparkException] { + DataType.fromJson(json) + }, + condition = "FEATURE_NOT_ENABLED", + parameters = Map( + "featureName" -> featureName, + "configKey" -> "spark.sql.timestampNanosTypes.enabled", + "configValue" -> "true")) + } + } + test("singleton DataType equality after deserialization") { // Singleton DataTypes that use `case object` pattern matching (e.g., `case BinaryType =>`). // If a non-singleton instance is created (e.g., via Kryo deserialization which doesn't call From f4fd22c5b48ca122d77de06e5c923137a468ff35 Mon Sep 17 00:00:00 2001 From: Stevo Mitric Date: Fri, 22 May 2026 10:13:33 +0000 Subject: [PATCH 3/4] update tests --- .../catalyst/parser/DataTypeParserSuite.scala | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala index 5c19374723f64..b55ed2b9c18a3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala @@ -163,6 +163,16 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper { assert(parse("timestamp without time zone") === TimestampNTZType) withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { assert(parse("timestamp(9)") === TimestampNTZNanosType(9)) + // Bare TIMESTAMP(p) routes through SqlApiConf.get.timestampType, so an + // out-of-range precision must surface as the NTZ error here. + Seq("6", "10").foreach { p => + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType(s"timestamp($p)") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p, "type" -> "TIMESTAMP_NTZ")) + } } } withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_LTZ.toString) { @@ -171,6 +181,15 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper { assert(parse("timestamp without time zone") === TimestampNTZType) withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { assert(parse("timestamp(9)") === TimestampLTZNanosType(9)) + // Bare TIMESTAMP(p) under LTZ default must surface as the LTZ error. + Seq("6", "10").foreach { p => + checkError( + exception = intercept[SparkException] { + CatalystSqlParser.parseDataType(s"timestamp($p)") + }, + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p, "type" -> "TIMESTAMP_LTZ")) + } } } } From 14f1f909c3d3b3bebd8dd0a39fd4c915d9b9ee75 Mon Sep 17 00:00:00 2001 From: Stevo Mitric Date: Fri, 22 May 2026 15:14:35 +0000 Subject: [PATCH 4/4] fix lint --- .../apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala | 3 +-- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index eabd128b594c7..9de6aceb757b9 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -371,8 +371,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE case TimestampNTZType => parseTimestampNtzNanosPrecision(currentCtx.precision.getText) case other => - throw SparkException.internalError( - s"Unexpected default timestamp type: $other") + throw SparkException.internalError(s"Unexpected default timestamp type: $other") } } case TIMESTAMP_LTZ => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 968c894f8b692..bdf8af0871411 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -653,6 +653,7 @@ object SQLConf { "downstream execution paths (Cast, PhysicalDataType, AnyTimestampType, encoders, " + "Connect proto) are not yet wired for these types. See SPARK-56822.") .version("4.2.0") + .withBindingPolicy(ConfigBindingPolicy.SESSION) .booleanConf .createWithDefault(false)