Skip to content

Commit b2dbc2b

Browse files
andygroveclaude
andcommitted
perf: reduce combinatorial explosion in temporal fuzz tests
The temporal tests had 72 configurations (3 output types × 3 timezones × 2 × 2 × 2 settings), reduced to 10 configurations: - INT96: 2 timezones × 3 int96 combinations = 6 configs - TIMESTAMP_MICROS/MILLIS: 2 timezones × 1 combination = 2 configs each Key changes: - INT96-specific settings only tested for INT96 format (where they matter) - Reduced timezones from 3 to 2 (local + UTC) - Removed inferTimestampNtzEnabled loop (not relevant for this test) - Added TimestampType column to properly exercise timestamp settings This reduces temporal test time by ~5x. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 035aeff commit b2dbc2b

1 file changed

Lines changed: 40 additions & 27 deletions

File tree

spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -271,19 +271,25 @@ class CometFuzzTestSuite extends CometFuzzTestBase {
271271
}
272272

273273
test("Parquet temporal types written as INT96") {
274-
testParquetTemporalTypes(ParquetOutputTimestampType.INT96)
274+
// INT96 is the only format where int96AsTimestamp and int96TimestampConversion matter
275+
testParquetTemporalTypes(ParquetOutputTimestampType.INT96, testInt96Settings = true)
275276
}
276277

277278
test("Parquet temporal types written as TIMESTAMP_MICROS") {
278-
testParquetTemporalTypes(ParquetOutputTimestampType.TIMESTAMP_MICROS)
279+
testParquetTemporalTypes(
280+
ParquetOutputTimestampType.TIMESTAMP_MICROS,
281+
testInt96Settings = false)
279282
}
280283

281284
test("Parquet temporal types written as TIMESTAMP_MILLIS") {
282-
testParquetTemporalTypes(ParquetOutputTimestampType.TIMESTAMP_MILLIS)
285+
testParquetTemporalTypes(
286+
ParquetOutputTimestampType.TIMESTAMP_MILLIS,
287+
testInt96Settings = false)
283288
}
284289

285290
private def testParquetTemporalTypes(
286-
outputTimestampType: ParquetOutputTimestampType.Value): Unit = {
291+
outputTimestampType: ParquetOutputTimestampType.Value,
292+
testInt96Settings: Boolean): Unit = {
287293

288294
val dataGenOptions = DataGenOptions(generateNegativeZero = false)
289295

@@ -294,15 +300,17 @@ class CometFuzzTestSuite extends CometFuzzTestBase {
294300
SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> outputTimestampType.toString,
295301
SQLConf.SESSION_LOCAL_TIMEZONE.key -> defaultTimezone) {
296302

303+
// Include both DateType and TimestampType to properly test timestamp configurations
297304
// TODO test with MapType
298305
// https://github.com/apache/datafusion-comet/issues/2945
299306
val schema = StructType(
300307
Seq(
301308
StructField("c0", DataTypes.DateType),
302-
StructField("c1", DataTypes.createArrayType(DataTypes.DateType)),
309+
StructField("c1", DataTypes.TimestampType),
310+
StructField("c2", DataTypes.createArrayType(DataTypes.DateType)),
303311
StructField(
304-
"c2",
305-
DataTypes.createStructType(Array(StructField("c3", DataTypes.DateType))))))
312+
"c3",
313+
DataTypes.createStructType(Array(StructField("c4", DataTypes.DateType))))))
306314

307315
ParquetGenerator.makeParquetFile(
308316
random,
@@ -313,28 +321,33 @@ class CometFuzzTestSuite extends CometFuzzTestBase {
313321
dataGenOptions)
314322
}
315323

316-
Seq(defaultTimezone, "UTC", "America/Denver").foreach { tz =>
317-
Seq(true, false).foreach { inferTimestampNtzEnabled =>
318-
Seq(true, false).foreach { int96TimestampConversion =>
319-
Seq(true, false).foreach { int96AsTimestamp =>
320-
withSQLConf(
321-
CometConf.COMET_ENABLED.key -> "true",
322-
SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz,
323-
SQLConf.PARQUET_INT96_AS_TIMESTAMP.key -> int96AsTimestamp.toString,
324-
SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION.key -> int96TimestampConversion.toString,
325-
SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key -> inferTimestampNtzEnabled.toString) {
324+
// Test with local timezone and UTC (2 timezones instead of 3)
325+
Seq(defaultTimezone, "UTC").foreach { tz =>
326+
// INT96-specific settings only matter for INT96 format
327+
val int96Combinations = if (testInt96Settings) {
328+
// Test key combinations for INT96: both settings true, both false, and mixed
329+
Seq((true, true), (false, false), (true, false))
330+
} else {
331+
// For non-INT96 formats, these settings don't matter, so just use defaults
332+
Seq((true, false))
333+
}
334+
335+
int96Combinations.foreach { case (int96AsTimestamp, int96TimestampConversion) =>
336+
withSQLConf(
337+
CometConf.COMET_ENABLED.key -> "true",
338+
SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz,
339+
SQLConf.PARQUET_INT96_AS_TIMESTAMP.key -> int96AsTimestamp.toString,
340+
SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION.key -> int96TimestampConversion.toString) {
326341

327-
val df = spark.read.parquet(filename.toString)
328-
df.createOrReplaceTempView("t1")
329-
val columns =
330-
df.schema.fields
331-
.filter(f => DataTypeSupport.hasTemporalType(f.dataType))
332-
.map(_.name)
342+
val df = spark.read.parquet(filename.toString)
343+
df.createOrReplaceTempView("t1")
344+
val columns =
345+
df.schema.fields
346+
.filter(f => DataTypeSupport.hasTemporalType(f.dataType))
347+
.map(_.name)
333348

334-
for (col <- columns) {
335-
checkSparkAnswer(s"SELECT $col FROM t1 ORDER BY $col")
336-
}
337-
}
349+
for (col <- columns) {
350+
checkSparkAnswer(s"SELECT $col FROM t1 ORDER BY $col")
338351
}
339352
}
340353
}

0 commit comments

Comments
 (0)