Skip to content

Commit 2c904fc

Browse files
committed
test: expand array_exists coverage with additional types and fallback paths
Adapted from PR #3611: - DataFrame API, decimal, date, and timestamp element types - Literal-only lambda bodies (true / false / null) - CaseWhen / If in lambda body - Fallback for binary element type - Fallback for lambdas capturing outer columns - Fallback for nested lambda
1 parent b03eb57 commit 2c904fc

1 file changed

Lines changed: 100 additions & 0 deletions

File tree

spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,4 +1132,104 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
11321132
checkSparkAnswerAndOperator(sql("SELECT exists(arr, x -> x > 0) FROM t"))
11331133
}
11341134
}
1135+
1136+
test("array_exists - DataFrame API") {
1137+
withTable("t") {
1138+
sql("CREATE TABLE t (arr ARRAY<INT>) USING parquet")
1139+
sql("INSERT INTO t VALUES (array(1, 2, 3)), (array(1, 2)), (array()), (NULL)")
1140+
val df = spark.table("t")
1141+
checkSparkAnswerAndOperator(df.select(exists(col("arr"), x => x > 2)))
1142+
checkSparkAnswerAndOperator(
1143+
df.select(
1144+
exists(col("arr"), x => x > 0).as("any_positive"),
1145+
exists(col("arr"), x => x > 100).as("any_large")))
1146+
}
1147+
}
1148+
1149+
test("array_exists - decimal element type") {
1150+
withTable("t") {
1151+
sql("CREATE TABLE t (arr ARRAY<DECIMAL(10,2)>) USING parquet")
1152+
sql("INSERT INTO t VALUES (array(1.50, 2.75, 3.25)), (array(0.10, 0.20))")
1153+
checkSparkAnswerAndOperator(
1154+
spark.table("t").select(exists(col("arr"), x => x > lit(BigDecimal("2.00")))))
1155+
}
1156+
}
1157+
1158+
test("array_exists - date element type") {
1159+
withTable("t") {
1160+
sql("CREATE TABLE t (arr ARRAY<DATE>) USING parquet")
1161+
sql(
1162+
"INSERT INTO t VALUES (array(date'2024-01-01', date'2024-06-15')), (array(date'2023-01-01'))")
1163+
checkSparkAnswerAndOperator(
1164+
spark
1165+
.table("t")
1166+
.select(exists(col("arr"), x => x > lit("2024-03-01").cast("date"))))
1167+
}
1168+
}
1169+
1170+
test("array_exists - timestamp element type") {
1171+
withTable("t") {
1172+
sql("CREATE TABLE t (arr ARRAY<TIMESTAMP>) USING parquet")
1173+
sql(
1174+
"INSERT INTO t VALUES " +
1175+
"(array(timestamp'2024-01-01 00:00:00', timestamp'2024-06-15 12:30:00')), " +
1176+
"(array(timestamp'2023-01-01 00:00:00'))")
1177+
checkSparkAnswerAndOperator(
1178+
spark
1179+
.table("t")
1180+
.select(exists(col("arr"), x => x > lit("2024-03-01 00:00:00").cast("timestamp"))))
1181+
}
1182+
}
1183+
1184+
test("array_exists - literal lambda bodies") {
1185+
withTable("t") {
1186+
sql("CREATE TABLE t (arr ARRAY<INT>) USING parquet")
1187+
sql("INSERT INTO t VALUES (array(1, 2, 3)), (array()), (NULL)")
1188+
val df = spark.table("t")
1189+
checkSparkAnswerAndOperator(df.select(exists(col("arr"), _ => lit(false))))
1190+
checkSparkAnswerAndOperator(df.select(exists(col("arr"), _ => lit(true))))
1191+
checkSparkAnswerAndOperator(df.select(exists(col("arr"), _ => lit(null).cast("boolean"))))
1192+
}
1193+
}
1194+
1195+
test("array_exists - CaseWhen/If in lambda") {
1196+
withTable("t") {
1197+
sql("CREATE TABLE t (arr ARRAY<INT>) USING parquet")
1198+
sql("INSERT INTO t VALUES (array(1, 2, 3)), (array(-1, 0, 1)), (NULL)")
1199+
val df = spark.table("t")
1200+
checkSparkAnswerAndOperator(
1201+
df.selectExpr("exists(arr, x -> CASE WHEN x > 0 THEN true ELSE false END)"))
1202+
checkSparkAnswerAndOperator(df.selectExpr("exists(arr, x -> IF(x > 0, true, false))"))
1203+
}
1204+
}
1205+
1206+
test("array_exists - fallback for unsupported element type") {
1207+
withTable("t") {
1208+
sql("CREATE TABLE t (arr ARRAY<BINARY>) USING parquet")
1209+
sql("INSERT INTO t VALUES (array(X'01', X'02'))")
1210+
checkSparkAnswerAndFallbackReason(
1211+
spark.table("t").select(exists(col("arr"), x => x.isNotNull)),
1212+
"Unsupported array element type")
1213+
}
1214+
}
1215+
1216+
test("array_exists - fallback for lambda capturing outer column") {
1217+
withTable("t") {
1218+
sql("CREATE TABLE t (arr ARRAY<INT>, threshold INT) USING parquet")
1219+
sql("INSERT INTO t VALUES (array(1, 2, 3), 2), (array(1, 2), 5)")
1220+
checkSparkAnswerAndFallbackReason(
1221+
spark.table("t").select(exists(col("arr"), x => x > col("threshold"))),
1222+
"Lambda references columns outside the array element")
1223+
}
1224+
}
1225+
1226+
test("array_exists - fallback for nested lambda") {
1227+
withTable("t") {
1228+
sql("CREATE TABLE t (arr1 ARRAY<INT>, arr2 ARRAY<INT>) USING parquet")
1229+
sql("INSERT INTO t VALUES (array(1, 2, 3), array(4, 5, 6)), (array(10), array(1))")
1230+
checkSparkAnswerAndFallbackReason(
1231+
spark.table("t").select(exists(col("arr1"), x => exists(col("arr2"), y => y > x))),
1232+
"Lambda references columns outside the array element")
1233+
}
1234+
}
11351235
}

0 commit comments

Comments
 (0)