@@ -1132,4 +1132,104 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
11321132 checkSparkAnswerAndOperator(sql(" SELECT exists(arr, x -> x > 0) FROM t" ))
11331133 }
11341134 }
1135+
1136+ test(" array_exists - DataFrame API" ) {
1137+ withTable(" t" ) {
1138+ sql(" CREATE TABLE t (arr ARRAY<INT>) USING parquet" )
1139+ sql(" INSERT INTO t VALUES (array(1, 2, 3)), (array(1, 2)), (array()), (NULL)" )
1140+ val df = spark.table(" t" )
1141+ checkSparkAnswerAndOperator(df.select(exists(col(" arr" ), x => x > 2 )))
1142+ checkSparkAnswerAndOperator(
1143+ df.select(
1144+ exists(col(" arr" ), x => x > 0 ).as(" any_positive" ),
1145+ exists(col(" arr" ), x => x > 100 ).as(" any_large" )))
1146+ }
1147+ }
1148+
1149+ test(" array_exists - decimal element type" ) {
1150+ withTable(" t" ) {
1151+ sql(" CREATE TABLE t (arr ARRAY<DECIMAL(10,2)>) USING parquet" )
1152+ sql(" INSERT INTO t VALUES (array(1.50, 2.75, 3.25)), (array(0.10, 0.20))" )
1153+ checkSparkAnswerAndOperator(
1154+ spark.table(" t" ).select(exists(col(" arr" ), x => x > lit(BigDecimal (" 2.00" )))))
1155+ }
1156+ }
1157+
1158+ test(" array_exists - date element type" ) {
1159+ withTable(" t" ) {
1160+ sql(" CREATE TABLE t (arr ARRAY<DATE>) USING parquet" )
1161+ sql(
1162+ " INSERT INTO t VALUES (array(date'2024-01-01', date'2024-06-15')), (array(date'2023-01-01'))" )
1163+ checkSparkAnswerAndOperator(
1164+ spark
1165+ .table(" t" )
1166+ .select(exists(col(" arr" ), x => x > lit(" 2024-03-01" ).cast(" date" ))))
1167+ }
1168+ }
1169+
1170+ test(" array_exists - timestamp element type" ) {
1171+ withTable(" t" ) {
1172+ sql(" CREATE TABLE t (arr ARRAY<TIMESTAMP>) USING parquet" )
1173+ sql(
1174+ " INSERT INTO t VALUES " +
1175+ " (array(timestamp'2024-01-01 00:00:00', timestamp'2024-06-15 12:30:00')), " +
1176+ " (array(timestamp'2023-01-01 00:00:00'))" )
1177+ checkSparkAnswerAndOperator(
1178+ spark
1179+ .table(" t" )
1180+ .select(exists(col(" arr" ), x => x > lit(" 2024-03-01 00:00:00" ).cast(" timestamp" ))))
1181+ }
1182+ }
1183+
1184+ test(" array_exists - literal lambda bodies" ) {
1185+ withTable(" t" ) {
1186+ sql(" CREATE TABLE t (arr ARRAY<INT>) USING parquet" )
1187+ sql(" INSERT INTO t VALUES (array(1, 2, 3)), (array()), (NULL)" )
1188+ val df = spark.table(" t" )
1189+ checkSparkAnswerAndOperator(df.select(exists(col(" arr" ), _ => lit(false ))))
1190+ checkSparkAnswerAndOperator(df.select(exists(col(" arr" ), _ => lit(true ))))
1191+ checkSparkAnswerAndOperator(df.select(exists(col(" arr" ), _ => lit(null ).cast(" boolean" ))))
1192+ }
1193+ }
1194+
1195+ test(" array_exists - CaseWhen/If in lambda" ) {
1196+ withTable(" t" ) {
1197+ sql(" CREATE TABLE t (arr ARRAY<INT>) USING parquet" )
1198+ sql(" INSERT INTO t VALUES (array(1, 2, 3)), (array(-1, 0, 1)), (NULL)" )
1199+ val df = spark.table(" t" )
1200+ checkSparkAnswerAndOperator(
1201+ df.selectExpr(" exists(arr, x -> CASE WHEN x > 0 THEN true ELSE false END)" ))
1202+ checkSparkAnswerAndOperator(df.selectExpr(" exists(arr, x -> IF(x > 0, true, false))" ))
1203+ }
1204+ }
1205+
1206+ test(" array_exists - fallback for unsupported element type" ) {
1207+ withTable(" t" ) {
1208+ sql(" CREATE TABLE t (arr ARRAY<BINARY>) USING parquet" )
1209+ sql(" INSERT INTO t VALUES (array(X'01', X'02'))" )
1210+ checkSparkAnswerAndFallbackReason(
1211+ spark.table(" t" ).select(exists(col(" arr" ), x => x.isNotNull)),
1212+ " Unsupported array element type" )
1213+ }
1214+ }
1215+
1216+ test(" array_exists - fallback for lambda capturing outer column" ) {
1217+ withTable(" t" ) {
1218+ sql(" CREATE TABLE t (arr ARRAY<INT>, threshold INT) USING parquet" )
1219+ sql(" INSERT INTO t VALUES (array(1, 2, 3), 2), (array(1, 2), 5)" )
1220+ checkSparkAnswerAndFallbackReason(
1221+ spark.table(" t" ).select(exists(col(" arr" ), x => x > col(" threshold" ))),
1222+ " Lambda references columns outside the array element" )
1223+ }
1224+ }
1225+
1226+ test(" array_exists - fallback for nested lambda" ) {
1227+ withTable(" t" ) {
1228+ sql(" CREATE TABLE t (arr1 ARRAY<INT>, arr2 ARRAY<INT>) USING parquet" )
1229+ sql(" INSERT INTO t VALUES (array(1, 2, 3), array(4, 5, 6)), (array(10), array(1))" )
1230+ checkSparkAnswerAndFallbackReason(
1231+ spark.table(" t" ).select(exists(col(" arr1" ), x => exists(col(" arr2" ), y => y > x))),
1232+ " Lambda references columns outside the array element" )
1233+ }
1234+ }
11351235}
0 commit comments