@@ -27,6 +27,7 @@ import org.apache.spark.sql.CometTestBase
2727import org .apache .spark .sql .catalyst .expressions .StructsToCsv
2828import org .apache .spark .sql .execution .adaptive .AdaptiveSparkPlanHelper
2929import org .apache .spark .sql .functions ._
30+ import org .apache .spark .sql .types .StringType
3031
3132import org .apache .comet .testing .{DataGenOptions , ParquetGenerator , SchemaGenOptions }
3233
@@ -71,15 +72,44 @@ class CometCsvExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper
7172 val table = " t1"
7273 withSQLConf(CometConf .COMET_NATIVE_SCAN_IMPL .key -> CometConf .SCAN_NATIVE_ICEBERG_COMPAT ) {
7374 withTable(table) {
75+ val newLinesStr =
76+ """ abc
77+ | bcde""" .stripMargin
7478 sql(s " create table $table(col string) using parquet " )
7579 sql(s " insert into $table values('') " )
7680 sql(s " insert into $table values(cast(null as string)) " )
7781 sql(s " insert into $table values(' abc') " )
7882 sql(s " insert into $table values('abc ') " )
7983 sql(s " insert into $table values(' abc ') " )
8084 sql(s """ insert into $table values('abc \"abc\"') """ )
81- val df = sql(s " select * from $table" )
85+ sql(s """ insert into $table values(' $newLinesStr') """ )
86+ sql(s """ insert into $table values('abc,def') """ )
87+ sql(s """ insert into $table values('abc;def;ghi') """ )
88+ sql(s """ insert into $table values('abc\tdef') """ )
89+ sql(s """ insert into $table values('a"b"c') """ )
90+ sql(s """ insert into $table values('"quoted"') """ )
91+ sql(s """ insert into $table values('line1\nline2') """ )
92+ sql(s """ insert into $table values('line1\rline2') """ )
93+ sql(s """ insert into $table values('line1\r\nline2') """ )
94+ sql(s """ insert into $table values('a''b') """ )
95+ sql(s """ insert into $table values('a \\\\ b') """ )
96+
97+ val df = sql(s " select * from $table order by col " )
98+
99+ // Default options
82100 checkSparkAnswerAndOperator(df.select(to_csv(struct(col(" col" ), lit(1 )))))
101+
102+ // Custom delimiter
103+ checkSparkAnswerAndOperator(
104+ df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" delimiter" -> " ;" ).asJava)))
105+
106+ checkSparkAnswerAndOperator(
107+ df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" delimiter" -> " |" ).asJava)))
108+
109+ checkSparkAnswerAndOperator(
110+ df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" delimiter" -> " \t " ).asJava)))
111+
112+ // Whitespace handling
83113 checkSparkAnswerAndOperator(
84114 df.select(
85115 to_csv(
@@ -88,8 +118,78 @@ class CometCsvExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper
88118 " delimiter" -> " ;" ,
89119 " ignoreLeadingWhiteSpace" -> " false" ,
90120 " ignoreTrailingWhiteSpace" -> " false" ).asJava)))
121+
122+ checkSparkAnswerAndOperator(
123+ df.select(
124+ to_csv(
125+ struct(col(" col" ), lit(1 )),
126+ Map (
127+ " ignoreLeadingWhiteSpace" -> " true" ,
128+ " ignoreTrailingWhiteSpace" -> " false" ).asJava)))
129+
130+ checkSparkAnswerAndOperator(
131+ df.select(
132+ to_csv(
133+ struct(col(" col" ), lit(1 )),
134+ Map (
135+ " ignoreLeadingWhiteSpace" -> " false" ,
136+ " ignoreTrailingWhiteSpace" -> " true" ).asJava)))
137+
138+ checkSparkAnswerAndOperator(df.select(to_csv(
139+ struct(col(" col" ), lit(1 )),
140+ Map (" ignoreLeadingWhiteSpace" -> " true" , " ignoreTrailingWhiteSpace" -> " true" ).asJava)))
141+
142+ // Escape character
143+ checkSparkAnswerAndOperator(
144+ df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" escape" -> " \\ " ).asJava)))
145+
146+ checkSparkAnswerAndOperator(
147+ df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" escape" -> " /" ).asJava)))
148+
149+ // Quote options
91150 checkSparkAnswerAndOperator(
92151 df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" quoteAll" -> " true" ).asJava)))
152+
153+ checkSparkAnswerAndOperator(
154+ df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" quoteAll" -> " false" ).asJava)))
155+
156+ // Null value representation
157+ checkSparkAnswerAndOperator(
158+ df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" nullValue" -> " NULL" ).asJava)))
159+
160+ checkSparkAnswerAndOperator(
161+ df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" nullValue" -> " N/A" ).asJava)))
162+
163+ checkSparkAnswerAndOperator(
164+ df.select(to_csv(struct(col(" col" ), lit(1 )), Map (" nullValue" -> " " ).asJava)))
165+
166+ // Combined options
167+ checkSparkAnswerAndOperator(
168+ df.select(
169+ to_csv(
170+ struct(col(" col" ), lit(1 )),
171+ Map (
172+ " delimiter" -> " |" ,
173+ " quoteAll" -> " false" ,
174+ " escape" -> " \\ " ,
175+ " nullValue" -> " NULL" ).asJava)))
176+
177+ checkSparkAnswerAndOperator(
178+ df.select(to_csv(
179+ struct(col(" col" ), lit(1 )),
180+ Map (
181+ " delimiter" -> " ;" ,
182+ " quoteAll" -> " false" ,
183+ " ignoreLeadingWhiteSpace" -> " true" ,
184+ " ignoreTrailingWhiteSpace" -> " true" ,
185+ " nullValue" -> " N/A" ).asJava)))
186+
187+ // Edge cases with multiple columns
188+ checkSparkAnswerAndOperator(
189+ df.select(
190+ to_csv(
191+ struct(col(" col" ), lit(1 ), lit(" test" ), lit(null ).cast(StringType )),
192+ Map (" delimiter" -> " ," , " quoteAll" -> " true" ).asJava)))
93193 }
94194 }
95195 }
0 commit comments