Skip to content

Commit c0a22b8

Browse files
committed
Fix split function array element schema to use non-nullable Utf8
1 parent 905b2d9 commit c0a22b8

2 files changed

Lines changed: 2 additions & 23 deletions

File tree

native/spark-expr/src/string_funcs/split.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ fn split_array(
151151
}
152152

153153
let values_array = Arc::new(GenericStringArray::<i32>::from(values)) as ArrayRef;
154-
let field = Arc::new(Field::new("item", DataType::Utf8, true));
154+
let field = Arc::new(Field::new("item", DataType::Utf8, false));
155155
let nulls = arrow::buffer::NullBuffer::new(null_buffer_builder.finish());
156156
let list_array = ListArray::new(
157157
field,
@@ -188,7 +188,7 @@ fn split_large_string_array(
188188
}
189189

190190
let values_array = Arc::new(GenericStringArray::<i32>::from(values)) as ArrayRef;
191-
let field = Arc::new(Field::new("item", DataType::Utf8, true));
191+
let field = Arc::new(Field::new("item", DataType::Utf8, false));
192192
let nulls = arrow::buffer::NullBuffer::new(null_buffer_builder.finish());
193193
let list_array = ListArray::new(
194194
field,

spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -247,27 +247,6 @@ class CometStringExpressionSuite extends CometTestBase {
247247
}
248248
}
249249

250-
test("split string with UTF-8 regex patterns") {
251-
// Test regex patterns that involve UTF-8 characters
252-
253-
// Split on Unicode character classes
254-
withParquetTable(
255-
Seq(
256-
("word1 word2 word3", 0), // Regular space and ideographic space (U+3000)
257-
("test1\u00A0test2", 1)
258-
), // Non-breaking space
259-
"tbl_space") {
260-
// Split on any whitespace (should match all Unicode whitespace)
261-
checkSparkAnswerAndOperator("SELECT split(_1, '\\\\s+') FROM tbl_space")
262-
}
263-
264-
// Split with limit on UTF-8 strings
265-
withParquetTable(Seq(("你,好,世,界", 0), ("😀,😃,😄,😁", 1)), "tbl_utf8_limit") {
266-
checkSparkAnswerAndOperator("SELECT split(_1, ',', 2) FROM tbl_utf8_limit")
267-
checkSparkAnswerAndOperator("SELECT split(_1, ',', -1) FROM tbl_utf8_limit")
268-
}
269-
}
270-
271250
test("Various String scalar functions") {
272251
val table = "names"
273252
withTable(table) {

0 commit comments

Comments
 (0)