Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 72 additions & 9 deletions datafusion/functions/src/regex/regexpcount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,14 @@ where
S: StringArrayType<'a>,
{
let (regex_scalar, is_regex_scalar) = if is_regex_scalar || regex_array.len() == 1 {
(Some(regex_array.value(0)), true)
(
if regex_array.is_null(0) {
None
} else {
Some(regex_array.value(0))
},
true,
)
} else {
(None, false)
};
Expand Down Expand Up @@ -300,7 +307,7 @@ where
match (is_regex_scalar, is_start_scalar, is_flags_scalar) {
(true, true, true) => {
let regex = match regex_scalar {
None | Some("") => {
None => {
return Ok(Arc::new(Int64Array::from(vec![0; values.len()])));
}
Some(regex) => regex,
Expand All @@ -317,7 +324,7 @@ where
}
(true, true, false) => {
let regex = match regex_scalar {
None | Some("") => {
None => {
return Ok(Arc::new(Int64Array::from(vec![0; values.len()])));
}
Some(regex) => regex,
Expand Down Expand Up @@ -346,7 +353,7 @@ where
}
(true, false, true) => {
let regex = match regex_scalar {
None | Some("") => {
None => {
return Ok(Arc::new(Int64Array::from(vec![0; values.len()])));
}
Some(regex) => regex,
Expand All @@ -366,7 +373,7 @@ where
}
(true, false, false) => {
let regex = match regex_scalar {
None | Some("") => {
None => {
return Ok(Arc::new(Int64Array::from(vec![0; values.len()])));
}
Some(regex) => regex,
Expand Down Expand Up @@ -411,7 +418,7 @@ where
.zip(regex_array.iter())
.map(|(value, regex)| {
let regex = match regex {
None | Some("") => return Ok(0),
None => return Ok(0),
Some(regex) => regex,
};

Expand Down Expand Up @@ -447,7 +454,7 @@ where
izip!(values.iter(), regex_array.iter(), flags_array.iter())
.map(|(value, regex, flags)| {
let regex = match regex {
None | Some("") => return Ok(0),
None => return Ok(0),
Some(regex) => regex,
};

Expand Down Expand Up @@ -481,7 +488,7 @@ where
izip!(values.iter(), regex_array.iter(), start_array.iter())
.map(|(value, regex, start)| {
let regex = match regex {
None | Some("") => return Ok(0),
None => return Ok(0),
Some(regex) => regex,
};

Expand Down Expand Up @@ -531,7 +538,7 @@ where
)
.map(|(value, regex, start, flags)| {
let regex = match regex {
None | Some("") => return Ok(0),
None => return Ok(0),
Some(regex) => regex,
};

Expand Down Expand Up @@ -590,6 +597,7 @@ mod tests {
fn test_regexp_count() {
test_case_sensitive_regexp_count_scalar();
test_case_sensitive_regexp_count_scalar_start();
test_case_sensitive_regexp_count_scalar_empty_pattern();
test_case_insensitive_regexp_count_scalar_flags();
test_case_sensitive_regexp_count_start_scalar_complex();

Expand Down Expand Up @@ -719,6 +727,61 @@ mod tests {
});
}

fn test_case_sensitive_regexp_count_scalar_empty_pattern() {
let values = ["abc", "abc", ""];
let regex = "";
let start = [1, 4, 1];
let expected: Vec<i64> = vec![4, 1, 0];

izip!(values.iter(), start.iter())
.enumerate()
.for_each(|(pos, (&v, &s))| {
let expected = expected.get(pos).cloned();

let v_sv = ScalarValue::Utf8(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8(Some(regex.to_string()));
let start_sv = ScalarValue::Int64(Some(s));
let re =
regexp_count_with_scalar_values(&[v_sv, regex_sv, start_sv.clone()]);
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(
v, expected,
"regexp_count scalar empty-pattern test failed"
);
}
_ => panic!("Unexpected result"),
}

let v_sv = ScalarValue::LargeUtf8(Some(v.to_string()));
let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string()));
let re =
regexp_count_with_scalar_values(&[v_sv, regex_sv, start_sv.clone()]);
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(
v, expected,
"regexp_count scalar empty-pattern test failed"
);
}
_ => panic!("Unexpected result"),
}

let v_sv = ScalarValue::Utf8View(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8View(Some(regex.to_string()));
let re = regexp_count_with_scalar_values(&[v_sv, regex_sv, start_sv]);
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(
v, expected,
"regexp_count scalar empty-pattern test failed"
);
}
_ => panic!("Unexpected result"),
}
});
}

fn test_case_insensitive_regexp_count_scalar_flags() {
let values = ["", "aabca", "abcabc", "abcAbcab", "abcabcabc"];
let regex = "abc";
Expand Down
5 changes: 5 additions & 0 deletions datafusion/sqllogictest/test_files/regexp/regexp_count.slt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ SELECT regexp_count('123123123123123', '(12)3');
----
5

query I
SELECT regexp_count('abc', '');
----
4

query I
SELECT regexp_count('123123123123', '123', 1);
----
Expand Down