From 9134614e79d760cf17a56c92b647ad80b3d0c3d1 Mon Sep 17 00:00:00 2001 From: Amogh Ramesh Date: Sun, 24 May 2026 19:51:01 +0530 Subject: [PATCH] fix: replace with empty search string should be a no-op PostgreSQL returns the input unchanged when `replace` is called with an empty `from`. DataFusion was instead inserting `to` before every character and at both ends - so `replace('abc', '', 'x')` returned `xaxbxcx`. Fix the empty-`from` branch in `apply_replace` to write the input verbatim. Update the regression tests in `string_query.slt.part` that were asserting the buggy output. Closes #22253 Signed-off-by: Amogh Ramesh --- datafusion/functions/src/string/replace.rs | 23 ++++++++++++------- .../test_files/string/string_literal.slt | 20 ++++++++++++++++ .../test_files/string/string_query.slt.part | 8 +++---- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 769727999ea05..77b617bb90899 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -268,14 +268,8 @@ fn apply_replace( } if from.is_empty() { - // Empty `from`: insert `to` before each character and at both ends. - builder.append_with(|w| { - w.write_str(to); - for ch in string.chars() { - w.write_char(ch); - w.write_str(to); - } - }); + // PostgreSQL returns the input unchanged when `from` is empty (#22253). + builder.append_with(|w| w.write_str(string)); return; } @@ -346,6 +340,19 @@ mod tests { StringArray ); + test_function!( + ReplaceFunc::new(), + vec![ + ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("abc")))), + ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("")))), + ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("x")))), + ], + Ok(Some("abc")), + &str, + LargeUtf8, + LargeStringArray + ); + Ok(()) } } diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt index 97f2a40c13fea..76a03b6739bca 100644 --- a/datafusion/sqllogictest/test_files/string/string_literal.slt +++ b/datafusion/sqllogictest/test_files/string/string_literal.slt @@ -426,6 +426,26 @@ SELECT replace(arrow_cast('foobar', 'LargeUtf8'), arrow_cast('bar', 'LargeUtf8') ---- foohello +# PostgreSQL compatibility: empty search string is a no-op (issue #22253) +query T +SELECT replace('abc', '', 'x') +---- +abc + +query T +SELECT replace(arrow_cast('abc', 'Dictionary(Int32, Utf8)'), '', 'x') +---- +abc + +query T +SELECT replace(arrow_cast('abc', 'Utf8View'), arrow_cast('', 'Utf8View'), arrow_cast('x', 'Utf8View')) +---- +abc + +query T +SELECT replace(arrow_cast('abc', 'LargeUtf8'), arrow_cast('', 'LargeUtf8'), arrow_cast('x', 'LargeUtf8')) +---- +abc query T SELECT reverse('abcde') diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part index 9e5b8f91e7d8e..dac4dd06db21f 100644 --- a/datafusion/sqllogictest/test_files/string/string_query.slt.part +++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part @@ -884,10 +884,10 @@ Xiangpeng bar NULL bar NULL datafusion数据融合 Raphael baraphael NULL datafusionДатbarион NULL datafusionДатаФусион under_score under_score NULL un iść core NULL un iść core percent percent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt -(empty) (empty) NULL bar NULL (empty) -(empty) (empty) NULL bar NULL (empty) -% % NULL bar NULL (empty) -_ _ NULL bar NULL (empty) +(empty) (empty) NULL (empty) NULL (empty) +(empty) (empty) NULL (empty) NULL (empty) +% % NULL (empty) NULL (empty) +_ _ NULL (empty) NULL (empty) NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL