diff --git a/src/main/java/org/apache/commons/lang3/Strings.java b/src/main/java/org/apache/commons/lang3/Strings.java index 213f4d1afa7..32ca691c5ee 100644 --- a/src/main/java/org/apache/commons/lang3/Strings.java +++ b/src/main/java/org/apache/commons/lang3/Strings.java @@ -152,7 +152,7 @@ public int indexOf(final CharSequence str, final CharSequence searchStr, int sta startPos = 0; } final int endLimit = str.length() - searchStr.length() + 1; - if (startPos > endLimit) { + if (startPos >= endLimit) { return INDEX_NOT_FOUND; } if (searchStr.length() == 0) { diff --git a/src/test/java/org/apache/commons/lang3/StringsTest.java b/src/test/java/org/apache/commons/lang3/StringsTest.java index db39ef8ee39..45227bfd104 100644 --- a/src/test/java/org/apache/commons/lang3/StringsTest.java +++ b/src/test/java/org/apache/commons/lang3/StringsTest.java @@ -59,6 +59,26 @@ void testCaseInsensitiveConstant() { assertFalse(Strings.CI.isCaseSensitive()); } + /** + * For an empty search the case-insensitive {@code indexOf} returned {@code startPos} unchanged once it reached + * {@code str.length() + 1}, so a start position one past the end yielded an index beyond the string instead of + * {@code -1}. + */ + @Test + void testCaseInsensitiveIndexOfEmptyOutOfRange() { + // repro: returned 4 (past the end of a length-3 string) before the fix + final String emptySearch = StringUtils.EMPTY; + assertEquals(-1, Strings.CI.indexOf("abc", emptySearch, 4)); + // documented out-of-range example, also -1 + assertEquals(-1, Strings.CI.indexOf("abc", emptySearch, 9)); + // the end position is still a valid empty match + assertEquals(3, Strings.CI.indexOf("abc", emptySearch, 3)); + assertEquals(2, Strings.CI.indexOf("aabaabaa", emptySearch, 2)); + assertEquals(0, Strings.CI.indexOf(emptySearch, emptySearch, 0)); + assertEquals(0, Strings.CI.indexOf(emptySearch, emptySearch, -1)); + assertEquals(0, Strings.CI.indexOf("a", emptySearch, -1)); + } + /** * {@code U+0130} lower-cases to the two-char sequence {@code "i̇"} outside Turkish locales, so pre-lower-casing the * search argument made the case-insensitive replace look for a two-char needle that no longer matches the single source