Skip to content

Commit 2f9917b

Browse files
pjfanningCopilot
andauthored
Add ByteString.endsWith and expand startsWith test coverage (#2862)
* Add endsWith to ByteString and improve startsWith/endsWith tests Agent-Logs-Url: https://github.com/pjfanning/incubator-pekko/sessions/ec49ac93-3978-458d-882a-e3a243c1b19f Co-authored-by: pjfanning <11783444+pjfanning@users.noreply.github.com> * overrides * Update ByteStringSpec.scala * Fix startsWith[B >: Byte](IterableOnce, Int) consuming iterator before loop Agent-Logs-Url: https://github.com/pjfanning/incubator-pekko/sessions/fe94910a-758a-4038-be9e-32cad79ed452 Co-authored-by: pjfanning <11783444+pjfanning@users.noreply.github.com> * add string tests * Create ByteString_startEnd_Benchmark.scala * Update ByteStringSpec.scala * Fix variable name from 'bss' to 'bs' in benchmarks * Add SWAR-optimised startsWith/endsWith overrides to ByteString1C and ByteString1 Agent-Logs-Url: https://github.com/pjfanning/incubator-pekko/sessions/4e1ea18b-b9d9-4c17-8b0a-bc51a3aac377 Co-authored-by: pjfanning <11783444+pjfanning@users.noreply.github.com> * Use Arrays.equals for non-SWAR tail bytes in ByteString1C and ByteString1 startsWith/endsWith Agent-Logs-Url: https://github.com/pjfanning/incubator-pekko/sessions/d8dfbe8f-158a-4cc1-92a3-0e84575dfa4c Co-authored-by: pjfanning <11783444+pjfanning@users.noreply.github.com> * Update ByteString_startEnd_Benchmark.scala * Update ByteString_startEnd_Benchmark.scala --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: pjfanning <11783444+pjfanning@users.noreply.github.com>
1 parent 8e6501b commit 2f9917b

3 files changed

Lines changed: 412 additions & 6 deletions

File tree

actor-tests/src/test/scala/org/apache/pekko/util/ByteStringSpec.scala

Lines changed: 261 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -699,12 +699,6 @@ class ByteStringSpec extends AnyWordSpec with Matchers with Checkers {
699699
byteStringLong.lastIndexOf('m') should ===(12)
700700
byteStringLong.lastIndexOf('z') should ===(25)
701701
byteStringLong.lastIndexOf('a') should ===(0)
702-
703-
val long1 = ByteString1.fromString("abcdefghijklmnop") // 16 bytes
704-
long1.lastIndexOf('a'.toByte) should ===(0)
705-
long1.lastIndexOf('p'.toByte) should ===(15)
706-
long1.lastIndexOf('h'.toByte, 7) should ===(7)
707-
long1.lastIndexOf('h'.toByte, 6) should ===(-1)
708702
}
709703
"indexOf from offset" in {
710704
ByteString.empty.indexOf(5, -1) should ===(-1)
@@ -944,6 +938,15 @@ class ByteStringSpec extends AnyWordSpec with Matchers with Checkers {
944938
val slicedLong = ByteString1.fromString("xxabcdefghijk").drop(2) // "abcdefghijk", 11 bytes
945939
slicedLong.lastIndexOf('a'.toByte) should ===(0) // first byte, found via chunk scan
946940
slicedLong.lastIndexOf('h'.toByte) should ===(7) // last byte of chunk
941+
942+
val long1 = ByteString1.fromString("abcdefghijklmnop") // 16 bytes
943+
long1.lastIndexOf('a'.toByte) should ===(0)
944+
long1.lastIndexOf('p'.toByte) should ===(15)
945+
long1.lastIndexOf('h'.toByte, 7) should ===(7)
946+
long1.lastIndexOf('h'.toByte, 6) should ===(-1)
947+
948+
val concat1 = makeMultiByteStringsWithEmptyComponents()
949+
concat1.lastIndexOf(16.toByte) should ===(17)
947950
}
948951
"indexOf (specialized)" in {
949952
ByteString.empty.indexOf(5.toByte) should ===(-1)
@@ -981,6 +984,11 @@ class ByteStringSpec extends AnyWordSpec with Matchers with Checkers {
981984
concat0.indexOf(0xFF.toByte) should ===(0)
982985
concat0.indexOf(16.toByte) should ===(17)
983986
concat0.indexOf(0xFE.toByte) should ===(-1)
987+
988+
val concat1 = makeMultiByteStringsWithEmptyComponents()
989+
concat1.indexOf(0xFF.toByte) should ===(0)
990+
concat1.indexOf(16.toByte) should ===(17)
991+
concat1.indexOf(0xFE.toByte) should ===(-1)
984992
}
985993
"indexOf (specialized) from offset" in {
986994
ByteString.empty.indexOf(5.toByte, -1) should ===(-1)
@@ -1270,6 +1278,10 @@ class ByteStringSpec extends AnyWordSpec with Matchers with Checkers {
12701278
val byteStringWithOffset = ByteString1(
12711279
"abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8), 2, 24)
12721280
byteStringWithOffset.indexOfSlice(slice0) should ===(21)
1281+
1282+
val concat0 = makeMultiByteStringsWithEmptyComponents()
1283+
concat0.indexOfSlice(Array(15.toByte, 16.toByte)) should ===(16)
1284+
concat0.indexOfSlice(Array(16.toByte, 15.toByte)) should ===(-1)
12731285
}
12741286
"lastIndexOfSlice" in {
12751287
val slice0 = ByteString1.fromString("xyz")
@@ -1360,6 +1372,10 @@ class ByteStringSpec extends AnyWordSpec with Matchers with Checkers {
13601372
concat0.lastIndexOfSlice(Array(16.toByte, 0xFF.toByte)) should ===(17)
13611373
concat0.lastIndexOfSlice(Array(16.toByte, 0xFE.toByte)) should ===(-1)
13621374

1375+
val concat1 = makeMultiByteStringsWithEmptyComponents()
1376+
concat1.lastIndexOfSlice(Array(15.toByte, 16.toByte)) should ===(16)
1377+
concat1.lastIndexOfSlice(Array(16.toByte, 15.toByte)) should ===(-1)
1378+
13631379
// Empty source with empty slice -> 0; with non-empty slice -> -1
13641380
ByteString.empty.lastIndexOfSlice(Array.empty[Byte]) should ===(0)
13651381
ByteString.empty.lastIndexOfSlice(Array[Byte]('a')) should ===(-1)
@@ -1383,6 +1399,52 @@ class ByteStringSpec extends AnyWordSpec with Matchers with Checkers {
13831399
ByteStrings(ByteString1.fromString("ab"), ByteString1.fromString("cd"))
13841400
.lastIndexOfSlice(Array[Byte]('b', 'c')) should ===(1)
13851401
}
1402+
"startsWith" in {
1403+
val slice0 = ByteString1.fromString("abcdefghijk")
1404+
val slice1 = ByteString1.fromString("xyz")
1405+
val slice2 = ByteString1.fromString("zabcdefghijk")
1406+
val notSlice = ByteString1.fromString("12345")
1407+
val byteStringLong = ByteString1.fromString("abcdefghijklmnopqrstuvwxyz")
1408+
val byteStrings = ByteStrings(byteStringLong, byteStringLong)
1409+
byteStringLong.startsWith(slice0) should ===(true)
1410+
byteStringLong.startsWith(slice1, 23) should ===(true)
1411+
byteStringLong.startsWith(notSlice) should ===(false)
1412+
1413+
byteStrings.startsWith(slice0) should ===(true)
1414+
byteStrings.startsWith(slice1, 23) should ===(true)
1415+
byteStrings.startsWith(slice2, 25) should ===(true)
1416+
byteStrings.startsWith(notSlice) should ===(false)
1417+
1418+
val byteStringWithOffset = ByteString1(
1419+
"abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8), 2, 20)
1420+
val slice3 = ByteString1.fromString("cdefghijklmn")
1421+
byteStringWithOffset.startsWith(slice3) should ===(true)
1422+
1423+
// empty bytes array always returns true
1424+
byteStringLong.startsWith(Array.emptyByteArray) should ===(true)
1425+
byteStrings.startsWith(Array.emptyByteArray) should ===(true)
1426+
1427+
// exact match
1428+
val fullSliceText = "abcdefghijklmnopqrstuvwxyz"
1429+
val fullSlice = ByteString1.fromString(fullSliceText)
1430+
byteStringLong.startsWith(fullSlice) should ===(true)
1431+
byteStringLong.startsWith(fullSliceText) should ===(true)
1432+
1433+
// bytes longer than ByteString returns false
1434+
val tooLong = ByteString1.fromString("abcdefghijklmnopqrstuvwxyz1")
1435+
byteStringLong.startsWith(tooLong) should ===(false)
1436+
1437+
// ByteString1C
1438+
val byteString1C = ByteString1C("abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8))
1439+
byteString1C.startsWith(slice0) should ===(true)
1440+
byteString1C.startsWith(notSlice) should ===(false)
1441+
byteString1C.startsWith(Array.emptyByteArray) should ===(true)
1442+
1443+
// empty ByteString
1444+
ByteString.empty.startsWith(Array.emptyByteArray) should ===(true)
1445+
ByteString.empty.startsWith(ByteString1.fromString("a")) should ===(false)
1446+
ByteString.empty.startsWith("a") should ===(false)
1447+
}
13861448
"startsWith (specialized)" in {
13871449
val slice0 = "abcdefghijk".getBytes(StandardCharsets.UTF_8)
13881450
val slice1 = "xyz".getBytes(StandardCharsets.UTF_8)
@@ -1403,6 +1465,193 @@ class ByteStringSpec extends AnyWordSpec with Matchers with Checkers {
14031465
"abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8), 2, 20)
14041466
val slice3 = "cdefghijklmn".getBytes(StandardCharsets.UTF_8)
14051467
byteStringWithOffset.startsWith(slice3) should ===(true)
1468+
1469+
// empty bytes array always returns true
1470+
byteStringLong.startsWith(Array.emptyByteArray) should ===(true)
1471+
byteStrings.startsWith(Array.emptyByteArray) should ===(true)
1472+
1473+
// exact match
1474+
val fullSlice = "abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8)
1475+
byteStringLong.startsWith(fullSlice) should ===(true)
1476+
1477+
// bytes longer than ByteString returns false
1478+
val tooLong = "abcdefghijklmnopqrstuvwxyz1".getBytes(StandardCharsets.UTF_8)
1479+
byteStringLong.startsWith(tooLong) should ===(false)
1480+
1481+
// ByteString1C
1482+
val byteString1C = ByteString1C("abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8))
1483+
byteString1C.startsWith(slice0) should ===(true)
1484+
byteString1C.startsWith(notSlice) should ===(false)
1485+
byteString1C.startsWith(Array.emptyByteArray) should ===(true)
1486+
1487+
// empty ByteString
1488+
ByteString.empty.startsWith(Array.emptyByteArray) should ===(true)
1489+
ByteString.empty.startsWith(Array[Byte]('a')) should ===(false)
1490+
1491+
val concat0 = makeMultiByteStringsWithEmptyComponents()
1492+
concat0.startsWith(Array(0xFF.toByte, 0.toByte, 1.toByte)) should ===(true)
1493+
concat0.startsWith(Array(0xFF.toByte, 1.toByte)) should ===(false)
1494+
1495+
// SWAR-optimised path: needles spanning full 8-byte chunks (ByteString1)
1496+
// exactly 8 bytes: one SWAR iteration, no tail
1497+
val exactly8 = "abcdefgh".getBytes(StandardCharsets.UTF_8)
1498+
byteStringLong.startsWith(exactly8) should ===(true)
1499+
byteStringLong.startsWith("12345678".getBytes(StandardCharsets.UTF_8)) should ===(false)
1500+
// 16 bytes: two SWAR iterations, no tail
1501+
val exactly16 = "abcdefghijklmnop".getBytes(StandardCharsets.UTF_8)
1502+
byteStringLong.startsWith(exactly16) should ===(true)
1503+
byteStringLong.startsWith("abcdefghijklmnop".reverse.getBytes(StandardCharsets.UTF_8)) should ===(false)
1504+
// 9 bytes: one SWAR iteration + 1-byte tail
1505+
val nine = "abcdefghi".getBytes(StandardCharsets.UTF_8)
1506+
byteStringLong.startsWith(nine) should ===(true)
1507+
// mismatch buried inside the 2nd 8-byte chunk
1508+
val mismatchInSecondChunk = "abcdefghijklmno_".getBytes(StandardCharsets.UTF_8)
1509+
byteStringLong.startsWith(mismatchInSecondChunk) should ===(false)
1510+
// ByteString1 with startsWith(Array[Byte], offset) exercising offset != 0
1511+
val bs1WithOffset = ByteString1("abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8), 0, 26)
1512+
bs1WithOffset.startsWith("ijklmnop".getBytes(StandardCharsets.UTF_8), 8) should ===(true)
1513+
bs1WithOffset.startsWith("12345678".getBytes(StandardCharsets.UTF_8), 8) should ===(false)
1514+
// ByteString1C SWAR path
1515+
val bs1c = ByteString1C("abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8))
1516+
bs1c.startsWith(exactly8) should ===(true)
1517+
bs1c.startsWith(exactly16) should ===(true)
1518+
bs1c.startsWith(nine) should ===(true)
1519+
bs1c.startsWith("12345678".getBytes(StandardCharsets.UTF_8)) should ===(false)
1520+
bs1c.startsWith("abcdefghi".getBytes(StandardCharsets.UTF_8), 0) should ===(true)
1521+
bs1c.startsWith("bcdefghi".getBytes(StandardCharsets.UTF_8), 1) should ===(true)
1522+
bs1c.startsWith("12345678".getBytes(StandardCharsets.UTF_8), 1) should ===(false)
1523+
}
1524+
"endsWith" in {
1525+
val suffix0 = ByteString1.fromString("uvwxyz")
1526+
val suffix1 = ByteString1.fromString("abcdefghijklmnopqrstuvwxyz")
1527+
val notSuffix = ByteString1.fromString("12345")
1528+
val byteStringLong = ByteString1.fromString("abcdefghijklmnopqrstuvwxyz")
1529+
val byteStrings = ByteStrings(byteStringLong, byteStringLong)
1530+
1531+
// ByteString1 basic cases
1532+
byteStringLong.endsWith(suffix0) should ===(true)
1533+
byteStringLong.endsWith(notSuffix) should ===(false)
1534+
1535+
// exact match
1536+
byteStringLong.endsWith(suffix1) should ===(true)
1537+
1538+
// bytes longer than ByteString returns false
1539+
val tooLong = ByteString1.fromString("0abcdefghijklmnopqrstuvwxyz")
1540+
byteStringLong.endsWith(tooLong) should ===(false)
1541+
1542+
// empty bytes array always returns true
1543+
byteStringLong.endsWith(Array.emptyByteArray) should ===(true)
1544+
1545+
// ByteStrings (multi-segment)
1546+
byteStrings.endsWith(suffix0) should ===(true)
1547+
byteStrings.endsWith(notSuffix) should ===(false)
1548+
byteStrings.endsWith(Array.emptyByteArray) should ===(true)
1549+
1550+
// suffix spanning the segment boundary
1551+
val crossBoundary = ByteString1.fromString("xyzabcdefghijklmnopqrstuvwxyz")
1552+
byteStrings.endsWith(crossBoundary) should ===(true)
1553+
1554+
// ByteString1C
1555+
val byteString1C = ByteString1C("abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8))
1556+
byteString1C.endsWith(suffix0) should ===(true)
1557+
byteString1C.endsWith(notSuffix) should ===(false)
1558+
byteString1C.endsWith(Array.emptyByteArray) should ===(true)
1559+
1560+
// ByteString1 with internal offset
1561+
val byteStringWithOffset = ByteString1(
1562+
"abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8), 2, 20)
1563+
// ByteString1(bytes, 2, 20) represents "cdefghijklmnopqrstuv"
1564+
val offsetSuffixText = "rstuv"
1565+
val offsetSuffix = ByteString1.fromString(offsetSuffixText)
1566+
byteStringWithOffset.endsWith(offsetSuffix) should ===(true)
1567+
byteStringWithOffset.endsWith(offsetSuffixText) should ===(true)
1568+
byteStringWithOffset.endsWith(notSuffix) should ===(false)
1569+
1570+
// empty ByteString
1571+
ByteString.empty.endsWith(Array.emptyByteArray) should ===(true)
1572+
ByteString.empty.endsWith(ByteString1.fromString("a")) should ===(false)
1573+
ByteString.empty.endsWith("a") should ===(false)
1574+
}
1575+
"endsWith (specialized)" in {
1576+
val suffix0 = "uvwxyz".getBytes(StandardCharsets.UTF_8)
1577+
val suffix1 = "abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8)
1578+
val notSuffix = "12345".getBytes(StandardCharsets.UTF_8)
1579+
val byteStringLong = ByteString1.fromString("abcdefghijklmnopqrstuvwxyz")
1580+
val byteStrings = ByteStrings(byteStringLong, byteStringLong)
1581+
1582+
// ByteString1 basic cases
1583+
byteStringLong.endsWith(suffix0) should ===(true)
1584+
byteStringLong.endsWith(notSuffix) should ===(false)
1585+
1586+
// exact match
1587+
byteStringLong.endsWith(suffix1) should ===(true)
1588+
1589+
// bytes longer than ByteString returns false
1590+
val tooLong = "0abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8)
1591+
byteStringLong.endsWith(tooLong) should ===(false)
1592+
1593+
// empty bytes array always returns true
1594+
byteStringLong.endsWith(Array.emptyByteArray) should ===(true)
1595+
1596+
// ByteStrings (multi-segment)
1597+
byteStrings.endsWith(suffix0) should ===(true)
1598+
byteStrings.endsWith(notSuffix) should ===(false)
1599+
byteStrings.endsWith(Array.emptyByteArray) should ===(true)
1600+
1601+
// suffix spanning the segment boundary
1602+
val crossBoundary = "xyzabcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8)
1603+
byteStrings.endsWith(crossBoundary) should ===(true)
1604+
1605+
// ByteString1C
1606+
val byteString1C = ByteString1C("abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8))
1607+
byteString1C.endsWith(suffix0) should ===(true)
1608+
byteString1C.endsWith(notSuffix) should ===(false)
1609+
byteString1C.endsWith(Array.emptyByteArray) should ===(true)
1610+
1611+
// ByteString1 with internal offset
1612+
val byteStringWithOffset = ByteString1(
1613+
"abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8), 2, 20)
1614+
// ByteString1(bytes, 2, 20) represents "cdefghijklmnopqrstuv"
1615+
val offsetSuffix = "rstuv".getBytes(StandardCharsets.UTF_8)
1616+
byteStringWithOffset.endsWith(offsetSuffix) should ===(true)
1617+
byteStringWithOffset.endsWith(notSuffix) should ===(false)
1618+
1619+
// empty ByteString
1620+
ByteString.empty.endsWith(Array.emptyByteArray) should ===(true)
1621+
ByteString.empty.endsWith(Array[Byte]('a')) should ===(false)
1622+
1623+
val concat1 = makeMultiByteStringsWithEmptyComponents()
1624+
concat1.endsWith(Array[Byte](16.toByte, 0xFF.toByte)) should ===(true)
1625+
concat1.endsWith(Array[Byte](15.toByte, 0xFF.toByte)) should ===(false)
1626+
1627+
// SWAR-optimised path: needles spanning full 8-byte chunks (ByteString1)
1628+
val byteStringLong2 = ByteString1.fromString("abcdefghijklmnopqrstuvwxyz")
1629+
// exactly 8 bytes: one SWAR iteration, no tail
1630+
val last8 = "stuvwxyz".getBytes(StandardCharsets.UTF_8)
1631+
byteStringLong2.endsWith(last8) should ===(true)
1632+
byteStringLong2.endsWith("12345678".getBytes(StandardCharsets.UTF_8)) should ===(false)
1633+
// 16 bytes: two SWAR iterations, no tail
1634+
val last16 = "klmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8)
1635+
byteStringLong2.endsWith(last16) should ===(true)
1636+
byteStringLong2.endsWith("klmnopqrstuvwxy_".getBytes(StandardCharsets.UTF_8)) should ===(false)
1637+
// 9 bytes: one SWAR iteration + 1-byte tail
1638+
val last9 = "rstuvwxyz".getBytes(StandardCharsets.UTF_8)
1639+
byteStringLong2.endsWith(last9) should ===(true)
1640+
// mismatch buried inside the first 8-byte chunk
1641+
val mismatchInFirstChunk = "_lmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8)
1642+
byteStringLong2.endsWith(mismatchInFirstChunk) should ===(false)
1643+
// ByteString1 with internal offset
1644+
val bs1WithOffset2 = ByteString1("abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8), 2, 20)
1645+
// represents "cdefghijklmnopqrstuv"
1646+
bs1WithOffset2.endsWith("mnopqrstuv".getBytes(StandardCharsets.UTF_8)) should ===(true)
1647+
bs1WithOffset2.endsWith("12345678".getBytes(StandardCharsets.UTF_8)) should ===(false)
1648+
// ByteString1C SWAR path
1649+
val bs1c2 = ByteString1C("abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8))
1650+
bs1c2.endsWith(last8) should ===(true)
1651+
bs1c2.endsWith(last16) should ===(true)
1652+
bs1c2.endsWith(last9) should ===(true)
1653+
bs1c2.endsWith("12345678".getBytes(StandardCharsets.UTF_8)) should ===(false)
1654+
bs1c2.endsWith(mismatchInFirstChunk) should ===(false)
14061655
}
14071656
"return same hashCode" in {
14081657
val slice0 = ByteString1.fromString("xyz")
@@ -2244,4 +2493,10 @@ class ByteStringSpec extends AnyWordSpec with Matchers with Checkers {
22442493
)
22452494
ByteStrings(byteStrings)
22462495
}
2496+
2497+
private def makeMultiByteStringsWithEmptyComponents(): ByteString = {
2498+
ByteString1(Array.emptyByteArray) ++
2499+
makeMultiByteStringsSample() ++
2500+
ByteString1(Array.emptyByteArray)
2501+
}
22472502
}

0 commit comments

Comments
 (0)