From f3a572ff42becc1721c349bcab35461b320656fe Mon Sep 17 00:00:00 2001 From: Dan Doel Date: Mon, 15 Dec 2025 16:00:23 -0500 Subject: [PATCH 1/2] Avoid calling `length` on chunks in lazy `splitAt` This instead uses the `measureOff` function used in the strict `splitAt` to count only as many characters as are needed. --- src/Data/Text/Lazy.hs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 73da0bce..e7ab1fd9 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -1306,12 +1306,18 @@ splitAt = loop loop :: Int64 -> Text -> (Text, Text) loop !_ Empty = (empty, empty) loop n t | n <= 0 = (empty, t) - loop n (Chunk t ts) - | n < len = let (t',t'') = T.splitAt (int64ToInt n) t - in (Chunk t' Empty, Chunk t'' ts) - | otherwise = let (ts',ts'') = loop (n - len) ts + loop n (Chunk t@(T.Text arr off len) ts) + | m > 0, m >= len = (Chunk t Empty, ts) + | m > 0 = let t' = T.Text arr off m + t'' = T.Text arr (off+m) (len-m) + in (Chunk t' Empty, Chunk t'' ts) + | otherwise = let (ts', ts'') = loop (n + intToInt64 m) ts in (Chunk t ts', ts'') - where len = intToInt64 (T.length t) + where + k | n > intToInt64 len = len+1 + | otherwise = int64ToInt n + m = T.measureOff k t + -- | /O(n)/ 'splitAtWord' @n t@ returns a strict pair whose first -- element is a prefix of @t@ whose chunks contain @n@ 'Word8' From 2091e7ed3340f315c30dab0fb5a78bedf2fb3e4f Mon Sep 17 00:00:00 2001 From: Dan Doel Date: Mon, 15 Dec 2025 17:59:06 -0500 Subject: [PATCH 2/2] Check for maxBound in lazy splitAt in its own case --- src/Data/Text/Lazy.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index e7ab1fd9..2a359157 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -1307,6 +1307,8 @@ splitAt = loop loop !_ Empty = (empty, empty) loop n t | n <= 0 = (empty, t) loop n (Chunk t@(T.Text arr off len) ts) + | n > mx = let (ts', ts'') = loop (n - intToInt64 (T.length t)) ts + in (Chunk t ts', ts'') | m > 0, m >= len = (Chunk t Empty, ts) | m > 0 = let t' = T.Text arr off m t'' = T.Text arr (off+m) (len-m) @@ -1314,9 +1316,8 @@ splitAt = loop | otherwise = let (ts', ts'') = loop (n + intToInt64 m) ts in (Chunk t ts', ts'') where - k | n > intToInt64 len = len+1 - | otherwise = int64ToInt n - m = T.measureOff k t + mx = intToInt64 P.maxBound + m = T.measureOff (int64ToInt n) t -- | /O(n)/ 'splitAtWord' @n t@ returns a strict pair whose first