From a843a51211c49b53c68600c3dcfbbd1a1ea06182 Mon Sep 17 00:00:00 2001 From: Li-yao Xia Date: Thu, 31 Jul 2025 15:49:59 +0200 Subject: [PATCH 1/4] Add regression test for #648 --- tests/Tests/Regressions.hs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/Tests/Regressions.hs b/tests/Tests/Regressions.hs index e151184b..892d635d 100644 --- a/tests/Tests/Regressions.hs +++ b/tests/Tests/Regressions.hs @@ -202,6 +202,17 @@ t633 = _ <- evaluate (stimes (maxBound :: Word) "a" :: T.Text) assertFailure "should fail" +t648 :: IO () +t648 = withTempFile $ \_ h -> do + hSetEncoding h utf8 + hSetNewlineMode h (NewlineMode LF CRLF) + hSetBuffering h (BlockBuffering $ Just 4) + let line = T.replicate 2047 "_" + T.hPutStrLn h line + hSeek h AbsoluteSeek 0 + line' <- T.hGetLine h + T.append line "\r" @?= line' + tests :: F.TestTree tests = F.testGroup "Regressions" [ F.testCase "hGetContents_crash" hGetContents_crash @@ -221,4 +232,5 @@ tests = F.testGroup "Regressions" , F.testCase "t529" t529 , F.testCase "t559" t559 , F.testCase "t633" t633 + , F.testCase "t648" t648 ] From 75febc60b4a1ff5ce7240ad1988c70493d3fdc51 Mon Sep 17 00:00:00 2001 From: Li-yao Xia Date: Wed, 30 Jul 2025 22:58:00 +0200 Subject: [PATCH 2/4] Improve read_write test to check CRLF encoding --- tests/Tests/Properties/LowLevel.hs | 44 ++++++++------ tests/Tests/QuickCheckUtils.hs | 92 +++++++++++++++++------------- tests/Tests/Utils.hs | 6 +- 3 files changed, 85 insertions(+), 57 deletions(-) diff --git a/tests/Tests/Properties/LowLevel.hs b/tests/Tests/Properties/LowLevel.hs index 0ffb5f9b..f5bbe9fb 100644 --- a/tests/Tests/Properties/LowLevel.hs +++ b/tests/Tests/Properties/LowLevel.hs @@ -2,6 +2,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-missing-signatures -fno-warn-unused-imports #-} @@ -16,6 +17,7 @@ module Tests.Properties.LowLevel (testLowLevel) where import Prelude hiding (head, tail) import Control.Applicative ((<$>), pure) import Control.Exception as E (SomeException, catch, evaluate) +import Data.Functor.Identity (Identity(..)) import Data.Int (Int32, Int64) import Data.Text.Foreign import Data.Text.Internal (Text(..), mul, mul32, mul64, safe) @@ -109,14 +111,32 @@ t_literal_foo = T.pack "foo" -- tl_put_get = write_read TL.unlines TL.filter put get -- where put h = withRedirect h IO.stdout . TL.putStr -- get h = withRedirect h IO.stdin TL.getContents -t_write_read = write_read T.unlines T.filter T.hPutStr T.hGetContents unSqrt -tl_write_read = write_read TL.unlines TL.filter TL.hPutStr TL.hGetContents unSqrt -t_write_read_line = write_read (T.concat . take 1) T.filter T.hPutStrLn T.hGetLine (: []) -tl_write_read_line = write_read (TL.concat . take 1) TL.filter TL.hPutStrLn TL.hGetLine (: []) +inputOutput :: TestTree +inputOutput = testGroup "input-output" [ + testProperty "t_write_read" $ write_read arbitrary shrink (T.replace "\n" "\r\n") T.hPutStr T.hGetContents, + testProperty "tl_write_read" $ write_read arbitrary shrink (TL.replace "\n" "\r\n") TL.hPutStr TL.hGetContents, + testProperty "t_write_read_line" $ write_read genTLine shrinkTLine (`T.append` "\r") T.hPutStrLn T.hGetLine, + testProperty "tl_write_read_line" $ write_read genTLLine shrinkTLLine (`TL.append` "\r") TL.hPutStrLn TL.hGetLine, + -- Note: Data.Text.IO.Utf8 does NO newline translation + testProperty "utf8_write_read" $ write_read arbitrary shrink id TU.hPutStr TU.hGetContents, + testProperty "utf8_write_read_line" $ write_read genTLine shrinkTLine id TU.hPutStrLn TU.hGetLine + -- These tests are subject to I/O race conditions + -- testProperty "t_put_get" t_put_get, + -- testProperty "tl_put_get" tl_put_get + ] + +genTLine :: Gen T.Text +genTLine = T.filter (`notElem` ("\r\n" :: String)) <$> arbitrary + +genTLLine :: Gen TL.Text +genTLLine = TL.filter (`notElem` ("\r\n" :: String)) <$> arbitrary + +shrinkTLine :: T.Text -> [T.Text] +shrinkTLine = filter (T.all (/= '\n')) . shrink -utf8_write_read = write_read T.unlines T.filter TU.hPutStr TU.hGetContents unSqrt -utf8_write_read_line = write_read (T.concat . take 1) T.filter TU.hPutStrLn TU.hGetLine (: []) +shrinkTLLine :: TL.Text -> [TL.Text] +shrinkTLLine = filter (TL.all (/= '\n')) . shrink testLowLevel :: TestTree testLowLevel = @@ -150,15 +170,5 @@ testLowLevel = #endif ], - testGroup "input-output" [ - testGroup "t_write_read" t_write_read, - testGroup "tl_write_read" tl_write_read, - testGroup "t_write_read_line" t_write_read_line, - testGroup "tl_write_read_line" tl_write_read_line, - testGroup "utf8_write_read" utf8_write_read, - testGroup "utf8_write_read_line" utf8_write_read_line - -- These tests are subject to I/O race conditions - -- testProperty "t_put_get" t_put_get, - -- testProperty "tl_put_get" tl_put_get - ] + inputOutput ] diff --git a/tests/Tests/QuickCheckUtils.hs b/tests/Tests/QuickCheckUtils.hs index d38eddee..77eacb99 100644 --- a/tests/Tests/QuickCheckUtils.hs +++ b/tests/Tests/QuickCheckUtils.hs @@ -34,16 +34,18 @@ module Tests.QuickCheckUtils ) where import Control.Arrow ((***)) -import Data.Bool (bool) +import Control.Monad (when) import Data.Char (isSpace) +import Data.IORef (writeIORef) import Data.Text.Foreign (I8) import Data.Text.Lazy.Builder.RealFloat (FPFormat(..)) import Data.Word (Word8, Word16) -import GHC.IO.Encoding.Types (TextEncoding(TextEncoding,textEncodingName)) -import Test.QuickCheck (Arbitrary(..), arbitraryUnicodeChar, arbitraryBoundedEnum, getUnicodeString, arbitrarySizedIntegral, shrinkIntegral, Property, ioProperty, discard, counterexample, scale, (.&&.), NonEmptyList(..), forAll, getPositive) +import qualified GHC.IO.Buffer as GIO +import qualified GHC.IO.Handle.Internals as GIO +import qualified GHC.IO.Handle.Types as GIO +import GHC.IO.Encoding.Types (TextEncoding(textEncodingName)) +import Test.QuickCheck (Arbitrary(..), arbitraryUnicodeChar, arbitraryBoundedEnum, getUnicodeString, arbitrarySizedIntegral, shrinkIntegral, Property, ioProperty, counterexample, scale, (.&&.), NonEmptyList(..), forAllShrink) import Test.QuickCheck.Gen (Gen, choose, chooseAny, elements, frequency, listOf, oneof, resize, sized) -import Test.Tasty (TestTree, testGroup) -import Test.Tasty.QuickCheck (testProperty) import Tests.Utils import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL @@ -248,39 +250,55 @@ instance Arbitrary IO.BufferMode where -- sometimes contain line endings.) -- * Newline translation mode. -- * Buffering. -write_read :: forall a b c. - (Eq a, Show a, Show c, Arbitrary c) - => ([b] -> a) - -> ((Char -> Bool) -> b -> b) +write_read :: forall a. + (Eq a, Show a) + => Gen a + -> (a -> [a]) + -> (a -> a) -- ^ replace '\n' with '\r\n' (for multiline tests) or append '\r' (for single-line tests) -> (IO.Handle -> a -> IO ()) -> (IO.Handle -> IO a) - -> (c -> [b]) - -> [TestTree] -write_read unline filt writer reader modData - = encodings <&> \enc@TextEncoding {textEncodingName} -> testGroup textEncodingName - [ testProperty "NoBuffering" $ propTest enc (pure IO.NoBuffering) - , testProperty "LineBuffering" $ propTest enc (pure IO.LineBuffering) - , testProperty "BlockBuffering" $ propTest enc blockBuffering - ] + -> Property +write_read genTxt shrinkTxt expandNl writer reader + = forAllShrink genEncoding shrinkEncoding propTest where - propTest :: TextEncoding -> Gen IO.BufferMode -> IO.NewlineMode -> c -> Property - propTest _ _ (IO.NewlineMode IO.LF IO.CRLF) _ = discard - propTest enc genBufferMode nl d = forAll genBufferMode $ \mode -> ioProperty $ withTempFile $ \_ h -> do - let ts = modData d - t = unline . map (filt (not . (`elem` "\r\n"))) $ ts - IO.hSetEncoding h enc - IO.hSetNewlineMode h nl - IO.hSetBuffering h mode - () <- writer h t - IO.hSeek h IO.AbsoluteSeek 0 - r <- reader h - let isEq = r == t - seq isEq $ pure $ counterexample (show r ++ bool " /= " " == " isEq ++ show t) isEq - - encodings = [IO.utf8, IO.utf8_bom, IO.utf16, IO.utf16le, IO.utf16be, IO.utf32, IO.utf32le, IO.utf32be] - - blockBuffering :: Gen IO.BufferMode - blockBuffering = IO.BlockBuffering <$> fmap (fmap $ min 4 . getPositive) arbitrary + propTest :: TextEncoding -> IO.BufferMode -> Property + propTest enc mode = forAllShrink genTxt shrinkTxt $ \txt -> ioProperty $ do + file <- emptyTempFile + let with nl k = IO.withFile file IO.ReadWriteMode $ \h -> do + IO.hSetEncoding h enc + IO.hSetBuffering h mode + IO.hSetNewlineMode h nl + setSmallBuffer h + k h + -- Put a very small buffer in Handle to easily test boundary conditions in `writeBlocks` + setSmallBuffer h = GIO.withHandle_ "setSmallBuffer" h $ \h_ -> do + buf <- GIO.newCharBuffer 9 GIO.WriteBuffer + writeIORef (GIO.haCharBuffer h_) buf + readExpecting h txt' msg = do + out <- reader h + when (txt' /= out) $ error (show txt' ++ " /= " ++ show out ++ msg) + -- 'reader' may be 'hGetContents', which closes the handle + -- So we reopen a new file every time. + + -- Test with CRLF encoding + with (IO.NewlineMode IO.CRLF IO.CRLF) $ \h -> do + writer h txt + IO.hSeek h IO.AbsoluteSeek 0 + readExpecting h txt " (at location 1)" + + -- Re-read without CRLF decoding to check that we did encode CRLF correctly + with (IO.NewlineMode IO.LF IO.LF) $ \h -> do + readExpecting h (expandNl txt) " (at location 2)" + + -- Test without CRLF encoding + with (IO.NewlineMode IO.LF IO.LF) $ \h -> do + IO.hSetFileSize h 0 + writer h txt + IO.hSeek h IO.AbsoluteSeek 0 + readExpecting h txt " (at location 3)" + + genEncoding = elements [IO.utf8, IO.utf8_bom, IO.utf16, IO.utf16le, IO.utf16be, IO.utf32, IO.utf32le, IO.utf32be] + shrinkEncoding enc = if textEncodingName enc == textEncodingName IO.utf8 then [] else [IO.utf8] -- Generate various Unicode space characters with high probability arbitrarySpacyChar :: Gen Char @@ -301,7 +319,3 @@ newtype SkewedBool = Skewed { getSkewed :: Bool } instance Arbitrary SkewedBool where arbitrary = Skewed <$> frequency [(1, pure False), (5, pure True)] - -(<&>) :: [a] -> (a -> b) -> [b] -(<&>) = flip fmap - diff --git a/tests/Tests/Utils.hs b/tests/Tests/Utils.hs index 1b97815f..8c616aea 100644 --- a/tests/Tests/Utils.hs +++ b/tests/Tests/Utils.hs @@ -6,11 +6,12 @@ module Tests.Utils (=^=) , withRedirect , withTempFile + , emptyTempFile ) where import Control.Exception (SomeException, bracket_, evaluate, try) import Control.Monad (when) -import System.IO.Temp (withSystemTempFile) +import System.IO.Temp (withSystemTempFile, emptySystemTempFile) import GHC.IO.Handle.Internals (withHandle) import System.IO (Handle, hFlush, hIsOpen, hIsWritable) import Test.QuickCheck (Property, ioProperty, property, (===), counterexample) @@ -33,6 +34,9 @@ infix 4 =^= withTempFile :: (FilePath -> Handle -> IO a) -> IO a withTempFile = withSystemTempFile "crashy.txt" +emptyTempFile :: IO FilePath +emptyTempFile = emptySystemTempFile "crashy.txt" + withRedirect :: Handle -> Handle -> IO a -> IO a withRedirect tmp h = bracket_ swap swap where From c74ef25d603f6c46409ee2e16d861e2bd8c8afc1 Mon Sep 17 00:00:00 2001 From: Li-yao Xia Date: Thu, 31 Jul 2025 19:29:55 +0200 Subject: [PATCH 3/4] Fix writeBlocks --- src/Data/Text/Internal/IO.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Data/Text/Internal/IO.hs b/src/Data/Text/Internal/IO.hs index 546d3ab6..1ba96215 100644 --- a/src/Data/Text/Internal/IO.hs +++ b/src/Data/Text/Internal/IO.hs @@ -262,11 +262,12 @@ writeBlocks isCRLF h buf0 (Stream next0 s0 _len) = outer s0 buf0 Done -> commit n False{-no flush-} True{-release-} >> return () Skip s' -> inner s' n Yield x s' - | isCRLF && x == '\n' && n + 1 < len -> do + -- Leave room for two characters for CRLF decoding + | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s + | x == '\n' && isCRLF -> do n1 <- writeCharBuf' raw len n '\r' writeCharBuf' raw len n1 '\n' >>= inner s' - | n < len -> writeCharBuf' raw len n x >>= inner s' - | otherwise -> commit n True{-needs flush-} False >>= outer s + | otherwise -> writeCharBuf' raw len n x >>= inner s' commit = commitBuffer h raw len -- | Only modifies the raw buffer and not the buffer attributes From f662261a382a9a1554c4dfca8c16fe268a593de3 Mon Sep 17 00:00:00 2001 From: Li-yao Xia Date: Thu, 31 Jul 2025 19:12:21 +0200 Subject: [PATCH 4/4] Fix unpack_nl --- src/Data/Text/Internal/IO.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Data/Text/Internal/IO.hs b/src/Data/Text/Internal/IO.hs index 1ba96215..5ff0a809 100644 --- a/src/Data/Text/Internal/IO.hs +++ b/src/Data/Text/Internal/IO.hs @@ -112,6 +112,7 @@ unpack !buf !r !w | otherwise = Yield (ix i) (i+1) ix i = inlinePerformIO $ peekElemOff pbuf i +-- Variant of 'unpack' with CRLF decoding. If there is a trailing '\r', leave it in the buffer. unpack_nl :: RawCharBuffer -> Int -> Int -> IO (Text, Int) unpack_nl !buf !r !w | charSize /= 4 = sizeError "unpack_nl" @@ -130,7 +131,7 @@ unpack_nl !buf !r !w in if i' < w then if ix i' == '\n' then Yield '\n' (i+2) - else Yield '\n' i' + else Yield '\r' i' else Done | otherwise = Yield c (i+1) where c = ix i