Skip to content

Commit b17c6aa

Browse files
committed
Use builders for creation by default
1 parent 4b688e9 commit b17c6aa

2 files changed

Lines changed: 117 additions & 129 deletions

File tree

  • benchmark/Streamly/Benchmark/Unicode
  • src/Streamly/Internal/Unicode

benchmark/Streamly/Benchmark/Unicode/Utf8.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
--------------------------------------------------------------------------------
1212

1313
import Data.Char (chr)
14-
import Streamly.Internal.Unicode.Utf8 (Text)
14+
import Streamly.Internal.Unicode.Utf8 (TextB)
1515

1616
import Test.Tasty.Bench hiding (env)
1717
import Streamly.Benchmark.Common
@@ -30,12 +30,12 @@ moduleName = "Unicode.Stream"
3030
--------------------------------------------------------------------------------
3131

3232
{-# INLINE pack #-}
33-
pack :: Int -> Text
33+
pack :: Int -> TextB
3434
pack i = Text.pack $ chr <$> [1 .. i]
3535

3636
{-# INLINE packUnpack #-}
3737
packUnpack :: Int -> String
38-
packUnpack = Text.unpack . pack
38+
packUnpack = Text.unpack . Text.force . pack
3939

4040
--------------------------------------------------------------------------------
4141
-- Main function

src/Streamly/Internal/Unicode/Utf8.hs

Lines changed: 114 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -17,55 +17,53 @@ module Streamly.Internal.Unicode.Utf8
1717
(
1818
-- * Type
1919
Text
20-
, Display(..)
20+
, TextB
21+
, Render(..)
2122
, text
23+
, build
24+
, force
25+
, force'
26+
, byteLength
27+
, toArray
2228

23-
-- * Combinators
24-
, length
29+
-- * Construction
2530
, snoc
31+
, singleton
32+
, cons
33+
, pack
34+
, concat
35+
, empty
36+
37+
-- * Elimination
38+
, length
2639
, unsnoc
2740
, head
2841
, last
2942
, tail
3043
, init
3144
, null
32-
, byteLength
33-
, singleton
34-
, empty
35-
, cons
3645
, uncons
37-
, toArray
3846
, read
39-
, create
40-
, fromStream
41-
, pack
4247
, unpack
43-
, concatM
44-
, concat
45-
, rightSizeM
46-
, rightSize
4748

4849
-- * Internals
4950
, readEncoding
5051
, readEncodingRev
5152

5253
-- * IO
5354
, print
54-
, printLn
5555
) where
5656

5757
--------------------------------------------------------------------------------
5858
-- Imports
5959
--------------------------------------------------------------------------------
6060

61-
import Data.Function ((&))
61+
import Control.Monad ((>=>))
6262
import Data.Word (Word8)
6363
import Streamly.Data.MutArray (MutArray)
6464
import Streamly.Data.Array (Array)
6565
import System.IO.Unsafe (unsafePerformIO)
66-
import Control.Monad.IO.Class (MonadIO(..))
6766
import Streamly.Data.Stream (Stream)
68-
import Streamly.Data.Fold (Fold)
6967
import Language.Haskell.TH (appE)
7068
import Language.Haskell.TH.Quote (QuasiQuoter)
7169
import Streamly.Internal.Unicode.String (strWith)
@@ -86,32 +84,41 @@ import Prelude hiding
8684
--------------------------------------------------------------------------------
8785

8886
-- | A space efficient, packed, unboxed Unicode container.
89-
newtype Text = Text (MutArray Word8)
90-
91-
instance Semigroup Text where
92-
(<>) = append
93-
94-
instance Monoid Text where
95-
mempty = Text (Array.unsafeThaw Array.empty)
96-
mconcat = concat
87+
newtype Text = Text (Array Word8)
9788

9889
{-# INLINE toArray #-}
9990
toArray :: Text -> Array Word8
100-
toArray t =
101-
unsafePerformIO $ do
102-
(Text arr) <- rightSizeM t
103-
pure $ Array.unsafeFreeze arr
91+
toArray (Text arr) = arr
10492

105-
{-# INLINE toMutArray #-}
106-
toMutArray :: Text -> MutArray Word8
107-
toMutArray (Text arr) = arr
93+
--------------------------------------------------------------------------------
94+
-- Builder
95+
--------------------------------------------------------------------------------
96+
97+
data TextB = TextB Int (MutArray Word8 -> IO (MutArray Word8))
98+
99+
byteLength :: TextB -> Int
100+
byteLength (TextB i _) = i
101+
102+
build :: Array Word8 -> TextB
103+
build t =
104+
TextB (Array.byteLength t) (`MutArray.unsafeSplice` (Array.unsafeThaw t))
105+
106+
force :: TextB -> Text
107+
force (TextB i f) =
108+
unsafePerformIO
109+
$ MutArray.emptyOf i >>= fmap (Text . Array.unsafeFreeze) . f
110+
111+
force' :: TextB -> Text
112+
force' (TextB i f) =
113+
unsafePerformIO
114+
$ MutArray.emptyOf' i >>= fmap (Text . Array.unsafeFreeze) . f
108115

109116
--------------------------------------------------------------------------------
110117
-- Class
111118
--------------------------------------------------------------------------------
112119

113-
class Display a where
114-
display :: a -> Text
120+
class Render a where
121+
render :: a -> TextB
115122

116123
--------------------------------------------------------------------------------
117124
-- Decoding Utils
@@ -132,116 +139,85 @@ readEncodingRev :: {- Monad m => -} Text -> Stream m Encoding
132139
readEncodingRev = undefined
133140

134141
--------------------------------------------------------------------------------
135-
-- Streaming
142+
-- Construction
136143
--------------------------------------------------------------------------------
137144

138-
{-# INLINE read #-}
139-
read :: Monad m => Text -> Stream m Char
140-
read = Unicode.decodeUtf8 . Array.read . toArray
141-
142-
{-# INLINE ordM #-}
143-
ordM :: MonadIO m => Char -> m (MutArray Word8)
144-
ordM = Stream.fold (MutArray.createOf 4) . Stream.unfold Unicode.readCharUtf8'
145-
146-
{-# INLINEABLE appendM #-}
147-
appendM :: MonadIO m => Text -> Text -> m Text
148-
appendM (Text a) (Text b) = Text <$> MutArray.spliceExp a b
145+
{-# INLINEABLE pack #-}
146+
pack :: String -> TextB
147+
pack =
148+
build
149+
. unsafePerformIO
150+
. Stream.fold Array.create
151+
. Unicode.encodeUtf8'
152+
. Stream.fromList
149153

150154
{-# INLINEABLE append #-}
151-
append :: Text -> Text -> Text
152-
append a b = unsafePerformIO $ appendM a b
153-
154-
{-# INLINE create #-}
155-
create :: MonadIO m => Fold m Char Text
156-
create =
157-
Fold.foldlM' MutArray.spliceExp (MutArray.emptyOf MutArray.blockSize)
158-
& Fold.lmapM ordM
159-
& fmap Text
160-
161-
{-# INLINE fromStream #-}
162-
fromStream :: MonadIO m => Stream m Char -> m Text
163-
fromStream = fmap Text . Stream.fold MutArray.create . Unicode.encodeUtf8'
164-
165-
--------------------------------------------------------------------------------
166-
-- Creation and elimination
167-
--------------------------------------------------------------------------------
155+
append :: TextB -> TextB -> TextB
156+
append (TextB i f) (TextB j g) = TextB (i + j) (f >=> g)
168157

169-
{-# INLINEABLE pack #-}
170-
pack :: String -> Text
171-
pack = unsafePerformIO . fromStream . Stream.fromList
172-
173-
{-# INLINEABLE unpack #-}
174-
unpack :: Text -> String
175-
unpack = unsafePerformIO . Stream.fold Fold.toList . read
158+
{-# INLINE ord #-}
159+
ord :: Char -> Array Word8
160+
ord =
161+
unsafePerformIO
162+
. Stream.fold (Array.createOf 4)
163+
. Stream.unfold Unicode.readCharUtf8'
176164

177165
{-# INLINEABLE singleton #-}
178-
singleton :: Char -> Text
179-
singleton = unsafePerformIO . fmap Text . ordM
166+
singleton :: Char -> TextB
167+
singleton = build . ord
180168

181169
{-# INLINEABLE empty #-}
182-
empty :: Text
183-
empty = Text (Array.unsafeThaw Array.empty)
184-
185-
{-# INLINEABLE rightSizeM #-}
186-
rightSizeM :: MonadIO m => Text -> m Text
187-
rightSizeM (Text arr) = Text <$> MutArray.rightSize arr
170+
empty :: TextB
171+
empty = TextB 0 pure
188172

189-
{-# INLINEABLE rightSize #-}
190-
rightSize :: Text -> Text
191-
rightSize = unsafePerformIO . rightSizeM
192-
193-
--------------------------------------------------------------------------------
194-
-- Instances
195-
--------------------------------------------------------------------------------
196-
197-
instance Display Text where
198-
display = id
173+
{-# INLINEABLE cons #-}
174+
cons :: Char -> TextB -> TextB
175+
cons c s = append (singleton c) s
199176

200-
instance Display String where
201-
display = pack
177+
{-# INLINEABLE snoc #-}
178+
snoc :: TextB -> Char -> TextB
179+
snoc s c = append s (singleton c)
202180

203-
instance Display Int where
204-
display = pack . show
181+
{-# INLINEABLE concat #-}
182+
concat :: [TextB] -> TextB
183+
concat = foldr mappend mempty
205184

206185
--------------------------------------------------------------------------------
207-
-- QuasiQuoter
186+
-- Elimination
208187
--------------------------------------------------------------------------------
209188

210-
text :: QuasiQuoter
211-
text = strWith [|mconcat|] (appE [|pack|]) (appE [|display|])
189+
{-# INLINE read #-}
190+
read :: Monad m => Text -> Stream m Char
191+
read = Unicode.decodeUtf8 . Array.read . toArray
192+
193+
{-# INLINEABLE unpack #-}
194+
unpack :: Text -> String
195+
unpack = unsafePerformIO . Stream.fold Fold.toList . read
212196

213197
--------------------------------------------------------------------------------
214-
-- Basic interface
198+
-- Elimination
215199
--------------------------------------------------------------------------------
216200

217-
{-# INLINEABLE cons #-}
218-
cons :: Char -> Text -> Text
219-
cons c s = append (singleton c) s
220-
221-
{-# INLINEABLE snoc #-}
222-
snoc :: Text -> Char -> Text
223-
snoc s c = append s (singleton c)
224-
225201
{-# INLINE uncons #-}
226202
uncons :: Text -> Maybe (Char, Text)
227203
uncons txt@(Text arr) = unsafePerformIO $ do
228-
let blen = MutArray.byteLength arr
204+
let blen = Array.byteLength arr
229205
val <- Stream.fold Fold.one $ readEncoding txt
230206
pure $ case val of
231207
Just (Encoding {..}) ->
232208
Just ( eChar
233-
, Text (MutArray.unsafeSliceOffLen eSize (blen - eSize) arr)
209+
, Text (Array.unsafeSliceOffLen eSize (blen - eSize) arr)
234210
)
235211
Nothing -> Nothing
236212

237213
{-# INLINE unsnoc #-}
238214
unsnoc :: Text -> Maybe (Text, Char)
239215
unsnoc txt@(Text arr) = unsafePerformIO $ do
240-
let blen = MutArray.byteLength arr
216+
let blen = Array.byteLength arr
241217
val <- Stream.fold Fold.one $ readEncodingRev txt
242218
pure $ case val of
243219
Just (Encoding {..}) ->
244-
Just ( Text (MutArray.unsafeSliceOffLen 0 (blen - eSize) arr)
220+
Just ( Text (Array.unsafeSliceOffLen 0 (blen - eSize) arr)
245221
, eChar
246222
)
247223
Nothing -> Nothing
@@ -264,34 +240,46 @@ init = fmap fst . unsnoc
264240

265241
{-# INLINEABLE null #-}
266242
null :: Text -> Bool
267-
null (Text arr) = MutArray.length arr == 0
243+
null (Text arr) = Array.length arr == 0
268244

269245
{-# INLINEABLE length #-}
270246
length :: Text -> Int
271247
length = unsafePerformIO . Stream.fold Fold.length . read
272248

273-
{-# INLINEABLE byteLength #-}
274-
byteLength :: Text -> Int
275-
byteLength (Text arr) = MutArray.length arr
249+
--------------------------------------------------------------------------------
250+
-- Text Instances
251+
--------------------------------------------------------------------------------
252+
253+
instance Semigroup TextB where
254+
(<>) = append
255+
256+
instance Monoid TextB where
257+
mempty = empty
258+
mconcat = foldr mappend mempty
276259

277260
--------------------------------------------------------------------------------
278-
-- Appending
261+
-- Render Instances
279262
--------------------------------------------------------------------------------
280263

281-
{-# INLINEABLE concatM #-}
282-
concatM :: MonadIO m => Stream m Text -> m Text
283-
concatM = fmap Text . MutArray.fromChunksRealloced . fmap toMutArray
264+
instance Render Text where
265+
render (Text arr) = build arr
284266

285-
{-# INLINEABLE concat #-}
286-
concat :: [Text] -> Text
287-
concat = unsafePerformIO . concatM . Stream.fromList
267+
instance Render String where
268+
render = pack
269+
270+
instance Render Int where
271+
render = pack . show
288272

289273
--------------------------------------------------------------------------------
290-
-- IO
274+
-- QuasiQuoter
291275
--------------------------------------------------------------------------------
292276

293-
print :: Display a => a -> IO ()
294-
print = Handle.putChunk stdout . toArray . display
277+
text :: QuasiQuoter
278+
text = strWith [|mconcat|] (appE [|render|]) (appE [|render|])
279+
280+
--------------------------------------------------------------------------------
281+
-- IO
282+
--------------------------------------------------------------------------------
295283

296-
printLn :: Display a => a -> IO ()
297-
printLn = Handle.putChunk stdout . toArray . (<> pack "\n") . display
284+
print :: Render a => a -> IO ()
285+
print = Handle.putChunk stdout . toArray . force' . render

0 commit comments

Comments
 (0)