-
Notifications
You must be signed in to change notification settings - Fork 71
Expand file tree
/
Copy pathParser.hs
More file actions
399 lines (336 loc) · 10.8 KB
/
Parser.hs
File metadata and controls
399 lines (336 loc) · 10.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
-- |
-- Module : Streamly.Internal.Data.Binary.Parser
-- Copyright : (c) 2020 Composewell Technologies
-- License : BSD-3-Clause
-- Maintainer : streamly@composewell.com
-- Portability : GHC
--
-- Decode Haskell data types from byte streams.
--
-- It would be inefficient to use this to compose parsers for general algebraic
-- data types. For general deserialization of ADTs please use the Serialize
-- type class instances. The fastest way to deserialize byte streams
-- representing Haskell data types is to write them to arrays and deserialize
-- the array using the Serialize type class.
module Streamly.Internal.Data.Binary.Parser
(
-- * Type class
FromBytes (..)
-- * Decoders
, unit
, bool
, ordering
, eqWord8 -- XXX rename to word8Eq
, word8
, word16be
, word16le
, word32be
, word32le
, word64be
, word64le
, word64host
, int8
, int16be
, int16le
, int32be
, int32le
, int64be
, int64le
, float32be
, float32le
, double64be
, double64le
, charLatin1
)
where
import Control.Monad.IO.Class (MonadIO)
import Data.Bits ((.|.), unsafeShiftL)
import Data.Char (chr)
import Data.Int (Int8, Int16, Int32, Int64)
import GHC.Float (castWord32ToFloat, castWord64ToDouble)
import Data.Word (Word8, Word16, Word32, Word64)
import Streamly.Internal.Data.Parser (Parser)
import Streamly.Internal.Data.Maybe.Strict (Maybe'(..))
import Streamly.Internal.Data.Tuple.Strict (Tuple' (..))
import qualified Streamly.Data.Array as A
import qualified Streamly.Internal.Data.Array as A
(unsafeGetIndex, unsafeCast)
import qualified Streamly.Internal.Data.Parser as PR
(fromPure, either, satisfy, takeEQ)
import qualified Streamly.Internal.Data.Parser as PRD
(Parser(..), Initial(..), Step(..), Final(..))
-- Note: The () type does not need to have an on-disk representation in theory.
-- But we use a concrete representation for it so that we count how many ()
-- types we have. Or when we have an array of units the array a concrete
-- length.
-- | A value of type '()' is encoded as @0@ in binary encoding.
--
-- @
-- 0 ==> ()
-- @
--
-- /Pre-release/
--
{-# INLINE unit #-}
unit :: Monad m => Parser Word8 m ()
unit = eqWord8 0 *> PR.fromPure ()
{-# INLINE word8ToBool #-}
word8ToBool :: Word8 -> Either String Bool
word8ToBool 0 = Right False
word8ToBool 1 = Right True
word8ToBool w = Left ("Invalid Bool encoding " ++ Prelude.show w)
-- | A value of type 'Bool' is encoded as follows in binary encoding.
--
-- @
-- 0 ==> False
-- 1 ==> True
-- @
--
-- /Pre-release/
--
{-# INLINE bool #-}
bool :: Monad m => Parser Word8 m Bool
bool = PR.either word8ToBool
{-# INLINE word8ToOrdering #-}
word8ToOrdering :: Word8 -> Either String Ordering
word8ToOrdering 0 = Right LT
word8ToOrdering 1 = Right EQ
word8ToOrdering 2 = Right GT
word8ToOrdering w = Left ("Invalid Ordering encoding " ++ Prelude.show w)
-- | A value of type 'Ordering' is encoded as follows in binary encoding.
--
-- @
-- 0 ==> LT
-- 1 ==> EQ
-- 2 ==> GT
-- @
--
-- /Pre-release/
--
{-# INLINE ordering #-}
ordering :: Monad m => Parser Word8 m Ordering
ordering = PR.either word8ToOrdering
-- XXX should go in a Word8 parser module?
-- | Accept the input byte only if it is equal to the specified value.
--
-- /Pre-release/
--
{-# INLINE eqWord8 #-}
eqWord8 :: Monad m => Word8 -> Parser Word8 m Word8
eqWord8 b = PR.satisfy (== b)
-- | Accept any byte.
--
-- /Pre-release/
--
{-# INLINE word8 #-}
word8 :: Monad m => Parser Word8 m Word8
word8 = PR.satisfy (const True)
-- | Big endian (MSB first) Word16
{-# INLINE word16beD #-}
word16beD :: Monad m => PRD.Parser Word8 m Word16
word16beD = PRD.Parser step initial extract
where
initial = return $ PRD.IPartial Nothing'
step Nothing' a =
-- XXX We can use a non-failing parser or a fold so that we do not
-- have to buffer for backtracking which is inefficient.
return $ PRD.SContinue 1 (Just' (fromIntegral a `unsafeShiftL` 8))
step (Just' w) a =
return $ PRD.SDone 1 (w .|. fromIntegral a)
extract _ = return $ PRD.FError "word16be: end of input"
-- | Parse two bytes as a 'Word16', the first byte is the MSB of the Word16 and
-- second byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE word16be #-}
word16be :: Monad m => Parser Word8 m Word16
word16be = word16beD
-- | Little endian (LSB first) Word16
{-# INLINE word16leD #-}
word16leD :: Monad m => PRD.Parser Word8 m Word16
word16leD = PRD.Parser step initial extract
where
initial = return $ PRD.IPartial Nothing'
step Nothing' a =
return $ PRD.SContinue 1 (Just' (fromIntegral a))
step (Just' w) a =
return $ PRD.SDone 1 (w .|. fromIntegral a `unsafeShiftL` 8)
extract _ = return $ PRD.FError "word16le: end of input"
-- | Parse two bytes as a 'Word16', the first byte is the LSB of the Word16 and
-- second byte is the MSB (little endian representation).
--
-- /Pre-release/
--
{-# INLINE word16le #-}
word16le :: Monad m => Parser Word8 m Word16
word16le = word16leD
-- | Big endian (MSB first) Word32
{-# INLINE word32beD #-}
word32beD :: Monad m => PRD.Parser Word8 m Word32
word32beD = PRD.Parser step initial extract
where
initial = return $ PRD.IPartial $ Tuple' 0 24
step (Tuple' w sh) a = return $
if sh /= 0
then
let w1 = w .|. (fromIntegral a `unsafeShiftL` sh)
in PRD.SContinue 1 (Tuple' w1 (sh - 8))
else PRD.SDone 1 (w .|. fromIntegral a)
extract _ = return $ PRD.FError "word32beD: end of input"
-- | Parse four bytes as a 'Word32', the first byte is the MSB of the Word32
-- and last byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE word32be #-}
word32be :: Monad m => Parser Word8 m Word32
word32be = word32beD
-- | Little endian (LSB first) Word32
{-# INLINE word32leD #-}
word32leD :: Monad m => PRD.Parser Word8 m Word32
word32leD = PRD.Parser step initial extract
where
initial = return $ PRD.IPartial $ Tuple' 0 0
step (Tuple' w sh) a = return $
let w1 = w .|. (fromIntegral a `unsafeShiftL` sh)
in if sh /= 24
then PRD.SContinue 1 (Tuple' w1 (sh + 8))
else PRD.SDone 1 w1
extract _ = return $ PRD.FError "word32leD: end of input"
-- | Parse four bytes as a 'Word32', the first byte is the MSB of the Word32
-- and last byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE word32le #-}
word32le :: Monad m => Parser Word8 m Word32
word32le = word32leD
-- | Big endian (MSB first) Word64
{-# INLINE word64beD #-}
word64beD :: Monad m => PRD.Parser Word8 m Word64
word64beD = PRD.Parser step initial extract
where
initial = return $ PRD.IPartial $ Tuple' 0 56
step (Tuple' w sh) a = return $
if sh /= 0
then
let w1 = w .|. (fromIntegral a `unsafeShiftL` sh)
in PRD.SContinue 1 (Tuple' w1 (sh - 8))
else PRD.SDone 1 (w .|. fromIntegral a)
extract _ = return $ PRD.FError "word64beD: end of input"
-- | Parse eight bytes as a 'Word64', the first byte is the MSB of the Word64
-- and last byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE word64be #-}
word64be :: Monad m => Parser Word8 m Word64
word64be = word64beD
-- | Little endian (LSB first) Word64
{-# INLINE word64leD #-}
word64leD :: Monad m => PRD.Parser Word8 m Word64
word64leD = PRD.Parser step initial extract
where
initial = return $ PRD.IPartial $ Tuple' 0 0
step (Tuple' w sh) a = return $
let w1 = w .|. (fromIntegral a `unsafeShiftL` sh)
in if sh /= 56
then PRD.SContinue 1 (Tuple' w1 (sh + 8))
else PRD.SDone 1 w1
extract _ = return $ PRD.FError "word64leD: end of input"
-- | Parse eight bytes as a 'Word64', the first byte is the MSB of the Word64
-- and last byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE word64le #-}
word64le :: Monad m => Parser Word8 m Word64
word64le = word64leD
{-# INLINE int8 #-}
int8 :: Monad m => Parser Word8 m Int8
int8 = fromIntegral <$> word8
-- | Parse two bytes as a 'Int16', the first byte is the MSB of the Int16 and
-- second byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE int16be #-}
int16be :: Monad m => Parser Word8 m Int16
int16be = fromIntegral <$> word16be
-- | Parse two bytes as a 'Int16', the first byte is the LSB of the Int16 and
-- second byte is the MSB (little endian representation).
--
-- /Pre-release/
--
{-# INLINE int16le #-}
int16le :: Monad m => Parser Word8 m Int16
int16le = fromIntegral <$> word16le
-- | Parse four bytes as a 'Int32', the first byte is the MSB of the Int32
-- and last byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE int32be #-}
int32be :: Monad m => Parser Word8 m Int32
int32be = fromIntegral <$> word32be
-- | Parse four bytes as a 'Int32', the first byte is the MSB of the Int32
-- and last byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE int32le #-}
int32le :: Monad m => Parser Word8 m Int32
int32le = fromIntegral <$> word32le
-- | Parse eight bytes as a 'Int64', the first byte is the MSB of the Int64
-- and last byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE int64be #-}
int64be :: Monad m => Parser Word8 m Int64
int64be = fromIntegral <$> word64be
-- | Parse eight bytes as a 'Int64', the first byte is the MSB of the Int64
-- and last byte is the LSB (big endian representation).
--
-- /Pre-release/
--
{-# INLINE int64le #-}
int64le :: Monad m => Parser Word8 m Int64
int64le = fromIntegral <$> word64le
{-# INLINE float32be #-}
float32be :: MonadIO m => Parser Word8 m Float
float32be = castWord32ToFloat <$> word32be
{-# INLINE float32le #-}
float32le :: MonadIO m => Parser Word8 m Float
float32le = castWord32ToFloat <$> word32le
{-# INLINE double64be #-}
double64be :: MonadIO m => Parser Word8 m Double
double64be = castWord64ToDouble <$> word64be
{-# INLINE double64le #-}
double64le :: MonadIO m => Parser Word8 m Double
double64le = castWord64ToDouble <$> word64le
-- | Accept any byte.
--
-- /Pre-release/
--
{-# INLINE charLatin1 #-}
charLatin1 :: Monad m => Parser Word8 m Char
charLatin1 = fmap (chr . fromIntegral) word8
-------------------------------------------------------------------------------
-- Host byte order
-------------------------------------------------------------------------------
-- | Parse eight bytes as a 'Word64' in the host byte order.
--
-- /Pre-release/
--
{-# INLINE word64host #-}
word64host :: MonadIO m => Parser Word8 m Word64
word64host =
fmap (A.unsafeGetIndex 0 . A.unsafeCast) $ PR.takeEQ 8 (A.createOf 8)
-------------------------------------------------------------------------------
-- Type class
-------------------------------------------------------------------------------
class FromBytes a where
-- | Decode a byte stream to a Haskell type.
fromBytes :: Parser Word8 m a