Skip to content

Commit 72702b7

Browse files
author
pranaysashank
committed
WIP.
- Parse strings with escaped quotes properly - Parse double
1 parent 007b507 commit 72702b7

3 files changed

Lines changed: 167 additions & 75 deletions

File tree

src/Streamly/Internal/Data/Json/Stream.hs

Lines changed: 109 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,16 @@
77
-- Stability : experimental
88
-- Portability : GHC
99

10-
{-# LANGUAGE CPP #-}
10+
{-# OPTIONS_GHC -Wno-orphans #-}
1111

12-
{-# LANGUAGE OverloadedLists #-}
13-
14-
module Streamly.Internal.Data.Json.Stream
15-
( parseJson )
12+
module Streamly.Internal.Data.Json.Stream
13+
( parseJson
14+
, parseJsonEOF
15+
)
1616
where
1717

1818
import Control.Monad (when)
1919
import Control.Monad.Catch (MonadCatch)
20-
import Data.Bits (testBit)
2120
import Data.Char (chr, ord)
2221
import Data.Functor.Identity (runIdentity)
2322
import Data.Word (Word8)
@@ -30,6 +29,7 @@ import qualified Data.Scientific as Sci
3029
import Streamly.Internal.Data.Parser.ParserD (Parser)
3130
import Streamly.Internal.Data.Array (Array)
3231
import Streamly.Internal.Data.Fold.Types (Fold(..))
32+
import Streamly.Internal.Data.Strict (Tuple' (..))
3333
import qualified Streamly.Internal.Data.Parser as PR
3434
import qualified Streamly.Internal.Data.Parser.ParserD as P
3535
import qualified Streamly.Internal.Data.Array as A
@@ -56,27 +56,12 @@ import qualified Streamly.Data.Fold as FL
5656
#define C_n 110
5757
#define C_t 116
5858

59+
backslash, zero, minus, plus :: Word8
5960
backslash = 92 :: Word8
60-
close_curly = 125 :: Word8
61-
close_square = 93 :: Word8
62-
comma = 44 :: Word8
63-
double_quote = 34 :: Word8
64-
open_curly = 123 :: Word8
65-
open_square = 91 :: Word8
66-
colon = 58 :: Word8
67-
c_0 = 48 :: Word8
68-
c_9 = 57 :: Word8
69-
c_A = 65 :: Word8
70-
c_F = 70 :: Word8
71-
c_a = 97 :: Word8
72-
c_f = 102 :: Word8
73-
c_n = 110 :: Word8
74-
c_t = 116 :: Word8
7561
zero = 48 :: Word8
7662
minus = 45 :: Word8
7763
plus = 43 :: Word8
7864

79-
8065
instance Enum a => Hashable (A.Array a) where
8166
hash arr = fromIntegral $ runIdentity $ IUF.fold A.read IFL.rollingHash arr
8267
hashWithSalt salt arr = fromIntegral $ runIdentity $
@@ -99,46 +84,33 @@ data Value
9984
| Null
10085
deriving (Eq, Show)
10186

102-
{-# INLINE pushToFold #-}
103-
pushToFold :: Monad m => Fold m a b -> a -> Fold m a b
104-
pushToFold (Fold step initial extract) a = Fold step initial' extract
105-
where
106-
initial' = do
107-
s <- initial
108-
step s a
109-
11087
{-# INLINE skipSpace #-}
11188
skipSpace :: MonadCatch m => Parser m Word8 ()
11289
skipSpace =
11390
P.takeWhile
11491
(\w -> w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09)
11592
FL.drain
11693

117-
{-# INLINE spaceAround #-}
118-
spaceAround :: MonadCatch m => Parser m Word8 b -> Parser m Word8 b
119-
spaceAround parseInBetween = do
120-
skipSpace
121-
between <- parseInBetween
122-
skipSpace
123-
return between
124-
12594
{-# INLINE skip #-}
12695
skip :: MonadCatch m => Int -> Parser m a ()
12796
skip n = P.take n FL.drain
12897

12998
{-# INLINE string #-}
99+
130100
string :: MonadCatch m => String -> Parser m Word8 ()
131101
string = P.eqBy (==) . map (fromIntegral . ord)
132102

133103
{-# INLINE match #-}
134104
match :: MonadCatch m => Word8 -> Parser m Word8 ()
135105
match w = P.eqBy (==) [w]
136106

107+
{-# SPECIALISE foldToInteger :: Monad m => Int -> Fold m Word8 Int #-}
108+
{-# SPECIALISE foldToInteger :: Monad m => Integer -> Fold m Word8 Integer #-}
137109
{-# INLINE foldToInteger #-}
138-
foldToInteger :: Monad m => Fold m Word8 Integer
139-
foldToInteger = Fold step initial extract
110+
foldToInteger :: (Num a, Monad m) => a -> Fold m Word8 a
111+
foldToInteger begin = Fold step initial extract
140112
where
141-
initial = return 0
113+
initial = return begin
142114

143115
step s a = return $ s * 10 + fromIntegral (a - 48)
144116

@@ -147,10 +119,20 @@ foldToInteger = Fold step initial extract
147119
{-# INLINE parseDecimal0 #-}
148120
parseDecimal0 :: MonadCatch m => Parser m Word8 Integer
149121
parseDecimal0 = do
150-
h <- P.peek
151-
n <- P.peek
152-
when (h == zero && n - 48 > 9) $ P.die "Leading zero in a number is not accepted in JSON."
153-
P.takeWhile1 (\w -> w - 48 <= 9) foldToInteger
122+
z <- P.peek
123+
n <- P.takeWhile1 (\w -> w - 48 <= 9) (foldToInteger 0)
124+
when (z == zero && n /= 0) $ do
125+
P.die $ " Leading zero in a number is not accepted in JSON."
126+
return n
127+
128+
{-# INLINE parseDecimal #-}
129+
parseDecimal :: MonadCatch m => Parser m Word8 Int
130+
parseDecimal = do
131+
sign <- P.peek
132+
let positive = sign == plus || sign /= minus
133+
pr = P.takeWhile1 (\w -> w - 48 <= 9) (foldToInteger 0)
134+
when (sign == plus || sign == minus) (skip 1)
135+
if positive then pr else negate <$> pr
154136

155137
{-# INLINE parseJsonNumber #-}
156138
parseJsonNumber :: MonadCatch m => Parser m Word8 Scientific
@@ -159,8 +141,24 @@ parseJsonNumber = do
159141
let positive = sign == plus || sign /= minus
160142
when (sign == plus || sign == minus) (skip 1)
161143
n <- parseDecimal0
162-
let signedCoeff = if positive then n else (-n)
163-
return (Sci.scientific signedCoeff 0)
144+
dot <- P.peekMaybe
145+
Tuple' c e <-
146+
case dot of
147+
Just 46 -> do
148+
skip 1
149+
P.takeWhile1
150+
(\w -> w - 48 <= 9)
151+
(Tuple' <$> foldToInteger n <*> FL.length)
152+
_ -> return $ Tuple' n 0
153+
154+
let signedCoeff = if positive then c else (-c)
155+
mex <- P.peekMaybe
156+
case mex of
157+
Just ex
158+
| ex == 101 || ex == 69 -> do
159+
skip 1
160+
Sci.scientific signedCoeff . (e +) <$> parseDecimal
161+
_ -> return (Sci.scientific signedCoeff e)
164162

165163
{-# INLINE parseJsonString #-}
166164
parseJsonString :: MonadCatch m => Parser m Word8 JsonString
@@ -170,19 +168,24 @@ parseJsonString = do
170168
w <- P.peek
171169
case w of
172170
DOUBLE_QUOTE -> skip 1 >> return s
171+
BACKSLASH -> (fmap (s <>) escapeParseJsonString) <* skip 1
173172
_ -> do
174-
P.die $ [(chr . fromIntegral) w] ++ " Not yet implemented to handle escape sequences in String."
173+
P.die $ [(chr . fromIntegral) w] ++ " : String without end."
174+
175+
{-# INLINE escapeParseJsonString #-}
176+
escapeParseJsonString :: MonadCatch m => Parser m Word8 JsonString
177+
escapeParseJsonString = P.scan startState go (Uni.foldUtf8With A.unsafeWrite)
178+
where
179+
startState = False
180+
go s a
181+
| s = Just False
182+
| a == DOUBLE_QUOTE = Nothing
183+
| otherwise =
184+
let a' = a == backslash
185+
in Just a'
175186

176187
{-# INLINE parseJsonValue #-}
177188
parseJsonValue :: MonadCatch m => Parser m Word8 Value
178-
{- parseJsonValue = skipSpace >>
179-
(Object <$> parseJsonObject)
180-
`P.alt` (Array <$> parseJsonArray)
181-
`P.alt` (String <$> parseJsonString)
182-
`P.alt` (Number <$> parseJsonNumber)
183-
`P.alt` (Bool True <$ string "true")
184-
`P.alt` (Bool False <$ string "false")
185-
`P.alt` (Null <$ string "null") -}
186189
parseJsonValue = do
187190
skipSpace
188191
w <- P.peek
@@ -230,13 +233,55 @@ parseJsonArray = do
230233
match CLOSE_SQUARE
231234
return jsonValues
232235

236+
{-# INLINE parseJsonEOF #-}
237+
parseJsonEOF :: MonadCatch m => PR.Parser m Word8 Value
238+
parseJsonEOF =
239+
P.toParserK $ do
240+
v <- parseJsonValue
241+
skipSpace
242+
P.eof
243+
return v
244+
233245
{-# INLINE parseJson #-}
234246
parseJson :: MonadCatch m => PR.Parser m Word8 Value
235-
-- parseJson = P.toParserK (spaceAround $ (Object <$> parseJsonObject) `P.alt` (Array <$> parseJsonArray))
236-
parseJson = P.toParserK $ do
237-
skipSpace
238-
w <- P.peek
239-
case w of
240-
OPEN_CURLY -> Object <$> parseJsonObject
241-
OPEN_SQUARE -> Array <$> parseJsonArray
242-
_ -> P.die $ "Encountered " ++ [chr . fromIntegral $ w] ++ " when expection [ or {."
247+
parseJson = P.toParserK $ parseJsonValue
248+
249+
{-
250+
251+
{-# INLINE pushToFold #-}
252+
pushToFold :: Monad m => Fold m a b -> a -> Fold m a b
253+
pushToFold (Fold step initial extract) a = Fold step initial' extract
254+
where
255+
initial' = do
256+
s <- initial
257+
step s a
258+
259+
sepBy1 ::
260+
MonadCatch m
261+
=> Fold m a c
262+
-> Parser m Word8 a
263+
-> Parser m Word8 sep
264+
-> Parser m Word8 c
265+
sepBy1 fl p sep = do
266+
a <- p
267+
P.many (pushToFold fl a) (sep >> p)
268+
269+
sepBy ::
270+
MonadCatch m
271+
=> Fold m a c
272+
-> Parser m Word8 a
273+
-> Parser m Word8 sep
274+
-> Parser m Word8 c
275+
sepBy fl@(Fold _ initial extract) p sep =
276+
sepBy1 fl p sep `P.alt` P.yieldM (initial >>= extract)
277+
278+
parseJsonValue = skipSpace >>
279+
(Object <$> parseJsonObject)
280+
`P.alt` (Array <$> parseJsonArray)
281+
`P.alt` (String <$> parseJsonString)
282+
`P.alt` (Number <$> parseJsonNumber)
283+
`P.alt` (Bool True <$ string "true")
284+
`P.alt` (Bool False <$ string "false")
285+
`P.alt` (Null <$ string "null")
286+
287+
-}

src/Streamly/Internal/Data/Parser.hs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,10 @@ module Streamly.Internal.Data.Parser
6262
, yieldM
6363
, die
6464
, dieM
65+
, scan
6566

6667
-- * Element parsers
68+
, peekMaybe
6769
, peek
6870
, eof
6971
, satisfy
@@ -321,6 +323,10 @@ die = D.toParserK . D.die
321323
dieM :: MonadCatch m => m String -> Parser m a b
322324
dieM = D.toParserK . D.dieM
323325

326+
{-# INLINE peekMaybe #-}
327+
peekMaybe :: MonadCatch m => Parser m a (Maybe a)
328+
peekMaybe = D.toParserK D.peekMaybe
329+
324330
-------------------------------------------------------------------------------
325331
-- Failing Parsers
326332
-------------------------------------------------------------------------------
@@ -458,6 +464,12 @@ takeWhile cond = D.toParserK . D.takeWhile cond
458464
takeWhile1 :: MonadCatch m => (a -> Bool) -> Fold m a b -> Parser m a b
459465
takeWhile1 cond = D.toParserK . D.takeWhile1 cond
460466

467+
-- /Internal/
468+
--
469+
{-# INLINE scan #-}
470+
scan :: MonadCatch m => s -> (s -> a -> Maybe s) -> Fold m a b -> Parser m a b
471+
scan s f fl = D.toParserK $ D.scan s f fl
472+
461473
-- | @sepBy fl p sep@ collects zero or more stream elements separated by @sep@.
462474
--
463475
-- * Stops - when either of @p@ or @sep@ fails

src/Streamly/Internal/Data/Parser/ParserD.hs

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ module Streamly.Internal.Data.Parser.ParserD
2727
, yieldM
2828
, die
2929
, dieM
30+
, scan
3031

3132
-- * Element parsers
33+
, peekMaybe
3234
, peek
3335
, eof
3436
, satisfy
@@ -249,6 +251,22 @@ all predicate = Parser step initial return
249251

250252
step s a = return (if s && predicate a then Partial 0 True else Done 0 False)
251253

254+
-- | See 'Streamly.Internal.Data.Parser.peekMaybe'.
255+
--
256+
-- /Internal/
257+
--
258+
{-# INLINABLE peekMaybe #-}
259+
peekMaybe :: MonadThrow m => Parser m a (Maybe a)
260+
peekMaybe = Parser step initial extract
261+
262+
where
263+
264+
initial = return ()
265+
266+
step () a = return $ Done 1 (Just a)
267+
268+
extract () = return Nothing
269+
252270
-------------------------------------------------------------------------------
253271
-- Failing Parsers
254272
-------------------------------------------------------------------------------
@@ -450,6 +468,23 @@ takeWhile1 predicate (Fold fstep finitial fextract) =
450468
extract Nothing = throwM $ ParseError "takeWhile1: end of input"
451469
extract (Just s) = fextract s
452470

471+
{-# INLINE scan #-}
472+
scan :: Monad m => s -> (s -> a -> Maybe s) -> Fold m a b -> Parser m a b
473+
scan begin sstep (Fold fstep finitial fextract) = Parser step initial extract
474+
475+
where
476+
477+
initial = Tuple' begin <$> finitial
478+
479+
step (Tuple' s fs) a =
480+
case sstep s a of
481+
Just s' -> do
482+
fs' <- fstep fs a
483+
return $ Partial 0 (Tuple' s' fs')
484+
Nothing -> Done 1 <$> fextract fs
485+
486+
extract (Tuple' _ fs) = fextract fs
487+
453488
{-# ANN type SepParseState Fuse #-}
454489
data SepParseState seps sa fs = SepParseA !Int fs sa | SepParseSep !Int fs seps
455490

@@ -473,29 +508,29 @@ sepBy (Fold fstep finitial fextract) (Parser pstep pinitial pextract)
473508
ps <- pinitial
474509
return $ SepParseA 0 fs ps
475510

476-
step (SepParseA cnt fst pst) a = do
511+
step (SepParseA cnt fs pst) a = do
477512
ps <- pstep pst a
478513
case ps of
479-
Partial n s -> return $ Partial n (SepParseA 0 fst s)
480-
Continue n s -> return $ Continue n (SepParseA (cnt + 1 - n) fst s)
514+
Partial n s -> return $ Partial n (SepParseA 0 fs s)
515+
Continue n s -> return $ Continue n (SepParseA (cnt + 1 - n) fs s)
481516
Done n b -> do
482517
pseps <- psepinitial
483-
fs <- fstep fst b
484-
return $ Continue n (SepParseSep 0 fs pseps)
518+
fs' <- fstep fs b
519+
return $ Partial n (SepParseSep 0 fs' pseps)
485520
Error _ -> do
486-
c <- fextract fst
521+
c <- fextract fs
487522
return $ Done (cnt + 1) c
488523

489-
step (SepParseSep cnt fst psepst) a = do
524+
step (SepParseSep cnt fs psepst) a = do
490525
pseps <- psepstep psepst a
491526
case pseps of
492-
Partial n s -> return $ Continue n (SepParseSep (cnt + 1 - n) fst s)
493-
Continue n s -> return $ Continue n (SepParseSep (cnt + 1 - n) fst s)
527+
Partial n s -> return $ Continue n (SepParseSep (cnt + 1 - n) fs s)
528+
Continue n s -> return $ Continue n (SepParseSep (cnt + 1 - n) fs s)
494529
Done n _ -> do
495530
ps <- pinitial
496-
return $ Continue n (SepParseA (cnt + 1) fst ps)
531+
return $ Continue n (SepParseA (cnt + 1 - n) fs ps)
497532
Error _ -> do
498-
c <- fextract fst
533+
c <- fextract fs
499534
return $ Done (cnt + 1) c
500535

501536
extract (SepParseA _ fs sa) = do

0 commit comments

Comments
 (0)