streamly/core/src/Streamly/Data/ParserK.hs at 333deb4df51fe30402b979161b80cccaef0fe56c · composewell/streamly · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
{-# LANGUAGE CPP #-}
-- |
-- Module      : Streamly.Data.ParserK
-- Copyright   : (c) 2023 Composewell Technologies
-- License     : BSD-3-Clause
-- Maintainer  : streamly@composewell.com
-- Stability   : pre-release
-- Portability : GHC
--
-- See the general notes about parsing in the "Streamly.Data.Parser" module.
-- This (ParserK) module implements a Continuation Passing Style (CPS) wrapper
-- over the fused "Streamly.Data.Parser" module. It is a faster CPS parser than
-- attoparsec.
--
-- The 'ParserK' type represents a stream-consumer as a composition of function
-- calls, therefore, a function call overhead is incurred at each composition.
-- It is reasonably fast in general but may be a few times slower than the
-- fused 'Streamly.Data.Parser.Parser' type. However, unlike fused parsers, it
-- allows for scalable dynamic composition, especially, 'ParserK' can be used
-- in recursive calls. Operations like 'splitWith' on 'ParserK' type have
-- linear (O(n)) performance with respect to the number of compositions.
--
-- 'ParserK' is preferred over the fused 'Streamly.Data.Parser.Parser' when
-- extensive applicative, alternative and monadic composition is required, or
-- when recursive or dynamic composition of parsers is required. 'ParserK' also
-- allows efficient parsing of a stream of byte arrays, it can also break the
-- input stream into a parse result and the remaining stream so that the stream
-- can be parsed independently in segments.
--
-- == How to parse a stream?
--
-- All the fused parsers from the "Streamly.Data.Parser" module can be
-- converted to the CPS ParserK, for use with different types of parser
-- drivers, using
-- the @toParserK@ combinators -
-- Streamly.Data.Array.'Streamly.Data.Array.toParserK',
-- Streamly.Data.StreamK.'Streamly.Data.StreamK.toParserK', and
-- Streamly.Data.Array.Generic.'Streamly.Data.Array.Generic.toParserK'
--
-- To parse a stream of unboxed arrays, use
-- Streamly.Data.Array.'Streamly.Data.Array.parse' for running the parser, this
-- is the preferred and most efficient way to parse chunked input. The
-- Streamly.Data.Array.'Streamly.Data.Array.parseBreak' function returns the
-- remaining stream as well along with the parse result.
--
-- To parse a stream of boxed arrays, use
-- Streamly.Data.Array.Generic.'Streamly.Data.Array.Generic.parse' or
-- Streamly.Data.Array.Generic.'Streamly.Data.Array.Generic.parseBreak' to run
-- the parser.
--
-- To parse a stream of individual elements, use
-- Streamly.Data.StreamK.'Streamly.Data.StreamK.parse' and
-- Streamly.Data.StreamK.'Streamly.Data.StreamK.parseBreak' to run the parser.
--
-- == Applicative  Composition
--
-- Applicative parsers are simpler but we cannot use lookbehind as we can in
-- the monadic parsers.
--
-- If we have to parse "9a" or "a9" but not "99" or "aa" we can use the
-- following Applicative, backtracking parser:
--
-- >>> -- parse p1 : p2 : []
-- >>> token p1 p2 = ((:) <$> p1 <*> ((:) <$> p2 <*> pure []))
-- >>> :{
-- backtracking :: Monad m => ParserK Char m String
-- backtracking = StreamK.toParserK $
--     token (Parser.satisfy isDigit) (Parser.satisfy isAlpha) -- e.g. "9a"
--     <|>
--     token (Parser.satisfy isAlpha) (Parser.satisfy isDigit) -- e.g. "a9"
-- :}
--
-- == Monadic Composition
--
-- Monad composition can be used to implement lookbehind parsers, we can dynamically
-- compose new parsers based on the results of the previously parsed values.
--
-- In the previous example, we know that if the first parse resulted in a digit
-- at the first place then the second parse is going to fail.  However, we
-- waste that information and parse the first character again in the second
-- parse only to know that it is not an alphabetic char.  By using lookbehind
-- in a 'Monad' composition we can make dynamic decisions based on previously
-- parsed information and avoid redundant work:
--
-- >>> data DigitOrAlpha = Digit Char | Alpha Char
--
-- >>> :{
-- lookbehind :: Monad m => ParserK Char m String
-- lookbehind = do
--     x1 <- StreamK.toParserK $
--              Digit <$> Parser.satisfy isDigit
--          <|> Alpha <$> Parser.satisfy isAlpha
--     -- Note: the parse depends on what we parsed already
--     x2 <- StreamK.toParserK $
--           case x1 of
--              Digit _ -> Parser.satisfy isAlpha
--              Alpha _ -> Parser.satisfy isDigit
--     return $ case x1 of
--         Digit x -> [x,x2]
--         Alpha x -> [x,x2]
-- :}
--
-- == Experimental APIs
--
-- Please refer to "Streamly.Internal.Data.ParserK" for functions that have
-- not yet been released.
--
module Streamly.Data.ParserK
    (
    -- * Setup
    -- | To execute the code examples provided in this module in ghci, please
    -- run the following commands first.
    --
    -- $setup

    -- * Parser Type
      ParserK

    -- * Parsers

    -- -- ** Without Input
    , fromPure
    , fromEffect
    , die

    -- * Deprecated
    , fromFold
    , fromParser
    , adapt
    , adaptC
    , adaptCG
    )

where

import Control.Monad.IO.Class (MonadIO)
import Streamly.Internal.Data.Fold (Fold)
import Streamly.Internal.Data.Unbox (Unbox)
import Streamly.Internal.Data.Array (Array)
import qualified Streamly.Internal.Data.Parser as ParserD
import qualified Streamly.Internal.Data.Array as Array

import Streamly.Internal.Data.ParserK

#include "DocTestDataParserK.hs"

{-# DEPRECATED fromFold "Please use \"Array.toParserK . Parser.fromFold\" instead." #-}
{-# INLINE fromFold #-}
fromFold :: (MonadIO m, Unbox a) => Fold m a b -> ParserK (Array a) m b
fromFold = Array.toParserK . ParserD.fromFold

{-# DEPRECATED fromParser "Please use \"Array.toParserK\" instead." #-}
{-# INLINE fromParser #-}
fromParser ::
       (MonadIO m, Unbox a) => ParserD.Parser a m b -> ParserK (Array a) m b
fromParser = Array.toParserK