Skip to content

Commit 0f472c0

Browse files
committed
feat: implement comment parser
1 parent e7aaad8 commit 0f472c0

28 files changed

Lines changed: 1460 additions & 108 deletions

Cabal-syntax/Cabal-syntax.cabal

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ build-type: Simple
1818
extra-doc-files:
1919
README.md ChangeLog.md
2020

21+
flag CABAL_PARSEC_DEBUG
22+
description: Enable debug build for the cabal field lexer/parser.
23+
default: False
24+
manual: True
25+
2126
source-repository head
2227
type: git
2328
location: https://github.com/haskell/cabal/
@@ -58,6 +63,11 @@ library
5863
if impl(ghc >= 8.0) && impl(ghc < 8.8)
5964
ghc-options: -Wnoncanonical-monadfail-instances
6065

66+
if flag(CABAL_PARSEC_DEBUG)
67+
CPP-Options: -DCABAL_PARSEC_DEBUG
68+
build-depends:
69+
vector
70+
6171
build-tool-depends:
6272
-- https://github.com/haskell/alex/issues/288
6373
alex:alex < 3.5.4.1 || > 3.5.4.1

Cabal-syntax/src/Distribution/FieldGrammar.hs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ module Distribution.FieldGrammar
2626
, Section (..)
2727
, Fields
2828
, partitionFields
29+
, extractComments
2930
, takeFields
3031
, runFieldParser
3132
, runFieldParser'
@@ -38,6 +39,7 @@ module Distribution.FieldGrammar
3839
import Distribution.Compat.Prelude
3940
import Prelude ()
4041

42+
import qualified Data.Bifunctor as Bi
4143
import qualified Data.Map.Strict as Map
4244

4345
import Distribution.FieldGrammar.Class
@@ -99,10 +101,17 @@ partitionFields = finalize . foldl' f (PS mempty mempty mempty)
99101
PS fs (MkSection name sargs sfields : s) ss
100102

101103
-- | Take all fields from the front.
104+
-- Returns a tuple containing the comments, nameless fields, and sections
102105
takeFields :: [Field ann] -> (Fields ann, [Field ann])
103106
takeFields = finalize . spanMaybe match
104107
where
105108
finalize (fs, rest) = (Map.fromListWith (flip (++)) fs, rest)
106109

107110
match (Field (Name ann name) fs) = Just (name, [MkNamelessField ann fs])
108111
match _ = Nothing
112+
113+
extractComments :: (Foldable f, Functor f) => [f (WithComments ann)] -> ([Comment ann], [f ann])
114+
extractComments = Bi.first mconcat . unzip . map extractCommentsStep
115+
116+
extractCommentsStep :: (Foldable f, Functor f) => f (WithComments ann) -> ([Comment ann], f ann)
117+
extractCommentsStep f = (foldMap justComments f, fmap unComments f)

Cabal-syntax/src/Distribution/Fields/Field.hs

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{-# LANGUAGE CPP #-}
2+
{-# LANGUAGE DeriveGeneric #-}
23
{-# LANGUAGE DeriveTraversable #-}
34
{-# LANGUAGE StandaloneDeriving #-}
45

@@ -17,6 +18,12 @@ module Distribution.Fields.Field
1718
, SectionArg (..)
1819
, sectionArgAnn
1920

21+
-- * Comment
22+
, Comment (..)
23+
, WithComments (..)
24+
, mapComments
25+
, mapCommentedData
26+
2027
-- * Name
2128
, FieldName
2229
, Name (..)
@@ -44,11 +51,26 @@ import qualified Data.Foldable1 as F1
4451
-- Cabal file
4552
-------------------------------------------------------------------------------
4653

54+
data Comment ann = Comment !ByteString !ann
55+
deriving (Show, Generic, Eq, Ord, Functor)
56+
57+
data WithComments ann = WithComments
58+
{ justComments :: ![Comment ann]
59+
, unComments :: !ann
60+
}
61+
deriving (Show, Generic, Eq, Ord, Functor)
62+
63+
mapComments :: ([Comment ann] -> [Comment ann]) -> WithComments ann -> WithComments ann
64+
mapComments f (WithComments cs x) = WithComments (f cs) x
65+
66+
mapCommentedData :: (ann -> ann) -> WithComments ann -> WithComments ann
67+
mapCommentedData f (WithComments cs x) = WithComments cs (f x)
68+
4769
-- | A Cabal-like file consists of a series of fields (@foo: bar@) and sections (@library ...@).
4870
data Field ann
4971
= Field !(Name ann) [FieldLine ann]
5072
| Section !(Name ann) [SectionArg ann] [Field ann]
51-
deriving (Eq, Show, Functor, Foldable, Traversable)
73+
deriving (Eq, Show, Functor, Foldable, Traversable, Generic)
5274

5375
-- | @since 3.12.0.0
5476
deriving instance Ord ann => Ord (Field ann)
@@ -73,7 +95,7 @@ fieldUniverse f@(Field _ _) = [f]
7395
--
7496
-- /Invariant:/ 'ByteString' has no newlines.
7597
data FieldLine ann = FieldLine !ann !ByteString
76-
deriving (Eq, Show, Functor, Foldable, Traversable)
98+
deriving (Eq, Show, Functor, Foldable, Traversable, Generic)
7799

78100
-- | @since 3.12.0.0
79101
deriving instance Ord ann => Ord (FieldLine ann)
@@ -94,7 +116,7 @@ data SectionArg ann
94116
SecArgStr !ann !ByteString
95117
| -- | everything else, mm. operators (e.g. in if-section conditionals)
96118
SecArgOther !ann !ByteString
97-
deriving (Eq, Show, Functor, Foldable, Traversable)
119+
deriving (Eq, Show, Functor, Foldable, Traversable, Generic)
98120

99121
-- | @since 3.12.0.0
100122
deriving instance Ord ann => Ord (SectionArg ann)
@@ -115,7 +137,7 @@ type FieldName = ByteString
115137
--
116138
-- /Invariant/: 'ByteString' is lower-case ASCII.
117139
data Name ann = Name !ann !FieldName
118-
deriving (Eq, Show, Functor, Foldable, Traversable)
140+
deriving (Eq, Show, Functor, Foldable, Traversable, Generic)
119141

120142
-- | @since 3.12.0.0
121143
deriving instance Ord ann => Ord (Name ann)

Cabal-syntax/src/Distribution/Fields/Lexer.x

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import qualified Data.ByteString.Char8 as B.Char8
3131
import qualified Data.Word as Word
3232

3333
#ifdef CABAL_PARSEC_DEBUG
34-
import Debug.Trace
3534
import qualified Data.Vector as V
3635
import qualified Data.Text as T
3736
import qualified Data.Text.Encoding as T
@@ -84,8 +83,9 @@ tokens :-
8483
<bol_section, bol_field_layout, bol_field_braces> {
8584
@nbspspacetab* @nl { \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken }
8685
-- no @nl here to allow for comments on last line of the file with no trailing \n
87-
$spacetab* "--" $comment* ; -- TODO: check the lack of @nl works here
88-
-- including counting line numbers
86+
$spacetab* "--" $comment* { toki TokComment }
87+
-- TODO: check the lack of @nl works here
88+
-- including counting line numbers
8989
}
9090

9191
<bol_section> {
@@ -105,9 +105,8 @@ tokens :-
105105
}
106106

107107
<in_section> {
108-
$spacetab+ ; --TODO: don't allow tab as leading space
109-
110-
"--" $comment* ;
108+
$spacetab+ ; --TODO: don't allow tab as leading space
109+
"--" $comment* { toki TokComment }
111110

112111
@name { toki TokSym }
113112
@string { \pos len inp -> return $! L pos (TokStr (B.take (len - 2) (B.tail inp))) }
@@ -161,6 +160,7 @@ data Token = TokSym !ByteString -- ^ Haskell-like identifier, number or
161160
| Colon
162161
| OpenBrace
163162
| CloseBrace
163+
| TokComment !ByteString
164164
| EOF
165165
| LexicalError InputStream --TODO: add separate string lexical error
166166
deriving Show
@@ -230,7 +230,9 @@ lexToken = do
230230
setInput inp'
231231
let !len_bytes = B.length inp - B.length inp'
232232
t <- action pos len_bytes inp
233-
--traceShow t $ return tok
233+
#ifdef CABAL_PARSEC_DEBUG
234+
traceShow t $ return tok
235+
#endif
234236
return t
235237

236238

@@ -241,10 +243,12 @@ checkPosition pos@(Position lineno colno) inp inp' len_chars = do
241243
let len_bytes = B.length inp - B.length inp'
242244
pos_txt | lineno-1 < V.length text_lines = T.take len_chars (T.drop (colno-1) (text_lines V.! (lineno-1)))
243245
| otherwise = T.empty
244-
real_txt = B.take len_bytes inp
246+
real_txt :: B.ByteString
247+
real_txt = B.take len_bytes inp
245248
when (pos_txt /= T.decodeUtf8 real_txt) $
246249
traceShow (pos, pos_txt, T.decodeUtf8 real_txt) $
247-
traceShow (take 3 (V.toList text_lines)) $ return ()
250+
traceShow (take 3 (V.toList text_lines)) $
251+
return ()
248252
where
249253
getDbgText = Lex $ \s@LexState{ dbgText = txt } -> LexResult s txt
250254
#else

0 commit comments

Comments
 (0)