Skip to content

Commit 5bd19b0

Browse files
author
Kristian Larsson
committed
Scan top-level chunks in bulk
The top-level chunk scanner still advanced through ordinary source, comments, and string text one character at a time. Batch those runs with Text operations while keeping newline, delimiter, interpolation, and escape handling on the existing state-machine paths. The scanner still preserves previous-character state, line-start tracking, continuations, and chunk boundaries, while avoiding per-character work for large ordinary regions of generated modules.
1 parent 4d1310c commit 5bd19b0

1 file changed

Lines changed: 108 additions & 15 deletions

File tree

compiler/lib/src/Acton/Parser.hs

Lines changed: 108 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,6 +1555,9 @@ scanTopLevelChunks src start emit
15551555
let depth' = max 0 (scanDepth st - 1)
15561556
(_, rest, st') = consumeMany True 1 i xs st { scanDepth = depth' }
15571557
in go (i + 1) rest st'
1558+
| Just n <- codeRunLength xs ->
1559+
let (rest, st') = consumeCodeRun n xs st
1560+
in go (i + n) rest st'
15581561
| otherwise ->
15591562
let (_, rest, st') = consumeMany True 1 i xs st
15601563
in go (i + 1) rest st'
@@ -1576,10 +1579,10 @@ scanTopLevelChunks src start emit
15761579
_ -> 1
15771580
(_, xs', st') = consumeMany False n i xs st
15781581
in go (i + n) xs' st'
1579-
| scanInterpolate mode && startsWith "{{" xs ->
1582+
| scanInterpolate mode && startsWith2 '{' '{' xs ->
15801583
let (_, xs', st') = consumeMany False 2 i xs st
15811584
in go (i + 2) xs' st'
1582-
| scanInterpolate mode && startsWith "}}" xs ->
1585+
| scanInterpolate mode && startsWith2 '}' '}' xs ->
15831586
let (_, xs', st') = consumeMany False 2 i xs st
15841587
in go (i + 2) xs' st'
15851588
| scanInterpolate mode && c == '{' ->
@@ -1593,6 +1596,9 @@ scanTopLevelChunks src start emit
15931596
let n = if scanTriple mode then 3 else 1
15941597
(_, xs', st') = consumeMany False n i xs st { scanModes = rest }
15951598
in go (i + n) xs' st'
1599+
| Just n <- stringTextRunLength mode xs ->
1600+
let (xs', st') = consumeStringTextRun n xs st
1601+
in go (i + n) xs' st'
15961602
| otherwise ->
15971603
let (_, xs', st') = consumeMany False 1 i xs st
15981604
in go (i + 1) xs' st'
@@ -1624,7 +1630,7 @@ scanTopLevelChunks src start emit
16241630
case T.uncons xs of
16251631
Just (q, _)
16261632
| q == '"' || q == '\'' ->
1627-
let triple = startsWith [q, q, q] xs
1633+
let triple = startsWith3 q q q xs
16281634
raw = scanPrev1 st == Just 'r' ||
16291635
(scanPrev2 st == Just 'r' && scanPrev1 st == Just 'b')
16301636
bytes = scanPrev1 st == Just 'b' ||
@@ -1635,10 +1641,12 @@ scanTopLevelChunks src start emit
16351641
_ -> Nothing
16361642

16371643
closesString xs mode
1638-
| scanTriple mode = startsWith (replicate 3 (scanQuote mode)) xs
1644+
| scanTriple mode = startsWith3 q q q xs
16391645
| otherwise = case T.uncons xs of
1640-
Just (c, _) -> c == scanQuote mode
1646+
Just (c, _) -> c == q
16411647
Nothing -> False
1648+
where
1649+
q = scanQuote mode
16421650

16431651
tripleInterpolatedQuoteText xs mode
16441652
| scanTriple mode && scanInterpolate mode =
@@ -1650,18 +1658,103 @@ scanTopLevelChunks src start emit
16501658

16511659
quoteRunLength q = T.length . T.takeWhile (== q)
16521660

1653-
startsWith prefix xs = T.pack prefix `T.isPrefixOf` xs
1661+
startsWith2 a b xs =
1662+
case T.uncons xs of
1663+
Just (c1, rest1) | c1 == a ->
1664+
case T.uncons rest1 of
1665+
Just (c2, _) -> c2 == b
1666+
Nothing -> False
1667+
_ -> False
16541668

1655-
skipComment i xs st =
1669+
startsWith3 a b c xs =
16561670
case T.uncons xs of
1657-
Nothing -> go i T.empty st
1658-
Just (c, _)
1659-
| c == '\n' ->
1660-
let (_, xs', st') = consumeMany False 1 i xs st
1661-
in go (i + 1) xs' st'
1662-
| otherwise ->
1663-
let (_, xs', st') = consumeMany False 1 i xs st
1664-
in skipComment (i + 1) xs' st'
1671+
Just (c1, rest1) | c1 == a ->
1672+
case T.uncons rest1 of
1673+
Just (c2, rest2) | c2 == b ->
1674+
case T.uncons rest2 of
1675+
Just (c3, _) -> c3 == c
1676+
Nothing -> False
1677+
_ -> False
1678+
_ -> False
1679+
1680+
codeRunLength xs =
1681+
let n = T.length (T.takeWhile isCodeRunChar xs)
1682+
in if n == 0 then Nothing else Just n
1683+
1684+
stringTextRunLength mode xs =
1685+
let n = T.length (T.takeWhile (isStringTextRunChar mode) xs)
1686+
in if n == 0 then Nothing else Just n
1687+
1688+
isCodeRunChar c =
1689+
c /= '#' && c /= '"' && c /= '\'' && c /= '\n' && c /= '\\' &&
1690+
not (c `elem` ("()[]{}" :: String))
1691+
1692+
isStringTextRunChar mode c =
1693+
c /= '\\' && c /= '\n' && c /= scanQuote mode &&
1694+
(not (scanInterpolate mode) || (c /= '{' && c /= '}'))
1695+
1696+
consumeCodeRun n xs st =
1697+
let (run, rest) = T.splitAt n xs
1698+
in (rest, advanceCodeRun run st)
1699+
1700+
consumeStringTextRun n xs st =
1701+
let (run, rest) = T.splitAt n xs
1702+
in (rest, advanceStringTextRun run st)
1703+
1704+
advanceCodeRun run st =
1705+
case T.unsnoc run of
1706+
Nothing -> st
1707+
Just (front, lastC) ->
1708+
let prev2' = case T.unsnoc front of
1709+
Just (_, c) -> Just c
1710+
Nothing -> scanPrev1 st
1711+
significant = T.any (\c -> c /= ' ' && c /= '\t' && c /= '\r') run
1712+
in st
1713+
{ scanPrev2 = prev2'
1714+
, scanPrev1 = Just lastC
1715+
, scanAtLineStart = False
1716+
, scanContinued = False
1717+
, scanBackslash = if significant then False else scanBackslash st
1718+
}
1719+
1720+
advanceStringTextRun run st =
1721+
case T.unsnoc run of
1722+
Nothing -> st
1723+
Just (front, lastC) ->
1724+
let prev2' = case T.unsnoc front of
1725+
Just (_, c) -> Just c
1726+
Nothing -> scanPrev1 st
1727+
in st
1728+
{ scanPrev2 = prev2'
1729+
, scanPrev1 = Just lastC
1730+
, scanAtLineStart = False
1731+
, scanContinued = False
1732+
}
1733+
1734+
skipComment i xs st =
1735+
let (comment, rest) = T.break (== '\n') xs
1736+
n = T.length comment
1737+
st' = advanceCommentRun comment st
1738+
i' = i + n
1739+
in case T.uncons rest of
1740+
Just ('\n', _) ->
1741+
let (_, xs', st'') = consumeMany False 1 i' rest st'
1742+
in go (i' + 1) xs' st''
1743+
_ -> go i' rest st'
1744+
1745+
advanceCommentRun run st =
1746+
case T.unsnoc run of
1747+
Nothing -> st
1748+
Just (front, lastC) ->
1749+
let prev2' = case T.unsnoc front of
1750+
Just (_, c) -> Just c
1751+
Nothing -> scanPrev1 st
1752+
in st
1753+
{ scanPrev2 = prev2'
1754+
, scanPrev1 = Just lastC
1755+
, scanAtLineStart = False
1756+
, scanContinued = False
1757+
}
16651758

16661759
consumeMany _ 0 i xs st = (i, xs, st)
16671760
consumeMany track n i xs st =

0 commit comments

Comments
 (0)