Skip to content

Commit 2637158

Browse files
authored
Merge pull request #160 from robotpy/lexer-digraphs
Add support for parsing digraphs
2 parents 0d527b5 + 7b92338 commit 2637158

2 files changed

Lines changed: 676 additions & 5 deletions

File tree

cxxheaderparser/lexer.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class LexError(CxxParseError):
1818
Protocol = object
1919

2020
_line_re = re.compile(r'^\#[\t ]*(line)? (\d+) "(.*)"')
21+
_pp_directive_prefix = r"(?:\#|%:)"
2122
_multicomment_re = re.compile("\n[\\s]+\\*")
2223

2324

@@ -194,6 +195,12 @@ class PlyLexer:
194195
"ELLIPSIS",
195196
"DBL_LBRACKET",
196197
"DBL_RBRACKET",
198+
"DIGRAPH_DBL_LBRACKET",
199+
"DIGRAPH_DBL_RBRACKET",
200+
"DIGRAPH_LBRACKET",
201+
"DIGRAPH_RBRACKET",
202+
"DIGRAPH_LBRACE",
203+
"DIGRAPH_RBRACE",
197204
"DBL_COLON",
198205
"DBL_AMP",
199206
"DBL_PIPE",
@@ -446,16 +453,17 @@ def t_NAME(self, t: LexToken) -> LexToken:
446453
t.type = t.value
447454
return t
448455

449-
@TOKEN(r"\#[\t ]*pragma")
456+
@TOKEN(_pp_directive_prefix + r"[\t ]*pragma")
450457
def t_PRAGMA_DIRECTIVE(self, t: LexToken) -> LexToken:
451-
return t
458+
return self._normalize_pp_directive(t)
452459

453-
@TOKEN(r"\#[\t ]*include (.*)")
460+
@TOKEN(_pp_directive_prefix + r"[\t ]*include (.*)")
454461
def t_INCLUDE_DIRECTIVE(self, t: LexToken) -> LexToken:
455-
return t
462+
return self._normalize_pp_directive(t)
456463

457-
@TOKEN(r"\#(.*)")
464+
@TOKEN(_pp_directive_prefix + r"(.*)")
458465
def t_PP_DIRECTIVE(self, t: LexToken):
466+
t = self._normalize_pp_directive(t)
459467
# handle line macros
460468
m = _line_re.match(t.value)
461469
if m:
@@ -476,6 +484,47 @@ def t_PP_DIRECTIVE(self, t: LexToken):
476484
t,
477485
)
478486

487+
def _normalize_pp_directive(self, t: LexToken) -> LexToken:
488+
if t.value.startswith("%:"):
489+
t.value = "#" + t.value[2:]
490+
return t
491+
492+
@TOKEN(r"<:<:")
493+
def t_DIGRAPH_DBL_LBRACKET(self, t: LexToken) -> LexToken:
494+
t.type = "DBL_LBRACKET"
495+
t.value = "[["
496+
return t
497+
498+
@TOKEN(r":>:>")
499+
def t_DIGRAPH_DBL_RBRACKET(self, t: LexToken) -> LexToken:
500+
t.type = "DBL_RBRACKET"
501+
t.value = "]]"
502+
return t
503+
504+
@TOKEN(r"<:(?!:[^:>])")
505+
def t_DIGRAPH_LBRACKET(self, t: LexToken) -> LexToken:
506+
t.type = "["
507+
t.value = "["
508+
return t
509+
510+
@TOKEN(r":>")
511+
def t_DIGRAPH_RBRACKET(self, t: LexToken) -> LexToken:
512+
t.type = "]"
513+
t.value = "]"
514+
return t
515+
516+
@TOKEN(r"<%")
517+
def t_DIGRAPH_LBRACE(self, t: LexToken) -> LexToken:
518+
t.type = "{"
519+
t.value = "{"
520+
return t
521+
522+
@TOKEN(r"%>")
523+
def t_DIGRAPH_RBRACE(self, t: LexToken) -> LexToken:
524+
t.type = "}"
525+
t.value = "}"
526+
return t
527+
479528
t_DIVIDE = r"/(?!/)"
480529
t_ELLIPSIS = r"\.\.\."
481530
t_DBL_LBRACKET = r"\[\["

0 commit comments

Comments
 (0)