Skip to content

Commit fa4f478

Browse files
committed
Handle comments before rule colons
1 parent c478fe9 commit fa4f478

4 files changed

Lines changed: 74 additions & 2 deletions

File tree

lib/lrama/lexer.rb

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class Lexer
6262
%categories
6363
%start
6464
).freeze #: Array[String]
65+
IDENTIFIER_PATTERN = /[a-zA-Z_.][-a-zA-Z0-9_.]*/.freeze #: Regexp
6566

6667
# @rbs (GrammarFile grammar_file) -> void
6768
def initialize(grammar_file)
@@ -135,10 +136,10 @@ def lex_token
135136
return [:STRING, Lrama::Lexer::Token::Str.new(s_value: %Q(#{@scanner.matched}), location: location)]
136137
when @scanner.scan(/\d+/)
137138
return [:INTEGER, Lrama::Lexer::Token::Int.new(s_value: Integer(@scanner.matched), location: location)]
138-
when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
139+
when @scanner.scan(IDENTIFIER_PATTERN)
139140
token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location)
140141
type =
141-
if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
142+
if identifier_colon?
142143
:IDENT_COLON
143144
else
144145
:IDENTIFIER
@@ -196,6 +197,36 @@ def lex_c_code
196197

197198
private
198199

200+
# @rbs () -> bool
201+
def identifier_colon?
202+
scanner = StringScanner.new(@scanner.rest)
203+
skip_trivia(scanner)
204+
205+
if scanner.scan(/\[/)
206+
skip_trivia(scanner)
207+
return false unless scanner.scan(IDENTIFIER_PATTERN)
208+
209+
skip_trivia(scanner)
210+
return false unless scanner.scan(/\]/)
211+
end
212+
213+
skip_trivia(scanner)
214+
!scanner.scan(/:/).nil?
215+
end
216+
217+
# @rbs (StringScanner scanner) -> void
218+
def skip_trivia(scanner)
219+
loop do
220+
case
221+
when scanner.scan(/\s+/)
222+
when scanner.scan(/\/\*[\s\S]*?\*\//)
223+
when scanner.scan(%r{//.*(?:\n|$)})
224+
else
225+
return
226+
end
227+
end
228+
end
229+
199230
# @rbs () -> void
200231
def lex_comment
201232
until @scanner.eos? do

sig/generated/lrama/lexer.rbs

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

spec/lrama/lexer_spec.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,4 +452,18 @@
452452
}
453453
CODE
454454
end
455+
456+
it 'lexes a rule name with a block comment before the colon as IDENT_COLON' do
457+
grammar_file = Lrama::Lexer::GrammarFile.new("commented_rule_lhs.y", <<~INPUT)
458+
%%
459+
stmt
460+
/* some block comment */
461+
:
462+
INPUT
463+
lexer = Lrama::Lexer.new(grammar_file)
464+
465+
expect(lexer.next_token).to eq(['%%', token_class::Token.new(s_value: '%%')])
466+
expect(lexer.next_token).to eq([:IDENT_COLON, token_class::Ident.new(s_value: 'stmt')])
467+
expect(lexer.next_token).to eq([':', token_class::Token.new(s_value: ':')])
468+
end
455469
end

spec/lrama/parser_spec.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4323,6 +4323,25 @@ class : keyword_class tSTRING keyword_end { code 1 }
43234323
end
43244324
end
43254325

4326+
it "can parse a rule when a block comment appears before the colon" do
4327+
y = <<~INPUT
4328+
%%
4329+
4330+
program:
4331+
stmt
4332+
;
4333+
4334+
stmt
4335+
/* some block comment */
4336+
: %empty
4337+
;
4338+
INPUT
4339+
4340+
parser = Lrama::Parser.new(y, "parse.y")
4341+
4342+
expect { parser.parse }.not_to raise_error
4343+
end
4344+
43264345
context "includes invalid named references" do
43274346
it "raise an error" do
43284347
y = <<~INPUT

0 commit comments

Comments
 (0)