Skip to content

Commit 11ef2d0

Browse files
committed
[PoC] Intoroduce parameterizing rules with conditonal
I would like to propose a new grammar in this PR. I believe that more parameterizing rules can handle more abstract rules if we can switch between rules and actions that are expanded by conditions in order to make rules common. Syntax is as follows: ``` %rule defined_rule(X, condition): /* empty */ | X { $$ = $1; } %if(condition) /* 1 */ | %if(condition) X %endif X { $$ = $1; } /* 2 */ ; %% r_true : defined_rule(number, %true) ; r_false : defined_rule(number, %false) ; ``` 1. It's like a postfix if in Ruby. If condition is false, it is equivalent to missing this line. 2. If statementIf condition is false, it is equivalent to missing RHS between `%if` and`% endif`. I believe it will solve the problem mentioned in the article below with the tight coupling with Lexer "to disable certain generation rules under certain conditions" and I would like to propose this feature to solve this problem. https://yui-knk.hatenablog.com/entry/2023/04/04/190413 We can trace the RHS to [f_args](https://github.com/ruby/ruby/blob/2f916812a9b818b432ee7c299e021ec62d4727fb/parse.y#L5523-L5575) > [args_tail](https://github.com/ruby/ruby/blob/2f916812a9b818b432ee7c299e021ec62d4727fb/parse.y#L5487-L5503) > [args_forward](https://github.com/ruby/ruby/blob/2f916812a9b818b432ee7c299e021ec62d4727fb/parse.y#L5586-L5597), where f_args is the RHS of both the lambda argument (f_larglist) and the method definition argument (f_arglist). So if we can switch between RHS and actions by passing parameters, we can break up the Lexer/Parser coupling here.
1 parent 5bed7c9 commit 11ef2d0

12 files changed

Lines changed: 798 additions & 427 deletions

File tree

lib/lrama/grammar.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class Grammar
3232
:after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
3333
:symbols_resolver, :types,
3434
:rules, :rule_builders,
35-
:sym_to_rules, :no_stdlib
35+
:sym_to_rules, :no_stdlib, :if_count
3636

3737
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
3838
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
@@ -60,6 +60,7 @@ def initialize(rule_counter)
6060
@accept_symbol = nil
6161
@aux = Auxiliary.new
6262
@no_stdlib = false
63+
@if_count = 0
6364

6465
append_special_symbols
6566
end
@@ -173,6 +174,10 @@ def find_rules_by_symbol(sym)
173174
@sym_to_rules[sym.number]
174175
end
175176

177+
def initialize_if_count
178+
@if_count = 0
179+
end
180+
176181
private
177182

178183
def compute_nullable

lib/lrama/grammar/binding.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ def resolve_symbol(symbol)
1717
if symbol.is_a?(Lexer::Token::InstantiateRule)
1818
resolved_args = symbol.args.map { |arg| resolve_symbol(arg) }
1919
Lrama::Lexer::Token::InstantiateRule.new(s_value: symbol.s_value, location: symbol.location, args: resolved_args, lhs_tag: symbol.lhs_tag)
20+
elsif symbol.is_a?(Lexer::Token::ControlSyntax)
21+
resolved = symbol.dup
22+
resolved.condition = @parameter_to_arg[symbol.condition_value]
23+
resolved
2024
else
2125
parameter_to_arg(symbol) || symbol
2226
end

lib/lrama/grammar/parameterizing_rule/rhs.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,31 @@ def initialize
1212
@precedence_sym = nil
1313
end
1414

15+
def skip?(bindings)
16+
return false unless @symbols.last
17+
last_sym = bindings.resolve_symbol(@symbols.last)
18+
last_sym.is_a?(Lexer::Token::ControlSyntax) && last_sym.if? && last_sym.false?
19+
end
20+
21+
def resolve_symbols(bindings)
22+
is_skip = []
23+
@symbols.map do |sym|
24+
resolved = bindings.resolve_symbol(sym)
25+
if resolved.is_a?(Lexer::Token::ControlSyntax)
26+
if resolved.if?
27+
is_skip.push(resolved.false?)
28+
elsif resolved.endif?
29+
is_skip.pop
30+
else
31+
raise "Unexpected control syntax: #{resolved.condition_value}"
32+
end
33+
nil
34+
else
35+
resolved unless is_skip.last
36+
end
37+
end.compact
38+
end
39+
1540
def resolve_user_code(bindings)
1641
return unless user_code
1742

lib/lrama/grammar/rule_builder.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,17 @@ def process_rhs
144144
if (created_lhs = @parameterizing_rule_resolver.created_lhs(lhs_s_value))
145145
@replaced_rhs << created_lhs
146146
else
147+
next if parameterizing_rule.rhs_list.all? { |r| r.skip?(bindings) }
147148
lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location)
148149
@replaced_rhs << lhs_token
149150
@parameterizing_rule_resolver.created_lhs_list << lhs_token
150151
parameterizing_rule.rhs_list.each do |r|
151152
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: token.lhs_tag || parameterizing_rule.tag)
152153
rule_builder.lhs = lhs_token
153-
r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
154+
next if r.skip?(bindings)
155+
r.resolve_symbols(bindings).each do |sym|
156+
rule_builder.add_rhs(sym)
157+
end
154158
rule_builder.line = line
155159
rule_builder.precedence_sym = r.precedence_sym
156160
rule_builder.user_code = r.resolve_user_code(bindings)

lib/lrama/lexer.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ class Lexer
4040
%rule
4141
%no-stdlib
4242
%inline
43+
%if
44+
%endif
45+
%true
46+
%false
4347
)
4448

4549
def initialize(grammar_file)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
module Lrama
2+
class Lexer
3+
class Token
4+
class ControlSyntax < Token
5+
attr_accessor :condition
6+
7+
def initialize(s_value:, location:, condition: nil)
8+
@condition = condition
9+
super(s_value: s_value, location: location)
10+
end
11+
12+
def if?
13+
s_value == '%if'
14+
end
15+
16+
def endif?
17+
s_value == '%endif'
18+
end
19+
20+
def true?
21+
!!@condition&.s_value
22+
end
23+
24+
def false?
25+
!true?
26+
end
27+
28+
def condition_value
29+
@condition&.s_value
30+
end
31+
end
32+
end
33+
end
34+
end

lib/lrama/parser.rb

Lines changed: 525 additions & 422 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

parser.y

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ rule
243243
{
244244
rule = Grammar::ParameterizingRule::Rule.new(val[2].s_value, [], val[4], is_inline: true)
245245
@grammar.add_parameterizing_rule(rule)
246+
@grammar.initialize_if_count
246247
}
247248
| "%rule" "%inline" IDENTIFIER "(" rule_args ")" ":" rule_rhs_list
248249
{
@@ -288,7 +289,7 @@ rule
288289
builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]])
289290
result = builder
290291
}
291-
| rule_rhs IDENTIFIER "(" parameterizing_args ")" TAG?
292+
| rule_rhs IDENTIFIER "(" parameterizing_rule_args ")" TAG?
292293
{
293294
builder = val[0]
294295
builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5])
@@ -322,6 +323,21 @@ rule
322323
builder.precedence_sym = sym
323324
result = builder
324325
}
326+
| rule_rhs "%if" "(" IDENTIFIER ")"
327+
{
328+
builder = val[0]
329+
builder.symbols << Lrama::Lexer::Token::ControlSyntax.new(s_value: val[1], location: @lexer.location, condition: val[3])
330+
@grammar.if_count += 1
331+
result = builder
332+
}
333+
| rule_rhs "%endif"
334+
{
335+
on_action_error("no %if before %endif", val[0]) if @grammar.if_count == 0
336+
builder = val[0]
337+
builder.symbols << Lrama::Lexer::Token::ControlSyntax.new(s_value: val[1], location: @lexer.location)
338+
@grammar.if_count -= 1
339+
result = builder
340+
}
325341

326342
alias: # empty
327343
| string_as_id { result = val[0].s_value }
@@ -494,11 +510,22 @@ rule
494510
| "+" { result = "nonempty_list" }
495511
| "*" { result = "list" }
496512

497-
parameterizing_args: symbol { result = [val[0]] }
498-
| parameterizing_args ',' symbol { result = val[0].append(val[2]) }
513+
parameterizing_rule_args: symbol { result = [val[0]] }
514+
| parameterizing_args ',' symbol { result = val[0].append(val[2]) }
515+
| symbol parameterizing_suffix { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] }
516+
| IDENTIFIER "(" parameterizing_args ")" { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] }
517+
518+
parameterizing_args: symbol_or_bool { result = [val[0]] }
519+
| parameterizing_args ',' symbol_or_bool { result = val[0].append(val[2]) }
499520
| symbol parameterizing_suffix { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] }
500521
| IDENTIFIER "(" parameterizing_args ")" { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] }
501522

523+
symbol_or_bool: symbol
524+
| bool
525+
526+
bool: "%true" { result = Lrama::Lexer::Token::Ident.new(s_value: true) }
527+
| "%false" { result = Lrama::Lexer::Token::Ident.new(s_value: false) }
528+
502529
named_ref_opt: # empty
503530
| '[' IDENTIFIER ']' { result = val[1].s_value }
504531

sig/lrama/grammar/parameterizing_rule/rhs.rbs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ module Lrama
77
attr_reader precedence_sym: Lexer::Token?
88

99
def initialize: () -> void
10+
def skip?: (Grammar::Binding bindings) -> bool
11+
def resolve_symbols: (Grammar::Binding bindings) -> Array[untyped]
1012
def resolve_user_code: (Grammar::Binding bindings) -> Lexer::Token::UserCode?
1113
end
1214
end
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
module Lrama
2+
class Lexer
3+
class Token
4+
class ControlSyntax < Token
5+
attr_accessor condition: Lexer::Token::Ident?
6+
7+
def initialize: (s_value: String, location: Location, ?condition: Lexer::Token::Ident?) -> void
8+
def if?: () -> bool
9+
def endif?: () -> bool
10+
def true?: () -> bool
11+
def false?: () -> bool
12+
def condition_value: () -> String?
13+
end
14+
end
15+
end
16+
end

0 commit comments

Comments
 (0)