Skip to content

Commit 6d10b22

Browse files
committed
1 parent 92dd5dd commit 6d10b22

38 files changed

Lines changed: 3736 additions & 506 deletions

NEWS.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,22 @@
22

33
## Lrama 0.8.0 (2026-xx-xx)
44

5+
### [EXPERIMENTAL] Support the generation of the PSLR(1) parser described in this dissertation
6+
7+
Support the generation of the PSLR(1) parser described in this dissertation.
8+
https://open.clemson.edu/all_dissertations/519/
9+
10+
If you use PSLR(1) parser, you can write the following directives in your grammar file.
11+
12+
```yacc
13+
%token-pattern RSHIFT />>/ "right shift"
14+
%token-pattern RANGLE />/ "right angle"
15+
16+
%lex-prec RANGLE -s RSHIFT
17+
```
18+
19+
But, currently PSLR(1) parser is experimental feature. If you find any bugs, please report it to us. Thank you.
20+
521
## Lrama 0.7.1 (2025-12-24)
622

723
### Optimize IELR

lib/lrama.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
require_relative "lrama/output"
1616
require_relative "lrama/parser"
1717
require_relative "lrama/reporter"
18+
require_relative "lrama/scanner_fsa"
1819
require_relative "lrama/state"
1920
require_relative "lrama/states"
21+
require_relative "lrama/length_precedences"
2022
require_relative "lrama/tracer"
2123
require_relative "lrama/version"
2224
require_relative "lrama/warnings"

lib/lrama/grammar.rb

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
require_relative "grammar/symbols"
2121
require_relative "grammar/type"
2222
require_relative "grammar/union"
23+
require_relative "grammar/token_pattern"
24+
require_relative "grammar/lex_prec"
2325
require_relative "lexer"
2426

2527
module Lrama
@@ -68,6 +70,8 @@ class Grammar
6870
# @union: Union
6971
# @precedences: Array[Precedence]
7072
# @start_nterm: Lrama::Lexer::Token::Base?
73+
# @token_patterns: Array[Grammar::TokenPattern]
74+
# @lex_prec: Grammar::LexPrec
7175

7276
extend Forwardable
7377

@@ -100,6 +104,8 @@ class Grammar
100104
attr_accessor :locations #: bool
101105
attr_accessor :define #: Hash[String, String]
102106
attr_accessor :required #: bool
107+
attr_reader :token_patterns #: Array[Grammar::TokenPattern]
108+
attr_reader :lex_prec #: Grammar::LexPrec
103109

104110
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value,
105111
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
@@ -133,6 +139,9 @@ def initialize(rule_counter, locations, define = {})
133139
@required = false
134140
@precedences = []
135141
@start_nterm = nil
142+
@token_patterns = []
143+
@lex_prec = Grammar::LexPrec.new
144+
@token_pattern_counter = 0
136145

137146
append_special_symbols
138147
end
@@ -304,6 +313,48 @@ def ielr_defined?
304313
@define.key?('lr.type') && @define['lr.type'] == 'ielr'
305314
end
306315

316+
# @rbs () -> bool
317+
def pslr_defined?
318+
@define.key?('lr.type') && @define['lr.type'] == 'pslr'
319+
end
320+
321+
# Add a token pattern from %token-pattern directive
322+
# @rbs (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, ?alias_name: String?, ?tag: Lexer::Token::Tag?, lineno: Integer) -> Grammar::TokenPattern
323+
def add_token_pattern(id:, pattern:, alias_name: nil, tag: nil, lineno:)
324+
token_pattern = Grammar::TokenPattern.new(
325+
id: id,
326+
pattern: pattern,
327+
alias_name: alias_name,
328+
tag: tag,
329+
lineno: lineno,
330+
definition_order: @token_pattern_counter
331+
)
332+
@token_pattern_counter += 1
333+
@token_patterns << token_pattern
334+
335+
# Also register as a terminal symbol
336+
add_term(id: id, alias_name: alias_name, tag: tag)
337+
338+
token_pattern
339+
end
340+
341+
# Add a lex-prec rule from %lex-prec directive
342+
# @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Grammar::LexPrec::Rule
343+
def add_lex_prec_rule(left_token:, operator:, right_token:, lineno:)
344+
@lex_prec.add_rule(
345+
left_token: left_token,
346+
operator: operator,
347+
right_token: right_token,
348+
lineno: lineno
349+
)
350+
end
351+
352+
# Find a token pattern by its name
353+
# @rbs (String name) -> Grammar::TokenPattern?
354+
def find_token_pattern(name)
355+
@token_patterns.find { |tp| tp.name == name }
356+
end
357+
307358
private
308359

309360
# @rbs () -> void

lib/lrama/grammar/lex_prec.rb

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# rbs_inline: enabled
2+
# frozen_string_literal: true
3+
4+
module Lrama
5+
class Grammar
6+
# Represents lexical precedence rules defined by %lex-prec directive
7+
# Based on Definition 3.2.3, 3.2.4, 3.2.10 from the PSLR dissertation
8+
#
9+
# Example: %lex-prec RANGLE -s RSHIFT # RANGLE is shorter than RSHIFT
10+
# %lex-prec IF - ID # IF has higher priority than ID (same length)
11+
class LexPrec
12+
# Precedence relation types
13+
# "," : Same priority (lex-tie)
14+
# "-" : Left has higher priority than right
15+
# "-s" : Left is shorter match priority over right
16+
SAME_PRIORITY = :same #: Symbol
17+
HIGHER = :higher #: Symbol
18+
SHORTER = :shorter #: Symbol
19+
20+
# Represents a single precedence rule
21+
class Rule
22+
attr_reader :left_token #: Lexer::Token::Ident
23+
attr_reader :operator #: Symbol
24+
attr_reader :right_token #: Lexer::Token::Ident
25+
attr_reader :lineno #: Integer
26+
27+
# @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> void
28+
def initialize(left_token:, operator:, right_token:, lineno:)
29+
@left_token = left_token
30+
@operator = operator
31+
@right_token = right_token
32+
@lineno = lineno
33+
end
34+
35+
# @rbs () -> String
36+
def left_name
37+
@left_token.s_value
38+
end
39+
40+
# @rbs () -> String
41+
def right_name
42+
@right_token.s_value
43+
end
44+
end
45+
46+
attr_reader :rules #: Array[Rule]
47+
48+
# @rbs () -> void
49+
def initialize
50+
@rules = []
51+
end
52+
53+
# @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Rule
54+
def add_rule(left_token:, operator:, right_token:, lineno:)
55+
rule = Rule.new(
56+
left_token: left_token,
57+
operator: operator,
58+
right_token: right_token,
59+
lineno: lineno
60+
)
61+
@rules << rule
62+
rule
63+
end
64+
65+
# Check if token t1 has higher priority than t2
66+
# Based on Definition 3.2.4
67+
# @rbs (String t1, String t2) -> bool
68+
def higher_priority?(t1, t2)
69+
@rules.any? do |rule|
70+
rule.operator == HIGHER &&
71+
rule.left_name == t1 &&
72+
rule.right_name == t2
73+
end
74+
end
75+
76+
# Check if token t1 has shorter-match priority over t2
77+
# Based on Definition 3.2.15
78+
# @rbs (String t1, String t2) -> bool
79+
def shorter_priority?(t1, t2)
80+
@rules.any? do |rule|
81+
rule.operator == SHORTER &&
82+
rule.left_name == t1 &&
83+
rule.right_name == t2
84+
end
85+
end
86+
87+
# Check if tokens t1 and t2 are in a lex-tie relationship
88+
# @rbs (String t1, String t2) -> bool
89+
def same_priority?(t1, t2)
90+
@rules.any? do |rule|
91+
rule.operator == SAME_PRIORITY &&
92+
((rule.left_name == t1 && rule.right_name == t2) ||
93+
(rule.left_name == t2 && rule.right_name == t1))
94+
end
95+
end
96+
end
97+
end
98+
end

lib/lrama/grammar/token_pattern.rb

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# rbs_inline: enabled
2+
# frozen_string_literal: true
3+
4+
module Lrama
5+
class Grammar
6+
# Represents a token pattern defined by %token-pattern directive
7+
# Example: %token-pattern RSHIFT />>/ "right shift"
8+
class TokenPattern
9+
attr_reader :id #: Lexer::Token::Ident
10+
attr_reader :pattern #: Lexer::Token::Regex
11+
attr_reader :alias_name #: String?
12+
attr_reader :tag #: Lexer::Token::Tag?
13+
attr_reader :lineno #: Integer
14+
attr_reader :definition_order #: Integer
15+
16+
# @rbs (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, ?alias_name: String?, ?tag: Lexer::Token::Tag?, lineno: Integer, definition_order: Integer) -> void
17+
def initialize(id:, pattern:, alias_name: nil, tag: nil, lineno:, definition_order:)
18+
@id = id
19+
@pattern = pattern
20+
@alias_name = alias_name
21+
@tag = tag
22+
@lineno = lineno
23+
@definition_order = definition_order
24+
end
25+
26+
# @rbs () -> String
27+
def name
28+
@id.s_value
29+
end
30+
31+
# Returns the regex pattern string (without slashes)
32+
# @rbs () -> String
33+
def regex_pattern
34+
@pattern.pattern
35+
end
36+
end
37+
end
38+
end

lib/lrama/length_precedences.rb

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# rbs_inline: enabled
2+
# frozen_string_literal: true
3+
4+
module Lrama
5+
# Length precedences table for PSLR(1)
6+
# Based on Definition 3.2.15 from the PSLR dissertation
7+
#
8+
# Determines which token should be preferred when there's a length conflict:
9+
# - :left - the shorter token (t1) should be preferred
10+
# - :right - the longer token (t2) should be preferred
11+
# - :undefined - no preference defined, use default (longest match)
12+
class LengthPrecedences
13+
# Result of length precedence lookup
14+
LEFT = :left #: Symbol
15+
RIGHT = :right #: Symbol
16+
UNDEFINED = :undefined #: Symbol
17+
18+
attr_reader :table #: Hash[[String, String], Symbol]
19+
20+
# @rbs (Grammar::LexPrec lex_prec) -> void
21+
def initialize(lex_prec)
22+
@table = build_table(lex_prec)
23+
end
24+
25+
# Get the length precedence between two tokens
26+
# @rbs (String t1, String t2) -> Symbol
27+
def precedence(t1, t2)
28+
@table[[t1, t2]] || UNDEFINED
29+
end
30+
31+
# Check if t1 (shorter) should be preferred over t2 (longer)
32+
# @rbs (String t1, String t2) -> bool
33+
def prefer_shorter?(t1, t2)
34+
precedence(t1, t2) == LEFT
35+
end
36+
37+
private
38+
39+
# Build the length precedence table from lex-prec rules
40+
# @rbs (Grammar::LexPrec lex_prec) -> Hash[[String, String], Symbol]
41+
def build_table(lex_prec)
42+
table = {}
43+
44+
lex_prec.rules.each do |rule|
45+
case rule.operator
46+
when Grammar::LexPrec::SHORTER
47+
# t1 -s t2: t1 (shorter) should be preferred over t2 (longer)
48+
table[[rule.left_name, rule.right_name]] = LEFT
49+
# Inverse: t2 (longer) should not be preferred over t1 (shorter)
50+
table[[rule.right_name, rule.left_name]] = RIGHT
51+
end
52+
end
53+
54+
table
55+
end
56+
end
57+
end

lib/lrama/lexer.rb

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ class Lexer
1818
# [::Symbol, Token::Char] |
1919
# [::Symbol, Token::Str] |
2020
# [::Symbol, Token::Int] |
21-
# [::Symbol, Token::Ident]
21+
# [::Symbol, Token::Ident] |
22+
# [::Symbol, Token::Regex]
2223
#
2324
# type c_token = [:C_DECLARATION, Token::UserCode]
2425

@@ -32,6 +33,7 @@ class Lexer
3233
PERCENT_TOKENS = %w(
3334
%union
3435
%token
36+
%token-pattern
3537
%type
3638
%nterm
3739
%left
@@ -43,6 +45,7 @@ class Lexer
4345
%printer
4446
%destructor
4547
%lex-param
48+
%lex-prec
4649
%parse-param
4750
%initial-action
4851
%precedence
@@ -121,7 +124,7 @@ def lex_token
121124
return
122125
when @scanner.scan(/#{SYMBOLS.join('|')}/)
123126
return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
124-
when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
127+
when @scanner.scan(/#{PERCENT_TOKENS.sort_by { |s| -s.length }.join('|')}/)
125128
return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
126129
when @scanner.scan(/[\?\+\*]/)
127130
return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
@@ -133,6 +136,12 @@ def lex_token
133136
return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
134137
when @scanner.scan(/".*?"/)
135138
return [:STRING, Lrama::Lexer::Token::Str.new(s_value: %Q(#{@scanner.matched}), location: location)]
139+
when @scanner.scan(%r{/[^/]+/})
140+
return [:REGEX, Lrama::Lexer::Token::Regex.new(s_value: @scanner.matched, location: location)]
141+
when @scanner.scan(/-s(?=\s)/)
142+
return ['-s', Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
143+
when @scanner.scan(/-(?=\s)/)
144+
return ['-', Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
136145
when @scanner.scan(/\d+/)
137146
return [:INTEGER, Lrama::Lexer::Token::Int.new(s_value: Integer(@scanner.matched), location: location)]
138147
when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)

lib/lrama/lexer/token.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
require_relative 'token/ident'
88
require_relative 'token/instantiate_rule'
99
require_relative 'token/int'
10+
require_relative 'token/regex'
1011
require_relative 'token/str'
1112
require_relative 'token/tag'
1213
require_relative 'token/token'

lib/lrama/lexer/token/regex.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# rbs_inline: enabled
2+
# frozen_string_literal: true
3+
4+
module Lrama
5+
class Lexer
6+
module Token
7+
# Token class for regex patterns used in %token-pattern directive
8+
# Example: /[a-zA-Z_][a-zA-Z0-9_]*/
9+
class Regex < Base
10+
# Returns the regex pattern without the surrounding slashes
11+
# @rbs () -> String
12+
def pattern
13+
# Remove leading and trailing slashes
14+
s_value[1..-2].to_s
15+
end
16+
end
17+
end
18+
end
19+
end

0 commit comments

Comments
 (0)