Skip to content

Commit e64b15c

Browse files
authored
Merge pull request #551 from ratmice/header_lexerkind
ctbuilder: Parse lexerkind from grmtools section
2 parents 28f59b5 + c2b68ed commit e64b15c

2 files changed

Lines changed: 187 additions & 9 deletions

File tree

lrlex/src/lib/ctbuilder.rs

Lines changed: 151 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ use std::{
1515
};
1616

1717
use bincode::Encode;
18-
use cfgrammar::{newlinecache::NewlineCache, Spanned};
18+
use cfgrammar::{
19+
header::{GrmtoolsSectionParser, HeaderError, HeaderErrorKind, Namespaced, Setting, Value},
20+
newlinecache::NewlineCache,
21+
Spanned,
22+
};
1923
use lazy_static::lazy_static;
2024
use lrpar::{CTParserBuilder, LexerTypes};
2125
use num_traits::{AsPrimitive, PrimInt, Unsigned};
@@ -37,6 +41,69 @@ pub enum LexerKind {
3741
LRNonStreamingLexer,
3842
}
3943

44+
impl TryFrom<&Value> for LexerKind {
45+
type Error = cfgrammar::header::HeaderError;
46+
fn try_from(it: &Value) -> Result<LexerKind, Self::Error> {
47+
match it {
48+
Value::Flag(_, loc) => Err(HeaderError {
49+
kind: HeaderErrorKind::ConversionError(
50+
"LexerKind",
51+
"Expected `LexerKind` found bool",
52+
),
53+
locations: vec![loc.clone()],
54+
}),
55+
Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
56+
kind: HeaderErrorKind::ConversionError(
57+
"LexerKind",
58+
"Expected `LexerKind` found numeric",
59+
),
60+
locations: vec![loc.clone()],
61+
}),
62+
Value::Setting(Setting::Constructor {
63+
ctor:
64+
Namespaced {
65+
namespace: _,
66+
member: (_, loc),
67+
},
68+
arg: _,
69+
}) => Err(HeaderError {
70+
kind: HeaderErrorKind::ConversionError(
71+
"LexerKind",
72+
"Expected `LexerKind` found constructor",
73+
),
74+
locations: vec![loc.clone()],
75+
}),
76+
Value::Setting(Setting::Unitary(Namespaced {
77+
namespace,
78+
member: (member, member_loc),
79+
})) => {
80+
if let Some((ns, loc)) = namespace {
81+
if ns.to_lowercase() != "lexerkind" {
82+
return Err(HeaderError {
83+
kind: HeaderErrorKind::ConversionError(
84+
"LexerKind",
85+
"Expected namespace `LexerKind`",
86+
),
87+
locations: vec![loc.clone()],
88+
});
89+
}
90+
}
91+
if member.to_lowercase() != "lrnonstreaminglexer" {
92+
return Err(HeaderError {
93+
kind: HeaderErrorKind::ConversionError(
94+
"LexerKind",
95+
"Unknown `LexerKind` Variant",
96+
),
97+
locations: vec![member_loc.clone()],
98+
});
99+
}
100+
101+
Ok(LexerKind::LRNonStreamingLexer)
102+
}
103+
}
104+
}
105+
}
106+
40107
/// Specify the visibility of the module generated by [CTLexerBuilder].
41108
#[derive(Clone, PartialEq, Eq, Debug)]
42109
#[non_exhaustive]
@@ -129,7 +196,7 @@ where
129196
lrpar_config: Option<Box<dyn Fn(CTParserBuilder<LexerTypesT>) -> CTParserBuilder<LexerTypesT>>>,
130197
lexer_path: Option<PathBuf>,
131198
output_path: Option<PathBuf>,
132-
lexerkind: LexerKind,
199+
lexerkind: Option<LexerKind>,
133200
mod_name: Option<&'a str>,
134201
visibility: Visibility,
135202
rust_edition: RustEdition,
@@ -138,6 +205,8 @@ where
138205
allow_missing_tokens_in_parser: bool,
139206
force_lex_flags: LexFlags,
140207
default_lex_flags: LexFlags,
208+
#[cfg(test)]
209+
inspect_lexerkind_cb: Option<Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>>,
141210
}
142211

143212
impl CTLexerBuilder<'_, DefaultLexerTypes<u32>> {
@@ -174,7 +243,7 @@ where
174243
lrpar_config: None,
175244
lexer_path: None,
176245
output_path: None,
177-
lexerkind: LexerKind::LRNonStreamingLexer,
246+
lexerkind: None,
178247
mod_name: None,
179248
visibility: Visibility::Private,
180249
rust_edition: RustEdition::Rust2021,
@@ -183,6 +252,8 @@ where
183252
allow_missing_tokens_in_parser: true,
184253
force_lex_flags: UNSPECIFIED_LEX_FLAGS,
185254
default_lex_flags: UNSPECIFIED_LEX_FLAGS,
255+
#[cfg(test)]
256+
inspect_lexerkind_cb: None,
186257
}
187258
}
188259

@@ -287,7 +358,7 @@ where
287358

288359
/// Set the type of lexer to be generated to `lexerkind`.
289360
pub fn lexerkind(mut self, lexerkind: LexerKind) -> Self {
290-
self.lexerkind = lexerkind;
361+
self.lexerkind = Some(lexerkind);
291362
self
292363
}
293364

@@ -367,12 +438,32 @@ where
367438
}
368439
lk.insert(outp.clone());
369440
}
370-
371441
let lex_src = read_to_string(lexerp)
372442
.map_err(|e| format!("When reading '{}': {e}", lexerp.display()))?;
443+
let (header, _) = GrmtoolsSectionParser::new(&lex_src, false)
444+
.parse()
445+
.map_err(|es| {
446+
es.iter()
447+
.map(|e| e.to_string())
448+
.collect::<Vec<_>>()
449+
.join("\n")
450+
})?;
451+
let lexerkind = match self.lexerkind {
452+
Some(lexerkind) => lexerkind,
453+
None => {
454+
if let Some((_, lk_val)) = header.get("lexerkind") {
455+
LexerKind::try_from(lk_val)?
456+
} else {
457+
LexerKind::LRNonStreamingLexer
458+
}
459+
}
460+
};
373461
let line_cache = NewlineCache::from_str(&lex_src).unwrap();
374-
let (mut lexerdef, lex_flags): (Box<dyn LexerDef<LexerTypesT>>, LexFlags) = match self
375-
.lexerkind
462+
#[cfg(test)]
463+
if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb {
464+
inspect_lexerkind_cb(lexerkind)?
465+
}
466+
let (mut lexerdef, lex_flags): (Box<dyn LexerDef<LexerTypesT>>, LexFlags) = match lexerkind
376467
{
377468
LexerKind::LRNonStreamingLexer => {
378469
let lexerdef = LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
@@ -529,7 +620,7 @@ where
529620
let rules = vec![#(#rules),*];
530621
});
531622
}
532-
let lexerdef_ty = match self.lexerkind {
623+
let lexerdef_ty = match lexerkind {
533624
LexerKind::LRNonStreamingLexer => {
534625
quote!(::lrlex::LRNonStreamingLexerDef)
535626
}
@@ -801,6 +892,15 @@ where
801892
self.default_lex_flags = flags;
802893
self
803894
}
895+
896+
#[cfg(test)]
897+
pub fn inspect_lexerkind(
898+
mut self,
899+
cb: Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>,
900+
) -> Self {
901+
self.inspect_lexerkind_cb = Some(cb);
902+
self
903+
}
804904
}
805905

806906
/// An interface to the result of [CTLexerBuilder::build()].
@@ -897,3 +997,46 @@ pub fn ct_token_map<StorageT: Display>(
897997
f.write_all(outs.as_bytes())?;
898998
Ok(())
899999
}
1000+
1001+
#[cfg(test)]
1002+
mod test {
1003+
use std::fs::File;
1004+
use std::io::Write;
1005+
1006+
use super::{CTLexerBuilder, LexerKind};
1007+
#[test]
1008+
fn test_grmtools_section_lexerkind() {
1009+
let lexerkinds = [
1010+
"LRNonStreamingLexer",
1011+
"lrnonstreaminglexer",
1012+
"LexerKind::lrnonstreaminglexer",
1013+
"lexerkind::LRNonStreamingLexer",
1014+
];
1015+
for (i, kind) in lexerkinds.iter().enumerate() {
1016+
let lex_src = format!(
1017+
"
1018+
%grmtools{{lexerkind: {}}}
1019+
%%
1020+
. ;
1021+
",
1022+
kind
1023+
);
1024+
let lex_path = format!(
1025+
"{}/test_grmtools_section_lexerkind_{}.l",
1026+
env!("OUT_DIR"),
1027+
i
1028+
);
1029+
let mut l_file = File::create(lex_path.clone()).unwrap();
1030+
l_file.write_all(lex_src.as_bytes()).unwrap();
1031+
CTLexerBuilder::new()
1032+
.output_path(format!("{}.rs", lex_path.clone()))
1033+
.lexer_path(lex_path.clone())
1034+
.inspect_lexerkind(Box::new(move |lexerkind| {
1035+
assert!(matches!(lexerkind, LexerKind::LRNonStreamingLexer));
1036+
Ok(())
1037+
}))
1038+
.build()
1039+
.unwrap();
1040+
}
1041+
}
1042+
}

lrlex/src/lib/parser.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ lazy_static! {
3434
static ref RE_LEADING_WS: Regex = Regex::new(r"^[\p{Pattern_White_Space}]*").unwrap();
3535
static ref RE_WS: Regex = Regex::new(r"\p{Pattern_White_Space}").unwrap();
3636
static ref RE_LEADING_DIGITS: Regex = Regex::new(r"^[0-9]+").unwrap();
37+
static ref RE_NAME: Regex = Regex::new(r"^[a-zA-Z][a-zA-Z]*").unwrap();
3738
}
3839
const INITIAL_START_STATE_NAME: &str = "INITIAL";
3940

@@ -252,7 +253,7 @@ where
252253
span_map: &mut HashMap<&str, Span>,
253254
lex_flags: &mut LexFlags,
254255
) -> LexInternalBuildResult<usize> {
255-
const OPTIONS: [&str; 12] = [
256+
const OPTIONS: [&str; 13] = [
256257
"allow_wholeline_comments",
257258
"dot_matches_new_line",
258259
"multi_line",
@@ -265,6 +266,7 @@ where
265266
"size_limit",
266267
"dfa_size_limit",
267268
"nest_limit",
269+
"lexerkind",
268270
];
269271
let start_pos = i;
270272
// RegexBuilder isn't uniform regarding whether the default value of an options is true
@@ -331,6 +333,26 @@ where
331333
}
332334
i = j;
333335
}
336+
"lexerkind" => {
337+
// We just want to skip this, we already know we're `LRNonStreamingLexer`
338+
i = self.parse_ws(i)?;
339+
if let Some(j) = self.lookahead_is(":", i) {
340+
i = j
341+
} else {
342+
return Err(LexBuildError {
343+
kind: LexErrorKind::InvalidGrmtoolsSectionValue,
344+
spans: vec![Span::new(i, i)],
345+
});
346+
}
347+
i = self.parse_ws(i)?;
348+
let (j, _) = self.parse_name(i)?;
349+
i = self.parse_ws(j)?;
350+
if let Some(j) = self.lookahead_is("::", j) {
351+
i = self.parse_ws(j)?;
352+
let (j, _) = self.parse_name(i)?;
353+
i = j;
354+
}
355+
}
334356
_ => unreachable!(),
335357
}
336358
span_map.insert(opt, Span::new(start_pos, end_pos));
@@ -809,6 +831,19 @@ where
809831
})
810832
}
811833

834+
fn parse_name(&self, i: usize) -> Result<(usize, String), LexBuildError> {
835+
match RE_NAME.find(&self.src[i..]) {
836+
Some(m) => {
837+
assert_eq!(m.start(), 0);
838+
Ok((i + m.end(), self.src[i..i + m.end()].to_string()))
839+
}
840+
None => Err(LexBuildError {
841+
kind: LexErrorKind::InvalidGrmtoolsSectionValue,
842+
spans: vec![Span::new(i, i)],
843+
}),
844+
}
845+
}
846+
812847
fn parse_spaces(&mut self, i: usize) -> LexInternalBuildResult<usize> {
813848
Ok(RE_LEADING_SPACE_SEPS
814849
.find(&self.src[i..])

0 commit comments

Comments
 (0)