Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 151 additions & 8 deletions lrlex/src/lib/ctbuilder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ use std::{
};

use bincode::Encode;
use cfgrammar::{newlinecache::NewlineCache, Spanned};
use cfgrammar::{
header::{GrmtoolsSectionParser, HeaderError, HeaderErrorKind, Namespaced, Setting, Value},
newlinecache::NewlineCache,
Spanned,
};
use lazy_static::lazy_static;
use lrpar::{CTParserBuilder, LexerTypes};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
Expand All @@ -37,6 +41,69 @@ pub enum LexerKind {
LRNonStreamingLexer,
}

impl TryFrom<&Value> for LexerKind {
type Error = cfgrammar::header::HeaderError;
fn try_from(it: &Value) -> Result<LexerKind, Self::Error> {
match it {
Value::Flag(_, loc) => Err(HeaderError {
kind: HeaderErrorKind::ConversionError(
"LexerKind",
"Expected `LexerKind` found bool",
),
locations: vec![loc.clone()],
}),
Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
kind: HeaderErrorKind::ConversionError(
"LexerKind",
"Expected `LexerKind` found numeric",
),
locations: vec![loc.clone()],
}),
Value::Setting(Setting::Constructor {
ctor:
Namespaced {
namespace: _,
member: (_, loc),
},
arg: _,
}) => Err(HeaderError {
kind: HeaderErrorKind::ConversionError(
"LexerKind",
"Expected `LexerKind` found constructor",
),
locations: vec![loc.clone()],
}),
Value::Setting(Setting::Unitary(Namespaced {
namespace,
member: (member, member_loc),
})) => {
if let Some((ns, loc)) = namespace {
if ns.to_lowercase() != "lexerkind" {
return Err(HeaderError {
kind: HeaderErrorKind::ConversionError(
"LexerKind",
"Expected namespace `LexerKind`",
),
locations: vec![loc.clone()],
});
}
}
if member.to_lowercase() != "lrnonstreaminglexer" {
return Err(HeaderError {
kind: HeaderErrorKind::ConversionError(
"LexerKind",
"Unknown `LexerKind` Variant",
),
locations: vec![member_loc.clone()],
});
}

Ok(LexerKind::LRNonStreamingLexer)
}
}
}
}

/// Specify the visibility of the module generated by [CTLexerBuilder].
#[derive(Clone, PartialEq, Eq, Debug)]
#[non_exhaustive]
Expand Down Expand Up @@ -129,7 +196,7 @@ where
lrpar_config: Option<Box<dyn Fn(CTParserBuilder<LexerTypesT>) -> CTParserBuilder<LexerTypesT>>>,
lexer_path: Option<PathBuf>,
output_path: Option<PathBuf>,
lexerkind: LexerKind,
lexerkind: Option<LexerKind>,
mod_name: Option<&'a str>,
visibility: Visibility,
rust_edition: RustEdition,
Expand All @@ -138,6 +205,8 @@ where
allow_missing_tokens_in_parser: bool,
force_lex_flags: LexFlags,
default_lex_flags: LexFlags,
#[cfg(test)]
inspect_lexerkind_cb: Option<Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>>,
}

impl CTLexerBuilder<'_, DefaultLexerTypes<u32>> {
Expand Down Expand Up @@ -174,7 +243,7 @@ where
lrpar_config: None,
lexer_path: None,
output_path: None,
lexerkind: LexerKind::LRNonStreamingLexer,
lexerkind: None,
mod_name: None,
visibility: Visibility::Private,
rust_edition: RustEdition::Rust2021,
Expand All @@ -183,6 +252,8 @@ where
allow_missing_tokens_in_parser: true,
force_lex_flags: UNSPECIFIED_LEX_FLAGS,
default_lex_flags: UNSPECIFIED_LEX_FLAGS,
#[cfg(test)]
inspect_lexerkind_cb: None,
}
}

Expand Down Expand Up @@ -287,7 +358,7 @@ where

/// Set the type of lexer to be generated to `lexerkind`.
pub fn lexerkind(mut self, lexerkind: LexerKind) -> Self {
self.lexerkind = lexerkind;
self.lexerkind = Some(lexerkind);
self
}

Expand Down Expand Up @@ -367,12 +438,32 @@ where
}
lk.insert(outp.clone());
}

let lex_src = read_to_string(lexerp)
.map_err(|e| format!("When reading '{}': {e}", lexerp.display()))?;
let (header, _) = GrmtoolsSectionParser::new(&lex_src, false)
.parse()
.map_err(|es| {
es.iter()
.map(|e| e.to_string())
.collect::<Vec<_>>()
.join("\n")
})?;
let lexerkind = match self.lexerkind {
Some(lexerkind) => lexerkind,
None => {
if let Some((_, lk_val)) = header.get("lexerkind") {
LexerKind::try_from(lk_val)?
} else {
LexerKind::LRNonStreamingLexer
}
}
};
let line_cache = NewlineCache::from_str(&lex_src).unwrap();
let (mut lexerdef, lex_flags): (Box<dyn LexerDef<LexerTypesT>>, LexFlags) = match self
.lexerkind
#[cfg(test)]
if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb {
inspect_lexerkind_cb(lexerkind)?
}
let (mut lexerdef, lex_flags): (Box<dyn LexerDef<LexerTypesT>>, LexFlags) = match lexerkind
{
LexerKind::LRNonStreamingLexer => {
let lexerdef = LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
Expand Down Expand Up @@ -529,7 +620,7 @@ where
let rules = vec![#(#rules),*];
});
}
let lexerdef_ty = match self.lexerkind {
let lexerdef_ty = match lexerkind {
LexerKind::LRNonStreamingLexer => {
quote!(::lrlex::LRNonStreamingLexerDef)
}
Expand Down Expand Up @@ -801,6 +892,15 @@ where
self.default_lex_flags = flags;
self
}

#[cfg(test)]
pub fn inspect_lexerkind(
mut self,
cb: Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>,
) -> Self {
self.inspect_lexerkind_cb = Some(cb);
self
}
}

/// An interface to the result of [CTLexerBuilder::build()].
Expand Down Expand Up @@ -897,3 +997,46 @@ pub fn ct_token_map<StorageT: Display>(
f.write_all(outs.as_bytes())?;
Ok(())
}

#[cfg(test)]
mod test {
use std::fs::File;
use std::io::Write;

use super::{CTLexerBuilder, LexerKind};
#[test]
fn test_grmtools_section_lexerkind() {
let lexerkinds = [
"LRNonStreamingLexer",
"lrnonstreaminglexer",
"LexerKind::lrnonstreaminglexer",
"lexerkind::LRNonStreamingLexer",
];
for (i, kind) in lexerkinds.iter().enumerate() {
let lex_src = format!(
"
%grmtools{{lexerkind: {}}}
%%
. ;
",
kind
);
let lex_path = format!(
"{}/test_grmtools_section_lexerkind_{}.l",
env!("OUT_DIR"),
i
);
let mut l_file = File::create(lex_path.clone()).unwrap();
l_file.write_all(lex_src.as_bytes()).unwrap();
CTLexerBuilder::new()
.output_path(format!("{}.rs", lex_path.clone()))
.lexer_path(lex_path.clone())
.inspect_lexerkind(Box::new(move |lexerkind| {
assert!(matches!(lexerkind, LexerKind::LRNonStreamingLexer));
Ok(())
}))
.build()
.unwrap();
}
}
}
37 changes: 36 additions & 1 deletion lrlex/src/lib/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ lazy_static! {
static ref RE_LEADING_WS: Regex = Regex::new(r"^[\p{Pattern_White_Space}]*").unwrap();
static ref RE_WS: Regex = Regex::new(r"\p{Pattern_White_Space}").unwrap();
static ref RE_LEADING_DIGITS: Regex = Regex::new(r"^[0-9]+").unwrap();
static ref RE_NAME: Regex = Regex::new(r"^[a-zA-Z][a-zA-Z]*").unwrap();
}
const INITIAL_START_STATE_NAME: &str = "INITIAL";

Expand Down Expand Up @@ -252,7 +253,7 @@ where
span_map: &mut HashMap<&str, Span>,
lex_flags: &mut LexFlags,
) -> LexInternalBuildResult<usize> {
const OPTIONS: [&str; 12] = [
const OPTIONS: [&str; 13] = [
"allow_wholeline_comments",
"dot_matches_new_line",
"multi_line",
Expand All @@ -265,6 +266,7 @@ where
"size_limit",
"dfa_size_limit",
"nest_limit",
"lexerkind",
];
let start_pos = i;
// RegexBuilder isn't uniform regarding whether the default value of an options is true
Expand Down Expand Up @@ -331,6 +333,26 @@ where
}
i = j;
}
"lexerkind" => {
// We just want to skip this, we already know we're `LRNonStreamingLexer`
i = self.parse_ws(i)?;
if let Some(j) = self.lookahead_is(":", i) {
i = j
} else {
return Err(LexBuildError {
kind: LexErrorKind::InvalidGrmtoolsSectionValue,
spans: vec![Span::new(i, i)],
});
}
i = self.parse_ws(i)?;
let (j, _) = self.parse_name(i)?;
i = self.parse_ws(j)?;
if let Some(j) = self.lookahead_is("::", j) {
i = self.parse_ws(j)?;
let (j, _) = self.parse_name(i)?;
i = j;
}
}
_ => unreachable!(),
}
span_map.insert(opt, Span::new(start_pos, end_pos));
Expand Down Expand Up @@ -809,6 +831,19 @@ where
})
}

fn parse_name(&self, i: usize) -> Result<(usize, String), LexBuildError> {
match RE_NAME.find(&self.src[i..]) {
Some(m) => {
assert_eq!(m.start(), 0);
Ok((i + m.end(), self.src[i..i + m.end()].to_string()))
}
None => Err(LexBuildError {
kind: LexErrorKind::InvalidGrmtoolsSectionValue,
spans: vec![Span::new(i, i)],
}),
}
}

fn parse_spaces(&mut self, i: usize) -> LexInternalBuildResult<usize> {
Ok(RE_LEADING_SPACE_SEPS
.find(&self.src[i..])
Expand Down