Skip to content

Commit 707bfd7

Browse files
committed
lrlex: unify parsing of the %grmtools section.
This unifies the code in lrlex with the rest of the crates by using the `GrmtoolsSectionParser`, for parsing the `%grmtools` section and using `Header` for merging values from the builder with those from the parsed header.
1 parent 03515db commit 707bfd7

4 files changed

Lines changed: 384 additions & 393 deletions

File tree

lrlex/src/lib/ctbuilder.rs

Lines changed: 158 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ use std::{
1717
use bincode::Encode;
1818
use cfgrammar::{
1919
header::{
20-
GrmtoolsSectionParser, HeaderError, HeaderErrorKind, HeaderValue, Namespaced, Setting,
21-
Value,
20+
GrmtoolsSectionParser, Header, HeaderError, HeaderErrorKind, HeaderValue, Namespaced,
21+
Setting, Value,
2222
},
23+
markmap::MergeBehavior,
2324
newlinecache::NewlineCache,
25+
span::Location,
2426
Spanned,
2527
};
2628
use lazy_static::lazy_static;
@@ -30,7 +32,7 @@ use proc_macro2::TokenStream;
3032
use quote::{format_ident, quote, ToTokens, TokenStreamExt};
3133
use regex::Regex;
3234

33-
use crate::{DefaultLexerTypes, LRNonStreamingLexerDef, LexFlags, LexerDef, UNSPECIFIED_LEX_FLAGS};
35+
use crate::{DefaultLexerTypes, LRNonStreamingLexerDef, LexFlags, LexerDef};
3436

3537
const RUST_FILE_EXT: &str = "rs";
3638

@@ -206,8 +208,7 @@ where
206208
rule_ids_map: Option<HashMap<String, LexerTypesT::StorageT>>,
207209
allow_missing_terms_in_lexer: bool,
208210
allow_missing_tokens_in_parser: bool,
209-
force_lex_flags: LexFlags,
210-
default_lex_flags: LexFlags,
211+
header: Header<Location>,
211212
#[cfg(test)]
212213
inspect_lexerkind_cb: Option<Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>>,
213214
}
@@ -242,6 +243,8 @@ where
242243
/// .build()?;
243244
/// ```
244245
pub fn new_with_lexemet() -> Self {
246+
let mut header = Header::new();
247+
header.set_default_merge_behavior(MergeBehavior::Ours);
245248
CTLexerBuilder {
246249
lrpar_config: None,
247250
lexer_path: None,
@@ -253,8 +256,7 @@ where
253256
rule_ids_map: None,
254257
allow_missing_terms_in_lexer: false,
255258
allow_missing_tokens_in_parser: true,
256-
force_lex_flags: UNSPECIFIED_LEX_FLAGS,
257-
default_lex_flags: UNSPECIFIED_LEX_FLAGS,
259+
header,
258260
#[cfg(test)]
259261
inspect_lexerkind_cb: None,
260262
}
@@ -443,14 +445,17 @@ where
443445
}
444446
let lex_src = read_to_string(lexerp)
445447
.map_err(|e| format!("When reading '{}': {e}", lexerp.display()))?;
446-
let (header, _) = GrmtoolsSectionParser::new(&lex_src, false)
448+
let mut header = self.header;
449+
let (parsed_header, _) = GrmtoolsSectionParser::new(&lex_src, false)
447450
.parse()
448451
.map_err(|es| {
449452
es.iter()
450453
.map(|e| e.to_string())
451454
.collect::<Vec<_>>()
452455
.join("\n")
453456
})?;
457+
header.merge_from(parsed_header)?;
458+
header.mark_used(&"lexerkind".to_string());
454459
let lexerkind = match self.lexerkind {
455460
Some(lexerkind) => lexerkind,
456461
None => {
@@ -469,30 +474,38 @@ where
469474
let (mut lexerdef, lex_flags): (Box<dyn LexerDef<LexerTypesT>>, LexFlags) = match lexerkind
470475
{
471476
LexerKind::LRNonStreamingLexer => {
472-
let lexerdef = LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
473-
&lex_src,
474-
self.force_lex_flags.clone(),
475-
self.default_lex_flags.clone(),
476-
)
477-
.map_err(|errs| {
478-
errs.iter()
479-
.map(|e| {
480-
if let Some((line, column)) = line_cache.byte_to_line_num_and_col_num(
481-
&lex_src,
482-
e.spans().first().unwrap().start(),
483-
) {
484-
format!("{} at line {line} column {column}", e)
485-
} else {
486-
format!("{}", e)
487-
}
488-
})
489-
.collect::<Vec<_>>()
490-
.join("\n")
491-
})?;
477+
let lex_flags = LexFlags::try_from(&mut header)?;
478+
let lexerdef =
479+
LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(&lex_src, lex_flags)
480+
.map_err(|errs| {
481+
errs.iter()
482+
.map(|e| {
483+
if let Some((line, column)) = line_cache
484+
.byte_to_line_num_and_col_num(
485+
&lex_src,
486+
e.spans().first().unwrap().start(),
487+
)
488+
{
489+
format!("{} at line {line} column {column}", e)
490+
} else {
491+
format!("{}", e)
492+
}
493+
})
494+
.collect::<Vec<_>>()
495+
.join("\n")
496+
})?;
492497
let lex_flags = lexerdef.lex_flags().cloned();
493498
(Box::new(lexerdef), lex_flags.unwrap())
494499
}
495500
};
501+
502+
let unused_header_values = header.unused();
503+
if !unused_header_values.is_empty() {
504+
return Err(
505+
format!("Unused header values: {}", unused_header_values.join(", ")).into(),
506+
);
507+
}
508+
496509
let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map {
497510
Some(ref rim) => {
498511
// Convert from HashMap<String, _> to HashMap<&str, _>
@@ -583,18 +596,18 @@ where
583596
// Code gen for the lexerdef() `lex_flags` variable.
584597
quote! {
585598
let mut lex_flags = ::lrlex::DEFAULT_LEX_FLAGS;
586-
lex_flags.allow_wholeline_comments = #allow_wholeline_comments;
587-
lex_flags.dot_matches_new_line = #dot_matches_new_line;
588-
lex_flags.multi_line = #multi_line;
589-
lex_flags.octal = #octal;
590-
lex_flags.posix_escapes = #posix_escapes;
591-
lex_flags.case_insensitive = #case_insensitive;
592-
lex_flags.unicode = #unicode;
593-
lex_flags.swap_greed = #swap_greed;
594-
lex_flags.ignore_whitespace = #ignore_whitespace;
595-
lex_flags.size_limit = #size_limit;
596-
lex_flags.dfa_size_limit = #dfa_size_limit;
597-
lex_flags.nest_limit = #nest_limit;
599+
lex_flags.allow_wholeline_comments = #allow_wholeline_comments.or(::lrlex::DEFAULT_LEX_FLAGS.allow_wholeline_comments);
600+
lex_flags.dot_matches_new_line = #dot_matches_new_line.or(::lrlex::DEFAULT_LEX_FLAGS.dot_matches_new_line);
601+
lex_flags.multi_line = #multi_line.or(::lrlex::DEFAULT_LEX_FLAGS.multi_line);
602+
lex_flags.octal = #octal.or(::lrlex::DEFAULT_LEX_FLAGS.octal);
603+
lex_flags.posix_escapes = #posix_escapes.or(::lrlex::DEFAULT_LEX_FLAGS.posix_escapes);
604+
lex_flags.case_insensitive = #case_insensitive.or(::lrlex::DEFAULT_LEX_FLAGS.case_insensitive);
605+
lex_flags.unicode = #unicode.or(::lrlex::DEFAULT_LEX_FLAGS.unicode);
606+
lex_flags.swap_greed = #swap_greed.or(::lrlex::DEFAULT_LEX_FLAGS.swap_greed);
607+
lex_flags.ignore_whitespace = #ignore_whitespace.or(::lrlex::DEFAULT_LEX_FLAGS.ignore_whitespace);
608+
lex_flags.size_limit = #size_limit.or(::lrlex::DEFAULT_LEX_FLAGS.size_limit);
609+
lex_flags.dfa_size_limit = #dfa_size_limit.or(::lrlex::DEFAULT_LEX_FLAGS.dfa_size_limit);
610+
lex_flags.nest_limit = #nest_limit.or(::lrlex::DEFAULT_LEX_FLAGS.nest_limit);
598611
let lex_flags = lex_flags;
599612
}
600613
};
@@ -785,7 +798,14 @@ where
785798
///
786799
/// Setting this flag will override the same flag within a `%grmtools` section.
787800
pub fn allow_wholeline_comments(mut self, flag: bool) -> Self {
788-
self.force_lex_flags.allow_wholeline_comments = Some(flag);
801+
let key = "allow_wholeline_comments".to_string();
802+
self.header.insert(
803+
key,
804+
HeaderValue(
805+
Location::Other("CTLexerBuilder".to_string()),
806+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
807+
),
808+
);
789809
self
790810
}
791811

@@ -794,7 +814,14 @@ where
794814
///
795815
/// Setting this flag will override the same flag within a `%grmtools` section.
796816
pub fn dot_matches_new_line(mut self, flag: bool) -> Self {
797-
self.force_lex_flags.dot_matches_new_line = Some(flag);
817+
let key = "dot_matches_new_line".to_string();
818+
self.header.insert(
819+
key,
820+
HeaderValue(
821+
Location::Other("CTLexerBuilder".to_string()),
822+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
823+
),
824+
);
798825
self
799826
}
800827

@@ -803,7 +830,14 @@ where
803830
///
804831
/// Setting this flag will override the same flag within a `%grmtools` section.
805832
pub fn multi_line(mut self, flag: bool) -> Self {
806-
self.force_lex_flags.multi_line = Some(flag);
833+
let key = "multi_line".to_string();
834+
self.header.insert(
835+
key,
836+
HeaderValue(
837+
Location::Other("CTLexerBuilder".to_string()),
838+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
839+
),
840+
);
807841
self
808842
}
809843

@@ -812,7 +846,14 @@ where
812846
///
813847
/// Setting this flag will override the same flag within a `%grmtools` section.
814848
pub fn posix_escapes(mut self, flag: bool) -> Self {
815-
self.force_lex_flags.posix_escapes = Some(flag);
849+
let key = "posix_escapes".to_string();
850+
self.header.insert(
851+
key,
852+
HeaderValue(
853+
Location::Other("CTLexerBuilder".to_string()),
854+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
855+
),
856+
);
816857
self
817858
}
818859

@@ -821,7 +862,14 @@ where
821862
///
822863
/// Setting this flag will override the same flag within a `%grmtools` section.
823864
pub fn octal(mut self, flag: bool) -> Self {
824-
self.force_lex_flags.octal = Some(flag);
865+
let key = "octal".to_string();
866+
self.header.insert(
867+
key,
868+
HeaderValue(
869+
Location::Other("CTLexerBuilder".to_string()),
870+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
871+
),
872+
);
825873
self
826874
}
827875

@@ -830,7 +878,14 @@ where
830878
///
831879
/// Setting this flag will override the same flag within a `%grmtools` section.
832880
pub fn swap_greed(mut self, flag: bool) -> Self {
833-
self.force_lex_flags.swap_greed = Some(flag);
881+
let key = "swap_greed".to_string();
882+
self.header.insert(
883+
key,
884+
HeaderValue(
885+
Location::Other("CTLexerBuilder".to_string()),
886+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
887+
),
888+
);
834889
self
835890
}
836891

@@ -839,7 +894,14 @@ where
839894
///
840895
/// Setting this flag will override the same flag within a `%grmtools` section.
841896
pub fn ignore_whitespace(mut self, flag: bool) -> Self {
842-
self.force_lex_flags.ignore_whitespace = Some(flag);
897+
let key = "ignore_whitespace".to_string();
898+
self.header.insert(
899+
key,
900+
HeaderValue(
901+
Location::Other("CTLexerBuilder".to_string()),
902+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
903+
),
904+
);
843905
self
844906
}
845907

@@ -848,7 +910,14 @@ where
848910
///
849911
/// Setting this flag will override the same flag within a `%grmtools` section.
850912
pub fn unicode(mut self, flag: bool) -> Self {
851-
self.force_lex_flags.unicode = Some(flag);
913+
let key = "unicode".to_string();
914+
self.header.insert(
915+
key,
916+
HeaderValue(
917+
Location::Other("CTLexerBuilder".to_string()),
918+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
919+
),
920+
);
852921
self
853922
}
854923

@@ -857,7 +926,14 @@ where
857926
///
858927
/// Setting this flag will override the same flag within a `%grmtools` section.
859928
pub fn case_insensitive(mut self, flag: bool) -> Self {
860-
self.force_lex_flags.case_insensitive = Some(flag);
929+
let key = "case_insensitive".to_string();
930+
self.header.insert(
931+
key,
932+
HeaderValue(
933+
Location::Other("CTLexerBuilder".to_string()),
934+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
935+
),
936+
);
861937
self
862938
}
863939

@@ -866,7 +942,17 @@ where
866942
///
867943
/// Setting this flag will override the same flag within a `%grmtools` section.
868944
pub fn size_limit(mut self, sz: usize) -> Self {
869-
self.force_lex_flags.size_limit = Some(sz);
945+
let key = "size_limit".to_string();
946+
self.header.insert(
947+
key,
948+
HeaderValue(
949+
Location::Other("CTLexerBuilder".to_string()),
950+
Value::Setting(Setting::Num(
951+
sz as u64,
952+
Location::Other("CTLexerBuilder".to_string()),
953+
)),
954+
),
955+
);
870956
self
871957
}
872958

@@ -875,7 +961,17 @@ where
875961
///
876962
/// Setting this flag will override the same flag within a `%grmtools` section.
877963
pub fn dfa_size_limit(mut self, sz: usize) -> Self {
878-
self.force_lex_flags.dfa_size_limit = Some(sz);
964+
let key = "dfa_size_limit".to_string();
965+
self.header.insert(
966+
key,
967+
HeaderValue(
968+
Location::Other("CTLexerBuilder".to_string()),
969+
Value::Setting(Setting::Num(
970+
sz as u64,
971+
Location::Other("CTLexerBuilder".to_string()),
972+
)),
973+
),
974+
);
879975
self
880976
}
881977

@@ -884,15 +980,17 @@ where
884980
///
885981
/// Setting this flag will override the same flag within a `%grmtools` section.
886982
pub fn nest_limit(mut self, lim: u32) -> Self {
887-
self.force_lex_flags.nest_limit = Some(lim);
888-
self
889-
}
890-
891-
/// `Some` values in the specified `flags` will be used as a default value
892-
/// unless the specified value has already been specified previously via `CTLexerBuilder`
893-
/// or was specified in the `%grmtools` section of a *.l* file.
894-
pub fn default_lex_flags(mut self, flags: LexFlags) -> Self {
895-
self.default_lex_flags = flags;
983+
let key = "nest_limit".to_string();
984+
self.header.insert(
985+
key,
986+
HeaderValue(
987+
Location::Other("CTLexerBuilder".to_string()),
988+
Value::Setting(Setting::Num(
989+
lim as u64,
990+
Location::Other("CTLexerBuilder".to_string()),
991+
)),
992+
),
993+
);
896994
self
897995
}
898996

0 commit comments

Comments
 (0)