Skip to content

Commit 3718507

Browse files
committed
Migrate LexFlags parsing to GrmtoolsSectionParser
1 parent 04ae392 commit 3718507

7 files changed

Lines changed: 510 additions & 438 deletions

File tree

cfgrammar/src/lib/yacc/parser.rs

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,6 @@ pub enum YaccGrammarErrorKind {
9292
UnknownEPP(String),
9393
ExpectedInput(char),
9494
InvalidYaccKind,
95-
InvalidYaccKindNamespace,
96-
InvalidActionKind,
97-
InvalidActionKindNamespace,
98-
InvalidGrmtoolsSectionEntry,
99-
DuplicateGrmtoolsSectionEntry,
100-
MissingGrmtoolsSection,
10195
}
10296

10397
/// Any error from the Yacc parser returns an instance of this struct.
@@ -184,17 +178,7 @@ impl fmt::Display for YaccGrammarErrorKind {
184178
name
185179
)
186180
}
187-
YaccGrammarErrorKind::MissingGrmtoolsSection => "Missing '%grmtools' section",
188-
YaccGrammarErrorKind::DuplicateGrmtoolsSectionEntry => {
189-
"Duplicate entry in %grmtools section"
190-
}
191-
YaccGrammarErrorKind::InvalidGrmtoolsSectionEntry => {
192-
"Invalid entry in %grmtools section"
193-
}
194181
YaccGrammarErrorKind::InvalidYaccKind => "Invalid yacc kind",
195-
YaccGrammarErrorKind::InvalidYaccKindNamespace => "Invalid yacc kind namespace",
196-
YaccGrammarErrorKind::InvalidActionKind => "Invalid action kind",
197-
YaccGrammarErrorKind::InvalidActionKindNamespace => "Invalid action kind namespace",
198182
};
199183
write!(f, "{}", s)
200184
}
@@ -303,12 +287,7 @@ impl Spanned for YaccGrammarError {
303287
| YaccGrammarErrorKind::UnknownRuleRef(_)
304288
| YaccGrammarErrorKind::UnknownToken(_)
305289
| YaccGrammarErrorKind::NoPrecForToken(_)
306-
| YaccGrammarErrorKind::MissingGrmtoolsSection
307-
| YaccGrammarErrorKind::InvalidGrmtoolsSectionEntry
308290
| YaccGrammarErrorKind::InvalidYaccKind
309-
| YaccGrammarErrorKind::InvalidYaccKindNamespace
310-
| YaccGrammarErrorKind::InvalidActionKind
311-
| YaccGrammarErrorKind::InvalidActionKindNamespace
312291
| YaccGrammarErrorKind::ExpectedInput(_)
313292
| YaccGrammarErrorKind::UnknownEPP(_) => SpansKind::Error,
314293
YaccGrammarErrorKind::DuplicatePrecedence
@@ -318,7 +297,6 @@ impl Spanned for YaccGrammarError {
318297
| YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration
319298
| YaccGrammarErrorKind::DuplicateStartDeclaration
320299
| YaccGrammarErrorKind::DuplicateActiontypeDeclaration
321-
| YaccGrammarErrorKind::DuplicateGrmtoolsSectionEntry
322300
| YaccGrammarErrorKind::DuplicateEPP => SpansKind::DuplicationError,
323301
}
324302
}

lrlex/src/lib/ctbuilder.rs

Lines changed: 150 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,12 @@ use std::{
1616

1717
use bincode::Encode;
1818
use cfgrammar::{
19-
header::{GrmtoolsSectionParser, HeaderError, HeaderErrorKind, Namespaced, Setting, Value},
19+
header::{
20+
GrmtoolsSectionParser, Header, HeaderError, HeaderErrorKind, Namespaced, Setting, Value,
21+
},
22+
markmap::MergeBehavior,
2023
newlinecache::NewlineCache,
24+
span::Location,
2125
Spanned,
2226
};
2327
use lazy_static::lazy_static;
@@ -27,7 +31,7 @@ use proc_macro2::TokenStream;
2731
use quote::{format_ident, quote, ToTokens, TokenStreamExt};
2832
use regex::Regex;
2933

30-
use crate::{DefaultLexerTypes, LRNonStreamingLexerDef, LexFlags, LexerDef, UNSPECIFIED_LEX_FLAGS};
34+
use crate::{DefaultLexerTypes, LRNonStreamingLexerDef, LexBuildHeaderError, LexFlags, LexerDef};
3135

3236
const RUST_FILE_EXT: &str = "rs";
3337

@@ -203,8 +207,7 @@ where
203207
rule_ids_map: Option<HashMap<String, LexerTypesT::StorageT>>,
204208
allow_missing_terms_in_lexer: bool,
205209
allow_missing_tokens_in_parser: bool,
206-
force_lex_flags: LexFlags,
207-
default_lex_flags: LexFlags,
210+
header: Header,
208211
#[cfg(test)]
209212
inspect_lexerkind_cb: Option<Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>>,
210213
}
@@ -250,8 +253,7 @@ where
250253
rule_ids_map: None,
251254
allow_missing_terms_in_lexer: false,
252255
allow_missing_tokens_in_parser: true,
253-
force_lex_flags: UNSPECIFIED_LEX_FLAGS,
254-
default_lex_flags: UNSPECIFIED_LEX_FLAGS,
256+
header: Header::new(),
255257
#[cfg(test)]
256258
inspect_lexerkind_cb: None,
257259
}
@@ -463,29 +465,34 @@ where
463465
if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb {
464466
inspect_lexerkind_cb(lexerkind)?
465467
}
468+
let header = Header::new();
466469
let (mut lexerdef, lex_flags): (Box<dyn LexerDef<LexerTypesT>>, LexFlags) = match lexerkind
467470
{
468471
LexerKind::LRNonStreamingLexer => {
469-
let lexerdef = LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
470-
&lex_src,
471-
self.force_lex_flags.clone(),
472-
self.default_lex_flags.clone(),
473-
)
474-
.map_err(|errs| {
475-
errs.iter()
476-
.map(|e| {
477-
if let Some((line, column)) = line_cache.byte_to_line_num_and_col_num(
478-
&lex_src,
479-
e.spans().first().unwrap().start(),
480-
) {
481-
format!("{} at line {line} column {column}", e)
482-
} else {
483-
format!("{}", e)
484-
}
485-
})
486-
.collect::<Vec<_>>()
487-
.join("\n")
488-
})?;
472+
let lexerdef =
473+
LRNonStreamingLexerDef::<LexerTypesT>::new_with_header(&lex_src, header)
474+
.map_err(|errs| {
475+
errs.iter()
476+
.map(|e| match e {
477+
LexBuildHeaderError::Build(e) => {
478+
if let Some((line, column)) = line_cache
479+
.byte_to_line_num_and_col_num(
480+
&lex_src,
481+
e.spans().first().unwrap().start(),
482+
)
483+
{
484+
format!("{} at line {line} column {column}", e)
485+
} else {
486+
format!("{}", e)
487+
}
488+
}
489+
LexBuildHeaderError::Header(e) => {
490+
format!("{}", e)
491+
}
492+
})
493+
.collect::<Vec<_>>()
494+
.join("\n")
495+
})?;
489496
let lex_flags = lexerdef.lex_flags().cloned();
490497
(Box::new(lexerdef), lex_flags.unwrap())
491498
}
@@ -782,7 +789,15 @@ where
782789
///
783790
/// Setting this flag will override the same flag within a `%grmtools` section.
784791
pub fn allow_wholeline_comments(mut self, flag: bool) -> Self {
785-
self.force_lex_flags.allow_wholeline_comments = Some(flag);
792+
let key = "allow_wholeline_comments".to_string();
793+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
794+
self.header.insert(
795+
key,
796+
(
797+
Location::Other("CTLexerBuilder".to_string()),
798+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
799+
),
800+
);
786801
self
787802
}
788803

@@ -791,7 +806,15 @@ where
791806
///
792807
/// Setting this flag will override the same flag within a `%grmtools` section.
793808
pub fn dot_matches_new_line(mut self, flag: bool) -> Self {
794-
self.force_lex_flags.dot_matches_new_line = Some(flag);
809+
let key = "dot_matches_new_line".to_string();
810+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
811+
self.header.insert(
812+
key,
813+
(
814+
Location::Other("CTLexerBuilder".to_string()),
815+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
816+
),
817+
);
795818
self
796819
}
797820

@@ -800,7 +823,15 @@ where
800823
///
801824
/// Setting this flag will override the same flag within a `%grmtools` section.
802825
pub fn multi_line(mut self, flag: bool) -> Self {
803-
self.force_lex_flags.multi_line = Some(flag);
826+
let key = "multi_line".to_string();
827+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
828+
self.header.insert(
829+
key,
830+
(
831+
Location::Other("CTLexerBuilder".to_string()),
832+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
833+
),
834+
);
804835
self
805836
}
806837

@@ -809,7 +840,15 @@ where
809840
///
810841
/// Setting this flag will override the same flag within a `%grmtools` section.
811842
pub fn posix_escapes(mut self, flag: bool) -> Self {
812-
self.force_lex_flags.posix_escapes = Some(flag);
843+
let key = "posix_escapes".to_string();
844+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
845+
self.header.insert(
846+
key,
847+
(
848+
Location::Other("CTLexerBuilder".to_string()),
849+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
850+
),
851+
);
813852
self
814853
}
815854

@@ -818,7 +857,15 @@ where
818857
///
819858
/// Setting this flag will override the same flag within a `%grmtools` section.
820859
pub fn octal(mut self, flag: bool) -> Self {
821-
self.force_lex_flags.octal = Some(flag);
860+
let key = "octal".to_string();
861+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
862+
self.header.insert(
863+
key,
864+
(
865+
Location::Other("CTLexerBuilder".to_string()),
866+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
867+
),
868+
);
822869
self
823870
}
824871

@@ -827,7 +874,15 @@ where
827874
///
828875
/// Setting this flag will override the same flag within a `%grmtools` section.
829876
pub fn swap_greed(mut self, flag: bool) -> Self {
830-
self.force_lex_flags.swap_greed = Some(flag);
877+
let key = "swap_greed".to_string();
878+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
879+
self.header.insert(
880+
key,
881+
(
882+
Location::Other("CTLexerBuilder".to_string()),
883+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
884+
),
885+
);
831886
self
832887
}
833888

@@ -836,7 +891,15 @@ where
836891
///
837892
/// Setting this flag will override the same flag within a `%grmtools` section.
838893
pub fn ignore_whitespace(mut self, flag: bool) -> Self {
839-
self.force_lex_flags.ignore_whitespace = Some(flag);
894+
let key = "ignore_whitespace".to_string();
895+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
896+
self.header.insert(
897+
key,
898+
(
899+
Location::Other("CTLexerBuilder".to_string()),
900+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
901+
),
902+
);
840903
self
841904
}
842905

@@ -845,7 +908,15 @@ where
845908
///
846909
/// Setting this flag will override the same flag within a `%grmtools` section.
847910
pub fn unicode(mut self, flag: bool) -> Self {
848-
self.force_lex_flags.unicode = Some(flag);
911+
let key = "unicode".to_string();
912+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
913+
self.header.insert(
914+
key,
915+
(
916+
Location::Other("CTLexerBuilder".to_string()),
917+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
918+
),
919+
);
849920
self
850921
}
851922

@@ -854,7 +925,15 @@ where
854925
///
855926
/// Setting this flag will override the same flag within a `%grmtools` section.
856927
pub fn case_insensitive(mut self, flag: bool) -> Self {
857-
self.force_lex_flags.case_insensitive = Some(flag);
928+
let key = "case_insensitive".to_string();
929+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
930+
self.header.insert(
931+
key,
932+
(
933+
Location::Other("CTLexerBuilder".to_string()),
934+
Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
935+
),
936+
);
858937
self
859938
}
860939

@@ -863,7 +942,18 @@ where
863942
///
864943
/// Setting this flag will override the same flag within a `%grmtools` section.
865944
pub fn size_limit(mut self, sz: usize) -> Self {
866-
self.force_lex_flags.size_limit = Some(sz);
945+
let key = "size_limit".to_string();
946+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
947+
self.header.insert(
948+
key,
949+
(
950+
Location::Other("CTLexerBuilder".to_string()),
951+
Value::Setting(Setting::Num(
952+
sz as u64,
953+
Location::Other("CTLexerBuilder".to_string()),
954+
)),
955+
),
956+
);
867957
self
868958
}
869959

@@ -872,7 +962,18 @@ where
872962
///
873963
/// Setting this flag will override the same flag within a `%grmtools` section.
874964
pub fn dfa_size_limit(mut self, sz: usize) -> Self {
875-
self.force_lex_flags.dfa_size_limit = Some(sz);
965+
let key = "dfa_size_limit".to_string();
966+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
967+
self.header.insert(
968+
key,
969+
(
970+
Location::Other("CTLexerBuilder".to_string()),
971+
Value::Setting(Setting::Num(
972+
sz as u64,
973+
Location::Other("CTLexerBuilder".to_string()),
974+
)),
975+
),
976+
);
876977
self
877978
}
878979

@@ -881,15 +982,18 @@ where
881982
///
882983
/// Setting this flag will override the same flag within a `%grmtools` section.
883984
pub fn nest_limit(mut self, lim: u32) -> Self {
884-
self.force_lex_flags.nest_limit = Some(lim);
885-
self
886-
}
887-
888-
/// `Some` values in the specified `flags` will be used as a default value
889-
/// unless the specified value has already been specified previously via `CTLexerBuilder`
890-
/// or was specified in the `%grmtools` section of a *.l* file.
891-
pub fn default_lex_flags(mut self, flags: LexFlags) -> Self {
892-
self.default_lex_flags = flags;
985+
let key = "nest_limit".to_string();
986+
self.header.set_merge_behavior(&key, MergeBehavior::Ours);
987+
self.header.insert(
988+
key,
989+
(
990+
Location::Other("CTLexerBuilder".to_string()),
991+
Value::Setting(Setting::Num(
992+
lim as u64,
993+
Location::Other("CTLexerBuilder".to_string()),
994+
)),
995+
),
996+
);
893997
self
894998
}
895999

0 commit comments

Comments
 (0)