From fe09db1508a94b41d19874349f247ee638af9c4a Mon Sep 17 00:00:00 2001 From: matt rice Date: Tue, 20 May 2025 17:04:19 -0700 Subject: [PATCH 1/3] Add source snippets to missing_from_lexer/parser errors --- lrlex/src/lib/ctbuilder.rs | 103 +++++++++++++++++++++++++++++++++---- lrpar/src/lib/ctbuilder.rs | 39 +++++++++++--- 2 files changed, 125 insertions(+), 17 deletions(-) diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index 835eebcf3..3bcbc0d2a 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -20,7 +20,8 @@ use cfgrammar::{ Setting, Value, }, markmap::MergeBehavior, - span::Location, + span::{Location, Span}, + yacc::YaccGrammar, }; use glob::glob; use lazy_static::lazy_static; @@ -497,7 +498,7 @@ where if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb { inspect_lexerkind_cb(lexerkind)? } - let (lexerdef, lex_flags): (LRNonStreamingLexerDef, LexFlags) = + let (mut lexerdef, lex_flags): (LRNonStreamingLexerDef, LexFlags) = match lexerkind { LexerKind::LRNonStreamingLexer => { let lex_flags = LexFlags::try_from(&mut header)?; @@ -521,15 +522,16 @@ where } }; + let mut has_unallowed_missing = false; if let Some(ref lrcfg) = self.lrpar_config { - let mut lexerdef = lexerdef.clone(); + let mut closure_lexerdef = lexerdef.clone(); let mut ctp = CTParserBuilder::::new().inspect_rt(Box::new( move |yacc_header, rtpb, rule_ids_map, grm_path| { let owned_map = rule_ids_map .iter() .map(|(x, y)| (&**x, *y)) .collect::>(); - lexerdef.set_rule_ids(&owned_map); + closure_lexerdef.set_rule_ids(&owned_map); yacc_header.mark_used(&"test_files".to_string()); let test_glob = yacc_header.get("test_files"); match test_glob { @@ -540,7 +542,8 @@ where { let path = path?; let input = fs::read_to_string(&path)?; - let l: LRNonStreamingLexer = lexerdef.lexer(&input); + let l: LRNonStreamingLexer = + closure_lexerdef.lexer(&input); for e in rtpb.parse_noaction(&l) { Err(format!("parsing {}: {}", path.display(), e))? } @@ -553,8 +556,58 @@ where }, )); ctp = lrcfg(ctp); - let map = ctp.build()?; - self.rule_ids_map = Some(map.token_map().to_owned()); + let ct_parser = ctp.build()?; + self.rule_ids_map = Some(ct_parser.token_map().to_owned()); + let (missing_from_lexer, missing_from_parser) = + set_rule_ids(&mut lexerdef, ct_parser.yacc_grammar()); + let yacc_diag = + SpannedDiagnosticFormatter::new(ct_parser.grammar_src(), ct_parser.grammar_path()); + let err_indent = " ".repeat(ERROR.len()); + if !self.allow_missing_terms_in_lexer { + if let Some(token_spans) = missing_from_lexer { + eprintln!( + "{ERROR} these tokens are not referenced in the lexer but defined as follows" + ); + eprintln!( + "{err_indent} {}", + yacc_diag.file_location_msg("in the grammar", None) + ); + for span in token_spans { + eprintln!( + "{}", + yacc_diag.underline_span_with_text( + span, + "Missing from lexer".to_string(), + '^' + ) + ); + } + eprintln!(); + has_unallowed_missing = true; + } + } + if !self.allow_missing_tokens_in_parser { + if let Some(token_spans) = missing_from_parser { + eprintln!( + "{ERROR} these tokens are not referenced in the grammar but defined as follows" + ); + eprintln!( + "{err_indent} {}", + lex_diag.file_location_msg("in the lexer", None) + ); + for span in token_spans { + eprintln!( + "{}", + lex_diag.underline_span_with_text( + span, + "Missing from parser".to_string(), + '^' + ) + ); + } + has_unallowed_missing = true; + } + } } let mut lexerdef = Box::new(lexerdef); @@ -565,6 +618,11 @@ where ); } + if has_unallowed_missing { + fs::remove_file(outp).ok(); + panic!(); + } + let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map { Some(ref rim) => { // Convert from HashMap to HashMap<&str, _> @@ -580,8 +638,6 @@ where } None => (None, None), }; - - let mut has_unallowed_missing = false; if !self.allow_missing_terms_in_lexer { if let Some(ref mfl) = missing_from_lexer { eprintln!("Error: the following tokens are used in the grammar but are not defined in the lexer:"); @@ -1172,6 +1228,35 @@ fn indent(indent: &str, s: &str) -> String { format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent)) } +fn set_rule_ids, LT: LexerDef>( + lexerdef: &mut LT, + grm: &YaccGrammar, +) -> (Option>, Option>) +where + ST: 'static + Copy + PrimInt + Unsigned, + usize: num_traits::AsPrimitive, +{ + let rule_ids = grm + .tokens_map() + .iter() + .map(|(&n, &i)| (n, i.0)) + .collect::>(); + let (missing_from_lexer, missing_from_parser) = lexerdef.set_rule_ids_spanned(&rule_ids); + let missing_from_lexer = missing_from_lexer.map(|tokens| { + tokens + .iter() + .map(|name| { + grm.token_span(*grm.tokens_map().get(name).unwrap()) + .expect("Given token should have a span") + }) + .collect::>() + }); + + let missing_from_parser = + missing_from_parser.map(|tokens| tokens.iter().map(|(_, span)| *span).collect::>()); + (missing_from_lexer, missing_from_parser) +} + #[cfg(test)] mod test { use std::fs::File; diff --git a/lrpar/src/lib/ctbuilder.rs b/lrpar/src/lib/ctbuilder.rs index 52e4135fb..5e687f7b3 100644 --- a/lrpar/src/lib/ctbuilder.rs +++ b/lrpar/src/lib/ctbuilder.rs @@ -692,6 +692,9 @@ where return Ok(CTParser { regenerated: false, rule_ids, + yacc_grammar: grm, + grammar_src: inc, + grammar_path: self.grammar_path.unwrap(), conflicts: None, }); } else { @@ -779,13 +782,16 @@ where &format!("/* CACHE INFORMATION {} */\n", cache), )?; let conflicts = if stable.conflicts().is_some() { - Some((grm, sgraph, stable)) + Some((sgraph, stable)) } else { None }; Ok(CTParser { regenerated: true, rule_ids, + yacc_grammar: grm, + grammar_src: inc, + grammar_path: self.grammar_path.unwrap(), conflicts, }) } @@ -1489,11 +1495,10 @@ where { regenerated: bool, rule_ids: HashMap, - conflicts: Option<( - YaccGrammar, - StateGraph, - StateTable, - )>, + yacc_grammar: YaccGrammar, + grammar_src: String, + grammar_path: PathBuf, + conflicts: Option<(StateGraph, StateTable)>, } impl CTParser @@ -1527,11 +1532,29 @@ where &StateTable, &Conflicts, )> { - if let Some((grm, sgraph, stable)) = &self.conflicts { - return Some((grm, sgraph, stable, stable.conflicts().unwrap())); + if let Some((sgraph, stable)) = &self.conflicts { + return Some(( + &self.yacc_grammar, + sgraph, + stable, + stable.conflicts().unwrap(), + )); } None } + + #[doc(hidden)] + pub fn yacc_grammar(&self) -> &YaccGrammar { + &self.yacc_grammar + } + #[doc(hidden)] + pub fn grammar_src(&self) -> &str { + &self.grammar_src + } + #[doc(hidden)] + pub fn grammar_path(&self) -> &Path { + self.grammar_path.as_path() + } } /// Indents a multi-line string and trims any trailing newline. From f4a42cbf0a118ebcdee702b37a2efeda15250fc2 Mon Sep 17 00:00:00 2001 From: matt rice Date: Wed, 21 May 2025 08:40:53 -0700 Subject: [PATCH 2/3] experiment with alternate rule id setting api --- cfgrammar/src/lib/yacc/grammar.rs | 10 ++ lrlex/examples/calc_manual_lex/build.rs | 2 +- lrlex/src/lib/ctbuilder.rs | 178 ++++++++++-------------- lrlex/src/lib/lexer.rs | 74 ++++++++++ lrpar/src/lib/ctbuilder.rs | 15 +- 5 files changed, 168 insertions(+), 111 deletions(-) diff --git a/cfgrammar/src/lib/yacc/grammar.rs b/cfgrammar/src/lib/yacc/grammar.rs index fe9def8a2..5f43470f2 100644 --- a/cfgrammar/src/lib/yacc/grammar.rs +++ b/cfgrammar/src/lib/yacc/grammar.rs @@ -644,6 +644,16 @@ where m } + pub fn tokens_map_iter(&self) -> impl Iterator)> { + self.iter_tidxs().filter_map(|tidx| { + if let Some((_, name)) = self.token_names[usize::from(tidx)].as_ref() { + Some((name.as_str(), tidx)) + } else { + None + } + }) + } + /// Return the index of the token named `n` or `None` if it doesn't exist. pub fn token_idx(&self, n: &str) -> Option> { self.token_names diff --git a/lrlex/examples/calc_manual_lex/build.rs b/lrlex/examples/calc_manual_lex/build.rs index ba946a95e..680c5d53b 100644 --- a/lrlex/examples/calc_manual_lex/build.rs +++ b/lrlex/examples/calc_manual_lex/build.rs @@ -18,7 +18,7 @@ fn main() { .unwrap(); ct_token_map::( "token_map", - ctp.token_map(), + ctp.token_map().as_ref(), Some(&TOKENS_MAP.iter().cloned().collect()), ) .unwrap(); diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index 3bcbc0d2a..4a387f69c 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -20,8 +20,7 @@ use cfgrammar::{ Setting, Value, }, markmap::MergeBehavior, - span::{Location, Span}, - yacc::YaccGrammar, + span::Location, }; use glob::glob; use lazy_static::lazy_static; @@ -498,7 +497,7 @@ where if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb { inspect_lexerkind_cb(lexerkind)? } - let (mut lexerdef, lex_flags): (LRNonStreamingLexerDef, LexFlags) = + let (lexerdef, lex_flags): (LRNonStreamingLexerDef, LexFlags) = match lexerkind { LexerKind::LRNonStreamingLexer => { let lex_flags = LexFlags::try_from(&mut header)?; @@ -522,8 +521,7 @@ where } }; - let mut has_unallowed_missing = false; - if let Some(ref lrcfg) = self.lrpar_config { + let ct_parser = if let Some(ref lrcfg) = self.lrpar_config { let mut closure_lexerdef = lexerdef.clone(); let mut ctp = CTParserBuilder::::new().inspect_rt(Box::new( move |yacc_header, rtpb, rule_ids_map, grm_path| { @@ -557,58 +555,11 @@ where )); ctp = lrcfg(ctp); let ct_parser = ctp.build()?; - self.rule_ids_map = Some(ct_parser.token_map().to_owned()); - let (missing_from_lexer, missing_from_parser) = - set_rule_ids(&mut lexerdef, ct_parser.yacc_grammar()); - let yacc_diag = - SpannedDiagnosticFormatter::new(ct_parser.grammar_src(), ct_parser.grammar_path()); - let err_indent = " ".repeat(ERROR.len()); - if !self.allow_missing_terms_in_lexer { - if let Some(token_spans) = missing_from_lexer { - eprintln!( - "{ERROR} these tokens are not referenced in the lexer but defined as follows" - ); - eprintln!( - "{err_indent} {}", - yacc_diag.file_location_msg("in the grammar", None) - ); - for span in token_spans { - eprintln!( - "{}", - yacc_diag.underline_span_with_text( - span, - "Missing from lexer".to_string(), - '^' - ) - ); - } - eprintln!(); - has_unallowed_missing = true; - } - } - if !self.allow_missing_tokens_in_parser { - if let Some(token_spans) = missing_from_parser { - eprintln!( - "{ERROR} these tokens are not referenced in the grammar but defined as follows" - ); - eprintln!( - "{err_indent} {}", - lex_diag.file_location_msg("in the lexer", None) - ); - for span in token_spans { - eprintln!( - "{}", - lex_diag.underline_span_with_text( - span, - "Missing from parser".to_string(), - '^' - ) - ); - } - has_unallowed_missing = true; - } - } - } + self.rule_ids_map = Some(*ct_parser.token_map()); + Some(ct_parser) + } else { + None + }; let mut lexerdef = Box::new(lexerdef); let unused_header_values = header.unused(); @@ -618,40 +569,86 @@ where ); } - if has_unallowed_missing { - fs::remove_file(outp).ok(); - panic!(); - } - let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map { Some(ref rim) => { - // Convert from HashMap to HashMap<&str, _> - let owned_map = rim - .iter() - .map(|(x, y)| (&**x, *y)) - .collect::>(); - let (x, y) = lexerdef.set_rule_ids(&owned_map); + let (x, y) = lexerdef.set_rule_ids_spanned_iter( + rim.iter().map(|(name, tidx)| (name.as_str(), *tidx)), + ); ( x.map(|a| a.iter().map(|&b| b.to_string()).collect::>()), - y.map(|a| a.iter().map(|&b| b.to_string()).collect::>()), + y.map(|a| { + a.iter() + .map(|(b, span)| (b.to_string(), *span)) + .collect::>() + }), ) } None => (None, None), }; + let mut has_unallowed_missing = false; + let err_indent = " ".repeat(ERROR.len()); if !self.allow_missing_terms_in_lexer { if let Some(ref mfl) = missing_from_lexer { - eprintln!("Error: the following tokens are used in the grammar but are not defined in the lexer:"); - for n in mfl { - eprintln!(" {}", n); + if let Some(ct_parser) = ct_parser { + let grm = ct_parser.yacc_grammar(); + let token_spans = mfl + .iter() + .map(|name| { + ct_parser + .yacc_grammar() + .token_span(*grm.tokens_map().get(name.as_str()).unwrap()) + .expect("Given token should have a span") + }) + .collect::>(); + + let yacc_diag = SpannedDiagnosticFormatter::new( + ct_parser.grammar_src(), + ct_parser.grammar_path(), + ); + + eprintln!("{ERROR} these tokens are not referenced in the lexer but defined as follows"); + eprintln!( + "{err_indent} {}", + yacc_diag.file_location_msg("in the grammar", None) + ); + for span in token_spans { + eprintln!( + "{}", + yacc_diag.underline_span_with_text( + span, + "Missing from lexer".to_string(), + '^' + ) + ); + } + eprintln!(); + } else { + eprintln!("{ERROR} the following tokens are used in the grammar but are not defined in the lexer:"); + for n in mfl { + eprintln!(" {}", n); + } } has_unallowed_missing = true; } } if !self.allow_missing_tokens_in_parser { if let Some(ref mfp) = missing_from_parser { - eprintln!("Error: the following tokens are defined in the lexer but not used in the grammar:"); - for n in mfp { - eprintln!(" {}", n); + eprintln!( + "{ERROR} these tokens are not referenced in the grammar but defined as follows" + ); + eprintln!( + "{err_indent} {}", + lex_diag.file_location_msg("in the lexer", None) + ); + for (_, span) in mfp { + eprintln!( + "{}", + lex_diag.underline_span_with_text( + *span, + "Missing from parser".to_string(), + '^' + ) + ); } has_unallowed_missing = true; } @@ -802,6 +799,8 @@ where // If the file we're about to write out already exists with the same contents, then we // don't overwrite it (since that will force a recompile of the file, and relinking of the // binary etc). + let missing_from_parser = + missing_from_parser.map(|mut set| set.drain().map(|(n, _)| n).collect::>()); if let Ok(curs) = read_to_string(outp) { if curs == outs { return Ok(CTLexer { @@ -1228,35 +1227,6 @@ fn indent(indent: &str, s: &str) -> String { format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent)) } -fn set_rule_ids, LT: LexerDef>( - lexerdef: &mut LT, - grm: &YaccGrammar, -) -> (Option>, Option>) -where - ST: 'static + Copy + PrimInt + Unsigned, - usize: num_traits::AsPrimitive, -{ - let rule_ids = grm - .tokens_map() - .iter() - .map(|(&n, &i)| (n, i.0)) - .collect::>(); - let (missing_from_lexer, missing_from_parser) = lexerdef.set_rule_ids_spanned(&rule_ids); - let missing_from_lexer = missing_from_lexer.map(|tokens| { - tokens - .iter() - .map(|name| { - grm.token_span(*grm.tokens_map().get(name).unwrap()) - .expect("Given token should have a span") - }) - .collect::>() - }); - - let missing_from_parser = - missing_from_parser.map(|tokens| tokens.iter().map(|(_, span)| *span).collect::>()); - (missing_from_lexer, missing_from_parser) -} - #[cfg(test)] mod test { use std::fs::File; diff --git a/lrlex/src/lib/lexer.rs b/lrlex/src/lib/lexer.rs index a62e42307..c856caf56 100644 --- a/lrlex/src/lib/lexer.rs +++ b/lrlex/src/lib/lexer.rs @@ -374,11 +374,20 @@ where rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>, ) -> (Option>, Option>); + /// Like `set_rule_ids` but also returns a `Span` for missing lex rules. fn set_rule_ids_spanned<'a>( &'a mut self, rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>, ) -> (Option>, Option>); + /// Like `set_rule_ids_spanned` but takes + fn set_rule_ids_spanned_iter<'a, I>( + &'a mut self, + rule_ids_map: I, + ) -> (Option>, Option>) + where + I: IntoIterator; + /// Returns an iterator over all rules in this AST. fn iter_rules(&self) -> Iter>; @@ -517,6 +526,71 @@ where (missing_from_lexer, missing_from_parser) } + /// Like `set_rule_ids_spanned` but takes an iterator. + fn set_rule_ids_spanned_iter<'a, I>( + &'a mut self, + rule_ids_iter: I, + ) -> (Option>, Option>) + where + I: IntoIterator, + { + let rule_ids_map: HashMap<&str, LexerTypesT::StorageT> = + HashMap::from_iter(rule_ids_iter.into_iter()); + // Because we have to iter_mut over self.rules, we can't easily store a reference to the + // rule's name at the same time. Instead, we store the index of each such rule and + // recover the names later. This has the unfortunate consequence of extended the mutable + // borrow for the rest of the 'a lifetime. To avoid that we could return idx's here. + // But the original `set_rule_ids` invalidates indexes. In the spirit of keeping that + // behavior consistent, this also returns the span. + let mut missing_from_parser_idxs = Vec::new(); + let mut rules_with_names = 0; + for (i, r) in self.rules.iter_mut().enumerate() { + if let Some(n) = r.name() { + match rule_ids_map.get(n) { + Some(tok_id) => r.tok_id = Some(*tok_id), + None => { + r.tok_id = None; + missing_from_parser_idxs.push(i); + } + } + rules_with_names += 1; + } + } + + let missing_from_parser = if missing_from_parser_idxs.is_empty() { + None + } else { + let mut mfp = HashSet::with_capacity(missing_from_parser_idxs.len()); + for i in &missing_from_parser_idxs { + mfp.insert((self.rules[*i].name().unwrap(), self.rules[*i].name_span())); + } + Some(mfp) + }; + + let missing_from_lexer = + if rules_with_names - missing_from_parser_idxs.len() == rule_ids_map.len() { + None + } else { + Some( + rule_ids_map + .keys() + .cloned() + .collect::>() + .difference( + &self + .rules + .iter() + .filter_map(|x| x.name()) + .collect::>(), + ) + .cloned() + .collect::>(), + ) + }; + + (missing_from_lexer, missing_from_parser) + } + fn iter_rules(&self) -> Iter> { self.rules.iter() } diff --git a/lrpar/src/lib/ctbuilder.rs b/lrpar/src/lib/ctbuilder.rs index 5e687f7b3..441dff45c 100644 --- a/lrpar/src/lib/ctbuilder.rs +++ b/lrpar/src/lib/ctbuilder.rs @@ -691,7 +691,6 @@ where if outc.contains(&cache.to_string()) { return Ok(CTParser { regenerated: false, - rule_ids, yacc_grammar: grm, grammar_src: inc, grammar_path: self.grammar_path.unwrap(), @@ -788,7 +787,6 @@ where }; Ok(CTParser { regenerated: true, - rule_ids, yacc_grammar: grm, grammar_src: inc, grammar_path: self.grammar_path.unwrap(), @@ -896,7 +894,7 @@ where inspect_callback: None, phantom: PhantomData, }; - Ok(cl.build()?.rule_ids) + Ok(*cl.build()?.token_map()) } fn output_file>( @@ -1494,7 +1492,6 @@ where StorageT: Eq + Hash, { regenerated: bool, - rule_ids: HashMap, yacc_grammar: YaccGrammar, grammar_src: String, grammar_path: PathBuf, @@ -1513,8 +1510,14 @@ where /// Returns a [HashMap] from lexeme string types to numeric types (e.g. `INT: 2`), suitable for /// handing to a lexer to coordinate the IDs of lexer and parser. - pub fn token_map(&self) -> &HashMap { - &self.rule_ids + pub fn token_map(&self) -> Box> { + Box::new( + self.yacc_grammar + .tokens_map() + .iter() + .map(|(name, tidx)| (name.to_string(), tidx.as_storaget())) + .collect::>(), + ) } /// If there are any conflicts in the grammar, return a tuple which allows users to inspect and From 8b973f2034ca0ab64e899d2f84ee4a20087d324d Mon Sep 17 00:00:00 2001 From: matt rice Date: Wed, 21 May 2025 12:48:06 -0700 Subject: [PATCH 3/3] Reduce intermediate `HashMap` from token_map rely on iterator more. We can avoid using `HashMap` entirely via avoiding calling `token_map`. This allows that the `lrpar_config` set_rule_ids typically relies entirely upon iterator. By deprecated the API we can also avoid needing to allocate them for the return value. Changing the `ct_token_map` parameter to `Borrow` can increases compatibility with previous versions. --- lrlex/examples/calc_manual_lex/build.rs | 2 +- lrlex/src/lib/ctbuilder.rs | 135 ++++-------------------- 2 files changed, 22 insertions(+), 115 deletions(-) diff --git a/lrlex/examples/calc_manual_lex/build.rs b/lrlex/examples/calc_manual_lex/build.rs index 680c5d53b..ba946a95e 100644 --- a/lrlex/examples/calc_manual_lex/build.rs +++ b/lrlex/examples/calc_manual_lex/build.rs @@ -18,7 +18,7 @@ fn main() { .unwrap(); ct_token_map::( "token_map", - ctp.token_map().as_ref(), + ctp.token_map(), Some(&TOKENS_MAP.iter().cloned().collect()), ) .unwrap(); diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index 4a387f69c..bf6dda331 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -2,6 +2,7 @@ use std::{ any::type_name, + borrow::Borrow, collections::{HashMap, HashSet}, env::{current_dir, var}, error::Error, @@ -446,7 +447,7 @@ where /// * or, if no module name was explicitly specified, then for the file `/a/b/c.l` the /// module name is `c_l` (i.e. the file's leaf name, minus its extension, with a prefix of /// `_l`). - pub fn build(mut self) -> Result> { + pub fn build(self) -> Result> { let lexerp = self .lexer_path .as_ref() @@ -555,7 +556,6 @@ where )); ctp = lrcfg(ctp); let ct_parser = ctp.build()?; - self.rule_ids_map = Some(*ct_parser.token_map()); Some(ct_parser) } else { None @@ -570,33 +570,32 @@ where } let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map { - Some(ref rim) => { - let (x, y) = lexerdef.set_rule_ids_spanned_iter( - rim.iter().map(|(name, tidx)| (name.as_str(), *tidx)), - ); - ( - x.map(|a| a.iter().map(|&b| b.to_string()).collect::>()), - y.map(|a| { - a.iter() - .map(|(b, span)| (b.to_string(), *span)) - .collect::>() + Some(ref rim) => lexerdef + .set_rule_ids_spanned_iter(rim.iter().map(|(name, tidx)| (name.as_str(), *tidx))), + None => match &ct_parser { + Some(ct_parser) => lexerdef.set_rule_ids_spanned_iter( + ct_parser.yacc_grammar().iter_tidxs().filter_map(|tidx| { + ct_parser + .yacc_grammar() + .token_name(tidx) + .map(|n| (n, tidx.as_storaget())) }), - ) - } - None => (None, None), + ), + None => (None, None), + }, }; let mut has_unallowed_missing = false; let err_indent = " ".repeat(ERROR.len()); if !self.allow_missing_terms_in_lexer { if let Some(ref mfl) = missing_from_lexer { - if let Some(ct_parser) = ct_parser { + if let Some(ct_parser) = &ct_parser { let grm = ct_parser.yacc_grammar(); let token_spans = mfl .iter() .map(|name| { ct_parser .yacc_grammar() - .token_span(*grm.tokens_map().get(name.as_str()).unwrap()) + .token_span(*grm.tokens_map().get(name).unwrap()) .expect("Given token should have a span") }) .collect::>(); @@ -799,94 +798,14 @@ where // If the file we're about to write out already exists with the same contents, then we // don't overwrite it (since that will force a recompile of the file, and relinking of the // binary etc). - let missing_from_parser = - missing_from_parser.map(|mut set| set.drain().map(|(n, _)| n).collect::>()); if let Ok(curs) = read_to_string(outp) { if curs == outs { - return Ok(CTLexer { - missing_from_lexer, - missing_from_parser, - }); + return Ok(CTLexer); } } let mut f = File::create(outp)?; f.write_all(outs.as_bytes())?; - Ok(CTLexer { - missing_from_lexer, - missing_from_parser, - }) - } - - /// Given the filename `a/b.l` as input, statically compile the file `src/a/b.l` into a Rust - /// module which can then be imported using `lrlex_mod!("a/b.l")`. This is a convenience - /// function around [`process_file`](struct.CTLexerBuilder.html#method.process_file) which makes - /// it easier to compile `.l` files stored in a project's `src/` directory: please see - /// [`process_file`](#method.process_file) for additional constraints and information about the - /// generated files. - #[deprecated( - since = "0.11.0", - note = "Please use lexer_in_src_dir() and build() instead" - )] - #[allow(deprecated)] - pub fn process_file_in_src( - self, - srcp: &str, - ) -> Result<(Option>, Option>), Box> { - let mut inp = current_dir()?; - inp.push("src"); - inp.push(srcp); - let mut outp = PathBuf::new(); - outp.push(var("OUT_DIR").unwrap()); - outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap()); - create_dir_all(&outp)?; - let mut leaf = Path::new(srcp) - .file_name() - .unwrap() - .to_str() - .unwrap() - .to_owned(); - write!(leaf, ".{}", RUST_FILE_EXT).ok(); - outp.push(leaf); - self.process_file(inp, outp) - } - - /// Statically compile the `.l` file `inp` into Rust, placing the output into the file `outp`. - /// The latter defines a module as follows: - /// - /// ```text - /// mod modname { - /// pub fn lexerdef() -> LexerDef { ... } - /// - /// ... - /// } - /// ``` - /// - /// where: - /// * `modname` is either: - /// * the module name specified [`mod_name`](#method.mod_name) - /// * or, if no module name was explicitly specified, then for the file `/a/b/c.l` the - /// module name is `c_l` (i.e. the file's leaf name, minus its extension, with a prefix of - /// `_l`). - #[deprecated( - since = "0.11.0", - note = "Please use lexer_in_src_dir() and build() instead" - )] - pub fn process_file( - mut self, - inp: P, - outp: Q, - ) -> Result<(Option>, Option>), Box> - where - P: AsRef, - Q: AsRef, - { - self.lexer_path = Some(inp.as_ref().to_owned()); - self.output_path = Some(outp.as_ref().to_owned()); - let cl = self.build()?; - Ok(( - cl.missing_from_lexer().map(|x| x.to_owned()), - cl.missing_from_parser().map(|x| x.to_owned()), - )) + Ok(CTLexer) } /// If passed false, tokens used in the grammar but not defined in the lexer will cause a @@ -1119,20 +1038,7 @@ where } /// An interface to the result of [CTLexerBuilder::build()]. -pub struct CTLexer { - missing_from_lexer: Option>, - missing_from_parser: Option>, -} - -impl CTLexer { - fn missing_from_lexer(&self) -> Option<&HashSet> { - self.missing_from_lexer.as_ref() - } - - fn missing_from_parser(&self) -> Option<&HashSet> { - self.missing_from_parser.as_ref() - } -} +pub struct CTLexer; /// Create a Rust module named `mod_name` that can be imported with /// [`lrlex_mod!(mod_name)`](crate::lrlex_mod). The module contains one `const` `StorageT` per @@ -1160,7 +1066,7 @@ impl CTLexer { /// ``` pub fn ct_token_map( mod_name: &str, - token_map: &HashMap, + token_map: impl Borrow>, rename_map: Option<&HashMap<&str, &str>>, ) -> Result<(), Box> { // Record the time that this version of lrlex was built. If the source code changes and rustc @@ -1177,6 +1083,7 @@ pub fn ct_token_map( .ok(); outs.push_str( &token_map + .borrow() .iter() .map(|(k, v)| { let k = match rename_map {