From 367f71595698c36400ca68a2784fc93956ea7bb3 Mon Sep 17 00:00:00 2001 From: matt rice Date: Thu, 10 Apr 2025 21:20:53 -0700 Subject: [PATCH] Allow the `recoverer` key to appear in the `%grmtools` section. This implements reading the `recoverer` field from the grmtools section for nimbleparse and `CTParserBuilder`. It also adds a `recoverer` field to cttests which calls the `recoverer` method in `CTParserBuilder`. As well as tests checking both the builder methods, and the `%grmtools` section value. --- lrpar/cttests/src/calc_actiontype.test | 1 + lrpar/cttests/src/calc_multitypes.test | 1 + .../cttests/src/calc_recoverer_cpctplus.test | 39 +++++ lrpar/cttests/src/calc_recoverer_none.test | 39 +++++ lrpar/cttests/src/cgen_helper.rs | 10 +- lrpar/src/lib/ctbuilder.rs | 147 ++++++++++++++++-- lrpar/src/lib/parser.rs | 86 +++++++++- nimbleparse/src/main.rs | 44 ++++-- 8 files changed, 339 insertions(+), 28 deletions(-) create mode 100644 lrpar/cttests/src/calc_recoverer_cpctplus.test create mode 100644 lrpar/cttests/src/calc_recoverer_none.test diff --git a/lrpar/cttests/src/calc_actiontype.test b/lrpar/cttests/src/calc_actiontype.test index cf4be1ca4..df4b8ca12 100644 --- a/lrpar/cttests/src/calc_actiontype.test +++ b/lrpar/cttests/src/calc_actiontype.test @@ -1,5 +1,6 @@ name: Test basic user actions using the calculator grammar yacckind: Original(YaccOriginalActionKind::UserAction) +recoverer: RecoveryKind::None grammar: | %start Expr %actiontype Result diff --git a/lrpar/cttests/src/calc_multitypes.test b/lrpar/cttests/src/calc_multitypes.test index 4e061f9a8..56e257c83 100644 --- a/lrpar/cttests/src/calc_multitypes.test +++ b/lrpar/cttests/src/calc_multitypes.test @@ -1,5 +1,6 @@ name: Test basic user actions using the calculator grammar yacckind: Grmtools +recoverer: RecoveryKind::CPCTPlus grammar: | %start Expr %avoid_insert "INT" diff --git a/lrpar/cttests/src/calc_recoverer_cpctplus.test b/lrpar/cttests/src/calc_recoverer_cpctplus.test new file mode 100644 index 000000000..aee8b67d8 --- /dev/null +++ b/lrpar/cttests/src/calc_recoverer_cpctplus.test @@ -0,0 +1,39 @@ +name: Test basic user actions using the calculator grammar +grammar: | + %grmtools {yacckind: Original(UserAction), recoverer: RecoveryKind::CPCTPlus} + %start Expr + %actiontype Result + %avoid_insert 'INT' + %% + Expr: Expr '+' Term { Ok($1? + $3?) } + | Term { $1 } + ; + + Term: Term '*' Factor { Ok($1? * $3?) } + | Factor { $1 } + ; + + Factor: '(' Expr ')' { $2 } + | 'INT' { + let l = $1.map_err(|_| ())?; + match $lexer.span_str(l.span()).parse::() { + Ok(v) => Ok(v), + Err(_) => { + let ((_, col), _) = $lexer.line_col(l.span()); + eprintln!("Error at column {}: '{}' cannot be represented as a u64", + col, + $lexer.span_str(l.span())); + Err(()) + } + } + } + ; + +lexer: | + %% + [0-9]+ "INT" + \+ "+" + \* "*" + \( "(" + \) ")" + [\t ]+ ; diff --git a/lrpar/cttests/src/calc_recoverer_none.test b/lrpar/cttests/src/calc_recoverer_none.test new file mode 100644 index 000000000..8f3759618 --- /dev/null +++ b/lrpar/cttests/src/calc_recoverer_none.test @@ -0,0 +1,39 @@ +vname: Test basic user actions using the calculator grammar +grammar: | + %grmtools {yacckind: Original(UserAction), recoverer: RecoveryKind::None} + %start Expr + %actiontype Result + %avoid_insert 'INT' + %% + Expr: Expr '+' Term { Ok($1? + $3?) } + | Term { $1 } + ; + + Term: Term '*' Factor { Ok($1? * $3?) } + | Factor { $1 } + ; + + Factor: '(' Expr ')' { $2 } + | 'INT' { + let l = $1.map_err(|_| ())?; + match $lexer.span_str(l.span()).parse::() { + Ok(v) => Ok(v), + Err(_) => { + let ((_, col), _) = $lexer.line_col(l.span()); + eprintln!("Error at column {}: '{}' cannot be represented as a u64", + col, + $lexer.span_str(l.span())); + Err(()) + } + } + } + ; + +lexer: | + %% + [0-9]+ "INT" + \+ "+" + \* "*" + \( "(" + \) ")" + [\t ]+ ; diff --git a/lrpar/cttests/src/cgen_helper.rs b/lrpar/cttests/src/cgen_helper.rs index 891ebc648..8a6f8ca7d 100644 --- a/lrpar/cttests/src/cgen_helper.rs +++ b/lrpar/cttests/src/cgen_helper.rs @@ -1,6 +1,6 @@ use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind}; use lrlex::{CTLexerBuilder, DefaultLexerTypes}; -use lrpar::CTParserBuilder; +use lrpar::{CTParserBuilder, RecoveryKind}; use std::{ env, fs, path::{Path, PathBuf}, @@ -32,6 +32,11 @@ pub(crate) fn run_test_path>(path: P) -> Result<(), Box panic!("YaccKind '{}' not supported", s), None => None, }; + let recoverer = match docs[0]["revoverer"].as_str() { + Some("RecoveryKind::CPCTPlus") => Some(RecoveryKind::CPCTPlus), + Some("RecoveryKind::None") => Some(RecoveryKind::None), + _ => None, + }; let (negative_yacc_flags, positive_yacc_flags) = &docs[0]["yacc_flags"] .as_vec() .map(|flags_vec| { @@ -93,6 +98,9 @@ pub(crate) fn run_test_path>(path: P) -> Result<(), Box, output_path: Option, mod_name: Option<&'a str>, - recoverer: RecoveryKind, + recoverer: Option, yacckind: Option, error_on_conflicts: bool, warnings_are_errors: bool, show_warnings: bool, visibility: Visibility, rust_edition: RustEdition, + // test function for inspecting private state + #[cfg(test)] + inspect_callback: Option Result<(), Box>>>, phantom: PhantomData, } @@ -279,13 +282,15 @@ where grammar_path: None, output_path: None, mod_name: None, - recoverer: RecoveryKind::CPCTPlus, + recoverer: None, yacckind: None, error_on_conflicts: true, warnings_are_errors: true, show_warnings: true, visibility: Visibility::Private, rust_edition: RustEdition::Rust2021, + #[cfg(test)] + inspect_callback: None, phantom: PhantomData, } } @@ -378,7 +383,7 @@ where /// Set the recoverer for this parser to `rk`. Defaults to `RecoveryKind::CPCTPlus`. pub fn recoverer(mut self, rk: RecoveryKind) -> Self { - self.recoverer = rk; + self.recoverer = Some(rk); self } @@ -416,6 +421,15 @@ where self } + #[cfg(test)] + pub fn inspect_recoverer( + mut self, + cb: Box Fn(RecoveryKind) -> Result<(), Box>>, + ) -> Self { + self.inspect_callback = Some(cb); + self + } + /// Statically compile the Yacc file specified by [CTParserBuilder::grammar_path()] into Rust, /// placing the output into the file spec [CTParserBuilder::output_path()]. Note that three /// additional files will be created with the same name as specified in [self.output_path] but @@ -494,6 +508,18 @@ where } }, } + if let Some(recoverer) = self.recoverer { + match header.entry("recoverer".to_string()) { + Entry::Occupied(_) => unreachable!(), + Entry::Vacant(v) => { + let rk_value: Value = Value::try_from(recoverer)?; + let mut o = + v.insert_entry((Location::Other("CTParserBuilder".to_string()), rk_value)); + o.set_merge_behavior(MergeBehavior::Ours); + } + } + } + { let mut lk = GENERATED_PATHS.lock().unwrap(); if lk.contains(outp.as_path()) { @@ -505,18 +531,16 @@ where let inc = read_to_string(grmp).map_err(|e| format!("When reading '{}': {e}", grmp.display()))?; let ast_validation = ASTWithValidityInfo::new(&mut header, &inc); - let unused_keys = header.unused(); - if !unused_keys.is_empty() { - return Err(format!("Unused keys in header: {}", unused_keys.join(", ")).into()); - } - let missing_keys = header.missing(); - if !missing_keys.is_empty() { - return Err(format!( - "Required values were missing from the header: {}", - unused_keys.join(", ") - ) - .into()); + header.mark_used(&"recoverer".to_string()); + let rk_val = header.get("recoverer").map(|(_, rk_val)| rk_val); + + if let Some(rk_val) = rk_val { + self.recoverer = Some(RecoveryKind::try_from(rk_val)?); + } else { + // Fallback to the default recoverykind. + self.recoverer = Some(RecoveryKind::CPCTPlus); } + self.yacckind = ast_validation.yacc_kind(); let warnings = ast_validation.ast().warnings(); let loc_fmt = |err_str, loc, inc: &str, line_cache: &NewlineCache| match loc { @@ -602,6 +626,24 @@ where } }; + #[cfg(test)] + if let Some(cb) = &self.inspect_callback { + cb(self.recoverer.expect("has a default value"))?; + } + + let unused_keys = header.unused(); + if !unused_keys.is_empty() { + return Err(format!("Unused keys in header: {}", unused_keys.join(", ")).into()); + } + let missing_keys = header.missing(); + if !missing_keys.is_empty() { + return Err(format!( + "Required values were missing from the header: {}", + unused_keys.join(", ") + ) + .into()); + } + let rule_ids = grm .tokens_map() .iter() @@ -797,6 +839,8 @@ where show_warnings: self.show_warnings, visibility: self.visibility.clone(), rust_edition: self.rust_edition, + #[cfg(test)] + inspect_callback: None, phantom: PhantomData, }; Ok(cl.build()?.rule_ids) @@ -873,7 +917,7 @@ where // rustc forces a recompile, this will change this value, causing anything which depends on // this build of lrpar to be recompiled too. let Self { - // All variables except for `output_path` and `phantom` should + // All variables except for `output_path`, `inspect_callback` and `phantom` should // be written into the cache. grammar_path, mod_name, @@ -885,6 +929,8 @@ where show_warnings, visibility, rust_edition, + #[cfg(test)] + inspect_callback: _, phantom: _, } = self; let build_time = env!("VERGEN_BUILD_TIMESTAMP"); @@ -1573,4 +1619,75 @@ C : 'a';" } } } + + #[cfg(test)] + #[test] + fn test_recoverer_header() -> Result<(), Box> { + use crate::RecoveryKind as RK; + #[rustfmt::skip] + let recovery_kinds = [ + // Builder, Header setting, Expected result. + // ----------- ------------------ ------------------- + (Some(RK::None), Some(RK::None), Some(RK::None)), + (Some(RK::None), Some(RK::CPCTPlus), Some(RK::None)), + (Some(RK::CPCTPlus), Some(RK::CPCTPlus), Some(RK::CPCTPlus)), + (Some(RK::CPCTPlus), Some(RK::None), Some(RK::CPCTPlus)), + (None, Some(RK::CPCTPlus), Some(RK::CPCTPlus)), + (None, Some(RK::None), Some(RK::None)), + (None, None, Some(RK::CPCTPlus)), + (Some(RK::None), None, Some(RK::None)), + (Some(RK::CPCTPlus), None, Some(RK::CPCTPlus)), + ]; + + for (i, (builder_arg, header_arg, expected_rk)) in + recovery_kinds.iter().cloned().enumerate() + { + let y_src = if let Some(header_arg) = header_arg { + format!( + "\ + %grmtools{{yacckind: Original(NoAction), recoverer: {}}} \ + %% \ + start: ; \ + ", + match header_arg { + RK::None => "RecoveryKind::None", + RK::CPCTPlus => "RecoveryKind::CPCTPlus", + } + ) + } else { + r#" + %grmtools{yacckind: Original(NoAction)} + %% + Start: ; + "# + .to_string() + }; + let out_dir = std::env::var("OUT_DIR").unwrap(); + let y_path = format!("{out_dir}/recoverykind_test_{i}.y"); + let y_out_path = format!("{y_path}.rs"); + std::fs::File::create(y_path.clone()).unwrap(); + std::fs::write(y_path.clone(), y_src).unwrap(); + let mut cp_builder = CTParserBuilder::::new(); + cp_builder = cp_builder + .output_path(y_out_path.clone()) + .grammar_path(y_path.clone()); + cp_builder = if let Some(builder_arg) = builder_arg { + cp_builder.recoverer(builder_arg) + } else { + cp_builder + } + .inspect_recoverer(Box::new(move |rk| { + if matches!( + (rk, expected_rk), + (RK::None, Some(RK::None)) | (RK::CPCTPlus, Some(RK::CPCTPlus)) + ) { + Ok(()) + } else { + panic!("Unexpected recovery kind") + } + })); + cp_builder.build()?; + } + Ok(()) + } } diff --git a/lrpar/src/lib/parser.rs b/lrpar/src/lib/parser.rs index c0cad8ddc..1530d9c6e 100644 --- a/lrpar/src/lib/parser.rs +++ b/lrpar/src/lib/parser.rs @@ -14,7 +14,7 @@ use std::time::{Duration, Instant}; use web_time::{Duration, Instant}; use cactus::Cactus; -use cfgrammar::{yacc::YaccGrammar, RIdx, Span, TIdx}; +use cfgrammar::{header::Value, yacc::YaccGrammar, RIdx, Span, TIdx}; use lrtable::{Action, StIdx, StateTable}; use num_traits::{AsPrimitive, PrimInt, Unsigned}; use proc_macro2::TokenStream; @@ -629,6 +629,90 @@ pub enum RecoveryKind { None, } +impl TryFrom for Value { + type Error = cfgrammar::header::HeaderError; + fn try_from(rk: RecoveryKind) -> Result { + use cfgrammar::{ + header::{Namespaced, Setting}, + Location, + }; + let from_loc = Location::Other("From".to_string()); + Ok(match rk { + RecoveryKind::CPCTPlus => Value::Setting(Setting::Unitary(Namespaced { + namespace: Some(("RecoveryKind".to_string(), from_loc.clone())), + member: ("CPCTPlus".to_string(), from_loc.clone()), + })), + RecoveryKind::None => Value::Setting(Setting::Unitary(Namespaced { + namespace: Some(("RecoveryKind".to_string(), from_loc.clone())), + member: ("None".to_string(), from_loc.clone()), + })), + }) + } +} + +impl TryFrom<&Value> for RecoveryKind { + type Error = cfgrammar::header::HeaderError; + fn try_from(rk: &Value) -> Result { + use cfgrammar::header::{HeaderError, HeaderErrorKind, Namespaced, Setting}; + + match rk { + Value::Flag(_, loc) => Err(HeaderError { + kind: HeaderErrorKind::ConversionError( + "RecoveryKind", + "Cannot convert boolean to RecoveryKind", + ), + locations: vec![loc.clone()], + }), + Value::Setting(Setting::Num(_, loc)) => Err(HeaderError { + kind: HeaderErrorKind::ConversionError( + "RecoveryKind", + "Cannot convert number to RecoveryKind", + ), + locations: vec![loc.clone()], + }), + Value::Setting(Setting::Unitary(Namespaced { + namespace, + member: (kind, kind_loc), + })) => { + match namespace { + Some((ns, loc)) if ns.to_lowercase() != "recoverykind" => { + return Err(HeaderError { + kind: HeaderErrorKind::ConversionError( + "RecoveryKind", + "Unknown namespace", + ), + locations: vec![loc.clone()], + }) + } + _ => {} + } + match kind.to_lowercase().as_ref() { + "cpctplus" => Ok(RecoveryKind::CPCTPlus), + "none" => Ok(RecoveryKind::None), + _ => Err(HeaderError { + kind: HeaderErrorKind::ConversionError("RecoveryKind", "Unknown variant"), + locations: vec![kind_loc.clone()], + }), + } + } + Value::Setting(Setting::Constructor { + ctor: _, + arg: + Namespaced { + namespace: _, + member: (_, arg_loc), + }, + }) => Err(HeaderError { + kind: HeaderErrorKind::ConversionError( + "RecoveryKind", + "Cannot convert constructor to RecoveryKind", + ), + locations: vec![arg_loc.clone()], + }), + } + } +} + impl quote::ToTokens for RecoveryKind { fn to_tokens(&self, tokens: &mut TokenStream) { tokens.extend(match *self { diff --git a/nimbleparse/src/main.rs b/nimbleparse/src/main.rs index 645f10a86..756bc242b 100644 --- a/nimbleparse/src/main.rs +++ b/nimbleparse/src/main.rs @@ -114,22 +114,32 @@ fn main() { let dump_state_graph = matches.opt_present("d"); let quiet = matches.opt_present("q"); - - let recoverykind = match matches.opt_str("r") { - None => RecoveryKind::CPCTPlus, - Some(s) => match &*s.to_lowercase() { - "cpctplus" => RecoveryKind::CPCTPlus, - "none" => RecoveryKind::None, - _ => usage(prog, &format!("Unknown recoverer '{}'.", s)), - }, - }; - let mut header = Header::new(); + match matches.opt_str("r") { + None => (), + Some(s) => { + header.set_merge_behavior( + &"recoverer".to_string(), + cfgrammar::markmap::MergeBehavior::Ours, + ); + header.insert( + "recoverer".to_string(), + ( + Location::CommandLine, + Value::try_from(match &*s.to_lowercase() { + "cpctplus" => RecoveryKind::CPCTPlus, + "none" => RecoveryKind::None, + _ => usage(prog, &format!("Unknown recoverer '{}'.", s)), + }) + .expect("All these RecoveryKinds should convert without error"), + ), + ); + } + }; let entry = match header.entry("yacckind".to_string()) { Entry::Occupied(_) => unreachable!("Header should be empty"), Entry::Vacant(v) => v.occupied_entry(), }; - match matches.opt_str("y") { None => {} Some(s) => { @@ -167,6 +177,18 @@ fn main() { let yacc_y_path = PathBuf::from(&matches.free[1]); let yacc_src = read_file(&yacc_y_path); let ast_validation = ASTWithValidityInfo::new(&mut header, &yacc_src); + let recoverykind = if let Some((_, rk_val)) = header.get("recoverer") { + match RecoveryKind::try_from(rk_val) { + Err(e) => { + eprintln!("{e}"); + process::exit(1) + } + Ok(rk) => rk, + } + } else { + // Fallback to the default recoverykind + RecoveryKind::CPCTPlus + }; let warnings = ast_validation.ast().warnings(); let res = YaccGrammar::new_from_ast_with_validity_info(&ast_validation); let mut yacc_diagnostic_formatter: Option = None;