Skip to content

Commit 4b8c791

Browse files
committed
Add an example using RTParserBuilder during CTParserBuilder
1 parent 10b7b4f commit 4b8c791

8 files changed

Lines changed: 204 additions & 34 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ members=[
1212
"lrpar/examples/calc_parsetree",
1313
"lrpar/examples/start_states",
1414
"lrpar/examples/clone_param",
15+
"lrpar/examples/inspect",
1516
"lrtable",
1617
"nimbleparse",
1718
]

lrlex/src/lib/ctbuilder.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -344,17 +344,18 @@ where
344344
/// module name is `c_l` (i.e. the file's leaf name, minus its extension, with a prefix of
345345
/// `_l`).
346346
pub fn build(mut self) -> Result<CTLexer, Box<dyn Error>> {
347+
let lexerp = self
348+
.lexer_path
349+
.as_ref()
350+
.expect("lexer_path must be specified before processing.");
347351
if let Some(ref lrcfg) = self.lrpar_config {
348352
let mut ctp = CTParserBuilder::<LexerTypesT>::new();
353+
ctp = ctp.lexer_path(lexerp.to_owned());
349354
ctp = lrcfg(ctp);
350355
let map = ctp.build()?;
351356
self.rule_ids_map = Some(map.token_map().to_owned());
352357
}
353358

354-
let lexerp = self
355-
.lexer_path
356-
.as_ref()
357-
.expect("lexer_path must be specified before processing.");
358359
let outp = self
359360
.output_path
360361
.as_ref()

lrpar/examples/inspect/Cargo.toml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[package]
2+
name = "inspect"
3+
description = "During the code generation step inspect the generated parser"
4+
version = "0.1.0"
5+
edition = "2021"
6+
license = "Apache-2.0/MIT"
7+
8+
[[bin]]
9+
doc = false
10+
name = "inspect"
11+
12+
[build-dependencies]
13+
cfgrammar = { path="../../../cfgrammar" }
14+
lrlex = { path="../../../lrlex" }
15+
lrpar = { path="../.." }
16+
num-traits.workspace = true
17+
18+
[dependencies]
19+
cfgrammar = { path="../../../cfgrammar" }
20+
lrlex = { path="../../../lrlex" }
21+
lrpar = { path="../.." }

lrpar/examples/inspect/build.rs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#![deny(rust_2018_idioms)]
2+
use cfgrammar::{yacc::YaccGrammar, Span};
3+
use lrlex::{CTLexerBuilder, DefaultLexerTypes, LRNonStreamingLexerDef, LexerDef};
4+
use lrpar::{LexerTypes, RTParserBuilder};
5+
use num_traits::ToPrimitive as _;
6+
use std::io::Read;
7+
8+
pub fn set_rule_ids<LexerTypesT: LexerTypes<StorageT = u32>, LT: LexerDef<LexerTypesT>>(
9+
lexerdef: &mut LT,
10+
grm: &YaccGrammar,
11+
) -> (Option<Vec<Span>>, Option<Vec<Span>>)
12+
where
13+
usize: num_traits::AsPrimitive<LexerTypesT::StorageT>,
14+
{
15+
let rule_ids = grm
16+
.tokens_map()
17+
.iter()
18+
.map(|(&n, &i)| (n, usize::from(i).to_u32().unwrap()))
19+
.collect::<std::collections::HashMap<&str, u32>>();
20+
let (missing_from_lexer, missing_from_parser) = lexerdef.set_rule_ids_spanned(&rule_ids);
21+
let missing_from_lexer = missing_from_lexer.map(|tokens| {
22+
tokens
23+
.iter()
24+
.map(|name| {
25+
grm.token_span(*grm.tokens_map().get(name).unwrap())
26+
.expect("Given token should have a span")
27+
})
28+
.collect::<Vec<_>>()
29+
});
30+
31+
let missing_from_parser =
32+
missing_from_parser.map(|tokens| tokens.iter().map(|(_, span)| *span).collect::<Vec<_>>());
33+
(missing_from_lexer, missing_from_parser)
34+
}
35+
36+
fn main() {
37+
// Since we're using both lrlex and lrpar, we use lrlex's `lrpar_config` convenience function
38+
// that makes it easy to a) create a lexer and parser and b) link them together.
39+
CTLexerBuilder::new()
40+
.rust_edition(lrlex::RustEdition::Rust2021)
41+
.lrpar_config(|ctp| {
42+
ctp.rust_edition(lrpar::RustEdition::Rust2021)
43+
.inspect(Box::new(move |_, recov, grm, stable, _, lexer_path| {
44+
let good_srcs = ["", "TODO: [Walk, relax]", "todo: [run, rest]"];
45+
let bad_srcs = [
46+
// Should start with TODO:
47+
"Frodo: [Breakfast, Elevenses]",
48+
];
49+
let mut lexer_src = String::new();
50+
let _ = std::fs::File::open(lexer_path.unwrap())
51+
.unwrap()
52+
.read_to_string(&mut lexer_src);
53+
let mut lexerdef =
54+
LRNonStreamingLexerDef::<DefaultLexerTypes>::from_str(&lexer_src).unwrap();
55+
set_rule_ids(&mut lexerdef, grm);
56+
let pb = RTParserBuilder::new(grm, stable).recoverer(recov);
57+
for src in good_srcs {
58+
let lexer = lexerdef.lexer(src);
59+
let errs = pb.parse_noaction(&lexer);
60+
if !errs.is_empty() {
61+
return Err(format!("{:?} while parsing src: {}", errs, src).into());
62+
}
63+
}
64+
for src in bad_srcs {
65+
let lexer = lexerdef.lexer(src);
66+
let errs = pb.parse_noaction(&lexer);
67+
if errs.is_empty() {
68+
return Err(format!(
69+
"Parse of source '{src}' succeeded while expecting failure"
70+
)
71+
.into());
72+
}
73+
}
74+
Ok(())
75+
}))
76+
.grammar_in_src_dir("todo.y")
77+
.unwrap()
78+
})
79+
.lexer_in_src_dir("todo.l")
80+
.unwrap()
81+
.build()
82+
.unwrap();
83+
}

lrpar/examples/inspect/src/main.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#![allow(clippy::unnecessary_wraps)]
2+
3+
use std::io::{self, Read};
4+
5+
use lrlex::lrlex_mod;
6+
use lrpar::lrpar_mod;
7+
8+
lrlex_mod!("todo.l");
9+
lrpar_mod!("todo.y");
10+
11+
// For the example within this bin see build.rs
12+
fn main() {
13+
let lexerdef = todo_l::lexerdef();
14+
let mut input = String::new();
15+
let _ = io::stdin().read_to_string(&mut input).unwrap();
16+
let lexer = lexerdef.lexer(&input);
17+
let (node, errs) = todo_y::parse(&lexer);
18+
if let Some(node) = node {
19+
println!("{:?}", node);
20+
}
21+
for e in errs {
22+
eprintln!("Error: {}", e);
23+
}
24+
}

lrpar/examples/inspect/src/todo.l

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
%grmtools{case_insensitive}
2+
%%
3+
\[ '['
4+
\] ']'
5+
\: ':'
6+
\, ','
7+
todo 'TODO'
8+
[A-Z]+ 'NAME'
9+
[\n\t\ ] ;

lrpar/examples/inspect/src/todo.y

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
%grmtools{yacckind: Original(GenericParseTree)}
2+
%token NAME
3+
%token TODO
4+
%%
5+
Start: %empty | TODO ':' '[' items ']';
6+
items: item | items ',' item;
7+
item: NAME | %empty;

lrpar/src/lib/ctbuilder.rs

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -242,13 +242,17 @@ where
242242
show_warnings: bool,
243243
visibility: Visibility,
244244
rust_edition: RustEdition,
245+
lexer_path: Option<PathBuf>,
245246
// We want lifetimes that imply the callback can't capture the header or the grammar.
246-
header_callback: Option<
247+
inspect_callback: Option<
247248
Box<
248-
dyn for<'h, 'y> Fn(
249-
&'h mut Header,
249+
dyn for<'y> Fn(
250+
&'y mut Header,
250251
RecoveryKind,
251252
&'y YaccGrammar<LexerTypesT::StorageT>,
253+
&'y StateTable<LexerTypesT::StorageT>,
254+
&'y StateGraph<LexerTypesT::StorageT>,
255+
Option<PathBuf>,
252256
) -> Result<(), Box<dyn Error>>,
253257
>,
254258
>,
@@ -296,7 +300,8 @@ where
296300
show_warnings: true,
297301
visibility: Visibility::Private,
298302
rust_edition: RustEdition::Rust2021,
299-
header_callback: None,
303+
inspect_callback: None,
304+
lexer_path: None,
300305
phantom: PhantomData,
301306
}
302307
}
@@ -427,17 +432,27 @@ where
427432
self
428433
}
429434

430-
pub fn process_header(
435+
/// Sets the path to the lexer sources this is for usage from within callbacks only, and
436+
/// not used during the build process.
437+
pub fn lexer_path(mut self, lexer_path: PathBuf) -> Self {
438+
self.lexer_path = Some(lexer_path);
439+
self
440+
}
441+
442+
pub fn inspect(
431443
mut self,
432444
cb: Box<
433445
dyn for<'h, 'y> Fn(
434446
&'h mut Header,
435447
RecoveryKind,
436448
&'y YaccGrammar<StorageT>,
449+
&'y StateTable<StorageT>,
450+
&'y StateGraph<StorageT>,
451+
Option<PathBuf>,
437452
) -> Result<(), Box<dyn Error>>,
438453
>,
439454
) -> Self {
440-
self.header_callback = Some(cb);
455+
self.inspect_callback = Some(cb);
441456
self
442457
}
443458

@@ -637,27 +652,6 @@ where
637652
}
638653
};
639654

640-
if let Some(cb) = &self.header_callback {
641-
cb(
642-
&mut header,
643-
self.recoverer.expect("has a default value"),
644-
&grm,
645-
)?;
646-
}
647-
648-
let unused_keys = header.unused();
649-
if !unused_keys.is_empty() {
650-
return Err(format!("Unused keys in header: {}", unused_keys.join(", ")).into());
651-
}
652-
let missing_keys = header.missing();
653-
if !missing_keys.is_empty() {
654-
return Err(format!(
655-
"Required values were missing from the header: {}",
656-
unused_keys.join(", ")
657-
)
658-
.into());
659-
}
660-
661655
let rule_ids = grm
662656
.tokens_map()
663657
.iter()
@@ -727,6 +721,31 @@ where
727721
fs::remove_file(outp).ok();
728722

729723
let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager)?;
724+
725+
if let Some(cb) = &self.inspect_callback {
726+
cb(
727+
&mut header,
728+
self.recoverer.expect("has a default value"),
729+
&grm,
730+
&stable,
731+
&sgraph,
732+
self.lexer_path.clone(),
733+
)?;
734+
}
735+
736+
let unused_keys = header.unused();
737+
if !unused_keys.is_empty() {
738+
return Err(format!("Unused keys in header: {}", unused_keys.join(", ")).into());
739+
}
740+
let missing_keys = header.missing();
741+
if !missing_keys.is_empty() {
742+
return Err(format!(
743+
"Required values were missing from the header: {}",
744+
unused_keys.join(", ")
745+
)
746+
.into());
747+
}
748+
730749
if self.error_on_conflicts {
731750
if let Some(c) = stable.conflicts() {
732751
match (grm.expect(), grm.expectrr()) {
@@ -853,7 +872,8 @@ where
853872
show_warnings: self.show_warnings,
854873
visibility: self.visibility.clone(),
855874
rust_edition: self.rust_edition,
856-
header_callback: None,
875+
inspect_callback: None,
876+
lexer_path: self.lexer_path.clone(),
857877
phantom: PhantomData,
858878
};
859879
Ok(cl.build()?.rule_ids)
@@ -930,7 +950,7 @@ where
930950
// rustc forces a recompile, this will change this value, causing anything which depends on
931951
// this build of lrpar to be recompiled too.
932952
let Self {
933-
// All variables except for `output_path`, `header_callback` and `phantom` should
953+
// All variables except for `output_path`, `inspect_callback` and `phantom` should
934954
// be written into the cache.
935955
grammar_path,
936956
mod_name,
@@ -942,11 +962,14 @@ where
942962
show_warnings,
943963
visibility,
944964
rust_edition,
945-
header_callback: _,
965+
inspect_callback: _,
966+
lexer_path,
946967
phantom: _,
947968
} = self;
948969
let build_time = env!("VERGEN_BUILD_TIMESTAMP");
949970
let grammar_path = grammar_path.as_ref().unwrap().to_string_lossy();
971+
let empty_path = PathBuf::new();
972+
let lexer_path = lexer_path.as_ref().unwrap_or(&empty_path).to_string_lossy();
950973
let mod_name = QuoteOption(mod_name.as_deref());
951974
let visibility = visibility.to_variant_tokens();
952975
let rust_edition = rust_edition.to_variant_tokens();
@@ -973,6 +996,7 @@ where
973996
RUST_EDITION = #rust_edition
974997
RULE_IDS_MAP = [#(#rule_map,)*]
975998
VISIBILITY = #visibility
999+
LEX_PATH = #lexer_path
9761000
};
9771001
let cache_info_str = cache_info.to_string();
9781002
quote!(#cache_info_str)

0 commit comments

Comments
 (0)