Skip to content

Commit a50ddd5

Browse files
authored
Merge pull request #631 from avityuk/issue-630-parse-performance
Avoid repeated parser table decode and cut parse setup overhead (issue 630)
2 parents 10c5083 + bf1870b commit a50ddd5

3 files changed

Lines changed: 47 additions & 25 deletions

File tree

lrpar/src/lib/ctbuilder.rs

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ where
10931093
let run_parser = match self.yacckind.unwrap() {
10941094
YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
10951095
quote! {
1096-
::lrpar::RTParserBuilder::new(&grm, &stable)
1096+
::lrpar::RTParserBuilder::new(grm, stable)
10971097
.recoverer(#recoverer)
10981098
.parse_map(
10991099
lexer,
@@ -1104,7 +1104,7 @@ where
11041104
}
11051105
YaccKind::Original(YaccOriginalActionKind::NoAction) => {
11061106
quote! {
1107-
::lrpar::RTParserBuilder::new(&grm, &stable)
1107+
::lrpar::RTParserBuilder::new(grm, stable)
11081108
.recoverer(#recoverer)
11091109
.parse_map(lexer, &|_| (), &|_, _| ()).1
11101110
}
@@ -1145,7 +1145,7 @@ where
11451145
#action_fn_parse_param_ty
11461146
) -> #actionskind #type_generics
11471147
> = ::std::vec![#(&#wrappers,)*];
1148-
match ::lrpar::RTParserBuilder::new(&grm, &stable)
1148+
match ::lrpar::RTParserBuilder::new(grm, stable)
11491149
.recoverer(#recoverer)
11501150
.parse_actions(lexer, &actions, #action_fn_parse_param) {
11511151
(Some(#actionskind::#action_ident(x)), y) => (Some(x), y),
@@ -1205,14 +1205,24 @@ where
12051205
const __GRM_DATA: &[u8] = &[#(#grm_data,)*];
12061206
const __STABLE_DATA: &[u8] = &[#(#stable_data,)*];
12071207

1208+
fn __lrpar_parser_data() -> &'static ::lrpar::ParserData<#storaget> {
1209+
static DATA: ::std::sync::OnceLock<::lrpar::ParserData<#storaget>>
1210+
= ::std::sync::OnceLock::new();
1211+
DATA.get_or_init(
1212+
|| ::lrpar::ctbuilder::_reconstitute(__GRM_DATA, __STABLE_DATA)
1213+
)
1214+
}
1215+
12081216
#[allow(dead_code)]
12091217
pub fn parse #generics (
12101218
lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
12111219
#parse_fn_parse_param
12121220
) -> #parse_fn_return_ty
12131221
#where_clause
12141222
{
1215-
let (grm, stable) = ::lrpar::ctbuilder::_reconstitute(__GRM_DATA, __STABLE_DATA);
1223+
let __data = __lrpar_parser_data();
1224+
let grm = __data.grm();
1225+
let stable = __data.stable();
12161226
#run_parser
12171227
}
12181228
})
@@ -1601,16 +1611,34 @@ where
16011611
}
16021612
}
16031613

1614+
/// Bundles `YaccGrammar` + `StateTable` so that generated parsers can hold
1615+
/// them in a `OnceLock` without naming `lrtable` directly.
1616+
#[doc(hidden)]
1617+
pub struct ParserData<StorageT: Eq + Hash> {
1618+
grm: YaccGrammar<StorageT>,
1619+
stable: StateTable<StorageT>,
1620+
}
1621+
1622+
impl<StorageT: Eq + Hash> ParserData<StorageT> {
1623+
pub fn grm(&self) -> &YaccGrammar<StorageT> {
1624+
&self.grm
1625+
}
1626+
1627+
pub fn stable(&self) -> &StateTable<StorageT> {
1628+
&self.stable
1629+
}
1630+
}
1631+
16041632
/// This function is called by generated files; it exists so that generated files don't require a
16051633
/// direct dependency on bincode.
16061634
#[doc(hidden)]
1607-
pub fn _reconstitute<StorageT: Decode<()> + Hash + PrimInt + Unsigned + 'static>(
1635+
pub fn _reconstitute<StorageT: Decode<()> + Eq + Hash + PrimInt + Unsigned + 'static>(
16081636
grm_buf: &[u8],
16091637
stable_buf: &[u8],
1610-
) -> (YaccGrammar<StorageT>, StateTable<StorageT>) {
1638+
) -> ParserData<StorageT> {
16111639
let (grm, _) = decode_from_slice(grm_buf, bincode::config::standard()).unwrap();
16121640
let (stable, _) = decode_from_slice(stable_buf, bincode::config::standard()).unwrap();
1613-
(grm, stable)
1641+
ParserData { grm, stable }
16141642
}
16151643

16161644
/// An interface to the result of [CTParserBuilder::build()].

lrpar/src/lib/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ pub mod parser;
206206
pub mod test_utils;
207207

208208
pub use crate::{
209-
ctbuilder::{CTParser, CTParserBuilder, RustEdition, Visibility},
209+
ctbuilder::{CTParser, CTParserBuilder, ParserData, RustEdition, Visibility},
210210
lex_api::{LexError, Lexeme, Lexer, LexerTypes, NonStreamingLexer},
211211
parser::{LexParseError, ParseError, ParseRepair, RTParserBuilder, RecoveryKind},
212212
};

lrpar/src/lib/parser.rs

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ pub(super) struct Parser<
112112
{
113113
rcvry_kind: RecoveryKind,
114114
pub(super) grm: &'a YaccGrammar<StorageT>,
115-
pub(super) token_cost: Box<TokenCostFn<'a, StorageT>>,
115+
pub(super) token_cost: TokenCostFn<'a, StorageT>,
116116
pub(super) stable: &'a StateTable<StorageT>,
117117
lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
118118
// In the long term, we should remove the `lexemes` field entirely, as the `NonStreamingLexer` API is
@@ -176,7 +176,7 @@ where
176176
let psr = Parser {
177177
rcvry_kind,
178178
grm,
179-
token_cost: Box::new(token_cost),
179+
token_cost,
180180
stable,
181181
lexer,
182182
lexemes,
@@ -274,7 +274,7 @@ where
274274
let psr = Parser {
275275
rcvry_kind,
276276
grm,
277-
token_cost: Box::new(token_cost),
277+
token_cost,
278278
stable,
279279
lexer,
280280
lexemes,
@@ -970,13 +970,10 @@ where
970970
fterm: &dyn Fn(LexerTypesT::LexemeT) -> Node,
971971
fnonterm: &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
972972
) -> (Option<Node>, Vec<LexParseError<StorageT, LexerTypesT>>) {
973-
let mut lexemes = vec![];
974-
for e in lexer.iter().collect::<Vec<_>>() {
975-
match e {
976-
Ok(l) => lexemes.push(l),
977-
Err(e) => return (None, vec![e.into()]),
978-
}
979-
}
973+
let lexemes = match lexer.iter().collect() {
974+
Ok(lexemes) => lexemes,
975+
Err(e) => return (None, vec![e.into()]),
976+
};
980977
Parser::<
981978
StorageT,
982979
LexerTypesT,
@@ -1019,13 +1016,10 @@ where
10191016
actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
10201017
param: ParamT,
10211018
) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
1022-
let mut lexemes = vec![];
1023-
for e in lexer.iter().collect::<Vec<_>>() {
1024-
match e {
1025-
Ok(l) => lexemes.push(l),
1026-
Err(e) => return (None, vec![e.into()]),
1027-
}
1028-
}
1019+
let lexemes = match lexer.iter().collect() {
1020+
Ok(lexemes) => lexemes,
1021+
Err(e) => return (None, vec![e.into()]),
1022+
};
10291023
Parser::parse_actions(
10301024
self.recoverer,
10311025
self.grm,

0 commit comments

Comments
 (0)