Skip to content
This repository was archived by the owner on Apr 2, 2026. It is now read-only.

Commit 7aa32b7

Browse files
authored
Merge pull request #285 from zesterer/perf
Reduced use of Marker for more performance wins
2 parents 7373b7c + 651d823 commit 7aa32b7

7 files changed

Lines changed: 70 additions & 77 deletions

File tree

benches/json.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,7 @@ mod chumsky_zero_copy {
100100

101101
let frac = just(b'.').then(digits.clone());
102102

103-
let exp = just(b'e')
104-
.or(just(b'E'))
103+
let exp = one_of(b"eE")
105104
.then(one_of(b"+-").or_not())
106105
.then(digits.clone());
107106

@@ -124,8 +123,7 @@ mod chumsky_zero_copy {
124123
just(b'r').to(b'\r'),
125124
just(b't').to(b'\t'),
126125
)))
127-
.ignored()
128-
.boxed();
126+
.ignored();
129127

130128
let string = none_of(b"\\\"")
131129
.ignored()

benches/lex.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ fn bench_lex(c: &mut Criterion) {
6363

6464
c.bench_function("lex_logos", |b| {
6565
b.iter(|| {
66-
assert!(black_box(logos::lexer(black_box(SAMPLE)))
67-
.all(|t| t != logos::Token::Error))
66+
assert!(black_box(logos::lexer(black_box(SAMPLE))).all(|t| t != logos::Token::Error))
6867
})
6968
});
7069
}
@@ -88,8 +87,7 @@ mod logos {
8887
str::from_utf8(lex.slice()).unwrap().parse().unwrap()
8988
}
9089

91-
#[derive(Logos)]
92-
#[derive(Debug, Clone, PartialEq)]
90+
#[derive(Logos, Debug, Clone, PartialEq)]
9391
pub enum Token<'a> {
9492
#[token("null")]
9593
Null,

src/zero_copy/combinator.rs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ where
241241
} else {
242242
let span = inp.span_since(before);
243243
Err(Located::at(
244-
inp.save(),
244+
inp.offset().into(),
245245
E::Error::expected_found(None, None, span),
246246
))
247247
}
@@ -397,7 +397,7 @@ where
397397
let span = inp.span_since(before);
398398
match (self.mapper)(out, span) {
399399
Ok(out) => Ok(M::bind(|| out)),
400-
Err(e) => Err(Located::at(inp.save(), e)),
400+
Err(e) => Err(Located::at(inp.offset().into(), e)),
401401
}
402402
})
403403
}
@@ -438,7 +438,7 @@ where
438438
let state = inp.state();
439439
match (self.mapper)(out, span, state) {
440440
Ok(out) => Ok(M::bind(|| out)),
441-
Err(e) => Err(Located::at(inp.save(), e)),
441+
Err(e) => Err(Located::at(inp.offset().into(), e)),
442442
}
443443
})
444444
}
@@ -875,7 +875,8 @@ impl RepeatedCfg {
875875
pub struct Repeated<A, OA, I: ?Sized, E> {
876876
pub(crate) parser: A,
877877
pub(crate) at_least: usize,
878-
pub(crate) at_most: Option<usize>,
878+
// Slightly evil: Should be `Option<usize>`, but we encode `!0` as 'no cap' because it's so large
879+
pub(crate) at_most: u64,
879880
pub(crate) phantom: PhantomData<(OA, E, I)>,
880881
}
881882

@@ -905,7 +906,7 @@ where
905906
/// Require that the pattern appear at most a maximum number of times.
906907
pub fn at_most(self, at_most: usize) -> Self {
907908
Self {
908-
at_most: Some(at_most),
909+
at_most: at_most as u64,
909910
..self
910911
}
911912
}
@@ -954,7 +955,7 @@ where
954955
pub fn exactly(self, exactly: usize) -> Self {
955956
Self {
956957
at_least: exactly,
957-
at_most: Some(exactly),
958+
at_most: exactly as u64,
958959
..self
959960
}
960961
}
@@ -1000,10 +1001,8 @@ where
10001001
inp: &mut InputRef<'a, '_, I, E>,
10011002
count: &mut Self::IterState<M>,
10021003
) -> Option<PResult<M, O, E::Error>> {
1003-
if let Some(at_most) = self.at_most {
1004-
if *count >= at_most {
1005-
return None;
1006-
}
1004+
if *count as u64 >= self.at_most {
1005+
return None;
10071006
}
10081007

10091008
let before = inp.save();
@@ -1038,13 +1037,11 @@ where
10381037
count: &mut Self::IterState<M>,
10391038
cfg: &Self::Config,
10401039
) -> Option<PResult<M, O, E::Error>> {
1041-
let at_most = cfg.at_most.or(self.at_most);
1040+
let at_most = cfg.at_most.map(|x| x as u64).unwrap_or(self.at_most);
10421041
let at_least = cfg.at_least.unwrap_or(self.at_least);
10431042

1044-
if let Some(at_most) = at_most {
1045-
if *count >= at_most {
1046-
return None;
1047-
}
1043+
if *count as u64 >= at_most {
1044+
return None;
10481045
}
10491046

10501047
let before = inp.save();
@@ -1070,7 +1067,8 @@ pub struct SeparatedBy<A, B, OA, OB, I: ?Sized, E> {
10701067
pub(crate) parser: A,
10711068
pub(crate) separator: B,
10721069
pub(crate) at_least: usize,
1073-
pub(crate) at_most: Option<usize>,
1070+
// Slightly evil: Should be `Option<usize>`, but we encode `!0` as 'no cap' because it's so large
1071+
pub(crate) at_most: u64,
10741072
pub(crate) allow_leading: bool,
10751073
pub(crate) allow_trailing: bool,
10761074
pub(crate) phantom: PhantomData<(OA, OB, E, I)>,
@@ -1142,7 +1140,7 @@ where
11421140
/// ````
11431141
pub fn at_most(self, at_most: usize) -> Self {
11441142
Self {
1145-
at_most: Some(at_most),
1143+
at_most: at_most as u64,
11461144
..self
11471145
}
11481146
}
@@ -1169,7 +1167,7 @@ where
11691167
pub fn exactly(self, exactly: usize) -> Self {
11701168
Self {
11711169
at_least: exactly,
1172-
at_most: Some(exactly),
1170+
at_most: exactly as u64,
11731171
..self
11741172
}
11751173
}
@@ -1253,7 +1251,7 @@ where
12531251
inp: &mut InputRef<'a, '_, I, E>,
12541252
state: &mut Self::IterState<M>,
12551253
) -> Option<PResult<M, OA, E::Error>> {
1256-
if self.at_most.map_or(false, |max| *state >= max) {
1254+
if *state as u64 >= self.at_most {
12571255
return None;
12581256
}
12591257

@@ -1433,7 +1431,7 @@ where
14331431
Ok(_) => {
14341432
let (at, tok) = inp.next();
14351433
Err(Located::at(
1436-
at,
1434+
at.into(),
14371435
E::Error::expected_found(None, tok, inp.span_since(before.offset)),
14381436
))
14391437
}

src/zero_copy/input.rs

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,11 @@ impl<I: Input + ?Sized> Clone for Marker<I> {
179179
/// Internal type representing an input as well as all the necessary context for parsing.
180180
pub struct InputRef<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> {
181181
input: &'a I,
182-
marker: Marker<I>,
182+
offset: I::Offset,
183+
errors: Vec<E::Error>,
183184
// TODO: Don't use a result, use something like `Cow` but that allows `E::State` to not be `Clone`
184185
state: Result<&'parse mut E::State, E::State>,
185186
ctx: E::Context,
186-
errors: Vec<E::Error>,
187187
}
188188

189189
impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> {
@@ -193,10 +193,7 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
193193
{
194194
Self {
195195
input,
196-
marker: Marker {
197-
offset: input.start(),
198-
err_count: 0,
199-
},
196+
offset: input.start(),
200197
state,
201198
ctx: E::Context::default(),
202199
errors: Vec::new(),
@@ -216,7 +213,7 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
216213

217214
let mut new_ctx = InputRef {
218215
input: self.input,
219-
marker: self.marker,
216+
offset: self.offset,
220217
state: match &mut self.state {
221218
Ok(state) => Ok(*state),
222219
Err(state) => Ok(state),
@@ -225,28 +222,31 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
225222
errors: mem::take(&mut self.errors),
226223
};
227224
let res = f(&mut new_ctx);
228-
self.marker = new_ctx.marker;
225+
self.offset = new_ctx.offset;
229226
self.errors = mem::take(&mut new_ctx.errors);
230227
res
231228
}
232229

233230
/// Get the input offset that is currently being pointed to.
234231
#[inline(always)]
235232
pub fn offset(&self) -> I::Offset {
236-
self.marker.offset
233+
self.offset
237234
}
238235

239236
/// Save off a [`Marker`] to the current position in the input
240237
#[inline(always)]
241238
pub fn save(&self) -> Marker<I> {
242-
self.marker
239+
Marker {
240+
offset: self.offset,
241+
err_count: self.errors.len(),
242+
}
243243
}
244244

245245
/// Reset the input state to the provided [`Marker`]
246246
#[inline(always)]
247247
pub fn rewind(&mut self, marker: Marker<I>) {
248248
self.errors.truncate(marker.err_count);
249-
self.marker = marker;
249+
self.offset = marker.offset;
250250
}
251251

252252
#[inline(always)]
@@ -264,11 +264,11 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
264264

265265
#[inline(always)]
266266
pub(crate) fn skip_while<F: FnMut(&I::Token) -> bool>(&mut self, mut f: F) {
267-
let mut offs = self.marker.offset;
267+
let mut offs = self.offset;
268268
loop {
269269
let (offset, token) = self.input.next(offs);
270270
if token.filter(&mut f).is_none() {
271-
self.marker.offset = offs;
271+
self.offset = offs;
272272
break;
273273
} else {
274274
offs = offset;
@@ -277,10 +277,10 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
277277
}
278278

279279
#[inline(always)]
280-
pub(crate) fn next(&mut self) -> (Marker<I>, Option<I::Token>) {
281-
let (offset, token) = self.input.next(self.marker.offset);
282-
self.marker.offset = offset;
283-
(self.marker, token)
280+
pub(crate) fn next(&mut self) -> (I::Offset, Option<I::Token>) {
281+
let (offset, token) = self.input.next(self.offset);
282+
self.offset = offset;
283+
(self.offset, token)
284284
}
285285

286286
/// Get the next token in the input. Returns `None` for EOI
@@ -290,7 +290,7 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
290290

291291
/// Peek the next token in the input. Returns `None` for EOI
292292
pub fn peek(&self) -> Option<I::Token> {
293-
self.input.next(self.marker.offset).1
293+
self.input.next(self.offset).1
294294
}
295295

296296
/// Skip the next token in the input.
@@ -320,13 +320,13 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
320320
where
321321
I: SliceInput,
322322
{
323-
self.input.slice_from(self.marker.offset..)
323+
self.input.slice_from(self.offset..)
324324
}
325325

326326
/// Return the span from the provided [`Marker`] to the current position
327327
#[inline(always)]
328328
pub fn span_since(&self, before: I::Offset) -> I::Span {
329-
self.input.span(before..self.marker.offset)
329+
self.input.span(before..self.offset)
330330
}
331331

332332
#[inline(always)]
@@ -335,13 +335,12 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
335335
C: Char,
336336
I: StrInput<C>,
337337
{
338-
self.marker.offset += skip;
338+
self.offset += skip;
339339
}
340340

341341
#[inline(always)]
342342
pub(crate) fn emit(&mut self, error: E::Error) {
343343
self.errors.push(error);
344-
self.marker.err_count += 1;
345344
}
346345

347346
pub(crate) fn into_errs(self) -> Vec<E::Error> {

src/zero_copy/mod.rs

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ pub mod prelude {
5959
pub use super::{
6060
error::{EmptyErr, Error as _, Rich, Simple},
6161
extra,
62-
primitive::{any, choice, empty, end, group, just, none_of, one_of, take_until, todo, map_ctx},
62+
primitive::{
63+
any, choice, empty, end, group, just, map_ctx, none_of, one_of, take_until, todo,
64+
},
6365
recovery::{nested_delimiters, skip_until},
6466
recursive::{recursive, Recursive},
6567
// select,
@@ -98,7 +100,7 @@ use self::{
98100
container::*,
99101
error::Error,
100102
extra::ParserExtra,
101-
input::{Input, InputRef, Marker, SliceInput, StrInput},
103+
input::{Input, InputRef, SliceInput, StrInput},
102104
prelude::*,
103105
recovery::RecoverWith,
104106
span::Span,
@@ -200,11 +202,8 @@ pub struct Located<E> {
200202
}
201203

202204
impl<E> Located<E> {
203-
pub fn at<I: Input + ?Sized>(mark: Marker<I>, err: E) -> Self {
204-
Self {
205-
pos: mark.offset.into(),
206-
err,
207-
}
205+
pub fn at(pos: usize, err: E) -> Self {
206+
Self { pos, err }
208207
}
209208

210209
fn at_pos(pos: usize, err: E) -> Self {
@@ -1266,7 +1265,7 @@ pub trait Parser<'a, I: Input + ?Sized, O, E: ParserExtra<'a, I> = extra::Defaul
12661265
Repeated {
12671266
parser: self,
12681267
at_least: 0,
1269-
at_most: None,
1268+
at_most: !0,
12701269
phantom: PhantomData,
12711270
}
12721271
}
@@ -1316,7 +1315,7 @@ pub trait Parser<'a, I: Input + ?Sized, O, E: ParserExtra<'a, I> = extra::Defaul
13161315
parser: self,
13171316
separator,
13181317
at_least: 0,
1319-
at_most: None,
1318+
at_most: !0,
13201319
allow_leading: false,
13211320
allow_trailing: false,
13221321
phantom: PhantomData,
@@ -2183,12 +2182,16 @@ fn regex_parser() {
21832182
.collect()
21842183
}
21852184
assert_eq!(
2186-
parser::<char>().parse("hello world this works").into_result(),
2185+
parser::<char>()
2186+
.parse("hello world this works")
2187+
.into_result(),
21872188
Ok(vec!["hello", "world", "this", "works"]),
21882189
);
21892190

21902191
assert_eq!(
2191-
parser::<u8>().parse(b"hello world this works" as &[_]).into_result(),
2192+
parser::<u8>()
2193+
.parse(b"hello world this works" as &[_])
2194+
.into_result(),
21922195
Ok(vec![
21932196
b"hello" as &[_],
21942197
b"world" as &[_],

0 commit comments

Comments
 (0)