Skip to content
This repository was archived by the owner on Apr 2, 2026. It is now read-only.

Commit cf7b266

Browse files
authored
Merge pull request #289 from zesterer/perf
Perf improvements and minor tweaks
2 parents a1bbe4e + 67d9b45 commit cf7b266

5 files changed

Lines changed: 83 additions & 56 deletions

File tree

benches/json.rs

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,9 @@ mod chumsky_zero_copy {
8888

8989
pub fn json<'a>() -> impl Parser<'a, [u8], JsonZero<'a>> {
9090
recursive(|value| {
91-
let digits = one_of(b'0'..=b'9').repeated().slice();
91+
let digits = one_of(b'0'..=b'9').repeated();
9292

9393
let int = one_of(b'1'..=b'9')
94-
.repeated()
95-
.at_least(1)
9694
.then(one_of(b'0'..=b'9').repeated())
9795
.ignored()
9896
.or(just(b'0').ignored())
@@ -112,21 +110,18 @@ mod chumsky_zero_copy {
112110
.map_slice(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
113111
.boxed();
114112

115-
let escape = just(b'\\')
116-
.then(choice((
117-
just(b'\\'),
118-
just(b'/'),
119-
just(b'"'),
120-
just(b'b').to(b'\x08'),
121-
just(b'f').to(b'\x0C'),
122-
just(b'n').to(b'\n'),
123-
just(b'r').to(b'\r'),
124-
just(b't').to(b'\t'),
125-
)))
126-
.ignored();
113+
let escape = just(b'\\').ignore_then(choice((
114+
just(b'\\'),
115+
just(b'/'),
116+
just(b'"'),
117+
just(b'b').to(b'\x08'),
118+
just(b'f').to(b'\x0C'),
119+
just(b'n').to(b'\n'),
120+
just(b'r').to(b'\r'),
121+
just(b't').to(b'\t'),
122+
)));
127123

128124
let string = none_of(b"\\\"")
129-
.ignored()
130125
.or(escape)
131126
.repeated()
132127
.slice()
@@ -135,7 +130,7 @@ mod chumsky_zero_copy {
135130

136131
let array = value
137132
.clone()
138-
.separated_by(just(b',').padded())
133+
.separated_by(just(b','))
139134
.collect()
140135
.padded()
141136
.delimited_by(just(b'['), just(b']'))

benches/parser.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
21
use chumsky::zero_copy::prelude::*;
3-
use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};
2+
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
43

54
fn bench_choice(c: &mut Criterion) {
65
let alphabet_choice = choice((
@@ -36,19 +35,25 @@ fn bench_choice(c: &mut Criterion) {
3635

3736
group.bench_function(BenchmarkId::new("choice::<(A..Z)>", "A"), |b| {
3837
b.iter(|| {
39-
black_box(Parser::parse(&alphabet_choice, black_box("A"))).into_result().unwrap();
38+
black_box(Parser::parse(&alphabet_choice, black_box("A")))
39+
.into_result()
40+
.unwrap();
4041
})
4142
});
4243

4344
group.bench_function(BenchmarkId::new("choice::<(A..Z)>", "Z"), |b| {
4445
b.iter(|| {
45-
black_box(alphabet_choice.parse(black_box("Z"))).into_result().unwrap();
46+
black_box(alphabet_choice.parse(black_box("Z")))
47+
.into_result()
48+
.unwrap();
4649
})
4750
});
4851

4952
group.bench_function(BenchmarkId::new("choice::<(A..Z)>", "0"), |b| {
5053
b.iter(|| {
51-
black_box(alphabet_choice.parse(black_box("0"))).into_result().unwrap_err();
54+
black_box(alphabet_choice.parse(black_box("0")))
55+
.into_result()
56+
.unwrap_err();
5257
})
5358
});
5459
}

src/zero_copy/input.rs

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ pub trait Input {
2121
fn start(&self) -> Self::Offset;
2222

2323
/// Get the next offset from the provided one, and the next token if it exists
24-
fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>);
24+
///
25+
/// Safety: `offset` must be generated be generated by either `Input::start` or a previous call to this function,
26+
/// on this input only.
27+
unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>);
2528

2629
/// Create a span from a start and end offset
2730
fn span(&self, range: Range<Self::Offset>) -> Self::Span;
@@ -55,7 +58,8 @@ impl Input for str {
5558
0
5659
}
5760

58-
fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
61+
#[inline]
62+
unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
5963
if offset < self.len() {
6064
let c = unsafe {
6165
self.get_unchecked(offset..)
@@ -69,6 +73,7 @@ impl Input for str {
6973
}
7074
}
7175

76+
#[inline]
7277
fn span(&self, range: Range<Self::Offset>) -> Self::Span {
7378
range.into()
7479
}
@@ -79,9 +84,12 @@ impl StrInput<char> for str {}
7984
impl SliceInput for str {
8085
type Slice = str;
8186

87+
#[inline]
8288
fn slice(&self, range: Range<Self::Offset>) -> &Self::Slice {
8389
&self[range]
8490
}
91+
92+
#[inline]
8593
fn slice_from(&self, from: RangeFrom<Self::Offset>) -> &Self::Slice {
8694
&self[from]
8795
}
@@ -96,14 +104,17 @@ impl<T: Clone> Input for [T] {
96104
0
97105
}
98106

99-
fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
107+
#[inline]
108+
unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
100109
if let Some(tok) = self.get(offset) {
101110
(offset + 1, Some(tok.clone()))
102111
} else {
112+
// We actually don't care if the offset goes beyond the end of the slice, and this seems to be *slightly* faster
103113
(offset, None)
104114
}
105115
}
106116

117+
#[inline]
107118
fn span(&self, range: Range<Self::Offset>) -> Self::Span {
108119
range.into()
109120
}
@@ -114,9 +125,12 @@ impl StrInput<u8> for [u8] {}
114125
impl<T: Clone> SliceInput for [T] {
115126
type Slice = [T];
116127

128+
#[inline]
117129
fn slice(&self, range: Range<Self::Offset>) -> &Self::Slice {
118130
&self[range]
119131
}
132+
133+
#[inline]
120134
fn slice_from(&self, from: RangeFrom<Self::Offset>) -> &Self::Slice {
121135
&self[from]
122136
}
@@ -135,7 +149,7 @@ impl<'a, Ctx: Clone, I: Input + ?Sized> Input for WithContext<'a, Ctx, I> {
135149
self.1.start()
136150
}
137151

138-
fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
152+
unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
139153
self.1.next(offset)
140154
}
141155

@@ -228,13 +242,13 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
228242
}
229243

230244
/// Get the input offset that is currently being pointed to.
231-
#[inline(always)]
245+
#[inline]
232246
pub fn offset(&self) -> I::Offset {
233247
self.offset
234248
}
235249

236250
/// Save off a [`Marker`] to the current position in the input
237-
#[inline(always)]
251+
#[inline]
238252
pub fn save(&self) -> Marker<I> {
239253
Marker {
240254
offset: self.offset,
@@ -243,30 +257,31 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
243257
}
244258

245259
/// Reset the input state to the provided [`Marker`]
246-
#[inline(always)]
260+
#[inline]
247261
pub fn rewind(&mut self, marker: Marker<I>) {
248262
self.errors.truncate(marker.err_count);
249263
self.offset = marker.offset;
250264
}
251265

252-
#[inline(always)]
266+
#[inline]
253267
pub(crate) fn state(&mut self) -> &mut E::State {
254268
match &mut self.state {
255269
Ok(state) => *state,
256270
Err(state) => state,
257271
}
258272
}
259273

260-
#[inline(always)]
274+
#[inline]
261275
pub(crate) fn ctx(&self) -> &E::Context {
262276
&self.ctx
263277
}
264278

265-
#[inline(always)]
279+
#[inline]
266280
pub(crate) fn skip_while<F: FnMut(&I::Token) -> bool>(&mut self, mut f: F) {
267281
let mut offs = self.offset;
268282
loop {
269-
let (offset, token) = self.input.next(offs);
283+
// SAFETY: offset was generated by previous call to `Input::next`
284+
let (offset, token) = unsafe { self.input.next(offs) };
270285
if token.filter(&mut f).is_none() {
271286
self.offset = offs;
272287
break;
@@ -276,9 +291,10 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
276291
}
277292
}
278293

279-
#[inline(always)]
294+
#[inline]
280295
pub(crate) fn next(&mut self) -> (I::Offset, Option<I::Token>) {
281-
let (offset, token) = self.input.next(self.offset);
296+
// SAFETY: offset was generated by previous call to `Input::next`
297+
let (offset, token) = unsafe { self.input.next(self.offset) };
282298
self.offset = offset;
283299
(self.offset, token)
284300
}
@@ -290,32 +306,33 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
290306

291307
/// Peek the next token in the input. Returns `None` for EOI
292308
pub fn peek(&self) -> Option<I::Token> {
293-
self.input.next(self.offset).1
309+
// SAFETY: offset was generated by previous call to `Input::next`
310+
unsafe { self.input.next(self.offset).1 }
294311
}
295312

296313
/// Skip the next token in the input.
297-
#[inline(always)]
314+
#[inline]
298315
pub fn skip(&mut self) {
299316
let _ = self.next();
300317
}
301318

302-
#[inline(always)]
319+
#[inline]
303320
pub(crate) fn slice(&self, range: Range<I::Offset>) -> &'a I::Slice
304321
where
305322
I: SliceInput,
306323
{
307324
self.input.slice(range)
308325
}
309326

310-
#[inline(always)]
327+
#[inline]
311328
pub(crate) fn slice_from(&self, from: RangeFrom<I::Offset>) -> &'a I::Slice
312329
where
313330
I: SliceInput,
314331
{
315332
self.input.slice_from(from)
316333
}
317334

318-
#[inline(always)]
335+
#[inline]
319336
pub(crate) fn slice_trailing(&self) -> &'a I::Slice
320337
where
321338
I: SliceInput,
@@ -324,12 +341,12 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
324341
}
325342

326343
/// Return the span from the provided [`Marker`] to the current position
327-
#[inline(always)]
344+
#[inline]
328345
pub fn span_since(&self, before: I::Offset) -> I::Span {
329346
self.input.span(before..self.offset)
330347
}
331348

332-
#[inline(always)]
349+
#[inline]
333350
pub(crate) fn skip_bytes<C>(&mut self, skip: usize)
334351
where
335352
C: Char,
@@ -338,7 +355,7 @@ impl<'a, 'parse, I: Input + ?Sized, E: ParserExtra<'a, I>> InputRef<'a, 'parse,
338355
self.offset += skip;
339356
}
340357

341-
#[inline(always)]
358+
#[inline]
342359
pub(crate) fn emit(&mut self, error: E::Error) {
343360
self.errors.push(error);
344361
}

src/zero_copy/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2021,6 +2021,14 @@ where
20212021
M::invoke(&*self.inner, inp)
20222022
}
20232023

2024+
fn boxed(self) -> Boxed<'a, I, O, E>
2025+
where
2026+
Self: Sized + 'a,
2027+
{
2028+
// Never double-box parsers
2029+
self
2030+
}
2031+
20242032
go_extra!(O);
20252033
}
20262034

src/zero_copy/primitive.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -834,24 +834,26 @@ where
834834
{
835835
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, O, E::Error> {
836836
let before = inp.save();
837-
let res = self.parsers
838-
.iter()
839-
.try_fold(None::<Located<E::Error>>, |err, parser, | {
840-
match parser.go::<M>(inp) {
841-
Ok(out) => Err(out),
842-
Err(e) => {
843-
Ok(Some(match err {
837+
let res =
838+
self.parsers
839+
.iter()
840+
.try_fold(None::<Located<E::Error>>, |err, parser| {
841+
match parser.go::<M>(inp) {
842+
Ok(out) => Err(out),
843+
Err(e) => Ok(Some(match err {
844844
Some(err) => err.prioritize(e, |a, b| a.merge(b)),
845845
None => e,
846-
}))
846+
})),
847847
}
848-
}
849-
});
848+
});
850849

851850
match res {
852-
Ok(err) => Err(err.unwrap_or_else(
853-
|| Located::at(inp.offset().into(), E::Error::expected_found(None, None, inp.span_since(before.offset)))
854-
)),
851+
Ok(err) => Err(err.unwrap_or_else(|| {
852+
Located::at(
853+
inp.offset().into(),
854+
E::Error::expected_found(None, None, inp.span_since(before.offset)),
855+
)
856+
})),
855857
Err(out) => Ok(out),
856858
}
857859
}

0 commit comments

Comments
 (0)