Skip to content
This repository was archived by the owner on Apr 2, 2026. It is now read-only.

Commit 32c97f9

Browse files
committed
Merge branch 'main' into fixes/recursive-leak
2 parents c35479f + 8658681 commit 32c97f9

37 files changed

Lines changed: 1751 additions & 1621 deletions

.github/workflows/rust.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ jobs:
2323
toolchain: nightly
2424
components: rustfmt, clippy
2525
- name: Run cargo check (all features)
26-
run: cargo check --tests --verbose --all-features
26+
run: cargo check --benches --examples --tests --verbose --all-features
2727
- name: Run cargo check (no features)
28-
run: cargo check --tests --verbose --no-default-features
28+
run: cargo check --benches --examples --tests --verbose --no-default-features
2929
- name: Run cargo clippy
30-
run: cargo clippy --verbose --all-features -- -D warnings
30+
run: cargo clippy --benches --examples --tests --verbose --all-features -- -D warnings
3131
- name: Run cargo fmt
3232
run: cargo fmt --verbose --check
3333
- name: Run cargo doc

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "chumsky"
3-
version = "1.0.0-alpha.4"
3+
version = "1.0.0-alpha.6"
44
description = "A parser library for humans with powerful error recovery"
55
authors = ["Joshua Barretto <joshua.s.barretto@gmail.com>", "Elijah Hartvigsen <elijah.reed@hartvigsen.xyz", "Jakob Wiesmore <runetynan@gmail.com>"]
66
repository = "https://github.com/zesterer/chumsky"
@@ -59,6 +59,9 @@ regex = ["dep:regex-automata"]
5959
# Enable serde serialization support
6060
serde = ["dep:serde"]
6161

62+
# Enable dependencies only needed for generation of documentation on docs.rs
63+
docsrs = ["dep:vergen"]
64+
6265
# An alias of all features that work with the stable compiler.
6366
# Do not use this feature, its removal is not considered a breaking change and its behaviour may change.
6467
# If you're working on chumsky and you're adding a feature that does not require nightly support, please add it to this list.
@@ -82,7 +85,7 @@ unicode-ident = "1.0.10"
8285
ariadne = "0.2"
8386
pom = "3.2"
8487
nom = "7.1"
85-
winnow = "0.5.0"
88+
winnow = "0.5.19"
8689
serde_json = { version = "1.0", features = ["preserve_order"] }
8790
ciborium = { version = "0.2" }
8891
criterion = "0.4.0"
@@ -94,7 +97,7 @@ lasso = "0.7"
9497
slotmap = "1.0"
9598

9699
[build-dependencies]
97-
vergen = { version = "=8.1.1", features = ["git", "gitoxide"] }
100+
vergen = { version = "=8.1.1", optional = true, features = ["git", "gitoxide"] }
98101

99102
[target.'cfg(unix)'.dev-dependencies]
100103
pprof = { version = "0.11", features = ["flamegraph", "criterion"] }

README2.md

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,28 @@
77

88
Chumsky is a parser combinator library for Rust that makes writing expressive, high-performance parsers easy.
99

10-
Although chumsky is designed primarily for user-fancing parsers such as compilers, chumsky is just as much at home
11-
parsing binary protocols in a networking layer, configuration files, or any other form of complex input validation that
12-
you may need.
13-
1410
<a href = "https://www.github.com/zesterer/tao">
1511
<img src="https://raw.githubusercontent.com/zesterer/chumsky/master/misc/example.png" alt="Example usage with my own language, Tao"/>
1612
</a>
1713

14+
Although chumsky is designed primarily for user-fancing parsers such as compilers, chumsky is just as much at home
15+
parsing binary protocols at the networking layer, configuration files, or any other form of complex input validation that
16+
you may need. It also has `no_std` support, making it suitable for embedded environments.
17+
1818
## Features
1919

2020
- 🪄 **Expressive combinators** that make writing your parser a joy
2121
- 🎛️ **Fully generic** across input, token, output, span, and error types
22-
- 📑 **Zero-copy parsing** minimises your parser's need to allocate
22+
- 📑 **Zero-copy parsing** minimises allocation by having outputs hold references/slices of the input
2323
- 🚦 **Flexible error recovery** strategies out of the box
2424
- 🚀 **Internal optimiser** leverages the power of [GATs](https://smallcultfollowing.com/babysteps/blog/2022/06/27/many-modes-a-gats-pattern/) to optimise your parser for you
2525
- 📖 **Text-oriented parsers** for text inputs (i.e: `&[u8]` and `&str`)
2626
- 👁️‍🗨️ **Context-free grammars** are fully supported, with support for context-sensitivity
2727
- 🔄 **Left recursion and memoization** have opt-in support
28-
- 🪺 **Nested inputs** such as token trees are fully supported
28+
- 🪺 **Nested inputs** such as token trees are fully supported both as inputs and outputs
2929
- 🏷️ **Pattern labelling** for dynamic, user-friendly error messages
30+
- 🗃️ **Caching** allows parsers to be created once and reused many times
31+
- ↔️ **Pratt parsing** support for unary and binary operators
3032

3133
*Note: Error diagnostic rendering is performed by [Ariadne](https://github.com/zesterer/ariadne)*
3234

@@ -39,19 +41,18 @@ See [`examples/brainfuck.rs`](https://github.com/zesterer/chumsky/blob/master/ex
3941
```rust
4042
use chumsky::prelude::*;
4143

42-
/// Define out output AST (Abstract Syntax Tree)
44+
/// An AST (Abstract Syntax Tree) for Brainfuck instructions
4345
#[derive(Clone)]
4446
enum Instr {
4547
Left, Right,
4648
Incr, Decr,
4749
Read, Write,
48-
// In Brainfuck, `[...]` blocks are loops
49-
Loop(Vec<Self>),
50+
Loop(Vec<Self>), // In Brainfuck, `[...]` loops contain sub-blocks of instructions
5051
}
5152

5253
/// A function that returns an instance of our Brainfuck parser
5354
fn parser<'a>() -> impl Parser<'a, &'a str, Vec<Instr>> {
54-
// Our parser is recursive: each instruction can contain many instructions (via `[...]` blocks)
55+
// Brainfuck syntax is recursive: each block can contain many sub-blocks (via `[...]` loops)
5556
recursive(|bf| choice((
5657
// All of the basic instructions are just single characters
5758
just('<').to(Instr::Left),
@@ -87,6 +88,32 @@ Chumsky has [a tutorial](https://github.com/zesterer/chumsky/blob/master/tutoria
8788
parser and interpreter for a simple dynamic language with unary and binary operators, operator precedence, functions,
8889
let declarations, and calls.
8990

91+
## Cargo Features
92+
93+
Chumsky contains several optional features that extend the crate's functionality.
94+
95+
- `pratt`: enables the [pratt parsing](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) combinator
96+
97+
- `regex`: enables the regex combinator
98+
99+
- `serde`: enables `serde` (de)serialization support for several types
100+
101+
- `either`: implements `Parser` for `either::Either`, allowing dynamic configuration of parsers at runtime
102+
103+
- `sync`: enables thread-safe features
104+
105+
- `extension`: enables the extension API, allowing you to write your own first-class combinators that integrate with and extend chumsky
106+
107+
- `memoization`: enables [memoization](https://en.wikipedia.org/wiki/Memoization#Parsers) features
108+
109+
- `spill-stack` (enabled by default): avoid stack overflows by spilling stack data to the heap
110+
111+
- `unstable`: enables experimental chumsky features
112+
113+
- `std` (enabled by default): support for standard library features
114+
115+
- `nightly`: enable support for features only supported by the nightly Rust compiler
116+
90117
## *What* is a parser combinator?
91118

92119
Parser combinators are a technique for implementing parsers by defining them in terms of other parsers. The resulting

benches/cbor.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::hint::black_box;
33

44
mod utils;
55

6-
static CBOR: &'static [u8] = include_bytes!("samples/sample.cbor");
6+
static CBOR: &[u8] = include_bytes!("samples/sample.cbor");
77

88
fn bench_cbor(c: &mut Criterion) {
99
// c.bench_function("cbor_nom", {
@@ -125,7 +125,8 @@ mod chumsky_zero_copy {
125125
};
126126
cfg.exactly(num)
127127
}))
128-
.map_slice(int_out);
128+
.to_slice()
129+
.map(int_out);
129130

130131
let uint = read_int.map(CborZero::Int);
131132
let nint = read_int.map(|i| CborZero::Int(-1 - i));
@@ -134,14 +135,16 @@ mod chumsky_zero_copy {
134135
any()
135136
.repeated()
136137
.configure(|cfg, ctx| cfg.exactly(*ctx as usize))
137-
.map_slice(CborZero::Bytes),
138+
.to_slice()
139+
.map(CborZero::Bytes),
138140
);
139141

140142
let str = read_int.ignore_with_ctx(
141143
any()
142144
.repeated()
143145
.configure(|cfg, ctx| cfg.exactly(*ctx as usize))
144-
.map_slice(|slice| CborZero::String(std::str::from_utf8(slice).unwrap())),
146+
.to_slice()
147+
.map(|slice| CborZero::String(std::str::from_utf8(slice).unwrap())),
145148
);
146149

147150
let array = read_int.ignore_with_ctx(

benches/json.rs

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#![allow(clippy::result_large_err, clippy::type_complexity)]
2+
13
use criterion::{black_box, criterion_group, criterion_main, Criterion};
24

35
mod utils;
@@ -22,7 +24,7 @@ pub enum JsonZero<'a> {
2224
Object(Vec<(&'a [u8], JsonZero<'a>)>),
2325
}
2426

25-
static JSON: &'static [u8] = include_bytes!("samples/sample.json");
27+
static JSON: &[u8] = include_bytes!("samples/sample.json");
2628

2729
fn bench_json(c: &mut Criterion) {
2830
c.bench_function("json_nom", {
@@ -142,15 +144,16 @@ mod chumsky_zero_copy {
142144
.then(int)
143145
.then(frac.or_not())
144146
.then(exp.or_not())
145-
.map_slice(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
147+
.to_slice()
148+
.map(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
146149
.boxed();
147150

148151
let escape = just(b'\\').then_ignore(one_of(b"\\/\"bfnrt"));
149152

150153
let string = none_of(b"\\\"")
151154
.or(escape)
152155
.repeated()
153-
.slice()
156+
.to_slice()
154157
.delimited_by(just(b'"'), just(b'"'))
155158
.boxed();
156159

@@ -197,14 +200,14 @@ mod pom {
197200
}
198201

199202
fn number() -> Parser<u8, f64> {
200-
let integer = one_of(b"123456789") - one_of(b"0123456789").repeat(0..) | sym(b'0');
203+
let integer = (one_of(b"123456789") - one_of(b"0123456789").repeat(0..)) | sym(b'0');
201204
let frac = sym(b'.') + one_of(b"0123456789").repeat(1..);
202205
let exp = one_of(b"eE") + one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
203206
let number = sym(b'-').opt() + integer + frac.opt() + exp.opt();
204207
number
205208
.collect()
206209
.convert(str::from_utf8)
207-
.convert(|s| f64::from_str(&s))
210+
.convert(f64::from_str)
208211
}
209212

210213
fn string() -> Parser<u8, String> {
@@ -237,10 +240,10 @@ mod pom {
237240
(seq(b"null").map(|_| Json::Null)
238241
| seq(b"true").map(|_| Json::Bool(true))
239242
| seq(b"false").map(|_| Json::Bool(false))
240-
| number().map(|num| Json::Num(num))
241-
| string().map(|text| Json::Str(text))
242-
| array().map(|arr| Json::Array(arr))
243-
| object().map(|obj| Json::Object(obj)))
243+
| number().map(Json::Num)
244+
| string().map(Json::Str)
245+
| array().map(Json::Array)
246+
| object().map(Json::Object))
244247
- space()
245248
}
246249

@@ -344,15 +347,15 @@ mod nom {
344347
terminated(value, space)(i)
345348
}
346349

347-
pub fn json<'a>(i: &'a [u8]) -> IResult<&'a [u8], JsonZero, (&'a [u8], nom::error::ErrorKind)> {
350+
pub fn json(i: &[u8]) -> IResult<&[u8], JsonZero, (&[u8], nom::error::ErrorKind)> {
348351
root(i)
349352
}
350353
}
351354

352355
mod winnow {
353356
use winnow::{
354357
ascii::{digit0, digit1, escaped},
355-
combinator::separated0,
358+
combinator::separated,
356359
combinator::{alt, dispatch},
357360
combinator::{cut_err, fail, opt, peek},
358361
combinator::{preceded, separated_pair, terminated},
@@ -403,7 +406,7 @@ mod winnow {
403406
preceded(
404407
'[',
405408
cut_err(terminated(
406-
separated0(value, preceded(space, ',')),
409+
separated(0.., value, preceded(space, ',')),
407410
preceded(space, ']'),
408411
)),
409412
)
@@ -427,7 +430,7 @@ mod winnow {
427430
preceded(
428431
'{',
429432
cut_err(terminated(
430-
separated0(member, preceded(space, ',')),
433+
separated(0.., member, preceded(space, ',')),
431434
preceded(space, '}'),
432435
)),
433436
)

benches/lex.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ pub enum Token<'a> {
2626
Comma,
2727
}
2828

29-
static SAMPLE: &'static [u8] = include_bytes!("tokens.txt");
29+
static SAMPLE: &[u8] = include_bytes!("tokens.txt");
3030

3131
fn bench_lex(c: &mut Criterion) {
3232
c.bench_function("lex_chumsky_zero_copy", {
@@ -126,7 +126,7 @@ mod chumsky_zero_copy {
126126
use std::str;
127127

128128
pub fn parser<'a>() -> impl Parser<'a, &'a [u8], Vec<Token<'a>>> {
129-
let digits = one_of(b'0'..=b'9').repeated().slice();
129+
let digits = one_of(b'0'..=b'9').repeated().to_slice();
130130

131131
let int = one_of(b'1'..=b'9')
132132
.repeated()
@@ -148,7 +148,8 @@ mod chumsky_zero_copy {
148148
.then(int)
149149
.then(frac.or_not())
150150
.then(exp.or_not())
151-
.map_slice(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
151+
.to_slice()
152+
.map(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
152153
.boxed();
153154

154155
let escape = just(b'\\')
@@ -169,11 +170,11 @@ mod chumsky_zero_copy {
169170
.ignored()
170171
.or(escape)
171172
.repeated()
172-
.slice()
173+
.to_slice()
173174
.delimited_by(just(b'"'), just(b'"'))
174175
.boxed();
175176

176-
let ident = text::ascii::ident().map_slice(Token::Ident);
177+
let ident = text::ascii::ident().to_slice().map(Token::Ident);
177178

178179
choice((
179180
just(b"null").to(Token::Null),

build.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,20 @@
11
use std::error::Error;
2+
#[cfg(feature = "docsrs")]
23
use vergen::EmitBuilder;
34

45
fn main() -> Result<(), Box<dyn Error>> {
6+
emit_git_metadata()?;
7+
Ok(())
8+
}
9+
10+
#[cfg(feature = "docsrs")]
11+
fn emit_git_metadata() -> Result<(), Box<dyn Error>> {
512
// Emit the instructions
613
EmitBuilder::builder().all_git().emit()?;
714
Ok(())
815
}
16+
17+
#[cfg(not(feature = "docsrs"))]
18+
fn emit_git_metadata() -> Result<(), Box<dyn Error>> {
19+
Ok(())
20+
}

examples/foo.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ enum Expr<'a> {
2727
},
2828
}
2929

30+
#[allow(clippy::let_and_return)]
3031
fn parser<'a>() -> impl Parser<'a, &'a str, Expr<'a>> {
3132
let ident = text::ascii::ident().padded();
3233

0 commit comments

Comments
 (0)