Skip to content

Commit 0f3c843

Browse files
committed
Merge rust-bitcoin#467: Upstream minurl::Url type for url parsing
7d0945e Add percent decoding to `query_pairs()` for parity with `url::Url` (Elias Rohrer) 1bdf361 Apply trimming and filtering to URL accessor methods (Elias Rohrer) 8664a2a Trim whitespace from URL input in `Url::parse` (Elias Rohrer) d47c77a Add a simple fuzz test for url parsing (Matt Corallo) 4fa9e80 Integrate percent encoding into `Url` type (Elias Rohrer) cb3ce89 Replace `HttpUrl` with new `Url` type (Elias Rohrer) 921e06c Add `Url` property and parity tests to bitreq (Elias Rohrer) 66337c0 Add `Url` type to bitreq (Elias Rohrer) Pull request description: Fixes rust-bitcoin#468. The `url` crate has a notoriously large dependency tree which is why we want to avoid it as far as possible. However, we found us then re-implementing several aspects of URL parsing in different places. To this end, I recently (mostly vibe-)coded the 0-dependency [`minurl`](https://crates.io/crates/minurl) crate (https://github.com/tnull/minurl) which is meant as a drop-in replacement for the popular `url` crate. To this end, we kept the `Url` API completely compatible, and even added parity tests ensuring both APIs return exactly the same output given the same input. While I'm generally fine maintaining this as a separate crate, it makes a lot of sense to have this live as part of `bitreq` and hence have it available everywhere in the ecosystem. Here I propose to upstream the `minurl::Url` type (and ofc the corresponding test code). This also allows us to replace the ~half-done `http_url::HttpUrl` type. ACKs for top commit: TheBlueMatt: ACK 7d0945e needs some of the followups before we can release but this looks good as-is so far. Tree-SHA512: 9d48c556c620b385fe9f24d27154b0faf4528416a645ed4857bd494a00ae622a793f991bab9f68d8e85d11a48cd1766105c382acd943bcfd6f66ad319331fb8d
2 parents 61d1294 + 7d0945e commit 0f3c843

20 files changed

Lines changed: 3429 additions & 301 deletions

Cargo-minimal.lock

Lines changed: 256 additions & 5 deletions
Large diffs are not rendered by default.

Cargo-recent.lock

Lines changed: 256 additions & 5 deletions
Large diffs are not rendered by default.

bitreq/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ log = { version = "0.4.0", default-features = false, optional = true }
4242
[dev-dependencies]
4343
tiny_http = "0.12"
4444
tokio = { version = "1.0", default-features = false, features = ["macros", "rt-multi-thread", "time"] }
45+
proptest = { version = "1", default-features = false, features = ["std"] }
46+
url = { version = "2.4" }
4547

4648
[package.metadata.docs.rs]
4749
all-features = true

bitreq/fuzz/Cargo.toml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
[package]
2+
name = "bitreq-fuzz"
3+
version = "0.0.1"
4+
authors = ["Automatically generated"]
5+
publish = false
6+
edition = "2021"
7+
8+
[package.metadata]
9+
cargo-fuzz = true
10+
11+
[features]
12+
afl_fuzz = ["afl"]
13+
honggfuzz_fuzz = ["honggfuzz"]
14+
libfuzzer_fuzz = ["libfuzzer-sys"]
15+
stdin_fuzz = []
16+
17+
[dependencies]
18+
bitreq = { path = ".." }
19+
url = "2.5"
20+
21+
afl = { version = "0.12", optional = true }
22+
honggfuzz = { version = "0.5", optional = true, default-features = false }
23+
libfuzzer-sys = { version = "0.4", optional = true }
24+
25+
# Prevent this from interfering with workspaces
26+
[workspace]
27+
members = ["."]
28+
29+
[profile.release]
30+
lto = true
31+
codegen-units = 1
32+
debug-assertions = true
33+
overflow-checks = true
34+
35+
# When testing a large fuzz corpus, -O1 offers a nice speedup
36+
[profile.dev]
37+
opt-level = 1
38+
39+
[lib]
40+
name = "bitreq_fuzz"
41+
path = "src/lib.rs"
42+
crate-type = ["rlib", "dylib", "staticlib"]
43+
44+
[lints.rust.unexpected_cfgs]
45+
level = "forbid"
46+
check-cfg = [
47+
"cfg(fuzzing)",
48+
]

bitreq/fuzz/README.md

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Fuzzing bitreq
2+
3+
This directory contains fuzzing infrastructure for the `bitreq` crate, specifically targeting the `Url` parser.
4+
5+
## Structure
6+
7+
- `src/url_parse.rs` - Fuzz target for the URL parser
8+
- `src/bin/target_template.txt` - Template for generating fuzz target binaries
9+
- `src/bin/gen_target.sh` - Script to generate target binaries from template
10+
- `ci-fuzz.sh` - CI script for running fuzzing tests
11+
12+
## Running Fuzz Tests
13+
14+
### With stdin (for quick testing)
15+
16+
```bash
17+
cd fuzz
18+
RUSTFLAGS="--cfg=fuzzing" cargo build --features stdin_fuzz --bin url_parse_target
19+
echo "http://example.com" | ./target/debug/url_parse_target
20+
```
21+
22+
### With honggfuzz (for comprehensive fuzzing)
23+
24+
```bash
25+
cd fuzz
26+
cargo install honggfuzz
27+
export RUSTFLAGS="--cfg=fuzzing"
28+
export HFUZZ_BUILD_ARGS="--features honggfuzz_fuzz"
29+
cargo hfuzz build
30+
HFUZZ_RUN_ARGS="--exit_upon_crash -v -n8 --run_time 30" cargo hfuzz run url_parse_target
31+
```
32+
33+
### With AFL (alternative fuzzer)
34+
35+
```bash
36+
cd fuzz
37+
cargo install afl
38+
export RUSTFLAGS="--cfg=fuzzing"
39+
cargo afl build --features afl_fuzz --bin url_parse_target
40+
cargo afl fuzz -i seeds -o findings target/debug/url_parse_target
41+
```
42+
43+
### Running CI Fuzzing
44+
45+
The `ci-fuzz.sh` script automates the fuzzing process with honggfuzz:
46+
47+
```bash
48+
cd fuzz
49+
./ci-fuzz.sh
50+
```
51+
52+
This will:
53+
1. Regenerate fuzz targets
54+
2. Install honggfuzz
55+
3. Build fuzz targets
56+
4. Run each target for 30 seconds
57+
5. Report any crashes found
58+
59+
## Running Tests
60+
61+
The fuzz targets include unit tests that can be run with:
62+
63+
```bash
64+
cd fuzz
65+
RUSTFLAGS="--cfg=fuzzing" cargo test
66+
```
67+
68+
## Adding New Fuzz Targets
69+
70+
1. Create a new module in `src/` (e.g., `src/my_target.rs`)
71+
2. Export it from `src/lib.rs`
72+
3. Add a `GEN_TEST my_target` line to `src/bin/gen_target.sh`
73+
4. Run `cd src/bin && ./gen_target.sh` to generate the target binary

bitreq/fuzz/ci-fuzz.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
set -e
3+
set -x
4+
5+
# Regenerate targets to ensure they're up to date
6+
pushd src/bin
7+
rm -f *_target.rs
8+
./gen_target.sh
9+
[ "$(git diff)" != "" ] && exit 1
10+
popd
11+
12+
export RUSTFLAGS="--cfg=fuzzing"
13+
14+
cargo install --color always --force honggfuzz --no-default-features
15+
16+
# Because we're fuzzing relatively few iterations, the maximum possible
17+
# compiler optimizations aren't necessary, so we turn off LTO
18+
sed -i 's/lto = true//' Cargo.toml
19+
20+
export HFUZZ_BUILD_ARGS="--features honggfuzz_fuzz"
21+
22+
cargo --color always hfuzz build -j8
23+
for TARGET in src/bin/*_target.rs; do
24+
FILENAME=$(basename $TARGET)
25+
FILE="${FILENAME%.*}"
26+
HFUZZ_RUN_ARGS="--exit_upon_crash -v -n8 --run_time 30"
27+
export HFUZZ_RUN_ARGS
28+
cargo --color always hfuzz run $FILE
29+
if [ -f hfuzz_workspace/$FILE/HONGGFUZZ.REPORT.TXT ]; then
30+
cat hfuzz_workspace/$FILE/HONGGFUZZ.REPORT.TXT
31+
for CASE in hfuzz_workspace/$FILE/SIG*; do
32+
cat $CASE | xxd -p
33+
done
34+
exit 1
35+
fi
36+
done

bitreq/fuzz/src/bin/gen_target.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/sh
2+
3+
GEN_TEST() {
4+
cat target_template.txt | sed s/TARGET_NAME/$1/g > $1_target.rs
5+
}
6+
7+
GEN_TEST url_parse
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// This file is auto-generated by gen_target.sh based on target_template.txt
2+
// To modify it, modify target_template.txt and run gen_target.sh instead.
3+
4+
#![cfg_attr(feature = "libfuzzer_fuzz", no_main)]
5+
#![cfg_attr(rustfmt, rustfmt_skip)]
6+
7+
#[cfg(not(fuzzing))]
8+
compile_error!("Fuzz targets need cfg=fuzzing");
9+
10+
extern crate bitreq_fuzz;
11+
use bitreq_fuzz::TARGET_NAME::*;
12+
13+
#[cfg(feature = "afl")]
14+
#[macro_use] extern crate afl;
15+
#[cfg(feature = "afl")]
16+
fn main() {
17+
fuzz!(|data| {
18+
TARGET_NAME_run(data.as_ptr(), data.len());
19+
});
20+
}
21+
22+
#[cfg(feature = "honggfuzz")]
23+
#[macro_use] extern crate honggfuzz;
24+
#[cfg(feature = "honggfuzz")]
25+
fn main() {
26+
loop {
27+
fuzz!(|data| {
28+
TARGET_NAME_run(data.as_ptr(), data.len());
29+
});
30+
}
31+
}
32+
33+
#[cfg(feature = "libfuzzer_fuzz")]
34+
#[macro_use] extern crate libfuzzer_sys;
35+
#[cfg(feature = "libfuzzer_fuzz")]
36+
fuzz_target!(|data: &[u8]| {
37+
TARGET_NAME_run(data.as_ptr(), data.len());
38+
});
39+
40+
#[cfg(feature = "stdin_fuzz")]
41+
fn main() {
42+
use std::io::Read;
43+
44+
let mut data = Vec::with_capacity(8192);
45+
std::io::stdin().read_to_end(&mut data).unwrap();
46+
TARGET_NAME_run(data.as_ptr(), data.len());
47+
}
48+
49+
#[test]
50+
fn run_test_cases() {
51+
use std::fs;
52+
use std::io::Read;
53+
54+
{
55+
let data: Vec<u8> = vec![0];
56+
TARGET_NAME_run(data.as_ptr(), data.len());
57+
}
58+
if let Ok(tests) = fs::read_dir("test_cases/TARGET_NAME") {
59+
for test in tests {
60+
let mut data: Vec<u8> = Vec::new();
61+
let path = test.unwrap().path();
62+
fs::File::open(&path).unwrap().read_to_end(&mut data).unwrap();
63+
TARGET_NAME_run(data.as_ptr(), data.len());
64+
}
65+
}
66+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// This file is auto-generated by gen_target.sh based on target_template.txt
2+
// To modify it, modify target_template.txt and run gen_target.sh instead.
3+
4+
#![cfg_attr(feature = "libfuzzer_fuzz", no_main)]
5+
#![cfg_attr(rustfmt, rustfmt_skip)]
6+
7+
#[cfg(not(fuzzing))]
8+
compile_error!("Fuzz targets need cfg=fuzzing");
9+
10+
extern crate bitreq_fuzz;
11+
use bitreq_fuzz::url_parse::*;
12+
13+
#[cfg(feature = "afl")]
14+
#[macro_use] extern crate afl;
15+
#[cfg(feature = "afl")]
16+
fn main() {
17+
fuzz!(|data| {
18+
url_parse_run(data.as_ptr(), data.len());
19+
});
20+
}
21+
22+
#[cfg(feature = "honggfuzz")]
23+
#[macro_use] extern crate honggfuzz;
24+
#[cfg(feature = "honggfuzz")]
25+
fn main() {
26+
loop {
27+
fuzz!(|data| {
28+
url_parse_run(data.as_ptr(), data.len());
29+
});
30+
}
31+
}
32+
33+
#[cfg(feature = "libfuzzer_fuzz")]
34+
#[macro_use] extern crate libfuzzer_sys;
35+
#[cfg(feature = "libfuzzer_fuzz")]
36+
fuzz_target!(|data: &[u8]| {
37+
url_parse_run(data.as_ptr(), data.len());
38+
});
39+
40+
#[cfg(feature = "stdin_fuzz")]
41+
fn main() {
42+
use std::io::Read;
43+
44+
let mut data = Vec::with_capacity(8192);
45+
std::io::stdin().read_to_end(&mut data).unwrap();
46+
url_parse_run(data.as_ptr(), data.len());
47+
}
48+
49+
#[test]
50+
fn run_test_cases() {
51+
use std::fs;
52+
use std::io::Read;
53+
54+
{
55+
let data: Vec<u8> = vec![0];
56+
url_parse_run(data.as_ptr(), data.len());
57+
}
58+
if let Ok(tests) = fs::read_dir("test_cases/url_parse") {
59+
for test in tests {
60+
let mut data: Vec<u8> = Vec::new();
61+
let path = test.unwrap().path();
62+
fs::File::open(&path).unwrap().read_to_end(&mut data).unwrap();
63+
url_parse_run(data.as_ptr(), data.len());
64+
}
65+
}
66+
}

bitreq/fuzz/src/lib.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// This file is licensed under the Apache License, Version 2.0 <LICENSE-APACHE
2+
// or http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your option.
4+
// You may not use this file except in accordance with one or both of these
5+
// licenses.
6+
7+
#![cfg_attr(rustfmt, rustfmt_skip)]
8+
9+
pub mod url_parse;

0 commit comments

Comments
 (0)