Skip to content

Commit 63056ec

Browse files
authored
feat!: refactor the Input implementors with automatic padding
- Padding and alignment is now handled automatically by the input types, allowing them to work safely without copying the entire input. The overhead is now limited to the padding, which is at most 256 bytes in total. - [`BorrowedBytes`](https://docs.rs/rsonpath-lib/0.8.4/rsonpath/input/borrowed/struct.BorrowedBytes.html) is now safe to construct. - [`OwnedBytes`](https://docs.rs/rsonpath-lib/0.8.4/rsonpath/input/owned/struct.OwnedBytes.html) no longer copies the entire source on construction. Ref: #276
1 parent fa5445e commit 63056ec

53 files changed

Lines changed: 2835 additions & 1053 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/rust.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,13 @@ jobs:
5555
# This target needs special setup with MinGW.
5656
needs-mingw: x86
5757
can-run: true
58-
rustflags: "-C link-arg=-fuse-ld=lld --deny warnings"
58+
# lld on Windows uses extreme amounts of memory for debuginfo=2
59+
rustflags: "-C link-arg=-fuse-ld=lld -C debuginfo=1 --deny warnings"
5960
- os: windows-latest
6061
target_triple: i686-pc-windows-msvc
6162
can-run: true
62-
rustflags: "-C link-arg=-fuse-ld=lld --deny warnings"
63+
# lld on Windows uses extreme amounts of memory for debuginfo=2
64+
rustflags: "-C link-arg=-fuse-ld=lld -C debuginfo=1 --deny warnings"
6365
- os: ubuntu-latest
6466
target_triple:
6567
i686-unknown-linux-gnu
@@ -74,11 +76,13 @@ jobs:
7476
- os: windows-latest
7577
target_triple: x86_64-pc-windows-gnu
7678
can-run: true
77-
rustflags: "-C link-arg=-fuse-ld=lld --deny warnings"
79+
# lld on Windows uses extreme amounts of memory for debuginfo=2
80+
rustflags: "-C link-arg=-fuse-ld=lld -C debuginfo=1 --deny warnings"
7881
- os: windows-latest
7982
target_triple: x86_64-pc-windows-msvc
8083
can-run: true
81-
rustflags: "-C link-arg=-fuse-ld=lld --deny warnings"
84+
# lld on Windows uses extreme amounts of memory for debuginfo=2
85+
rustflags: "-C link-arg=-fuse-ld=lld -C debuginfo=1 --deny warnings"
8286
- os: ubuntu-latest
8387
target_triple: x86_64-unknown-linux-gnu
8488
can-run: true

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
"rustdoc",
6666
"RUSTFLAGS",
6767
"rustfmt",
68+
"Seekable",
6869
"SIMD",
6970
"smallvec",
7071
"snaks",

Cargo.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ lto = false
1919
# Release should be used for benching, but not actually distributed.
2020
[profile.release]
2121
lto = "thin"
22-
debug = 2
22+
debug = 1
2323

2424
# This is the profile used for final binaries distributed via package managers.
2525
# It prioritizes performance, and then binary size. We generally don't care about

Justfile

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,11 @@ test-engine: (gen-tests)
105105

106106
# Run the input tests on default features.
107107
test-input:
108-
cargo test --test input_implementation -q
108+
cargo test --test input_implementation_tests -q
109109

110110
# Run the query tests on default features.
111111
test-parser:
112-
cargo test --test query_parser -q
112+
cargo test --test query_parser_tests -q
113113

114114
# Run all tests, including real dataset tests, on the feature powerset of the project.
115115
test-full: (gen-tests)
@@ -241,12 +241,8 @@ commit msg:
241241
[private]
242242
hook-pre-commit:
243243
#!/bin/sh
244-
tmpdiff=$(mktemp -t pre-commit-hook-diff-XXXXXXXX.$$)
245244
just assert-benchmarks-committed
246-
git diff --full-index --binary > $tmpdiff
247-
git stash -q --keep-index
248-
(just verify-fmt && just verify-check); \
249-
git apply --whitespace=nowarn < $tmpdiff}} && git stash drop -q; rm $tmpdiff
245+
(just verify-fmt && just verify-check);
250246

251247
[private]
252248
@hook-post-checkout: checkout-benchmarks

book/src/lib/intro.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ _This part of the book is a work in progress._
55
```rust
66
# extern crate rsonpath;
77
use rsonpath::engine::{Compiler, Engine, RsonpathEngine};
8-
use rsonpath::input::OwnedBytes;
8+
use rsonpath::input::BorrowedBytes;
99
use rsonpath::query::JsonPathQuery;
1010

1111
# fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -29,7 +29,7 @@ let contents = r#"
2929
}
3030
}"#;
3131

32-
let input = OwnedBytes::new(&contents)?;
32+
let input = BorrowedBytes::new(contents.as_bytes());
3333
let engine = RsonpathEngine::compile_query(&query)?;
3434
let count = engine.count(&input)?;
3535

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Seeds for failure cases proptest has generated in the past. It is
2+
# automatically read and these particular cases re-run before any
3+
# novel cases are generated.
4+
#
5+
# It is recommended to check this file in to source control so that
6+
# everyone who runs the test benefits from these saved cases.
7+
cc 34c43f30d1bf155cf3ee0e13c9776fc11c4dac1f3f19aa602c4c50df5f0e7049 # shrinks to (input, expected) = (",", [Comma(0)])
8+
cc 24036ddd437694148c11fa1c274956e48ed3792ea084f3923deaa4225d030700 # shrinks to (input, expected) = (",", [Comma(0)])

crates/rsonpath-lib/src/classification/classifier_correctness_tests.rs

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,41 +3,47 @@ use crate::{
33
simd::{self, Simd},
44
structural::{BracketType, Structural, StructuralIterator},
55
},
6+
input::BorrowedBytes,
67
input::Input,
7-
input::OwnedBytes,
88
result::empty::EmptyRecorder,
99
FallibleIterator,
1010
};
1111

1212
use super::simd::config_simd;
1313

14-
fn classify_string(json: &str) -> Vec<Structural> {
14+
fn classify_string(json: &str) -> (Vec<Structural>, usize) {
1515
let simd = simd::configure();
1616

1717
config_simd!(simd => |simd| {
1818
let json_string = json.to_owned();
19-
let bytes = OwnedBytes::try_from(json_string).unwrap();
19+
let bytes = BorrowedBytes::new(json_string.as_bytes());
2020
let iter = bytes.iter_blocks(&EmptyRecorder);
2121
let quotes_classifier = simd.classify_quoted_sequences(iter);
2222
let mut structural_classifier = simd.classify_structural_characters(quotes_classifier);
2323
structural_classifier.turn_commas_on(0);
2424
structural_classifier.turn_colons_on(0);
2525

26-
structural_classifier.collect().unwrap()
26+
(structural_classifier.collect().unwrap(), bytes.leading_padding_len())
2727
})
2828
}
2929

30+
fn apply_offset(vec: &mut [Structural], offset: usize) {
31+
for x in vec {
32+
*x = x.offset(offset);
33+
}
34+
}
35+
3036
#[test]
3137
fn empty_string() {
32-
let result = classify_string("");
38+
let (result, _) = classify_string("");
3339

3440
assert_eq!(Vec::<Structural>::default(), result);
3541
}
3642

3743
#[test]
3844
fn json() {
3945
let json = r#"{"a": [1, 2, 3], "b": "string", "c": {"d": 42, "e": 17}}"#;
40-
let expected: &[Structural] = &[
46+
let expected: &mut [Structural] = &mut [
4147
Structural::Opening(BracketType::Curly, 0),
4248
Structural::Colon(4),
4349
Structural::Opening(BracketType::Square, 6),
@@ -56,23 +62,25 @@ fn json() {
5662
Structural::Closing(BracketType::Curly, 55),
5763
];
5864

59-
let result = classify_string(json);
65+
let (result, offset) = classify_string(json);
66+
apply_offset(expected, offset);
6067

6168
assert_eq!(expected, result);
6269
}
6370

6471
#[test]
6572
fn json_with_escapes() {
6673
let json = r#"{"a": "Hello, World!", "b": "\"{Hello, [World]!}\""}"#;
67-
let expected: &[Structural] = &[
74+
let expected: &mut [Structural] = &mut [
6875
Structural::Opening(BracketType::Curly, 0),
6976
Structural::Colon(4),
7077
Structural::Comma(21),
7178
Structural::Colon(26),
7279
Structural::Closing(BracketType::Curly, 51),
7380
];
7481

75-
let result = classify_string(json);
82+
let (result, offset) = classify_string(json);
83+
apply_offset(expected, offset);
7684

7785
assert_eq!(expected, result);
7886
}
@@ -82,7 +90,7 @@ fn reverse_exclamation_point() {
8290
let wtf = "¡";
8391
let expected: &[Structural] = &[];
8492

85-
let result = classify_string(wtf);
93+
let (result, _) = classify_string(wtf);
8694

8795
assert_eq!(expected, result);
8896
}
@@ -92,7 +100,7 @@ fn block_boundary() {
92100
use Structural::*;
93101

94102
let wtf = r##",,#;0a#0,#a#0#0aa ;a0 0a,"A"#a~A#0a~A##a0|a0#0aaa~ 0#;A|~|"a"A-|;#0 Aa,,"0","A"A0,,,,,,,,,,,,,,,"a",AA;#|#|a;AAA;a A~;aA;A##A#~a ,,,,,,0^A-AA0aa;- ~0,,,#;A;aA#A#0 a-, a;0aaa0|a 0aA -A#a,,,,"\\","##;
95-
let expected: &[Structural] = &[
103+
let expected: &mut [Structural] = &mut [
96104
Comma(0),
97105
Comma(1),
98106
Comma(8),
@@ -133,13 +141,14 @@ fn block_boundary() {
133141
Comma(193),
134142
];
135143

136-
let result = classify_string(wtf);
144+
let (result, offset) = classify_string(wtf);
145+
apply_offset(expected, offset);
137146

138147
assert_eq!(expected, result);
139148
}
140149

141150
mod prop_test {
142-
use super::{classify_string, BracketType, Structural};
151+
use super::{apply_offset, classify_string, BracketType, Structural};
143152
use proptest::{self, collection, prelude::*};
144153
use std::fmt::Debug;
145154

@@ -275,15 +284,17 @@ mod prop_test {
275284

276285
proptest! {
277286
#[test]
278-
fn classifies_correctly_ascii((input, expected) in input_string_ascii()) {
279-
let result = classify_string(&input);
287+
fn classifies_correctly_ascii((input, mut expected) in input_string_ascii()) {
288+
let (result, offset) = classify_string(&input);
289+
apply_offset(&mut expected, offset);
280290

281291
assert_eq!(expected, result);
282292
}
283293

284294
#[test]
285-
fn classifies_correctly_all((input, expected) in input_string_all()) {
286-
let result = classify_string(&input);
295+
fn classifies_correctly_all((input, mut expected) in input_string_all()) {
296+
let (result, offset) = classify_string(&input);
297+
apply_offset(&mut expected, offset);
287298

288299
assert_eq!(expected, result);
289300
}

crates/rsonpath-lib/src/classification/memmem.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,13 @@ pub(crate) trait MemmemImpl {
4242
type Classifier<'i, 'b, 'r, I, R>: Memmem<'i, 'b, 'r, I, BLOCK_SIZE>
4343
where
4444
I: Input + 'i,
45-
<I as Input>::BlockIterator<'i, 'r, BLOCK_SIZE, R>: 'b,
45+
<I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>: 'b,
4646
R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>> + 'r,
4747
'i: 'r;
4848

4949
fn memmem<'i, 'b, 'r, I, R>(
5050
input: &'i I,
51-
iter: &'b mut <I as Input>::BlockIterator<'i, 'r, BLOCK_SIZE, R>,
51+
iter: &'b mut <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
5252
) -> Self::Classifier<'i, 'b, 'r, I, R>
5353
where
5454
I: Input,

0 commit comments

Comments
 (0)