Skip to content

Commit f77e71e

Browse files
authored
refactor: improve execution pipeline and TPCC backends (#313)
* refactor: optimizer normalization passes * refactor: normalization scheduling to avoid matcher scans * chore: output_columns reduces the number of Vecs constructed * refactor: derive column positions during binding instead of BindPosition fallback * refactor: avoid redundant operator clones in normalization rules * feat: add LMDB storage and fix scalar subquery semantics - add LMDB storage backend - restore scalar subqueries in WHERE to join-aware binding - enforce scalar subquery cardinality at execution time - return NULL for empty scalar subqueries and error on multi-row results * refactor: borrow range bounds and reuse delete buffers * refactor: make key encoding paths borrowed and reusable * refactor: storage buffers and trim optimizer column-reference overhead * refactor: column pruning to reuse borrowed column summaries * refactor: remove memo and choose physical impls directly * refactor: optimizer hint propagation and preindex implementation rules * refactor: optimizer annotate pass to merge post-rules and restore borrowed scan hints * optimizer: avoid schema diff in column pruning remap * optimizer: avoid recursive expr walks in column pruning * perf: prebind range comparison evaluators * refactior: new ExecArena-based executor model * fix: tpcc segfault caused by bump-backed executor drop order * refactor: reuse arena result tuple across execution pipeline * chore: uncheck for decode tuple primary key when query * refactor(tpcc): split backends and improve benchmark tooling * refactor(execution): unify executor node API and trim test helpers * chore: codefmt * refactor(stats): load sketches alongside histograms in StatisticsMeta * feat: improve normalization rules and add tpcc benchmark runner * chore(release): bump kite_sql and macros crate to 0.2.0 * feat(cargo): gate native backends behind rocksdb/lmdb features
1 parent fbdabef commit f77e71e

File tree

169 files changed

+13226
-6692
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

169 files changed

+13226
-6692
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ kite_sql_tpcc
2525
copy.csv
2626

2727
tests/data/row_20000.csv
28-
tests/data/distinct_rows.csv
28+
tests/data/distinct_rows.csv

Cargo.lock

Lines changed: 27 additions & 76 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
[package]
44
name = "kite_sql"
5-
version = "0.1.8"
5+
version = "0.2.0"
66
edition = "2021"
77
authors = ["Kould <kould2333@gmail.com>", "Xwg <loloxwg@gmail.com>"]
88
description = "SQL as a Function for Rust"
@@ -17,17 +17,19 @@ default-run = "kite_sql"
1717
[[bin]]
1818
name = "kite_sql"
1919
path = "src/bin/server.rs"
20-
required-features = ["net"]
20+
required-features = ["net", "rocksdb"]
2121

2222
[lib]
2323
doctest = false
2424
crate-type = ["cdylib", "rlib"]
2525

2626
[features]
27-
default = ["macros"]
27+
default = ["macros", "rocksdb"]
2828
macros = []
2929
orm = []
30-
net = ["dep:pgwire", "dep:async-trait", "dep:clap", "dep:env_logger", "dep:futures", "dep:log", "dep:tokio"]
30+
rocksdb = ["dep:rocksdb"]
31+
lmdb = ["dep:lmdb", "dep:lmdb-sys"]
32+
net = ["rocksdb", "dep:pgwire", "dep:async-trait", "dep:clap", "dep:env_logger", "dep:futures", "dep:log", "dep:tokio"]
3133
pprof = ["pprof/criterion", "pprof/flamegraph"]
3234
python = ["dep:pyo3"]
3335

@@ -55,13 +57,12 @@ recursive = { version = "0.1" }
5557
regex = { version = "1" }
5658
rust_decimal = { version = "1" }
5759
serde = { version = "1", features = ["derive", "rc"] }
58-
kite_sql_serde_macros = { version = "0.1.2", path = "kite_sql_serde_macros" }
60+
kite_sql_serde_macros = { version = "0.2.0", path = "kite_sql_serde_macros" }
5961
siphasher = { version = "1", features = ["serde"] }
6062
sqlparser = { version = "0.61", features = ["serde"] }
6163
thiserror = { version = "1" }
6264
typetag = { version = "0.2" }
6365
ulid = { version = "1", features = ["serde"] }
64-
genawaiter = { version = "0.99" }
6566

6667
# Feature: net
6768
async-trait = { version = "0.1", optional = true }
@@ -84,7 +85,9 @@ tempfile = { version = "3.10" }
8485
sqlite = { version = "0.34" }
8586

8687
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
87-
rocksdb = { version = "0.23" }
88+
rocksdb = { version = "0.23", optional = true }
89+
lmdb = { version = "0.8.0", optional = true }
90+
lmdb-sys = { version = "0.8.0", optional = true }
8891

8992
[target.'cfg(target_arch = "wasm32")'.dependencies]
9093
wasm-bindgen = { version = "0.2.106" }

Makefile

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,12 @@ CARGO ?= cargo
33
WASM_PACK ?= wasm-pack
44
SQLLOGIC_PATH ?= tests/slt/**/*.slt
55
PYO3_PYTHON ?= /usr/bin/python3.12
6+
TPCC_MEASURE_TIME ?= 15
7+
TPCC_NUM_WARE ?= 1
8+
TPCC_PPROF_OUTPUT ?= /tmp/tpcc_lmdb.svg
9+
TPCC_SQLITE_PROFILE ?= balanced
610

7-
.PHONY: test test-python test-wasm test-slt test-all wasm-build check tpcc tpcc-dual cargo-check build wasm-examples native-examples fmt clippy
11+
.PHONY: test test-python test-wasm test-slt test-all wasm-build check tpcc tpcc-kitesql-rocksdb tpcc-kitesql-lmdb tpcc-lmdb-flamegraph tpcc-sqlite tpcc-sqlite-practical tpcc-sqlite-balanced tpcc-dual cargo-check build wasm-examples native-examples fmt clippy
812

913
## Run default Rust tests in the current environment (non-WASM).
1014
test:
@@ -48,9 +52,31 @@ clippy:
4852
## Run formatting (check mode) and clippy linting together.
4953
check: fmt clippy
5054

51-
## Execute the TPCC workload example as a standalone command.
52-
tpcc:
53-
$(CARGO) run -p tpcc --release
55+
tpcc: tpcc-kitesql-lmdb
56+
57+
## Execute the TPCC workload on KiteSQL with RocksDB storage.
58+
tpcc-kitesql-rocksdb:
59+
$(CARGO) run -p tpcc --release -- --backend kitesql-rocksdb
60+
61+
## Execute the TPCC workload on KiteSQL with LMDB storage.
62+
tpcc-kitesql-lmdb:
63+
$(CARGO) run -p tpcc --release -- --backend kitesql-lmdb
64+
65+
## Execute TPCC on LMDB and emit a pprof flamegraph SVG.
66+
tpcc-lmdb-flamegraph:
67+
CARGO_PROFILE_RELEASE_DEBUG=true $(CARGO) run -p tpcc --release --features pprof -- --backend kitesql-lmdb --measure-time $(TPCC_MEASURE_TIME) --num-ware $(TPCC_NUM_WARE) --pprof-output $(TPCC_PPROF_OUTPUT)
68+
69+
## Execute the TPCC workload on SQLite with the practical profile.
70+
tpcc-sqlite:
71+
$(CARGO) run -p tpcc --release -- --backend sqlite --sqlite-profile $(TPCC_SQLITE_PROFILE) --path kite_sql_tpcc.sqlite
72+
73+
## Execute the TPCC workload on SQLite with the practical profile.
74+
tpcc-sqlite-practical:
75+
$(MAKE) tpcc-sqlite TPCC_SQLITE_PROFILE=practical
76+
77+
## Execute the TPCC workload on SQLite with the balanced profile.
78+
tpcc-sqlite-balanced:
79+
$(MAKE) tpcc-sqlite TPCC_SQLITE_PROFILE=balanced
5480

5581
## Execute TPCC while mirroring every statement to an in-memory SQLite instance for validation.
5682
tpcc-dual:

README.md

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
## Introduction
2929
**KiteSQL** is a lightweight embedded relational database for Rust, inspired by **MyRocks** and **SQLite** and fully written in Rust. It is designed to work not only as a SQL engine, but also as a Rust-native data API that can be embedded directly into applications without relying on external services or heavyweight infrastructure.
3030

31-
KiteSQL supports direct SQL execution, typed ORM models, schema migration, and builder-style queries, so you can combine relational power with an API surface that feels natural in Rust.
31+
KiteSQL supports direct SQL execution, typed ORM models, schema migration, and builder-style queries, so you can combine relational power with an API surface that feels natural in Rust. On native targets, KiteSQL ships with both RocksDB-backed and LMDB-backed persistent storage builders, plus an in-memory builder for tests and temporary workloads.
3232

3333
## Key Features
3434
- A lightweight embedded SQL database fully rewritten in Rust
@@ -79,7 +79,8 @@ struct UserSummary {
7979
}
8080

8181
fn main() -> Result<(), DatabaseError> {
82-
let database = DataBaseBuilder::path("./data").build()?;
82+
let database = DataBaseBuilder::path("./data").build_rocksdb()?;
83+
// Or: let database = DataBaseBuilder::path("./data").build_lmdb()?;
8384

8485
database.migrate::<User>()?;
8586

@@ -128,8 +129,20 @@ fn main() -> Result<(), DatabaseError> {
128129
}
129130
```
130131

132+
## Storage Backends
133+
- `build_rocksdb()` opens a persistent RocksDB-backed database.
134+
- `build_lmdb()` opens a persistent LMDB-backed database.
135+
- `build_in_memory()` opens an in-memory database for tests, examples, and temporary workloads.
136+
- `build_optimistic()` is available on native targets when you specifically want optimistic transactions on top of RocksDB.
137+
- Cargo features:
138+
- `rocksdb` is enabled by default
139+
- `lmdb` is optional
140+
- `cargo check --no-default-features --features lmdb` builds an LMDB-only native configuration
141+
142+
On native targets, `LMDB` shines when reads dominate, while `RocksDB` is usually the stronger choice when writes do.
143+
131144
👉**more examples**
132-
- [hello_word](examples/hello_world.rs)
145+
- [hello_world](examples/hello_world.rs)
133146
- [transaction](examples/transaction.rs)
134147

135148

@@ -149,7 +162,7 @@ console.log(rows.map((r) => r.values.map((v) => v.Int32 ?? v)));
149162
150163
## Python (PyO3)
151164
- Enable bindings with Cargo feature `python`.
152-
- Constructor is explicit: `Database(path)`; in-memory usage is `Database.in_memory()`.
165+
- Constructor is explicit: `Database(path, backend="rocksdb")`; use `backend="lmdb"` to open LMDB. In-memory usage is `Database.in_memory()`.
153166
- Minimal usage:
154167
```python
155168
import kite_sql
@@ -162,25 +175,24 @@ for row in db.run("select * from demo"):
162175
```
163176
164177
## TPC-C
165-
Run `make tpcc` (or `cargo run -p tpcc --release`) to execute the benchmark against the default KiteSQL storage.
178+
Run `make tpcc` (or `cargo run -p tpcc --release`) to execute the benchmark against the default KiteSQL storage. Use `--backend rocksdb` or `--backend lmdb` to compare the two persistent backends directly.
166179
Run `make tpcc-dual` to mirror every TPCC statement to an in-memory SQLite database alongside KiteSQL and assert the two engines return identical results; this target runs for 60 seconds (`--measure-time 60`). Use `cargo run -p tpcc --release -- --backend dual --measure-time <secs>` for a custom duration.
167180
168181
- i9-13900HX
169182
- 32.0 GB
170183
- KIOXIA-EXCERIA PLUS G3 SSD
171184
- Tips: TPC-C currently only supports single thread
172185
173-
All cases have been fully optimized.
174-
```shell
175-
<90th Percentile RT (MaxRT)>
176-
New-Order : 0.002 (0.005)
177-
Payment : 0.001 (0.013)
178-
Order-Status : 0.002 (0.006)
179-
Delivery : 0.010 (0.023)
180-
Stock-Level : 0.002 (0.017)
181-
<TpmC>
182-
27226 Tpmc
183-
```
186+
Recent 720-second local comparison on the machine above:
187+
188+
| Backend | TpmC | New-Order p90 | Payment p90 | Order-Status p90 | Delivery p90 | Stock-Level p90 |
189+
| --- | ---: | ---: | ---: | ---: | ---: | ---: |
190+
| KiteSQL LMDB | 53510 | 0.001s | 0.001s | 0.001s | 0.002s | 0.001s |
191+
| KiteSQL RocksDB | 32248 | 0.001s | 0.001s | 0.002s | 0.011s | 0.003s |
192+
| SQLite balanced | 36273 | 0.001s | 0.001s | 0.001s | 0.001s | 0.001s |
193+
| SQLite practical | 35516 | 0.001s | 0.001s | 0.001s | 0.001s | 0.001s |
194+
195+
The detailed raw outputs for both runs are recorded in [tpcc/README.md](tpcc/README.md).
184196
#### 👉[check more](tpcc/README.md)
185197
186198
## Roadmap

benchmarks/query_benchmark.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
use criterion::{criterion_group, criterion_main, Criterion};
1616
use indicatif::{ProgressBar, ProgressStyle};
17-
use kite_sql::db::{DataBaseBuilder, ResultIter};
17+
use kite_sql::db::DataBaseBuilder;
1818
use kite_sql::errors::DatabaseError;
1919
#[cfg(unix)]
2020
use pprof::criterion::{Output, PProfProfiler};
@@ -38,7 +38,7 @@ fn query_cases() -> Vec<(&'static str, &'static str)> {
3838
}
3939

4040
fn init_kitesql_query_bench() -> Result<(), DatabaseError> {
41-
let database = DataBaseBuilder::path(QUERY_BENCH_KITE_SQL_PATH).build()?;
41+
let database = DataBaseBuilder::path(QUERY_BENCH_KITE_SQL_PATH).build_rocksdb()?;
4242
database
4343
.run("create table t1 (c1 int primary key, c2 int)")?
4444
.done()?;
@@ -104,7 +104,7 @@ fn query_on_execute(c: &mut Criterion) {
104104
init_kitesql_query_bench().unwrap();
105105
}
106106
let database = DataBaseBuilder::path(QUERY_BENCH_KITE_SQL_PATH)
107-
.build()
107+
.build_rocksdb()
108108
.unwrap();
109109
println!("Table initialization completed");
110110

0 commit comments

Comments
 (0)