Skip to content

Commit 383e565

Browse files
committed
feat: improve normalization rules and add tpcc benchmark runner
1 parent 0d7e0ef commit 383e565

12 files changed

Lines changed: 683 additions & 89 deletions

File tree

Makefile

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ PYO3_PYTHON ?= /usr/bin/python3.12
66
TPCC_MEASURE_TIME ?= 15
77
TPCC_NUM_WARE ?= 1
88
TPCC_PPROF_OUTPUT ?= /tmp/tpcc_lmdb.svg
9+
TPCC_SQLITE_PROFILE ?= balanced
910

10-
.PHONY: test test-python test-wasm test-slt test-all wasm-build check tpcc tpcc-kitesql-rocksdb tpcc-kitesql-lmdb tpcc-lmdb-flamegraph tpcc-sqlite-practical tpcc-dual cargo-check build wasm-examples native-examples fmt clippy
11+
.PHONY: test test-python test-wasm test-slt test-all wasm-build check tpcc tpcc-kitesql-rocksdb tpcc-kitesql-lmdb tpcc-lmdb-flamegraph tpcc-sqlite tpcc-sqlite-practical tpcc-sqlite-balanced tpcc-dual cargo-check build wasm-examples native-examples fmt clippy
1112

1213
## Run default Rust tests in the current environment (non-WASM).
1314
test:
@@ -65,9 +66,17 @@ tpcc-kitesql-lmdb:
6566
tpcc-lmdb-flamegraph:
6667
CARGO_PROFILE_RELEASE_DEBUG=true $(CARGO) run -p tpcc --release --features pprof -- --backend kitesql-lmdb --measure-time $(TPCC_MEASURE_TIME) --num-ware $(TPCC_NUM_WARE) --pprof-output $(TPCC_PPROF_OUTPUT)
6768

69+
## Execute the TPCC workload on SQLite with the practical profile.
70+
tpcc-sqlite:
71+
$(CARGO) run -p tpcc --release -- --backend sqlite --sqlite-profile $(TPCC_SQLITE_PROFILE) --path kite_sql_tpcc.sqlite
72+
6873
## Execute the TPCC workload on SQLite with the practical profile.
6974
tpcc-sqlite-practical:
70-
$(CARGO) run -p tpcc --release -- --backend sqlite --sqlite-profile practical --path kite_sql_tpcc.sqlite
75+
$(MAKE) tpcc-sqlite TPCC_SQLITE_PROFILE=practical
76+
77+
## Execute the TPCC workload on SQLite with the balanced profile.
78+
tpcc-sqlite-balanced:
79+
$(MAKE) tpcc-sqlite TPCC_SQLITE_PROFILE=balanced
7180

7281
## Execute TPCC while mirroring every statement to an in-memory SQLite instance for validation.
7382
tpcc-dual:

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,10 @@ Recent 720-second local comparison on the machine above:
183183
184184
| Backend | TpmC | New-Order p90 | Payment p90 | Order-Status p90 | Delivery p90 | Stock-Level p90 |
185185
| --- | ---: | ---: | ---: | ---: | ---: | ---: |
186+
| KiteSQL LMDB | 53510 | 0.001s | 0.001s | 0.001s | 0.002s | 0.001s |
187+
| KiteSQL RocksDB | 32248 | 0.001s | 0.001s | 0.002s | 0.011s | 0.003s |
188+
| SQLite balanced | 36273 | 0.001s | 0.001s | 0.001s | 0.001s | 0.001s |
186189
| SQLite practical | 35516 | 0.001s | 0.001s | 0.001s | 0.001s | 0.001s |
187-
| KiteSQL LMDB | 29171 | 0.001s | 0.001s | 0.001s | 0.015s | 0.002s |
188190
189191
The detailed raw outputs for both runs are recorded in [tpcc/README.md](tpcc/README.md).
190192
#### 👉[check more](tpcc/README.md)

scripts/run_tpcc_matrix.sh

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
6+
cd "$ROOT_DIR"
7+
8+
NUM_WARE="${TPCC_NUM_WARE:-1}"
9+
MAX_DUPLICATE_RETRY="${TPCC_DUPLICATE_RETRY:-1}"
10+
MAIN_MEASURE_TIME="${TPCC_MAIN_MEASURE_TIME:-}"
11+
STAMP="${TPCC_RESULT_STAMP:-$(date +%Y-%m-%d_%H-%M-%S)}"
12+
RESULT_DIR="${TPCC_RESULT_DIR:-$ROOT_DIR/tpcc/results/$STAMP}"
13+
LOG_DIR="$RESULT_DIR/logs"
14+
TMP_DIR="$ROOT_DIR/target/tpcc-run-data"
15+
BINARY="$ROOT_DIR/target/release/tpcc"
16+
SUMMARY_FILE="$RESULT_DIR/summary.md"
17+
18+
mkdir -p "$LOG_DIR" "$TMP_DIR"
19+
20+
if [[ ! -x "$BINARY" ]]; then
21+
echo "missing binary: $BINARY" >&2
22+
echo "build it first with: cargo build -p tpcc --release" >&2
23+
exit 1
24+
fi
25+
26+
extract_tpmc() {
27+
local log_file="$1"
28+
awk '/<TpmC>/{getline; print $1; exit}' "$log_file"
29+
}
30+
31+
extract_p90() {
32+
local log_file="$1"
33+
local label="$2"
34+
awk -v label="$label" '
35+
/<90th Percentile RT \(MaxRT\)>/ { in_block = 1; next }
36+
in_block && index($0, label) {
37+
gsub(/^[[:space:]]+/, "", $0)
38+
print $3
39+
exit
40+
}
41+
' "$log_file"
42+
}
43+
44+
should_retry_duplicate() {
45+
local log_file="$1"
46+
rg -q "UNIQUE constraint failed|duplicate key|primary key|Duplicate" "$log_file"
47+
}
48+
49+
run_variant() {
50+
local name="$1"
51+
local measure_label="$2"
52+
local db_path="$3"
53+
shift 3
54+
local -a cmd=("$@")
55+
local log_file="$LOG_DIR/$name.log"
56+
local status="ok"
57+
local notes="-"
58+
local attempts=0
59+
local max_attempts=$((MAX_DUPLICATE_RETRY + 1))
60+
61+
: > "$log_file"
62+
63+
while (( attempts < max_attempts )); do
64+
attempts=$((attempts + 1))
65+
rm -rf "$db_path"
66+
67+
{
68+
printf '## Attempt %s\n' "$attempts"
69+
printf '$'
70+
printf ' %q' "${cmd[@]}"
71+
printf '\n\n'
72+
} >> "$log_file"
73+
74+
set +e
75+
"${cmd[@]}" >> "$log_file" 2>&1
76+
local cmd_status=$?
77+
set -e
78+
79+
if [[ "$cmd_status" -eq 0 ]]; then
80+
break
81+
fi
82+
83+
if (( attempts < max_attempts )) && should_retry_duplicate "$log_file"; then
84+
notes="retry after duplicate-key failure"
85+
printf '\n[runner] duplicate-key style failure detected, retrying %s from scratch\n\n' "$name" >> "$log_file"
86+
continue
87+
fi
88+
89+
status="failed"
90+
notes="$(tail -n 5 "$log_file" | tr '\n' ' ' | sed 's/[[:space:]]\\+/ /g; s/^ //; s/ $//')"
91+
break
92+
done
93+
94+
local tpmc="-"
95+
local new_order="-"
96+
local payment="-"
97+
local order_status="-"
98+
local delivery="-"
99+
local stock_level="-"
100+
101+
if [[ "$status" == "ok" ]]; then
102+
tpmc="$(extract_tpmc "$log_file" || echo -)"
103+
new_order="$(extract_p90 "$log_file" "New-Order" || echo -)"
104+
payment="$(extract_p90 "$log_file" "Payment" || echo -)"
105+
order_status="$(extract_p90 "$log_file" "Order-Status" || echo -)"
106+
delivery="$(extract_p90 "$log_file" "Delivery" || echo -)"
107+
stock_level="$(extract_p90 "$log_file" "Stock-Level" || echo -)"
108+
fi
109+
110+
printf '| %s | %s | %s | %s | %s | %s | %s | %s | %s | %s | %s | [%s](./logs/%s.log) |\n' \
111+
"$name" \
112+
"$status" \
113+
"$attempts" \
114+
"$measure_label" \
115+
"$tpmc" \
116+
"$new_order" \
117+
"$payment" \
118+
"$order_status" \
119+
"$delivery" \
120+
"$stock_level" \
121+
"$notes" \
122+
"$name" \
123+
"$name" \
124+
>> "$SUMMARY_FILE"
125+
126+
rm -rf "$db_path"
127+
128+
cat "$log_file"
129+
}
130+
131+
cat > "$SUMMARY_FILE" <<EOF
132+
# TPCC Run Summary
133+
134+
- Timestamp: $STAMP
135+
- Warehouses: ${NUM_WARE}
136+
- Duplicate-key retries per variant: ${MAX_DUPLICATE_RETRY}
137+
- Main variants measure time: ${MAIN_MEASURE_TIME:-tpcc default (720s)}
138+
- Binary: \`$BINARY\`
139+
140+
| Variant | Status | Attempts | Measure Time | TpmC | New-Order p90 | Payment p90 | Order-Status p90 | Delivery p90 | Stock-Level p90 | Notes | Raw Log |
141+
| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | --- |
142+
EOF
143+
144+
main_measure_args=()
145+
main_measure_label="tpcc default (720s)"
146+
if [[ -n "$MAIN_MEASURE_TIME" ]]; then
147+
main_measure_args=(--measure-time "$MAIN_MEASURE_TIME")
148+
main_measure_label="${MAIN_MEASURE_TIME}s"
149+
fi
150+
151+
run_variant \
152+
"kitesql-lmdb" \
153+
"$main_measure_label" \
154+
"$TMP_DIR/kitesql-lmdb" \
155+
"$BINARY" --backend kitesql-lmdb "${main_measure_args[@]}" --num-ware "$NUM_WARE" --path "$TMP_DIR/kitesql-lmdb"
156+
157+
run_variant \
158+
"kitesql-rocksdb" \
159+
"$main_measure_label" \
160+
"$TMP_DIR/kitesql-rocksdb" \
161+
"$BINARY" --backend kitesql-rocksdb "${main_measure_args[@]}" --num-ware "$NUM_WARE" --path "$TMP_DIR/kitesql-rocksdb"
162+
163+
run_variant \
164+
"sqlite-balanced" \
165+
"$main_measure_label" \
166+
"$TMP_DIR/sqlite-balanced.sqlite" \
167+
"$BINARY" --backend sqlite --sqlite-profile balanced "${main_measure_args[@]}" --num-ware "$NUM_WARE" --path "$TMP_DIR/sqlite-balanced.sqlite"
168+
169+
run_variant \
170+
"sqlite-practical" \
171+
"$main_measure_label" \
172+
"$TMP_DIR/sqlite-practical.sqlite" \
173+
"$BINARY" --backend sqlite --sqlite-profile practical "${main_measure_args[@]}" --num-ware "$NUM_WARE" --path "$TMP_DIR/sqlite-practical.sqlite"
174+
175+
rm -rf "$TMP_DIR"
176+
177+
printf '\nGenerated summary: %s\n' "$SUMMARY_FILE"

src/binder/select.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ impl VisitorMut<'_> for SplitScopePositionRebinder<'_> {
9393
if let Some(left_position) = self
9494
.left_schema
9595
.iter()
96-
.position(|candidate| candidate.summary() == column.summary())
96+
.position(|candidate| candidate.same_column(column))
9797
{
9898
*position = left_position;
9999
} else if let Some(right_position) = self
100100
.right_schema
101101
.iter()
102-
.position(|candidate| candidate.summary() == column.summary())
102+
.position(|candidate| candidate.same_column(column))
103103
{
104104
*position = right_position;
105105
}

src/catalog/column.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ pub struct ColumnSummary {
6666
}
6767

6868
impl ColumnRef {
69+
pub(crate) fn same_column(&self, other: &ColumnRef) -> bool {
70+
self.summary() == other.summary()
71+
}
72+
6973
pub(crate) fn nullable_for_join(&self, nullable: bool) -> Option<ColumnRef> {
7074
if self.nullable == nullable {
7175
return None;
@@ -201,6 +205,39 @@ impl ColumnCatalog {
201205
}
202206
}
203207

208+
#[cfg(all(test, not(target_arch = "wasm32")))]
209+
mod tests {
210+
use super::{ColumnCatalog, ColumnDesc, ColumnRef};
211+
use crate::errors::DatabaseError;
212+
use crate::types::LogicalType;
213+
214+
#[test]
215+
fn test_same_column_ignores_nullable_and_desc() -> Result<(), DatabaseError> {
216+
let mut left = ColumnCatalog::new(
217+
"c1".to_string(),
218+
false,
219+
ColumnDesc::new(LogicalType::Integer, None, false, None)?,
220+
);
221+
let mut right = ColumnCatalog::new(
222+
"c1".to_string(),
223+
true,
224+
ColumnDesc::new(LogicalType::Bigint, None, false, None)?,
225+
);
226+
let left_ref = ColumnRef::from(left.clone());
227+
let right_ref = ColumnRef::from(right.clone());
228+
229+
assert_ne!(left_ref, right_ref);
230+
assert!(left_ref.same_column(&right_ref));
231+
232+
left.set_name("c2".to_string());
233+
right.set_name("c3".to_string());
234+
let left_ref = ColumnRef::from(left);
235+
let right_ref = ColumnRef::from(right);
236+
assert!(!left_ref.same_column(&right_ref));
237+
Ok(())
238+
}
239+
}
240+
204241
/// The descriptor of a column.
205242
#[derive(Debug, Clone, PartialEq, Eq, Hash, ReferenceSerialization)]
206243
pub struct ColumnDesc {

src/expression/mod.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,20 @@ impl ScalarExpression {
240240
ScalarExpression::ColumnRef { column, position }
241241
}
242242

243+
pub(crate) fn eq_ignore_colref_pos(&self, other: &ScalarExpression) -> bool {
244+
match (self.unpack_alias_ref(), other.unpack_alias_ref()) {
245+
(
246+
ScalarExpression::ColumnRef {
247+
column: lhs_column, ..
248+
},
249+
ScalarExpression::ColumnRef {
250+
column: rhs_column, ..
251+
},
252+
) => lhs_column.same_column(rhs_column),
253+
(lhs, rhs) => lhs == rhs,
254+
}
255+
}
256+
243257
pub fn unpack_alias(self) -> ScalarExpression {
244258
if let ScalarExpression::Alias {
245259
alias: AliasType::Expr(expr),
@@ -809,6 +823,38 @@ mod test {
809823
use std::sync::Arc;
810824
use tempfile::TempDir;
811825

826+
#[test]
827+
fn test_eq_ignore_colref_pos() -> Result<(), DatabaseError> {
828+
let left = ScalarExpression::column_expr(
829+
ColumnRef::from(ColumnCatalog::new(
830+
"c1".to_string(),
831+
false,
832+
ColumnDesc::new(LogicalType::Integer, None, false, None)?,
833+
)),
834+
0,
835+
);
836+
let right = ScalarExpression::column_expr(
837+
ColumnRef::from(ColumnCatalog::new(
838+
"c1".to_string(),
839+
true,
840+
ColumnDesc::new(LogicalType::Bigint, None, false, None)?,
841+
)),
842+
2,
843+
);
844+
let different = ScalarExpression::column_expr(
845+
ColumnRef::from(ColumnCatalog::new(
846+
"c2".to_string(),
847+
false,
848+
ColumnDesc::new(LogicalType::Integer, None, false, None)?,
849+
)),
850+
0,
851+
);
852+
853+
assert!(left.eq_ignore_colref_pos(&right));
854+
assert!(!left.eq_ignore_colref_pos(&different));
855+
Ok(())
856+
}
857+
812858
#[test]
813859
fn test_serialization() -> Result<(), DatabaseError> {
814860
fn fn_assert(

0 commit comments

Comments
 (0)