Skip to content

Commit 8677d9f

Browse files
authored
Merge pull request #525 from tidesdb/art-build-times
build times article rocksdb, tidesdb
2 parents a660c34 + a2a22d3 commit 8677d9f

8 files changed

Lines changed: 413 additions & 0 deletions

File tree

astro.config.mjs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,11 @@ export default defineConfig({
167167
{
168168
label: 'Articles',
169169
items: [
170+
{
171+
172+
label: 'Build Comparison TidesDB v9.3.6 & RocksDB v11.1.1',
173+
link: 'articles/build-comparison-tidesdb-v9-3-6-rocksdb-v11-1-1'
174+
},
170175
{
171176

172177
label: 'sysbench Analysis on TideSQL v4.5.6 & InnoDB in MariaDB v11.8.6',
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#!/usr/bin/env bash
2+
#
3+
# benchmark.sh - Fair build-time comparison: TidesDB vs RocksDB
4+
#
5+
# Measures ONLY the time to compile each project's core static library, from a
6+
# clean build tree, all cores, Release. Clones are NOT timed. Configure (cmake
7+
# generate) is NOT timed. We time only `cmake --build --target <lib>`.
8+
#
9+
# Decisions (see README.md for rationale):
10+
# * Scope : core library target only (tidesdb / rocksdb), nothing else
11+
# * Artifact : STATIC lib for both (removes shared-vs-static asymmetry)
12+
# * Build type: Release for both
13+
# * Parallel : -j$(nproc) for both (override with JOBS=1 for raw work)
14+
# * Runs : 3, median reported (fresh build tree each run)
15+
# * Flags : each project keeps its own defaults (e.g. RocksDB -march=native)
16+
#
17+
# Everything below is env-overridable, e.g.:
18+
# RUNS=5 JOBS=1 GENERATOR="Unix Makefiles" ./benchmark.sh
19+
#
20+
set -euo pipefail
21+
22+
# configuration
23+
TIDESDB_TAG=${TIDESDB_TAG:-v9.3.6}
24+
ROCKSDB_TAG=${ROCKSDB_TAG:-v11.1.1}
25+
RUNS=${RUNS:-3}
26+
JOBS=${JOBS:-$(nproc)}
27+
GENERATOR=${GENERATOR:-Ninja}
28+
BUILD_TYPE=${BUILD_TYPE:-Release}
29+
30+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
31+
REPOS="$ROOT/repos"
32+
RESULTS="$ROOT/results"
33+
LOGS="$RESULTS/logs"
34+
CSV="$RESULTS/build_times.csv"
35+
META="$RESULTS/environment.txt"
36+
37+
# helpers
38+
c_blue='\033[1;34m'; c_dim='\033[2m'; c_grn='\033[1;32m'; c_rst='\033[0m'
39+
log() { printf "${c_blue}==>${c_rst} %s\n" "$*"; }
40+
sub() { printf "${c_dim} %s${c_rst}\n" "$*"; }
41+
die() { printf "\033[1;31mERROR:${c_rst} %s\n" "$*" >&2; exit 1; }
42+
now() { date +%s.%N; }
43+
secs() { awk -v a="$1" -v b="$2" 'BEGIN{printf "%.3f", b-a}'; } # b - a
44+
45+
# clone a tag shallowly if not already present (NOT timed)
46+
clone_repo() {
47+
local url="$1" tag="$2" dir="$3"
48+
if [ -d "$dir/.git" ]; then
49+
sub "already cloned: $(basename "$dir") @ $tag"
50+
else
51+
log "Cloning $(basename "$dir") @ $tag (not timed)"
52+
git clone --quiet --depth 1 --branch "$tag" "$url" "$dir" \
53+
|| die "clone failed for $url @ $tag"
54+
fi
55+
}
56+
57+
# bench <name> <src_dir> <target> <extra cmake args...>
58+
# Re-configures + rebuilds from scratch RUNS times; times only the build step.
59+
bench() {
60+
local name="$1" src="$2" target="$3"; shift 3
61+
local extra=("$@")
62+
local build="$src/_bench_build"
63+
64+
log "Benchmarking ${name} (target: ${target}, ${RUNS} run(s), -j${JOBS}, ${BUILD_TYPE})"
65+
for run in $(seq 1 "$RUNS"); do
66+
rm -rf "$build"
67+
# configure (NOT timed)
68+
cmake -S "$src" -B "$build" -G "$GENERATOR" \
69+
-DCMAKE_BUILD_TYPE="$BUILD_TYPE" "${extra[@]}" \
70+
> "$LOGS/${name}_configure_run${run}.log" 2>&1 \
71+
|| { cat "$LOGS/${name}_configure_run${run}.log"; die "$name configure failed"; }
72+
73+
sync # flush dirty pages so each run starts comparably (page cache stays warm)
74+
75+
# build the library target (TIMED)
76+
local t0 t1 elapsed
77+
t0="$(now)"
78+
cmake --build "$build" --target "$target" -j "$JOBS" \
79+
> "$LOGS/${name}_build_run${run}.log" 2>&1 \
80+
|| { tail -n 40 "$LOGS/${name}_build_run${run}.log"; die "$name build failed"; }
81+
t1="$(now)"
82+
83+
elapsed="$(secs "$t0" "$t1")"
84+
printf "%s,%d,%s\n" "$name" "$run" "$elapsed" >> "$CSV"
85+
sub "run ${run}/${RUNS}: ${elapsed}s"
86+
done
87+
}
88+
89+
# print median of the runs for a project (reads CSV)
90+
median_of() {
91+
local name="$1"
92+
awk -F, -v n="$name" '$1==n{print $3}' "$CSV" | sort -n | awk '
93+
{a[NR]=$1}
94+
END{ if(NR==0){print "n/a"; exit}
95+
if(NR%2){printf "%.3f", a[(NR+1)/2]}
96+
else {printf "%.3f", (a[NR/2]+a[NR/2+1])/2} }'
97+
}
98+
99+
# main
100+
command -v git >/dev/null || die "git not found"
101+
command -v cmake >/dev/null || die "cmake not found"
102+
if [ "$GENERATOR" = "Ninja" ]; then command -v ninja >/dev/null || die "ninja not found (or set GENERATOR=\"Unix Makefiles\")"; fi
103+
104+
mkdir -p "$REPOS" "$LOGS"
105+
echo "project,run,seconds" > "$CSV"
106+
107+
# record environment for reproducibility / the article
108+
{
109+
echo "Build-time benchmark environment"
110+
echo "date : $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
111+
echo "host kernel : $(uname -srm)"
112+
echo "cpu cores : $(nproc)"
113+
echo "memory : $(free -h 2>/dev/null | awk '/^Mem:/{print $2}')"
114+
echo "cmake : $(cmake --version | head -1)"
115+
echo "generator : $GENERATOR"
116+
command -v ninja >/dev/null && echo "ninja : $(ninja --version)"
117+
echo "cc : $(${CC:-cc} --version | head -1)"
118+
echo "cxx : $(${CXX:-c++} --version | head -1)"
119+
echo "build type : $BUILD_TYPE"
120+
echo "parallel jobs : $JOBS"
121+
echo "runs (median) : $RUNS"
122+
echo "tidesdb tag : $TIDESDB_TAG"
123+
echo "rocksdb tag : $ROCKSDB_TAG"
124+
echo "scope : core static library target only (tidesdb / rocksdb)"
125+
} | tee "$META"
126+
echo
127+
128+
# 1. clone (not timed)
129+
clone_repo "https://github.com/tidesdb/tidesdb.git" "$TIDESDB_TAG" "$REPOS/tidesdb"
130+
clone_repo "https://github.com/facebook/rocksdb.git" "$ROCKSDB_TAG" "$REPOS/rocksdb"
131+
echo
132+
133+
# 2. benchmark each (force STATIC lib for an apples-to-apples artifact)
134+
bench "tidesdb" "$REPOS/tidesdb" "tidesdb" \
135+
-DBUILD_SHARED_LIBS=OFF
136+
echo
137+
bench "rocksdb" "$REPOS/rocksdb" "rocksdb" \
138+
-DROCKSDB_BUILD_SHARED=OFF \
139+
-DFAIL_ON_WARNINGS=OFF
140+
# FAIL_ON_WARNINGS=OFF drops RocksDB's default -Werror. Necessary: GCC 12 emits a
141+
# spurious -Wrestrict in options_type.h that aborts the stock build. Also fairer --
142+
# TidesDB's default build has no -Werror. It changes neither what is compiled nor
143+
# the optimization level, so the measured build time is unaffected.
144+
echo
145+
146+
# 3. summary
147+
tds_med="$(median_of tidesdb)"; rdb_med="$(median_of rocksdb)"
148+
log "Median build time (lower is faster)"
149+
printf " ${c_grn}TidesDB %s${c_rst} : %ss\n" "$TIDESDB_TAG" "$tds_med"
150+
printf " ${c_grn}RocksDB %s${c_rst} : %ss\n" "$ROCKSDB_TAG" "$rdb_med"
151+
if awk -v t="$tds_med" -v r="$rdb_med" 'BEGIN{exit !(t>0 && r>0)}'; then
152+
ratio="$(awk -v t="$tds_med" -v r="$rdb_med" 'BEGIN{printf "%.1f", r/t}')"
153+
sub "RocksDB takes ~${ratio}x as long as TidesDB to build its core library."
154+
fi
155+
echo
156+
sub "raw timings : $CSV"
157+
sub "environment : $META"
158+
sub "build logs : $LOGS/"
159+
160+
# 4. plot
161+
if command -v python3 >/dev/null; then
162+
log "Plotting -> $RESULTS/build_comparison.png"
163+
python3 "$ROOT/plot.py" "$CSV" "$RESULTS/build_comparison.png" "$TIDESDB_TAG" "$ROCKSDB_TAG" \
164+
&& sub "wrote $RESULTS/build_comparison.png" \
165+
|| sub "plot skipped (see error above)"
166+
else
167+
sub "python3 not found - skipping plot"
168+
fi
58.5 KB
Loading
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
project,run,seconds
2+
tidesdb,1,5.387
3+
tidesdb,2,5.429
4+
tidesdb,3,5.425
5+
rocksdb,1,132.374
6+
rocksdb,2,134.515
7+
rocksdb,3,134.838
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Build-time benchmark environment
2+
date : 2026-06-12 01:12:27 UTC
3+
host kernel : Linux 6.2.0-39-generic x86_64
4+
cpu cores : 16
5+
memory : 46Gi
6+
cmake : cmake version 3.25.1
7+
generator : Ninja
8+
ninja : 1.11.1
9+
cc : cc (Ubuntu 12.3.0-1ubuntu1~23.04) 12.3.0
10+
cxx : c++ (Ubuntu 12.3.0-1ubuntu1~23.04) 12.3.0
11+
build type : Release
12+
parallel jobs : 16
13+
runs (median) : 3
14+
tidesdb tag : v9.3.6
15+
rocksdb tag : v11.1.1
16+
scope : core static library target only (tidesdb / rocksdb)
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#!/usr/bin/env python3
2+
"""Simple bar-chart plotter for the TidesDB, RocksDB build-time benchmark.
3+
4+
Usage:
5+
python3 plot.py <build_times.csv> <out.png> [tidesdb_tag] [rocksdb_tag]
6+
7+
Reads a CSV with header `project,run,seconds`, plots the MEDIAN build time per
8+
project as a bar (with min/max whiskers for honesty), in each project's brand
9+
colour, and writes a PNG.
10+
"""
11+
import csv
12+
import statistics
13+
import sys
14+
15+
import matplotlib
16+
matplotlib.use("Agg") # headless
17+
import matplotlib.pyplot as plt
18+
19+
# brand colours requested
20+
COLORS = {"tidesdb": "#183CD4", "rocksdb": "#F2B603"}
21+
22+
23+
def main() -> int:
24+
if len(sys.argv) < 3:
25+
print(__doc__)
26+
return 2
27+
csv_path, out_path = sys.argv[1], sys.argv[2]
28+
tags = {
29+
"tidesdb": sys.argv[3] if len(sys.argv) > 3 else "",
30+
"rocksdb": sys.argv[4] if len(sys.argv) > 4 else "",
31+
}
32+
33+
# collect timings per project, preserving first-seen order
34+
times: dict[str, list[float]] = {}
35+
with open(csv_path, newline="") as fh:
36+
for row in csv.DictReader(fh):
37+
times.setdefault(row["project"], []).append(float(row["seconds"]))
38+
39+
if not times:
40+
print(f"no data in {csv_path}", file=sys.stderr)
41+
return 1
42+
43+
projects = list(times.keys())
44+
medians = [statistics.median(times[p]) for p in projects]
45+
lows = [min(times[p]) for p in projects]
46+
highs = [max(times[p]) for p in projects]
47+
# asymmetric whiskers: distance from median down to min / up to max
48+
yerr = [[m - lo for m, lo in zip(medians, lows)],
49+
[hi - m for m, hi in zip(medians, highs)]]
50+
colors = [COLORS.get(p, "#888888") for p in projects]
51+
52+
labels = [f"{p.replace('tidesdb','TidesDB').replace('rocksdb','RocksDB')}"
53+
+ (f"\n{tags[p]}" if tags.get(p) else "") for p in projects]
54+
55+
fig, ax = plt.subplots(figsize=(7, 5.5))
56+
bars = ax.bar(labels, medians, color=colors, width=0.55,
57+
yerr=yerr, capsize=8, ecolor="#555555",
58+
edgecolor="white", linewidth=1.2, zorder=3)
59+
60+
runs = max(len(v) for v in times.values())
61+
stat = "median" if runs > 1 else "single run"
62+
ax.set_ylabel("Build time (seconds), lower is faster", fontsize=11)
63+
ax.set_title(f"Core library build time: TidesDB vs RocksDB\n"
64+
f"static lib, Release, all cores, {stat} of {runs} run(s)",
65+
fontsize=13, fontweight="bold")
66+
ax.grid(axis="y", linestyle="--", alpha=0.4, zorder=0)
67+
ax.set_axisbelow(True)
68+
for spine in ("top", "right"):
69+
ax.spines[spine].set_visible(False)
70+
ax.set_ylim(0, max(highs) * 1.18)
71+
72+
# value labels on top of each bar
73+
for bar, m in zip(bars, medians):
74+
ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height(),
75+
f"{m:.1f}s", ha="center", va="bottom",
76+
fontsize=12, fontweight="bold")
77+
78+
# speed-ratio annotation when exactly two projects
79+
if len(projects) == 2 and min(medians) > 0:
80+
slow, fast = max(medians), min(medians)
81+
ax.text(0.5, 0.94, f"{slow/fast:.1f}x difference",
82+
transform=ax.transAxes, ha="center", fontsize=10,
83+
color="#444444", style="italic")
84+
85+
fig.tight_layout()
86+
fig.savefig(out_path, dpi=150)
87+
print(f"wrote {out_path}")
88+
return 0
89+
90+
91+
if __name__ == "__main__":
92+
raise SystemExit(main())
638 KB
Loading

0 commit comments

Comments
 (0)