|
| 1 | +#!/usr/bin/env bash |
| 2 | +# |
| 3 | +# benchmark.sh - Fair build-time comparison: TidesDB vs RocksDB |
| 4 | +# |
| 5 | +# Measures ONLY the time to compile each project's core static library, from a |
| 6 | +# clean build tree, all cores, Release. Clones are NOT timed. Configure (cmake |
| 7 | +# generate) is NOT timed. We time only `cmake --build --target <lib>`. |
| 8 | +# |
| 9 | +# Decisions (see README.md for rationale): |
| 10 | +# * Scope : core library target only (tidesdb / rocksdb), nothing else |
| 11 | +# * Artifact : STATIC lib for both (removes shared-vs-static asymmetry) |
| 12 | +# * Build type: Release for both |
| 13 | +# * Parallel : -j$(nproc) for both (override with JOBS=1 for raw work) |
| 14 | +# * Runs : 3, median reported (fresh build tree each run) |
| 15 | +# * Flags : each project keeps its own defaults (e.g. RocksDB -march=native) |
| 16 | +# |
| 17 | +# Everything below is env-overridable, e.g.: |
| 18 | +# RUNS=5 JOBS=1 GENERATOR="Unix Makefiles" ./benchmark.sh |
| 19 | +# |
| 20 | +set -euo pipefail |
| 21 | + |
| 22 | +# configuration |
| 23 | +TIDESDB_TAG=${TIDESDB_TAG:-v9.3.6} |
| 24 | +ROCKSDB_TAG=${ROCKSDB_TAG:-v11.1.1} |
| 25 | +RUNS=${RUNS:-3} |
| 26 | +JOBS=${JOBS:-$(nproc)} |
| 27 | +GENERATOR=${GENERATOR:-Ninja} |
| 28 | +BUILD_TYPE=${BUILD_TYPE:-Release} |
| 29 | + |
| 30 | +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| 31 | +REPOS="$ROOT/repos" |
| 32 | +RESULTS="$ROOT/results" |
| 33 | +LOGS="$RESULTS/logs" |
| 34 | +CSV="$RESULTS/build_times.csv" |
| 35 | +META="$RESULTS/environment.txt" |
| 36 | + |
| 37 | +# helpers |
| 38 | +c_blue='\033[1;34m'; c_dim='\033[2m'; c_grn='\033[1;32m'; c_rst='\033[0m' |
| 39 | +log() { printf "${c_blue}==>${c_rst} %s\n" "$*"; } |
| 40 | +sub() { printf "${c_dim} %s${c_rst}\n" "$*"; } |
| 41 | +die() { printf "\033[1;31mERROR:${c_rst} %s\n" "$*" >&2; exit 1; } |
| 42 | +now() { date +%s.%N; } |
| 43 | +secs() { awk -v a="$1" -v b="$2" 'BEGIN{printf "%.3f", b-a}'; } # b - a |
| 44 | + |
| 45 | +# clone a tag shallowly if not already present (NOT timed) |
| 46 | +clone_repo() { |
| 47 | + local url="$1" tag="$2" dir="$3" |
| 48 | + if [ -d "$dir/.git" ]; then |
| 49 | + sub "already cloned: $(basename "$dir") @ $tag" |
| 50 | + else |
| 51 | + log "Cloning $(basename "$dir") @ $tag (not timed)" |
| 52 | + git clone --quiet --depth 1 --branch "$tag" "$url" "$dir" \ |
| 53 | + || die "clone failed for $url @ $tag" |
| 54 | + fi |
| 55 | +} |
| 56 | + |
| 57 | +# bench <name> <src_dir> <target> <extra cmake args...> |
| 58 | +# Re-configures + rebuilds from scratch RUNS times; times only the build step. |
| 59 | +bench() { |
| 60 | + local name="$1" src="$2" target="$3"; shift 3 |
| 61 | + local extra=("$@") |
| 62 | + local build="$src/_bench_build" |
| 63 | + |
| 64 | + log "Benchmarking ${name} (target: ${target}, ${RUNS} run(s), -j${JOBS}, ${BUILD_TYPE})" |
| 65 | + for run in $(seq 1 "$RUNS"); do |
| 66 | + rm -rf "$build" |
| 67 | + # configure (NOT timed) |
| 68 | + cmake -S "$src" -B "$build" -G "$GENERATOR" \ |
| 69 | + -DCMAKE_BUILD_TYPE="$BUILD_TYPE" "${extra[@]}" \ |
| 70 | + > "$LOGS/${name}_configure_run${run}.log" 2>&1 \ |
| 71 | + || { cat "$LOGS/${name}_configure_run${run}.log"; die "$name configure failed"; } |
| 72 | + |
| 73 | + sync # flush dirty pages so each run starts comparably (page cache stays warm) |
| 74 | + |
| 75 | + # build the library target (TIMED) |
| 76 | + local t0 t1 elapsed |
| 77 | + t0="$(now)" |
| 78 | + cmake --build "$build" --target "$target" -j "$JOBS" \ |
| 79 | + > "$LOGS/${name}_build_run${run}.log" 2>&1 \ |
| 80 | + || { tail -n 40 "$LOGS/${name}_build_run${run}.log"; die "$name build failed"; } |
| 81 | + t1="$(now)" |
| 82 | + |
| 83 | + elapsed="$(secs "$t0" "$t1")" |
| 84 | + printf "%s,%d,%s\n" "$name" "$run" "$elapsed" >> "$CSV" |
| 85 | + sub "run ${run}/${RUNS}: ${elapsed}s" |
| 86 | + done |
| 87 | +} |
| 88 | + |
| 89 | +# print median of the runs for a project (reads CSV) |
| 90 | +median_of() { |
| 91 | + local name="$1" |
| 92 | + awk -F, -v n="$name" '$1==n{print $3}' "$CSV" | sort -n | awk ' |
| 93 | + {a[NR]=$1} |
| 94 | + END{ if(NR==0){print "n/a"; exit} |
| 95 | + if(NR%2){printf "%.3f", a[(NR+1)/2]} |
| 96 | + else {printf "%.3f", (a[NR/2]+a[NR/2+1])/2} }' |
| 97 | +} |
| 98 | + |
| 99 | +# main |
| 100 | +command -v git >/dev/null || die "git not found" |
| 101 | +command -v cmake >/dev/null || die "cmake not found" |
| 102 | +if [ "$GENERATOR" = "Ninja" ]; then command -v ninja >/dev/null || die "ninja not found (or set GENERATOR=\"Unix Makefiles\")"; fi |
| 103 | + |
| 104 | +mkdir -p "$REPOS" "$LOGS" |
| 105 | +echo "project,run,seconds" > "$CSV" |
| 106 | + |
| 107 | +# record environment for reproducibility / the article |
| 108 | +{ |
| 109 | + echo "Build-time benchmark environment" |
| 110 | + echo "date : $(date -u '+%Y-%m-%d %H:%M:%S UTC')" |
| 111 | + echo "host kernel : $(uname -srm)" |
| 112 | + echo "cpu cores : $(nproc)" |
| 113 | + echo "memory : $(free -h 2>/dev/null | awk '/^Mem:/{print $2}')" |
| 114 | + echo "cmake : $(cmake --version | head -1)" |
| 115 | + echo "generator : $GENERATOR" |
| 116 | + command -v ninja >/dev/null && echo "ninja : $(ninja --version)" |
| 117 | + echo "cc : $(${CC:-cc} --version | head -1)" |
| 118 | + echo "cxx : $(${CXX:-c++} --version | head -1)" |
| 119 | + echo "build type : $BUILD_TYPE" |
| 120 | + echo "parallel jobs : $JOBS" |
| 121 | + echo "runs (median) : $RUNS" |
| 122 | + echo "tidesdb tag : $TIDESDB_TAG" |
| 123 | + echo "rocksdb tag : $ROCKSDB_TAG" |
| 124 | + echo "scope : core static library target only (tidesdb / rocksdb)" |
| 125 | +} | tee "$META" |
| 126 | +echo |
| 127 | + |
| 128 | +# 1. clone (not timed) |
| 129 | +clone_repo "https://github.com/tidesdb/tidesdb.git" "$TIDESDB_TAG" "$REPOS/tidesdb" |
| 130 | +clone_repo "https://github.com/facebook/rocksdb.git" "$ROCKSDB_TAG" "$REPOS/rocksdb" |
| 131 | +echo |
| 132 | + |
| 133 | +# 2. benchmark each (force STATIC lib for an apples-to-apples artifact) |
| 134 | +bench "tidesdb" "$REPOS/tidesdb" "tidesdb" \ |
| 135 | + -DBUILD_SHARED_LIBS=OFF |
| 136 | +echo |
| 137 | +bench "rocksdb" "$REPOS/rocksdb" "rocksdb" \ |
| 138 | + -DROCKSDB_BUILD_SHARED=OFF \ |
| 139 | + -DFAIL_ON_WARNINGS=OFF |
| 140 | +# FAIL_ON_WARNINGS=OFF drops RocksDB's default -Werror. Necessary: GCC 12 emits a |
| 141 | +# spurious -Wrestrict in options_type.h that aborts the stock build. Also fairer -- |
| 142 | +# TidesDB's default build has no -Werror. It changes neither what is compiled nor |
| 143 | +# the optimization level, so the measured build time is unaffected. |
| 144 | +echo |
| 145 | + |
| 146 | +# 3. summary |
| 147 | +tds_med="$(median_of tidesdb)"; rdb_med="$(median_of rocksdb)" |
| 148 | +log "Median build time (lower is faster)" |
| 149 | +printf " ${c_grn}TidesDB %s${c_rst} : %ss\n" "$TIDESDB_TAG" "$tds_med" |
| 150 | +printf " ${c_grn}RocksDB %s${c_rst} : %ss\n" "$ROCKSDB_TAG" "$rdb_med" |
| 151 | +if awk -v t="$tds_med" -v r="$rdb_med" 'BEGIN{exit !(t>0 && r>0)}'; then |
| 152 | + ratio="$(awk -v t="$tds_med" -v r="$rdb_med" 'BEGIN{printf "%.1f", r/t}')" |
| 153 | + sub "RocksDB takes ~${ratio}x as long as TidesDB to build its core library." |
| 154 | +fi |
| 155 | +echo |
| 156 | +sub "raw timings : $CSV" |
| 157 | +sub "environment : $META" |
| 158 | +sub "build logs : $LOGS/" |
| 159 | + |
| 160 | +# 4. plot |
| 161 | +if command -v python3 >/dev/null; then |
| 162 | + log "Plotting -> $RESULTS/build_comparison.png" |
| 163 | + python3 "$ROOT/plot.py" "$CSV" "$RESULTS/build_comparison.png" "$TIDESDB_TAG" "$ROCKSDB_TAG" \ |
| 164 | + && sub "wrote $RESULTS/build_comparison.png" \ |
| 165 | + || sub "plot skipped (see error above)" |
| 166 | +else |
| 167 | + sub "python3 not found - skipping plot" |
| 168 | +fi |
0 commit comments