Skip to content

Commit 64f8d83

Browse files
committed
benchmarks: add time-series microbenchmarks for serialize, varint, timestamps, bind
Standalone benchmark covering the hot paths for time-series write/read workloads. Establishes baselines before optimization: DateType.serialize (datetime 2025): ~1020 ns/call DateType.deserialize (2025): ~695 ns/call varint_pack (medium): ~643 ns/call varint_unpack (medium): ~1086 ns/call MonotonicTimestampGenerator: ~374 ns/call BoundStatement.bind (5-col): ~4027 ns/call
1 parent 8e6c4d4 commit 64f8d83

1 file changed

Lines changed: 235 additions & 0 deletions

File tree

benchmarks/bench_timeseries.py

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Microbenchmarks for time-series write and read hot paths.
4+
5+
Covers:
6+
- DateType.serialize / deserialize
7+
- varint_pack / varint_unpack
8+
- MonotonicTimestampGenerator
9+
- BoundStatement.bind() for a typical time-series schema
10+
11+
All results in nanoseconds per call. Run with:
12+
python benchmarks/bench_timeseries.py
13+
"""
14+
15+
import datetime
16+
import struct
17+
import sys
18+
import threading
19+
import time
20+
import timeit
21+
import uuid
22+
23+
# ---------------------------------------------------------------------------
24+
# Helpers
25+
# ---------------------------------------------------------------------------
26+
27+
WARMUP = 50_000
28+
ITERATIONS = 500_000
29+
30+
31+
def bench(label, stmt, setup="pass", number=ITERATIONS, warmup=WARMUP):
32+
"""Run *stmt* under *setup*, return ns/call and print a line."""
33+
globs = {}
34+
exec(setup, globs)
35+
# warmup
36+
t_code = compile(stmt, "<bench>", "exec")
37+
for _ in range(warmup):
38+
exec(t_code, globs)
39+
# measure
40+
timer = timeit.Timer(stmt, setup, globals=globs)
41+
raw = timer.timeit(number=number)
42+
ns = raw / number * 1e9
43+
print(f" {label:.<60s} {ns:>9.1f} ns/call")
44+
return ns
45+
46+
47+
# ---------------------------------------------------------------------------
48+
# DateType.serialize / deserialize
49+
# ---------------------------------------------------------------------------
50+
51+
52+
def bench_datetype():
53+
print("\n=== DateType.serialize ===")
54+
setup = """\
55+
from cassandra.cqltypes import DateType
56+
import datetime
57+
dt_now = datetime.datetime(2025, 4, 5, 12, 0, 0, 123456)
58+
dt_epoch = datetime.datetime(1970, 1, 1, 0, 0, 1, 0)
59+
dt_far = datetime.datetime(2300, 1, 1, 0, 0, 0, 1000)
60+
d_only = datetime.date(2025, 4, 5)
61+
ts_int = 1712318400000
62+
"""
63+
bench("serialize datetime (2025)", "DateType.serialize(dt_now, 4)", setup)
64+
bench("serialize datetime (epoch)", "DateType.serialize(dt_epoch, 4)", setup)
65+
bench("serialize datetime (2300)", "DateType.serialize(dt_far, 4)", setup)
66+
bench("serialize date object", "DateType.serialize(d_only, 4)", setup)
67+
bench("serialize raw int timestamp", "DateType.serialize(ts_int, 4)", setup)
68+
69+
print("\n=== DateType.deserialize ===")
70+
setup_deser = (
71+
setup
72+
+ """\
73+
packed_now = DateType.serialize(dt_now, 4)
74+
packed_far = DateType.serialize(dt_far, 4)
75+
"""
76+
)
77+
bench("deserialize (2025)", "DateType.deserialize(packed_now, 4)", setup_deser)
78+
bench("deserialize (2300)", "DateType.deserialize(packed_far, 4)", setup_deser)
79+
80+
81+
# ---------------------------------------------------------------------------
82+
# varint_pack / varint_unpack
83+
# ---------------------------------------------------------------------------
84+
85+
86+
def bench_varint():
87+
print("\n=== varint_pack ===")
88+
setup = """\
89+
from cassandra.marshal import varint_pack, varint_unpack
90+
small = 42
91+
medium = 2**62
92+
large = 2**127
93+
negative = -(2**62)
94+
zero = 0
95+
"""
96+
bench("varint_pack zero", "varint_pack(zero)", setup)
97+
bench("varint_pack small", "varint_pack(small)", setup)
98+
bench("varint_pack medium", "varint_pack(medium)", setup)
99+
bench("varint_pack large", "varint_pack(large)", setup)
100+
bench("varint_pack negative", "varint_pack(negative)", setup)
101+
102+
print("\n=== varint_unpack ===")
103+
setup_u = (
104+
setup
105+
+ """\
106+
packed_small = varint_pack(small)
107+
packed_medium = varint_pack(medium)
108+
packed_large = varint_pack(large)
109+
packed_negative = varint_pack(negative)
110+
packed_zero = varint_pack(zero)
111+
"""
112+
)
113+
bench("varint_unpack zero", "varint_unpack(packed_zero)", setup_u)
114+
bench("varint_unpack small", "varint_unpack(packed_small)", setup_u)
115+
bench("varint_unpack medium", "varint_unpack(packed_medium)", setup_u)
116+
bench("varint_unpack large", "varint_unpack(packed_large)", setup_u)
117+
bench("varint_unpack negative", "varint_unpack(packed_negative)", setup_u)
118+
119+
120+
# ---------------------------------------------------------------------------
121+
# MonotonicTimestampGenerator
122+
# ---------------------------------------------------------------------------
123+
124+
125+
def bench_timestamp_generator():
126+
print("\n=== MonotonicTimestampGenerator (single-thread) ===")
127+
setup = """\
128+
from cassandra.timestamps import MonotonicTimestampGenerator
129+
gen = MonotonicTimestampGenerator()
130+
"""
131+
bench("generator call", "gen()", setup)
132+
133+
print("\n=== MonotonicTimestampGenerator (4-thread contention) ===")
134+
from cassandra.timestamps import MonotonicTimestampGenerator
135+
136+
gen = MonotonicTimestampGenerator()
137+
n_threads = 4
138+
calls_per_thread = ITERATIONS // n_threads
139+
barrier = threading.Barrier(n_threads + 1)
140+
141+
elapsed = []
142+
143+
def worker():
144+
barrier.wait()
145+
t0 = time.perf_counter_ns()
146+
for _ in range(calls_per_thread):
147+
gen()
148+
elapsed.append(time.perf_counter_ns() - t0)
149+
barrier.wait()
150+
151+
threads = [threading.Thread(target=worker) for _ in range(n_threads)]
152+
for t in threads:
153+
t.start()
154+
barrier.wait() # release all workers
155+
barrier.wait() # wait for all to finish
156+
for t in threads:
157+
t.join()
158+
159+
total_calls = n_threads * calls_per_thread
160+
wall_ns = max(elapsed)
161+
ns_per_call = wall_ns / calls_per_thread # per-thread throughput
162+
print(f" {'contended (4 threads, per-thread)':.<60s} {ns_per_call:>9.1f} ns/call")
163+
throughput = total_calls / (wall_ns / 1e9)
164+
print(f" {'aggregate throughput':.<60s} {throughput:>9.0f} calls/sec")
165+
166+
167+
# ---------------------------------------------------------------------------
168+
# BoundStatement.bind() — typical time-series schema
169+
# ---------------------------------------------------------------------------
170+
171+
172+
def bench_bind():
173+
print("\n=== BoundStatement.bind (time-series schema) ===")
174+
setup = """\
175+
import datetime
176+
from cassandra.query import BoundStatement, PreparedStatement
177+
from cassandra.cqltypes import (
178+
DateType, Int32Type, DoubleType, FloatType, UTF8Type,
179+
)
180+
from cassandra.protocol import ProtocolVersion
181+
from unittest.mock import MagicMock
182+
183+
# Build a mock PreparedStatement with 5 columns:
184+
# (ts timestamp, sensor_id int, value double, quality float, tag text)
185+
col_types = [DateType, Int32Type, DoubleType, FloatType, UTF8Type]
186+
col_names = ['ts', 'sensor_id', 'value', 'quality', 'tag']
187+
188+
col_meta = []
189+
for name, ctype in zip(col_names, col_types):
190+
cm = MagicMock()
191+
cm.name = name
192+
cm.keyspace_name = 'ks'
193+
cm.table_name = 'metrics'
194+
cm.type = ctype
195+
col_meta.append(cm)
196+
197+
ps = MagicMock(spec=PreparedStatement)
198+
ps.column_metadata = col_meta
199+
ps.routing_key_indexes = None
200+
ps.protocol_version = 4
201+
ps.column_encryption_policy = None
202+
ps.serial_consistency_level = None
203+
ps.retry_policy = None
204+
ps.consistency_level = None
205+
ps.fetch_size = None
206+
ps.custom_payload = None
207+
ps.is_idempotent = False
208+
209+
dt = datetime.datetime(2025, 4, 5, 12, 0, 0, 123456)
210+
row = [dt, 42, 3.14159, 0.95, 'sensor-alpha-001']
211+
"""
212+
bench(
213+
"bind 5-col time-series row",
214+
"""\
215+
bs = BoundStatement(ps)
216+
bs.bind(row)
217+
""",
218+
setup,
219+
)
220+
221+
222+
# ---------------------------------------------------------------------------
223+
# Main
224+
# ---------------------------------------------------------------------------
225+
226+
if __name__ == "__main__":
227+
print(f"Python {sys.version}")
228+
print(f"Iterations per benchmark: {ITERATIONS:,}")
229+
230+
bench_datetype()
231+
bench_varint()
232+
bench_timestamp_generator()
233+
bench_bind()
234+
235+
print("\nDone.")

0 commit comments

Comments
 (0)