Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
use flake
layout python
49 changes: 49 additions & 0 deletions benchmarks2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SDK Benchmarks

Simple Go-style benchmarks for measuring SDK overhead.

## Quick Start (Automated)

```bash
./run_benchmark.sh [duration_seconds]
```

This automatically:
1. Starts the delay server
2. Runs baseline benchmark (SDK disabled)
3. Runs benchmark with SDK enabled
4. Prints comparison with overhead percentages

## Manual Mode

```bash
# Terminal 1: Start delay server
python delay_server.py

# Terminal 2: Run baseline (SDK disabled)
TUSK_DRIFT_MODE=DISABLED python app.py

# Terminal 3: Run benchmark
python benchmark.py --url=http://localhost:8080

# Terminal 2: Restart with SDK enabled
# Ctrl+C, then:
TUSK_DRIFT_MODE=RECORD python app.py

# Terminal 3: Run benchmark again
python benchmark.py --url=http://localhost:8080
```

## Output

```
Benchmark_Sort 1000 1234567 ns/op 810.37 ops/s
Benchmark_Downstream 500 20123456 ns/op 49.69 ops/s
```

## For Node SDK

Use the same `benchmark.py` against Node test server:
```bash
python benchmark.py --url=http://localhost:3000
```
41 changes: 41 additions & 0 deletions benchmarks2/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python3
"""Test app for SDK benchmarks. Respects TUSK_DRIFT_MODE env var."""
import logging
import os
import requests as req
from flask import Flask, request, jsonify

# Disable Flask/Werkzeug request logging
logging.getLogger('werkzeug').setLevel(logging.ERROR)

# SDK initialization based on env var
MODE = os.environ.get('TUSK_DRIFT_MODE', 'DISABLED')
if MODE != 'DISABLED':
from drift import TuskDrift
TuskDrift.initialize(
api_key="benchmark-key",
env="benchmark",
log_level="error",
)

app = Flask(__name__)
DELAY_SERVER = os.environ.get('DELAY_SERVER', 'http://127.0.0.1:9999')

@app.route('/health')
def health():
return 'ok'

@app.route('/api/sort', methods=['POST'])
def api_sort():
data = request.json['data']
return jsonify({'sorted': sorted(data)})

@app.route('/api/downstream', methods=['POST'])
def api_downstream():
delay_ms = request.json.get('delay_ms', 10)
req.get(f'{DELAY_SERVER}/delay?ms={delay_ms}')
return jsonify({'status': 'ok'})

if __name__ == '__main__':
port = int(os.environ.get('PORT', 8080))
app.run(host='127.0.0.1', port=port, threaded=True)
113 changes: 113 additions & 0 deletions benchmarks2/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/usr/bin/env python3
"""
Benchmark runner for SDK performance testing.
Works with any HTTP server implementing /api/sort and /api/downstream.

Usage:
python benchmark.py --url=http://localhost:8080 [--duration=5] [--baseline=baseline.txt]
"""
import argparse
import random
import time
import requests

def benchmark(name, func, duration_sec):
"""Run func repeatedly for duration_sec, return stats."""
start = time.perf_counter_ns()
deadline = start + (duration_sec * 1_000_000_000)
iterations = 0

while time.perf_counter_ns() < deadline:
func()
iterations += 1

elapsed_ns = time.perf_counter_ns() - start
ns_per_op = elapsed_ns // iterations if iterations > 0 else 0
ops_per_sec = iterations / (elapsed_ns / 1_000_000_000) if elapsed_ns > 0 else 0

return iterations, ns_per_op, ops_per_sec

def print_result(name, iterations, ns_per_op, ops_per_sec):
print(f"{name:<25} {iterations:>8} {ns_per_op:>15} ns/op {ops_per_sec:>12.2f} ops/s")

def parse_results(filepath):
"""Parse benchmark output file into dict of {name: ops_per_sec}."""
results = {}
with open(filepath) as f:
for line in f:
if line.startswith('Benchmark_'):
parts = line.split()
name = parts[0]
ops_s = float(parts[5])
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
Outdated
results[name] = ops_s
return results

def print_comparison(baseline, current):
"""Print comparison table with percentage diff."""
print("\n" + "=" * 70)
print("COMPARISON (negative = slower with SDK)")
print("=" * 70)
print(f"{'Benchmark':<25} {'Baseline':>12} {'Current':>12} {'Diff':>12}")
print("-" * 70)
for name in baseline:
if name in current:
base_ops = baseline[name]
curr_ops = current[name]
if base_ops > 0:
diff_pct = ((curr_ops - base_ops) / base_ops) * 100
print(f"{name:<25} {base_ops:>10.2f}/s {curr_ops:>10.2f}/s {diff_pct:>+10.1f}%")

def main():
parser = argparse.ArgumentParser(description='Benchmark SDK overhead')
parser.add_argument('--url', required=True, help='Base URL of test server')
parser.add_argument('--duration', type=int, default=5, help='Seconds per benchmark')
parser.add_argument('--baseline', help='Baseline results file to compare against')
args = parser.parse_args()

base_url = args.url.rstrip('/')
session = requests.Session()

# Wait for server ready
print(f"Connecting to {base_url}...")
for _ in range(50):
try:
session.get(f'{base_url}/health', timeout=0.5)
break
except Exception:
time.sleep(0.1)
else:
print("ERROR: Server not responding")
return 1

print(f"Running benchmarks (duration={args.duration}s per test)...\n")

results = {}

# Benchmark: Sort
test_data = list(range(1000))
random.shuffle(test_data)

def sort_request():
resp = session.post(f'{base_url}/api/sort', json={'data': test_data})
resp.raise_for_status()

iters, ns_op, ops_s = benchmark('Benchmark_Sort', sort_request, args.duration)
print_result('Benchmark_Sort', iters, ns_op, ops_s)
results['Benchmark_Sort'] = ops_s

# Benchmark: Downstream
def downstream_request():
resp = session.post(f'{base_url}/api/downstream', json={'delay_ms': 10})
resp.raise_for_status()

iters, ns_op, ops_s = benchmark('Benchmark_Downstream', downstream_request, args.duration)
print_result('Benchmark_Downstream', iters, ns_op, ops_s)
results['Benchmark_Downstream'] = ops_s

# Compare against baseline if provided
if args.baseline:
baseline = parse_results(args.baseline)
print_comparison(baseline, results)

if __name__ == '__main__':
main()
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
Outdated
22 changes: 22 additions & 0 deletions benchmarks2/delay_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env python3
"""Simple delay server for downstream benchmarks."""
import logging
import time
from flask import Flask, request

logging.getLogger('werkzeug').setLevel(logging.ERROR)

app = Flask(__name__)

@app.route('/delay')
def delay():
ms = int(request.args.get('ms', 10))
time.sleep(ms / 1000)
return 'ok'

@app.route('/health')
def health():
return 'ok'

if __name__ == '__main__':
app.run(host='127.0.0.1', port=9999, threaded=True)
51 changes: 51 additions & 0 deletions benchmarks2/run_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash
# Automated benchmark runner - compares SDK disabled vs enabled performance.
#
# Usage: ./run_benchmark.sh [duration_seconds]

set -e

DURATION=${1:-5}
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"

cleanup() {
echo "Cleaning up..."
kill $DELAY_PID 2>/dev/null || true
kill $APP_PID 2>/dev/null || true
}
trap cleanup EXIT

# Start delay server
echo "Starting delay server..."
python delay_server.py &
DELAY_PID=$!
sleep 1

# Run baseline (SDK disabled)
echo ""
echo "============================================================"
echo "BASELINE (SDK DISABLED)"
echo "============================================================"

TUSK_DRIFT_MODE=DISABLED python app.py &
APP_PID=$!
sleep 1

python benchmark.py --url=http://localhost:8080 --duration="$DURATION" | tee /tmp/baseline.txt

kill $APP_PID 2>/dev/null || true
wait $APP_PID 2>/dev/null || true
sleep 0.5

# Run with SDK enabled (with baseline comparison)
echo ""
echo "============================================================"
echo "WITH SDK (TUSK_DRIFT_MODE=RECORD)"
echo "============================================================"

TUSK_DRIFT_MODE=RECORD python app.py &
APP_PID=$!
sleep 1

python benchmark.py --url=http://localhost:8080 --duration="$DURATION" --baseline=/tmp/baseline.txt
27 changes: 27 additions & 0 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 47 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
description = "A very basic flake";

inputs = {
nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
};

outputs =
{
self,
nixpkgs,
}:
let
x86-linux = "x86_64-linux";
arm-linux = "aarch64-linux";
arm-macos = "aarch64-darwin";

defaultSystems = [
x86-linux
arm-linux
arm-macos
];
eachSystem =
fun: systems:
nixpkgs.lib.genAttrs systems (
system:
let
pkgs = import nixpkgs {
inherit system;
};
in
fun pkgs
);
in
{
devShell = eachSystem (
pkgs:
with pkgs;
mkShell {
buildInputs = [
(python3.withPackages (p: with p; [ ]))
];
}
) defaultSystems;
};

}