Skip to content

Commit 02e2a23

Browse files
authored
chore: add radar benchmark suite (#267)
This PR adds the benchmark suite to cslib that radar has already been using for a while. It can also be run standalone following the instructions in `scripts/bench/README.md`. Radar will automatically switch to this suite once merged. There have been some small changes to bring the benchmark suite in line with the mathlib benchmark suite, but nothing that should affect the results.
1 parent 465167a commit 02e2a23

11 files changed

Lines changed: 411 additions & 0 deletions

File tree

scripts/bench/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Cslib benchmark suite
2+
3+
This directory contains the cslib benchmark suite.
4+
It is built around [radar](github.com/leanprover/radar)
5+
and benchmark results can be viewed
6+
on the [Lean FRO radar instance](https://radar.lean-lang.org/repos/cslib).
7+
8+
To execute the entire suite, run `scripts/bench/run` in the repo root.
9+
To execute an individual benchmark, run `scripts/bench/<benchmark>/run` in the repo root.
10+
All scripts output their measurements into the file `measurements.jsonl`.
11+
12+
Radar sums any duplicated measurements with matching metrics.
13+
To post-process the `measurements.jsonl` file this way in-place,
14+
run `scripts/bench/combine.py` in the repo root after executing the benchmark suite.
15+
16+
The `*.py` symlinks exist only so the python files are a bit nicer to edit
17+
in text editors that rely on the file ending.
18+
19+
## Adding a benchmark
20+
21+
To add a benchmark to the suite, follow these steps:
22+
23+
1. Create a new folder containing a `run` script and a `README.md` file describing the benchmark,
24+
as well as any other files required for the benchmark.
25+
2. Edit `scripts/bench/run` to call the `run` script of your new benchmark.

scripts/bench/build/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# The `build` benchmark
2+
3+
This benchmark executes a complete build of cslib and collects global and per-module metrics.
4+
5+
The following metrics are collected by a wrapper around the entire build process:
6+
7+
- `build//instructions`
8+
- `build//maxrss`
9+
- `build//task-clock`
10+
- `build//wall-clock`
11+
12+
The following metrics are collected from `leanc --profile` and summed across all modules:
13+
14+
- `build/profile/<name>//wall-clock`
15+
16+
The following metrics are collected from `lakeprof report`:
17+
18+
- `build/lakeprof/longest build path//wall-clock`
19+
- `build/lakeprof/longest rebuild path//wall-clock`
20+
21+
The following metrics are collected individually for each module:
22+
23+
- `build/module/<name>//lines`
24+
- `build/module/<name>//instructions`
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import json
5+
import re
6+
import subprocess
7+
import sys
8+
from pathlib import Path
9+
10+
NAME = "build"
11+
REPO = Path()
12+
BENCH = REPO / "scripts" / "bench"
13+
OUTFILE = REPO / "measurements.jsonl"
14+
15+
16+
def save_result(metric: str, value: float, unit: str | None = None) -> None:
17+
data = {"metric": metric, "value": value}
18+
if unit is not None:
19+
data["unit"] = unit
20+
with open(OUTFILE, "a+") as f:
21+
f.write(f"{json.dumps(data)}\n")
22+
23+
24+
def run(*command: str) -> None:
25+
result = subprocess.run(command)
26+
if result.returncode != 0:
27+
sys.exit(result.returncode)
28+
29+
30+
def run_stderr(*command: str) -> str:
31+
result = subprocess.run(command, capture_output=True, encoding="utf-8")
32+
if result.returncode != 0:
33+
print(result.stdout, end="", file=sys.stdout)
34+
print(result.stderr, end="", file=sys.stderr)
35+
sys.exit(result.returncode)
36+
return result.stderr
37+
38+
39+
def get_module(setup: Path) -> str:
40+
with open(setup) as f:
41+
return json.load(f)["name"]
42+
43+
44+
def count_lines(module: str, path: Path) -> None:
45+
with open(path) as f:
46+
lines = sum(1 for _ in f)
47+
save_result(f"{NAME}/module/{module}//lines", lines)
48+
49+
50+
def run_lean(module: str) -> None:
51+
stderr = run_stderr(
52+
f"{BENCH}/measure.py",
53+
*("-t", f"{NAME}/module/{module}"),
54+
*("-m", "instructions"),
55+
"--",
56+
*("lean", "--profile", "-Dprofiler.threshold=9999999"),
57+
*sys.argv[1:],
58+
)
59+
60+
for line in stderr.splitlines():
61+
# Output of `lean --profile`
62+
# See timeit.cpp for the time format
63+
if match := re.fullmatch(r"\t(.*) ([\d.]+)(m?s)", line):
64+
name = match.group(1)
65+
seconds = float(match.group(2))
66+
if match.group(3) == "ms":
67+
seconds = seconds / 1000
68+
save_result(f"{NAME}/profile/{name}//wall-clock", seconds, "s")
69+
70+
71+
def main() -> None:
72+
if sys.argv[1:] == ["--print-prefix"]:
73+
print(Path(__file__).resolve().parent.parent)
74+
return
75+
76+
if sys.argv[1:] == ["--githash"]:
77+
run("lean", "--githash")
78+
return
79+
80+
parser = argparse.ArgumentParser()
81+
parser.add_argument("lean", type=Path)
82+
parser.add_argument("--setup", type=Path)
83+
args, _ = parser.parse_known_args()
84+
85+
lean: Path = args.lean
86+
setup: Path = args.setup
87+
88+
module = get_module(setup)
89+
count_lines(module, lean)
90+
run_lean(module)
91+
92+
93+
if __name__ == "__main__":
94+
main()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
lean

scripts/bench/build/run

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env bash
2+
set -euxo pipefail
3+
4+
BENCH="scripts/bench"
5+
6+
# Prepare build
7+
lake exe cache get
8+
9+
# Run build
10+
LAKE_OVERRIDE_LEAN=true LEAN=$(realpath "$BENCH/build/fake-root/bin/lean") \
11+
"$BENCH/measure.py" -t build \
12+
-m instructions -m maxrss -m task-clock -m wall-clock -- \
13+
lakeprof record lake build --no-cache
14+
15+
# Analyze lakeprof data
16+
lakeprof report -pj | jq -c '{metric: "build/lakeprof/longest build path//wall-clock", value: .[-1][2], unit: "s"}' >> measurements.jsonl
17+
lakeprof report -rj | jq -c '{metric: "build/lakeprof/longest rebuild path//wall-clock", value: .[-1][2], unit: "s"}' >> measurements.jsonl

scripts/bench/combine.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import json
5+
from pathlib import Path
6+
7+
OUTFILE = Path() / "measurements.jsonl"
8+
9+
if __name__ == "__main__":
10+
parser = argparse.ArgumentParser(
11+
description=f"Combine duplicated measurements in {OUTFILE.name} the way radar does, by summing their values."
12+
)
13+
args = parser.parse_args()
14+
15+
values: dict[str, float] = {}
16+
units: dict[str, str | None] = {}
17+
18+
with open(OUTFILE, "r") as f:
19+
for line in f:
20+
data = json.loads(line)
21+
metric = data["metric"]
22+
values[metric] = values.get(metric, 0) + data["value"]
23+
units[metric] = data.get("unit")
24+
25+
with open(OUTFILE, "w") as f:
26+
for metric, value in values.items():
27+
unit = units.get(metric)
28+
data = {"metric": metric, "value": value}
29+
if unit is not None:
30+
data["unit"] = unit
31+
f.write(f"{json.dumps(data)}\n")

scripts/bench/measure.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import json
5+
import os
6+
import resource
7+
import subprocess
8+
import sys
9+
import tempfile
10+
from dataclasses import dataclass
11+
from pathlib import Path
12+
13+
OUTFILE = Path() / "measurements.jsonl"
14+
15+
16+
@dataclass
17+
class PerfMetric:
18+
event: str
19+
factor: float = 1
20+
unit: str | None = None
21+
22+
23+
@dataclass
24+
class RusageMetric:
25+
name: str
26+
factor: float = 1
27+
unit: str | None = None
28+
29+
30+
PERF_METRICS = {
31+
"task-clock": PerfMetric("task-clock", factor=1e-9, unit="s"),
32+
"wall-clock": PerfMetric("duration_time", factor=1e-9, unit="s"),
33+
"instructions": PerfMetric("instructions"),
34+
}
35+
36+
PERF_UNITS = {
37+
"msec": 1e-3,
38+
"ns": 1e-9,
39+
}
40+
41+
RUSAGE_METRICS = {
42+
"maxrss": RusageMetric("ru_maxrss", factor=1000, unit="B"), # KiB on linux
43+
}
44+
45+
ALL_METRICS = {**PERF_METRICS, **RUSAGE_METRICS}
46+
47+
48+
def measure_perf(cmd: list[str], events: list[str]) -> dict[str, tuple[float, str]]:
49+
with tempfile.NamedTemporaryFile() as tmp:
50+
cmd = [
51+
*["perf", "stat", "-j", "-o", tmp.name],
52+
*[arg for event in events for arg in ["-e", event]],
53+
*["--", *cmd],
54+
]
55+
56+
# Execute command
57+
env = os.environ.copy()
58+
env["LC_ALL"] = "C" # or else perf may output syntactically invalid json
59+
result = subprocess.run(cmd, env=env)
60+
if result.returncode != 0:
61+
sys.exit(result.returncode)
62+
63+
# Collect results
64+
perf = {}
65+
for line in tmp:
66+
data = json.loads(line)
67+
if "event" in data and "counter-value" in data:
68+
perf[data["event"]] = float(data["counter-value"]), data["unit"]
69+
70+
return perf
71+
72+
73+
@dataclass
74+
class Result:
75+
category: str
76+
value: float
77+
unit: str | None
78+
79+
def fmt(self, topic: str) -> str:
80+
metric = f"{topic}//{self.category}"
81+
if self.unit is None:
82+
return json.dumps({"metric": metric, "value": self.value})
83+
return json.dumps({"metric": metric, "value": self.value, "unit": self.unit})
84+
85+
86+
def measure(cmd: list[str], metrics: list[str]) -> list[Result]:
87+
# Check args
88+
unknown_metrics = []
89+
for metric in metrics:
90+
if metric not in RUSAGE_METRICS and metric not in PERF_METRICS:
91+
unknown_metrics.append(metric)
92+
if unknown_metrics:
93+
raise Exception(f"unknown metrics: {', '.join(unknown_metrics)}")
94+
95+
# Prepare perf events
96+
events: list[str] = []
97+
for metric in metrics:
98+
if info := PERF_METRICS.get(metric):
99+
events.append(info.event)
100+
101+
# Measure
102+
perf = measure_perf(cmd, events)
103+
rusage = resource.getrusage(resource.RUSAGE_CHILDREN)
104+
105+
# Extract results
106+
results = []
107+
for metric in metrics:
108+
if info := PERF_METRICS.get(metric):
109+
if info.event in perf:
110+
value, unit = perf[info.event]
111+
else:
112+
# Without the corresponding permissions,
113+
# we only get access to the userspace versions of the counters.
114+
value, unit = perf[f"{info.event}:u"]
115+
116+
value *= PERF_UNITS.get(unit, info.factor)
117+
results.append(Result(metric, value, info.unit))
118+
119+
if info := RUSAGE_METRICS.get(metric):
120+
value = getattr(rusage, info.name) * info.factor
121+
results.append(Result(metric, value, info.unit))
122+
123+
return results
124+
125+
126+
if __name__ == "__main__":
127+
parser = argparse.ArgumentParser(
128+
description=f"Measure resource usage of a command using perf and rusage. The results are appended to {OUTFILE.name}.",
129+
)
130+
parser.add_argument(
131+
"-t",
132+
"--topic",
133+
action="append",
134+
default=[],
135+
help="topic prefix for the metrics",
136+
)
137+
parser.add_argument(
138+
"-m",
139+
"--metric",
140+
action="append",
141+
default=[],
142+
help=f"metrics to measure. Can be specified multiple times. Available metrics: {', '.join(sorted(ALL_METRICS))}",
143+
)
144+
parser.add_argument(
145+
"cmd",
146+
nargs="*",
147+
help="command to measure the resource usage of",
148+
)
149+
args = parser.parse_args()
150+
151+
topics: list[str] = args.topic
152+
metrics: list[str] = args.metric
153+
cmd: list[str] = args.cmd
154+
155+
results = measure(cmd, metrics)
156+
157+
with open(OUTFILE, "a+") as f:
158+
for result in results:
159+
for topic in topics:
160+
f.write(f"{result.fmt(topic)}\n")

scripts/bench/run

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
BENCH="scripts/bench"
5+
6+
echo "Running benchmark: build"
7+
"$BENCH/build/run"
8+
9+
echo "Running benchmark: size"
10+
"$BENCH/size/run"

scripts/bench/size/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# The `size` benchmark
2+
3+
This benchmark measures a few deterministic values.
4+
5+
- `size/.lean//files`
6+
- `size/.lean//lines`
7+
- `size/.olean//files`
8+
- `size/.olean//bytes`

0 commit comments

Comments
 (0)