Skip to content

Commit e6e7a67

Browse files
committed
feat: add roofline analysis (RooflineAnalyzer, measure_peaks)
Adds measurement primitives for compute-vs-memory-bound layer analysis: - measure_peaks(): empirical probe of peak FLOPs/s (matmul) and streaming bandwidth (cache-defeating memcpy). Pins TF32 off by default for honest fp32 peaks; caches per (device, dtype, sizes). - HardwarePeaks: dataclass with peak_flops, peak_bandwidth, ridge_point, and the flags under which they were measured. - RooflineAnalyzer: per-layer profiler with .profile() / .summary() / .plot(). Single-pass hooks measure FLOPs (analytical for Conv and Linear), bytes (weights + input + output, Williams 2009), and time. Classifies each layer as memory-bound or compute-bound; layers outside Conv/Linear land in an "undefined" bucket with a warning. - Plotly log-log roofline with transparent background and the project teal palette. Per the measurement-only contract, fasterbench exposes numbers and the plot; compression decisions belong in fasterrecipes. Includes: - API notebook nbs/analysis/roofline.ipynb with inline unit tests (hand-computed Conv2d flops/bytes, cache test, Linear stack) and #|slow integration tests (ResNet-18 CPU with synthetic peaks, CUDA smoke test guarded by is_available()). - Tutorial nbs/tutorials/roofline.ipynb showing hardware peaks, ResNet-18 profiling, and AI shift across input resolutions. - Sidebar + index.ipynb re-exports.
1 parent 4c06d71 commit e6e7a67

10 files changed

Lines changed: 1182 additions & 44 deletions

File tree

fasterbench/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
from .compute import ComputeMetrics, compute_compute
2020
from .memory import MemoryMetrics, compute_memory, compute_memory_multi
2121
from .energy import EnergyMetrics, compute_energy, compute_energy_multi
22+
from fasterbench.roofline import (
23+
HardwarePeaks, RooflinePoint, measure_peaks, clear_peaks_cache, RooflineAnalyzer,
24+
)
2225
from .plot import create_radar_plot, SPECS
2326
from .utils import parse_metric_value
2427

@@ -38,6 +41,8 @@
3841
'MemoryMetrics', 'compute_memory', 'compute_memory_multi',
3942
# Energy
4043
'EnergyMetrics', 'compute_energy', 'compute_energy_multi',
44+
# Roofline
45+
'HardwarePeaks', 'RooflinePoint', 'measure_peaks', 'clear_peaks_cache', 'RooflineAnalyzer',
4146
# Plot
4247
'create_radar_plot', 'SPECS',
4348
# Report

fasterbench/_modidx.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,36 @@
130130
'fasterbench.report._generate_css': ('analysis/report.html#_generate_css', 'fasterbench/report.py'),
131131
'fasterbench.report._improvement_indicator': ( 'analysis/report.html#_improvement_indicator',
132132
'fasterbench/report.py')},
133+
'fasterbench.roofline': { 'fasterbench.roofline.HardwarePeaks': ( 'analysis/roofline.html#hardwarepeaks',
134+
'fasterbench/roofline.py'),
135+
'fasterbench.roofline.HardwarePeaks.as_dict': ( 'analysis/roofline.html#hardwarepeaks.as_dict',
136+
'fasterbench/roofline.py'),
137+
'fasterbench.roofline.RooflineAnalyzer': ( 'analysis/roofline.html#rooflineanalyzer',
138+
'fasterbench/roofline.py'),
139+
'fasterbench.roofline.RooflineAnalyzer.__init__': ( 'analysis/roofline.html#rooflineanalyzer.__init__',
140+
'fasterbench/roofline.py'),
141+
'fasterbench.roofline.RooflineAnalyzer.plot': ( 'analysis/roofline.html#rooflineanalyzer.plot',
142+
'fasterbench/roofline.py'),
143+
'fasterbench.roofline.RooflineAnalyzer.profile': ( 'analysis/roofline.html#rooflineanalyzer.profile',
144+
'fasterbench/roofline.py'),
145+
'fasterbench.roofline.RooflineAnalyzer.results': ( 'analysis/roofline.html#rooflineanalyzer.results',
146+
'fasterbench/roofline.py'),
147+
'fasterbench.roofline.RooflineAnalyzer.summary': ( 'analysis/roofline.html#rooflineanalyzer.summary',
148+
'fasterbench/roofline.py'),
149+
'fasterbench.roofline.RooflinePoint': ( 'analysis/roofline.html#rooflinepoint',
150+
'fasterbench/roofline.py'),
151+
'fasterbench.roofline.RooflinePoint.as_dict': ( 'analysis/roofline.html#rooflinepoint.as_dict',
152+
'fasterbench/roofline.py'),
153+
'fasterbench.roofline._layer_flops': ( 'analysis/roofline.html#_layer_flops',
154+
'fasterbench/roofline.py'),
155+
'fasterbench.roofline._pinned_benchmark_flags': ( 'analysis/roofline.html#_pinned_benchmark_flags',
156+
'fasterbench/roofline.py'),
157+
'fasterbench.roofline._setup_roofline_hooks': ( 'analysis/roofline.html#_setup_roofline_hooks',
158+
'fasterbench/roofline.py'),
159+
'fasterbench.roofline.clear_peaks_cache': ( 'analysis/roofline.html#clear_peaks_cache',
160+
'fasterbench/roofline.py'),
161+
'fasterbench.roofline.measure_peaks': ( 'analysis/roofline.html#measure_peaks',
162+
'fasterbench/roofline.py')},
133163
'fasterbench.size': { 'fasterbench.size.SizeMetrics': ('metrics/size.html#sizemetrics', 'fasterbench/size.py'),
134164
'fasterbench.size.SizeMetrics.as_dict': ('metrics/size.html#sizemetrics.as_dict', 'fasterbench/size.py'),
135165
'fasterbench.size.compute_size': ('metrics/size.html#compute_size', 'fasterbench/size.py'),

0 commit comments

Comments
 (0)