Skip to content

Commit 9939891

Browse files
committed
add profiler, update tutorials, clean code base
1 parent e988736 commit 9939891

29 files changed

Lines changed: 3394 additions & 10104 deletions

README.md

Lines changed: 93 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -39,104 +39,151 @@ from fasterbench import benchmark
3939

4040
# Load your model
4141
model = resnet18()
42-
43-
# Create sample input
4442
dummy_input = torch.randn(1, 3, 224, 224)
4543

4644
# Run comprehensive benchmarks
47-
results = benchmark(model, dummy_input)
45+
result = benchmark(model, dummy_input, metrics=["size", "speed", "compute"])
46+
47+
# Typed access (IDE autocomplete!)
48+
print(f"Size: {result.size.size_mib:.2f} MiB")
49+
print(f"Params: {result.size.num_params:,}")
50+
print(f"CPU Latency: {result.speed['cpu'].mean_ms:.2f} ms")
51+
print(f"MACs: {result.compute.macs_m}M")
4852

49-
# Print results
50-
for metric, value in results.items():
51-
print(f"{metric}: {value}")
53+
# Backward-compatible dict access still works
54+
print(result["size_disk_bytes"])
5255
```
5356

5457
## Features
5558

56-
### All-in-one Benchmarking
59+
### BenchmarkResult: Typed Access + Backward Compatibility
5760

58-
Get comprehensive metrics with a single function call:
61+
The
62+
[`benchmark()`](https://FasterAI-Labs.github.io/fasterbench/benchmark.html#benchmark)
63+
function returns a
64+
[`BenchmarkResult`](https://FasterAI-Labs.github.io/fasterbench/benchmark.html#benchmarkresult)
65+
object with both typed attribute access and dict-like access:
5966

6067
``` python
61-
# Measure all metrics
62-
results = benchmark(model, dummy_input)
68+
result = benchmark(model, dummy_input, metrics=["size", "speed", "compute"])
69+
70+
# Typed access - IDE autocomplete and type checking
71+
result.size.size_mib # 44.59
72+
result.size.num_params # 11689512
73+
result.speed["cpu"].mean_ms # 45.23
74+
result.speed["cpu"].throughput_s # 22.1
75+
result.compute.macs_m # 1819.066
76+
result.compute.macs_available # True (False if MACs couldn't be computed)
77+
78+
# Dict access - backward compatible with existing code
79+
result["size_size_mib"] # 44.59
80+
result["speed_cpu_mean_ms"] # 45.23
81+
for key, value in result.items():
82+
print(f"{key}: {value}")
83+
```
84+
85+
### Human-Readable Summary
6386

64-
# Or select specific metrics
65-
results = benchmark(model, dummy_input, metrics=["size", "speed"])
87+
Get a quick overview with formatted output:
88+
89+
``` python
90+
result.summary() # prints directly
6691
```
6792

68-
### Size Metrics
93+
═══ Size ════════════════════════════════════
94+
Disk: 44.59 MiB
95+
Params: 11.69M
96+
═══ Speed ═══════════════════════════════════
97+
cpu: 45.23 ms │ 22.1 inf/s │ p99: 48.12 ms
98+
═══ Compute ═════════════════════════════════
99+
MACs: 1819.1 M
100+
Params: 11.69 M
69101

70-
Evaluate model size characteristics:
102+
### Export to DataFrame or JSON
71103

72104
``` python
73-
from fasterbench import compute_size
105+
# Convert to pandas DataFrame for analysis
106+
df = result.to_dataframe()
107+
df.to_csv("benchmark_results.csv")
74108

75-
size_metrics = compute_size(model)
76-
print(f"Disk Size: {size_metrics.size_mib:.2f} MiB")
77-
print(f"Parameters: {size_metrics.num_params:,}")
109+
# Serialize to JSON
110+
json_str = result.to_json()
111+
112+
# Get formatted string (for logging, etc.)
113+
summary_str = str(result)
78114
```
79115

80-
### Speed Metrics
116+
### Selective Metrics
81117

82-
Measure inference performance across devices:
118+
Only compute what you need:
83119

84120
``` python
85-
from fasterbench import compute_speed_multi
121+
# Fast: just size and compute (no inference runs)
122+
result = benchmark(model, dummy_input, metrics=["size", "compute"])
86123

87-
speed_metrics = compute_speed_multi(model, dummy_input)
88-
for device, metrics in speed_metrics.items():
89-
print(f"{device} latency (P50): {metrics.p50_ms:.2f} ms")
90-
print(f"{device} throughput: {metrics.throughput_s:.2f} inferences/sec")
124+
# Full benchmark on specific devices
125+
result = benchmark(model, dummy_input,
126+
speed_devices=["cpu", "cuda"],
127+
memory_devices=["cuda"])
91128
```
92129

93-
### Compute Metrics
130+
### Individual Metric Functions
94131

95-
Quantify computational complexity:
132+
For fine-grained control, use the individual compute functions:
96133

97134
``` python
98-
from fasterbench import compute_compute
99-
100-
compute_metrics = compute_compute(model, dummy_input)
101-
print(f"MACs: {compute_metrics.macs_m} million")
135+
from fasterbench import compute_size, compute_speed_multi, compute_compute
136+
137+
# Size metrics
138+
size = compute_size(model)
139+
print(f"Disk Size: {size.size_mib:.2f} MiB")
140+
print(f"Parameters: {size.num_params:,}")
141+
142+
# Speed metrics across devices
143+
speed = compute_speed_multi(model, dummy_input)
144+
for device, metrics in speed.items():
145+
print(f"{device}: {metrics.mean_ms:.2f}ms, {metrics.throughput_s:.1f} inf/s")
146+
147+
# Compute metrics (MACs)
148+
compute = compute_compute(model, dummy_input)
149+
if compute.macs_available:
150+
print(f"MACs: {compute.macs_m}M")
102151
```
103152

104153
### Memory Metrics
105154

106-
Profile memory usage:
155+
Profile memory usage on CPU and GPU:
107156

108157
``` python
109158
from fasterbench import compute_memory_multi
110159

111-
memory_metrics = compute_memory_multi(model, dummy_input)
112-
for device, metrics in memory_metrics.items():
113-
print(f"{device} peak memory: {metrics.peak_mib:.2f} MiB")
160+
memory = compute_memory_multi(model, dummy_input)
161+
for device, metrics in memory.items():
162+
print(f"{device} peak: {metrics.peak_mib:.2f} MiB")
114163
```
115164

116165
### Energy Metrics
117166

118-
Measure environmental impact:
167+
Measure power consumption and carbon footprint (requires `codecarbon`):
119168

120169
``` python
121170
from fasterbench import compute_energy_multi
122171

123-
# Requires codecarbon package
124-
energy_metrics = compute_energy_multi(model, dummy_input)
125-
for device, metrics in energy_metrics.items():
126-
print(f"{device} power usage: {metrics.mean_watts:.2f} W")
127-
print(f"{device} CO2: {metrics.co2_eq_g:.6f} g CO₂-eq per inference")
172+
energy = compute_energy_multi(model, dummy_input)
173+
for device, metrics in energy.items():
174+
print(f"{device}: {metrics.mean_watts:.1f}W, {metrics.co2_eq_g:.4f}g CO₂/inf")
128175
```
129176

130-
### Thread Count Optimization
177+
### Thread Sweep for CPU Optimization
131178

132-
Find the optimal number of CPU threads:
179+
Find the optimal thread count for CPU inference:
133180

134181
``` python
135182
from fasterbench import sweep_threads
136183

137-
thread_results = sweep_threads(model, dummy_input, thread_counts=[1, 2, 4, 8, 16])
138-
for result in thread_results:
139-
print(f"Threads: {result['threads']}, Latency: {result['mean_ms']:.2f} ms")
184+
results = sweep_threads(model, dummy_input, thread_counts=[1, 2, 4, 8])
185+
for r in results:
186+
print(f"{r['threads']} threads: {r['mean_ms']:.2f}ms")
140187
```
141188

142189
### Visualize Results

fasterbench/__init__.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,43 @@
11
__version__ = "0.0.6"
2+
"""Comprehensive benchmarking toolkit for deep learning models"""
3+
4+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/index.ipynb.
5+
6+
# %% auto #0
7+
__all__ = []
8+
9+
# %% ../nbs/index.ipynb #8b6f8c52
10+
from .benchmark import benchmark, BenchmarkResult
11+
from .size import SizeMetrics, compute_size, get_model_size, get_num_parameters
12+
from fasterbench.speed import (
13+
SpeedMetrics, compute_speed, compute_speed_multi,
14+
sweep_threads, sweep_latency, sweep_batch_sizes,
15+
)
16+
from .profiling import profile_layers, LayerProfiler
17+
from .compute import ComputeMetrics, compute_compute
18+
from .memory import MemoryMetrics, compute_memory, compute_memory_multi
19+
from .energy import EnergyMetrics, compute_energy, compute_energy_multi
20+
from .plot import create_radar_plot, SPECS
21+
from .utils import parse_metric_value
22+
23+
__all__ = [
24+
# Main entry point
25+
'benchmark', 'BenchmarkResult',
26+
# Size
27+
'SizeMetrics', 'compute_size', 'get_model_size', 'get_num_parameters',
28+
# Speed
29+
'SpeedMetrics', 'compute_speed', 'compute_speed_multi',
30+
'sweep_threads', 'sweep_latency', 'sweep_batch_sizes',
31+
# Profiling
32+
'profile_layers', 'LayerProfiler',
33+
# Compute
34+
'ComputeMetrics', 'compute_compute',
35+
# Memory
36+
'MemoryMetrics', 'compute_memory', 'compute_memory_multi',
37+
# Energy
38+
'EnergyMetrics', 'compute_energy', 'compute_energy_multi',
39+
# Plot
40+
'create_radar_plot', 'SPECS',
41+
# Utils
42+
'parse_metric_value',
43+
]

fasterbench/_modidx.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,42 @@
55
'doc_host': 'https://FasterAI-Labs.github.io',
66
'git_url': 'https://github.com/FasterAI-Labs/fasterbench',
77
'lib_path': 'fasterbench'},
8-
'syms': { 'fasterbench.benchmark': {'fasterbench.benchmark.benchmark': ('benchmark.html#benchmark', 'fasterbench/benchmark.py')},
8+
'syms': { 'fasterbench.benchmark': { 'fasterbench.benchmark.BenchmarkResult': ( 'benchmark.html#benchmarkresult',
9+
'fasterbench/benchmark.py'),
10+
'fasterbench.benchmark.BenchmarkResult.__getitem__': ( 'benchmark.html#benchmarkresult.__getitem__',
11+
'fasterbench/benchmark.py'),
12+
'fasterbench.benchmark.BenchmarkResult.__iter__': ( 'benchmark.html#benchmarkresult.__iter__',
13+
'fasterbench/benchmark.py'),
14+
'fasterbench.benchmark.BenchmarkResult.__len__': ( 'benchmark.html#benchmarkresult.__len__',
15+
'fasterbench/benchmark.py'),
16+
'fasterbench.benchmark.BenchmarkResult.__repr__': ( 'benchmark.html#benchmarkresult.__repr__',
17+
'fasterbench/benchmark.py'),
18+
'fasterbench.benchmark.BenchmarkResult.__str__': ( 'benchmark.html#benchmarkresult.__str__',
19+
'fasterbench/benchmark.py'),
20+
'fasterbench.benchmark.BenchmarkResult._format_summary': ( 'benchmark.html#benchmarkresult._format_summary',
21+
'fasterbench/benchmark.py'),
22+
'fasterbench.benchmark.BenchmarkResult.as_dict': ( 'benchmark.html#benchmarkresult.as_dict',
23+
'fasterbench/benchmark.py'),
24+
'fasterbench.benchmark.BenchmarkResult.summary': ( 'benchmark.html#benchmarkresult.summary',
25+
'fasterbench/benchmark.py'),
26+
'fasterbench.benchmark.BenchmarkResult.to_dataframe': ( 'benchmark.html#benchmarkresult.to_dataframe',
27+
'fasterbench/benchmark.py'),
28+
'fasterbench.benchmark.BenchmarkResult.to_json': ( 'benchmark.html#benchmarkresult.to_json',
29+
'fasterbench/benchmark.py'),
30+
'fasterbench.benchmark._fmt_params': ('benchmark.html#_fmt_params', 'fasterbench/benchmark.py'),
31+
'fasterbench.benchmark._section': ('benchmark.html#_section', 'fasterbench/benchmark.py'),
32+
'fasterbench.benchmark.benchmark': ('benchmark.html#benchmark', 'fasterbench/benchmark.py')},
933
'fasterbench.compute': { 'fasterbench.compute.ComputeMetrics': ('compute.html#computemetrics', 'fasterbench/compute.py'),
1034
'fasterbench.compute.ComputeMetrics.as_dict': ( 'compute.html#computemetrics.as_dict',
1135
'fasterbench/compute.py'),
36+
'fasterbench.compute.ComputeMetrics.macs_available': ( 'compute.html#computemetrics.macs_available',
37+
'fasterbench/compute.py'),
1238
'fasterbench.compute.compute_compute': ('compute.html#compute_compute', 'fasterbench/compute.py')},
1339
'fasterbench.core': { 'fasterbench.core._bytes_to_mib': ('core.html#_bytes_to_mib', 'fasterbench/core.py'),
1440
'fasterbench.core._device_ctx': ('core.html#_device_ctx', 'fasterbench/core.py'),
15-
'fasterbench.core._sync': ('core.html#_sync', 'fasterbench/core.py')},
41+
'fasterbench.core._sync': ('core.html#_sync', 'fasterbench/core.py'),
42+
'fasterbench.core._validate_benchmark_params': ( 'core.html#_validate_benchmark_params',
43+
'fasterbench/core.py')},
1644
'fasterbench.energy': { 'fasterbench.energy.EnergyMetrics': ('energy.html#energymetrics', 'fasterbench/energy.py'),
1745
'fasterbench.energy.EnergyMetrics.as_dict': ( 'energy.html#energymetrics.as_dict',
1846
'fasterbench/energy.py'),
@@ -29,6 +57,27 @@
2957
'fasterbench/memory.py')},
3058
'fasterbench.plot': { 'fasterbench.plot._parse': ('plot.html#_parse', 'fasterbench/plot.py'),
3159
'fasterbench.plot.create_radar_plot': ('plot.html#create_radar_plot', 'fasterbench/plot.py')},
60+
'fasterbench.profiling': { 'fasterbench.profiling.LayerProfiler': ('profiling.html#layerprofiler', 'fasterbench/profiling.py'),
61+
'fasterbench.profiling.LayerProfiler.__init__': ( 'profiling.html#layerprofiler.__init__',
62+
'fasterbench/profiling.py'),
63+
'fasterbench.profiling.LayerProfiler._profile_compute': ( 'profiling.html#layerprofiler._profile_compute',
64+
'fasterbench/profiling.py'),
65+
'fasterbench.profiling.LayerProfiler._profile_memory': ( 'profiling.html#layerprofiler._profile_memory',
66+
'fasterbench/profiling.py'),
67+
'fasterbench.profiling.LayerProfiler._profile_size': ( 'profiling.html#layerprofiler._profile_size',
68+
'fasterbench/profiling.py'),
69+
'fasterbench.profiling.LayerProfiler._profile_speed': ( 'profiling.html#layerprofiler._profile_speed',
70+
'fasterbench/profiling.py'),
71+
'fasterbench.profiling.LayerProfiler.profile': ( 'profiling.html#layerprofiler.profile',
72+
'fasterbench/profiling.py'),
73+
'fasterbench.profiling.LayerProfiler.summary': ( 'profiling.html#layerprofiler.summary',
74+
'fasterbench/profiling.py'),
75+
'fasterbench.profiling.LayerProfiler.top': ( 'profiling.html#layerprofiler.top',
76+
'fasterbench/profiling.py'),
77+
'fasterbench.profiling._output_bytes': ('profiling.html#_output_bytes', 'fasterbench/profiling.py'),
78+
'fasterbench.profiling._tensor_bytes': ('profiling.html#_tensor_bytes', 'fasterbench/profiling.py'),
79+
'fasterbench.profiling.profile_layers': ( 'profiling.html#profile_layers',
80+
'fasterbench/profiling.py')},
3281
'fasterbench.size': { 'fasterbench.size.SizeMetrics': ('size.html#sizemetrics', 'fasterbench/size.py'),
3382
'fasterbench.size.SizeMetrics.as_dict': ('size.html#sizemetrics.as_dict', 'fasterbench/size.py'),
3483
'fasterbench.size.compute_size': ('size.html#compute_size', 'fasterbench/size.py'),
@@ -40,6 +89,7 @@
4089
'fasterbench.speed._stats': ('speed.html#_stats', 'fasterbench/speed.py'),
4190
'fasterbench.speed.compute_speed': ('speed.html#compute_speed', 'fasterbench/speed.py'),
4291
'fasterbench.speed.compute_speed_multi': ('speed.html#compute_speed_multi', 'fasterbench/speed.py'),
92+
'fasterbench.speed.sweep_batch_sizes': ('speed.html#sweep_batch_sizes', 'fasterbench/speed.py'),
4393
'fasterbench.speed.sweep_latency': ('speed.html#sweep_latency', 'fasterbench/speed.py'),
4494
'fasterbench.speed.sweep_threads': ('speed.html#sweep_threads', 'fasterbench/speed.py')},
4595
'fasterbench.utils': {'fasterbench.utils.parse_metric_value': ('utils.html#parse_metric_value', 'fasterbench/utils.py')}}}

0 commit comments

Comments
 (0)