Skip to content

Commit 8224d86

Browse files
committed
Changed: Refactors benchmarks to compare against other crates
Renames and updates the benchmarking suite to compare against multiple linear algebra crates, including faer, in addition to nalgebra, providing a broader performance context. Adds faer implementations and benchmarks. Updates data and plots.
1 parent 71908f2 commit 8224d86

13 files changed

Lines changed: 880 additions & 157 deletions

Cargo.lock

Lines changed: 496 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@ keywords = ["linear-algebra", "geometry", "const-generics"]
1717
[dev-dependencies]
1818
approx = "0.5.1"
1919
criterion = { version = "0.8.1", features = ["html_reports"] }
20+
faer = { version = "0.23.2", default-features = false, features = ["std", "linalg"] }
2021
nalgebra = "0.34.1"
2122
pastey = "0.2.0"
2223
proptest = "1.9.0"
2324

2425
[[bench]]
25-
name = "vs_nalgebra"
26+
name = "vs_linalg"
2627
harness = false
2728

2829
[lints.rust]

README.md

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ while keeping the API intentionally small and explicit.
3434

3535
## 🚫 Anti-goals
3636

37-
- Comprehensive: use [`nalgebra`](https://crates.io/crates/nalgebra) if you need a full-featured library
3837
- Bare-metal performance: see [`blas-src`](https://crates.io/crates/blas-src), [`lapack-src`](https://crates.io/crates/lapack-src), [`openblas-src`](https://crates.io/crates/openblas-src)
38+
- Comprehensive: use [`nalgebra`](https://crates.io/crates/nalgebra) if you need a full-featured library
39+
- Large matrices/dimensions with parallelism: use [`faer`](https://crates.io/crates/faer) if you need this
3940

4041
## 🔢 Scalar types
4142

@@ -112,25 +113,25 @@ just commit-check # lint + all tests + examples
112113

113114
For the full set of developer commands, see `just --list` and `WARP.md`.
114115

115-
## 📊 Benchmarks (vs nalgebra)
116+
## 📊 Benchmarks (vs nalgebra/faer)
116117

117-
![LU solve (factor + solve): median time vs dimension](docs/assets/bench/vs_nalgebra_lu_solve_median.svg)
118+
![LU solve (factor + solve): median time vs dimension](docs/assets/bench/vs_linalg_lu_solve_median.svg)
118119

119-
Raw data: [docs/assets/bench/vs_nalgebra_lu_solve_median.csv](docs/assets/bench/vs_nalgebra_lu_solve_median.csv)
120+
Raw data: [docs/assets/bench/vs_linalg_lu_solve_median.csv](docs/assets/bench/vs_linalg_lu_solve_median.csv)
120121

121-
Summary (median time; lower is better). “la-stack vs nalgebra” is the % time reduction relative to nalgebra (positive = la-stack faster):
122+
Summary (median time; lower is better). The “la-stack vs nalgebra/faer” columns show the % time reduction relative to each baseline (positive = la-stack faster):
122123

123124
<!-- BENCH_TABLE:lu_solve:median:new:BEGIN -->
124-
| D | la-stack median (ns) | nalgebra median (ns) | la-stack vs nalgebra |
125-
|---:|--------------------:|--------------------:|---------------------:|
126-
| 2 | 2.125 | 19.172 | +88.9% |
127-
| 3 | 13.562 | 24.082 | +43.7% |
128-
| 4 | 28.365 | 55.434 | +48.8% |
129-
| 5 | 48.567 | 76.793 | +36.8% |
130-
| 8 | 141.935 | 182.628 | +22.3% |
131-
| 16 | 642.935 | 605.115 | -6.3% |
132-
| 32 | 2,761.816 | 2,505.691 | -10.2% |
133-
| 64 | 17,009.208 | 14,696.410 | -15.7% |
125+
| D | la-stack median (ns) | nalgebra median (ns) | faer median (ns) | la-stack vs nalgebra | la-stack vs faer |
126+
|---:|--------------------:|--------------------:|----------------:|---------------------:|----------------:|
127+
| 2 | 2.065 | 18.375 | 160.418 | +88.8% | +98.7% |
128+
| 3 | 13.457 | 23.377 | 198.440 | +42.4% | +93.2% |
129+
| 4 | 27.750 | 54.267 | 228.744 | +48.9% | +87.9% |
130+
| 5 | 46.317 | 73.840 | 291.623 | +37.3% | +84.1% |
131+
| 8 | 138.183 | 177.982 | 389.006 | +22.4% | +64.5% |
132+
| 16 | 629.427 | 591.505 | 893.672 | -6.4% | +29.6% |
133+
| 32 | 2,688.216 | 2,503.157 | 2,908.436 | -7.4% | +7.6% |
134+
| 64 | 16,771.962 | 14,860.016 | 12,485.424 | -12.9% | -34.3% |
134135
<!-- BENCH_TABLE:lu_solve:median:new:END -->
135136

136137
## 📄 License

WARP.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ When making changes in this repo, prioritize (in order):
3535
- `src/lu.rs`: `Lu<const D: usize>` factorization with partial pivoting (`solve_vec`, `det`)
3636
- A minimal `justfile` exists for common workflows (see `just --list`).
3737
- The public API re-exports these items from `src/lib.rs`.
38-
- Dev-only benchmarks live in `benches/vs_nalgebra.rs` (Criterion + nalgebra comparison).
38+
- Dev-only benchmarks live in `benches/vs_linalg.rs` (Criterion + nalgebra/faer comparison).
3939

4040
## Publishing note
4141

Lines changed: 140 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,58 @@
1-
//! Benchmark comparison between la-stack and nalgebra.
1+
//! Benchmark comparison between la-stack and other Rust linear algebra crates.
22
//!
33
//! Goal: like-for-like comparisons of the operations la-stack supports across several
44
//! fixed dimensions.
55
//!
66
//! Notes:
7-
//! - Determinant is benchmarked via LU on both sides (nalgebra uses closed-forms for 1×1/2×2/3×3).
8-
//! - Matrix infinity norm is the maximum absolute row sum on both sides.
7+
//! - Determinant is benchmarked via LU on all sides (nalgebra uses closed-forms for 1×1/2×2/3×3).
8+
//! - Matrix infinity norm is the maximum absolute row sum on all sides.
99
1010
use criterion::Criterion;
11+
use faer::linalg::solvers::Solve;
12+
use faer::perm::PermRef;
1113
use pastey::paste;
1214
use std::hint::black_box;
1315

16+
fn faer_perm_sign(p: PermRef<'_, usize>) -> f64 {
17+
// Sign(det(P)) for a permutation matrix P is +1 for even permutations, -1 for odd.
18+
// Parity can be computed from the number of cycles:
19+
// sign = (-1)^(n - cycles)
20+
let (forward, _inverse) = p.arrays();
21+
let n = forward.len();
22+
23+
let mut seen = vec![false; n];
24+
let mut cycles = 0usize;
25+
26+
for start in 0..n {
27+
if seen[start] {
28+
continue;
29+
}
30+
cycles += 1;
31+
32+
let mut i = start;
33+
while !seen[i] {
34+
seen[i] = true;
35+
i = forward[i];
36+
}
37+
}
38+
39+
if (n - cycles).is_multiple_of(2) {
40+
1.0
41+
} else {
42+
-1.0
43+
}
44+
}
45+
46+
fn faer_det_from_partial_piv_lu(lu: &faer::linalg::solvers::PartialPivLu<f64>) -> f64 {
47+
// For PA = LU with unit-lower L, det(A) = det(P) * det(U).
48+
let u = lu.U();
49+
let mut det = 1.0;
50+
for i in 0..u.nrows() {
51+
det *= u[(i, i)];
52+
}
53+
det * faer_perm_sign(lu.P())
54+
}
55+
1456
#[inline]
1557
#[allow(clippy::cast_precision_loss)] // D, r, c are small integers, precision loss is not an issue.
1658
fn matrix_entry<const D: usize>(r: usize, c: usize) -> f64 {
@@ -81,7 +123,7 @@ fn nalgebra_inf_norm<const D: usize>(m: &nalgebra::SMatrix<f64, D, D>) -> f64 {
81123
max_row_sum
82124
}
83125

84-
macro_rules! gen_vs_nalgebra_benches_for_dim {
126+
macro_rules! gen_vs_linalg_benches_for_dim {
85127
($c:expr, $d:literal) => {
86128
paste! {{
87129
// Isolate each dimension's inputs to keep types and captures clean.
@@ -96,11 +138,17 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
96138
let nv1 = nalgebra::SVector::<f64, $d>::from_fn(|i, _| vector_entry(i, 0.0));
97139
let nv2 = nalgebra::SVector::<f64, $d>::from_fn(|i, _| vector_entry(i, 1.0));
98140

141+
let fa = faer::Mat::<f64>::from_fn($d, $d, |r, c| matrix_entry::<$d>(r, c));
142+
let frhs = faer::Mat::<f64>::from_fn($d, 1, |i, _| vector_entry(i, 0.0));
143+
let fv1 = faer::Mat::<f64>::from_fn($d, 1, |i, _| vector_entry(i, 0.0));
144+
let fv2 = faer::Mat::<f64>::from_fn($d, 1, |i, _| vector_entry(i, 1.0));
145+
99146
// Precompute LU once for solve-only / det-only benchmarks.
100147
let a_lu = a
101148
.lu(la_stack::DEFAULT_PIVOT_TOL)
102149
.expect("matrix should be non-singular");
103150
let na_lu = na.clone().lu();
151+
let fa_lu = fa.partial_piv_lu();
104152

105153
let mut [<group_d $d>] = ($c).benchmark_group(concat!("d", stringify!($d)));
106154

@@ -123,6 +171,14 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
123171
});
124172
});
125173

174+
[<group_d $d>].bench_function("faer_det_via_lu", |bencher| {
175+
bencher.iter(|| {
176+
let lu = black_box(&fa).partial_piv_lu();
177+
let det = faer_det_from_partial_piv_lu(&lu);
178+
black_box(det);
179+
});
180+
});
181+
126182
// === LU factorization ===
127183
[<group_d $d>].bench_function("la_stack_lu", |bencher| {
128184
bencher.iter(|| {
@@ -140,6 +196,13 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
140196
});
141197
});
142198

199+
[<group_d $d>].bench_function("faer_lu", |bencher| {
200+
bencher.iter(|| {
201+
let lu = black_box(&fa).partial_piv_lu();
202+
black_box(lu);
203+
});
204+
});
205+
143206
// === LU solve (factor + solve) ===
144207
[<group_d $d>].bench_function("la_stack_lu_solve", |bencher| {
145208
bencher.iter(|| {
@@ -163,6 +226,14 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
163226
});
164227
});
165228

229+
[<group_d $d>].bench_function("faer_lu_solve", |bencher| {
230+
bencher.iter(|| {
231+
let lu = black_box(&fa).partial_piv_lu();
232+
let x = lu.solve(black_box(&frhs));
233+
black_box(x);
234+
});
235+
});
236+
166237
// === Solve using a precomputed LU ===
167238
[<group_d $d>].bench_function("la_stack_solve_from_lu", |bencher| {
168239
bencher.iter(|| {
@@ -182,6 +253,13 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
182253
});
183254
});
184255

256+
[<group_d $d>].bench_function("faer_solve_from_lu", |bencher| {
257+
bencher.iter(|| {
258+
let x = fa_lu.solve(black_box(&frhs));
259+
black_box(x);
260+
});
261+
});
262+
185263
// === Determinant from a precomputed LU ===
186264
[<group_d $d>].bench_function("la_stack_det_from_lu", |bencher| {
187265
bencher.iter(|| {
@@ -197,6 +275,13 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
197275
});
198276
});
199277

278+
[<group_d $d>].bench_function("faer_det_from_lu", |bencher| {
279+
bencher.iter(|| {
280+
let det = faer_det_from_partial_piv_lu(&fa_lu);
281+
black_box(det);
282+
});
283+
});
284+
200285
// === Vector dot product ===
201286
[<group_d $d>].bench_function("la_stack_dot", |bencher| {
202287
bencher.iter(|| {
@@ -212,6 +297,18 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
212297
});
213298
});
214299

300+
[<group_d $d>].bench_function("faer_dot", |bencher| {
301+
bencher.iter(|| {
302+
let mut sum = 0.0;
303+
let a = black_box(&fv1);
304+
let b = black_box(&fv2);
305+
for i in 0..$d {
306+
sum += a[(i, 0)] * b[(i, 0)];
307+
}
308+
black_box(sum);
309+
});
310+
});
311+
215312
// === Vector norm squared ===
216313
[<group_d $d>].bench_function("la_stack_norm2_sq", |bencher| {
217314
bencher.iter(|| {
@@ -227,6 +324,18 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
227324
});
228325
});
229326

327+
[<group_d $d>].bench_function("faer_norm2_sq", |bencher| {
328+
bencher.iter(|| {
329+
let mut sum = 0.0;
330+
let v = black_box(&fv1);
331+
for i in 0..$d {
332+
let x = v[(i, 0)];
333+
sum += x * x;
334+
}
335+
black_box(sum);
336+
});
337+
});
338+
230339
// === Matrix infinity norm (max absolute row sum) ===
231340
[<group_d $d>].bench_function("la_stack_inf_norm", |bencher| {
232341
bencher.iter(|| {
@@ -242,6 +351,25 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
242351
});
243352
});
244353

354+
[<group_d $d>].bench_function("faer_inf_norm", |bencher| {
355+
bencher.iter(|| {
356+
let m = black_box(&fa);
357+
let mut max_row_sum = 0.0;
358+
359+
for r in 0..$d {
360+
let mut row_sum = 0.0;
361+
for c in 0..$d {
362+
row_sum += m[(r, c)].abs();
363+
}
364+
if row_sum > max_row_sum {
365+
max_row_sum = row_sum;
366+
}
367+
}
368+
369+
black_box(max_row_sum);
370+
});
371+
});
372+
245373
[<group_d $d>].finish();
246374
}
247375
}}
@@ -251,15 +379,15 @@ macro_rules! gen_vs_nalgebra_benches_for_dim {
251379
fn main() {
252380
let mut c = Criterion::default().configure_from_args();
253381

254-
gen_vs_nalgebra_benches_for_dim!(&mut c, 2);
255-
gen_vs_nalgebra_benches_for_dim!(&mut c, 3);
256-
gen_vs_nalgebra_benches_for_dim!(&mut c, 4);
257-
gen_vs_nalgebra_benches_for_dim!(&mut c, 5);
382+
gen_vs_linalg_benches_for_dim!(&mut c, 2);
383+
gen_vs_linalg_benches_for_dim!(&mut c, 3);
384+
gen_vs_linalg_benches_for_dim!(&mut c, 4);
385+
gen_vs_linalg_benches_for_dim!(&mut c, 5);
258386

259-
gen_vs_nalgebra_benches_for_dim!(&mut c, 8);
260-
gen_vs_nalgebra_benches_for_dim!(&mut c, 16);
261-
gen_vs_nalgebra_benches_for_dim!(&mut c, 32);
262-
gen_vs_nalgebra_benches_for_dim!(&mut c, 64);
387+
gen_vs_linalg_benches_for_dim!(&mut c, 8);
388+
gen_vs_linalg_benches_for_dim!(&mut c, 16);
389+
gen_vs_linalg_benches_for_dim!(&mut c, 32);
390+
gen_vs_linalg_benches_for_dim!(&mut c, 64);
263391

264392
c.final_summary();
265393
}

cspell.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"words": [
66
"acgetchell",
77
"blas",
8+
"capsys",
89
"Clippy",
910
"clippy",
1011
"codacy",
@@ -16,6 +17,8 @@
1617
"f128",
1718
"f32",
1819
"f64",
20+
"faer",
21+
"frhs",
1922
"generics",
2023
"Getchell",
2124
"gnuplot",
@@ -24,6 +27,7 @@
2427
"keepends",
2528
"laerror",
2629
"lapack",
30+
"linalg",
2731
"linespoints",
2832
"logscale",
2933
"lu",
@@ -38,6 +42,7 @@
3842
"nonfinite",
3943
"noplot",
4044
"nrhs",
45+
"nrows",
4146
"openblas",
4247
"pastey",
4348
"patchlevel",
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
D,la_stack,la_lo,la_hi,nalgebra,na_lo,na_hi,faer,fa_lo,fa_hi
2+
2,2.064783337073873,2.057689704502174,2.068888221867889,18.37497049638361,18.331137225709913,18.44905068359342,160.41828167015206,159.771806584937,161.23095742945685
3+
3,13.456732290936305,13.447517145467188,13.49236442840063,23.37662094350666,23.266710970814643,23.477406862396062,198.4402652520574,197.38808795625323,199.0284125157543
4+
4,27.75044852748178,27.599588184050052,27.785226320734317,54.266619121715074,54.15885008672231,54.36197439817226,228.74410991553628,227.56318439122288,230.055154703485
5+
5,46.31708492242545,46.21936017779927,46.502000712392494,73.83977618264294,73.57429654893484,74.07010889514254,291.6230709464669,290.6976886943525,292.8926041338858
6+
8,138.18343752426807,137.71388876876108,138.5654231809486,177.9820086523135,177.7122781945933,178.24448790485158,389.0063341187339,387.97176861931916,389.80891746278604
7+
16,629.4267672715603,626.9209242618742,638.6291503313327,591.5050977290701,590.798294714918,592.3790737673589,893.6719342969343,891.1105343477684,898.4784384384384
8+
32,2688.2164628623186,2684.227603602204,2691.7876890359166,2503.1570307509674,2500.668872475772,2506.5409929308526,2908.435890151515,2905.222288277013,2916.704963235294
9+
64,16771.9616225278,16754.37058346066,16817.485164835165,14860.015805946792,14773.547535211268,14956.477776495036,12485.423971036585,12471.988315217392,12501.272443181819

0 commit comments

Comments
 (0)