Skip to content

Commit adddc64

Browse files
committed
Replace simba with fearless_simd
This crate is better-maintained and more flexible.
1 parent 93f7793 commit adddc64

3 files changed

Lines changed: 85 additions & 77 deletions

File tree

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@ all-features = true
1111

1212
[features]
1313
parry = ["parry3d-f64", "lru-slab"]
14+
fearless_simd = ["dep:fearless_simd"]
1415

1516
[dependencies]
1617
na = { package = "nalgebra", version = "0.34.1" }
1718
slab = "0.4.2"
1819
hashbrown = "0.16"
1920
parry3d-f64 = { version = "0.25.0", optional = true }
20-
num-traits = "0.2.19"
2121
lru-slab = { version = "0.1.1", optional = true }
22+
fearless_simd = { version = "0.3.0", optional = true }
2223

2324
[dev-dependencies]
2425
criterion = "0.7.0"

src/chunk.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
use na::{RealField, SimdRealField};
1+
#[cfg(feature = "fearless_simd")]
2+
use fearless_simd::Simd;
3+
use na::RealField;
24

5+
#[cfg(feature = "fearless_simd")]
36
use crate::cubemap::SampleIterSimd;
47
use crate::cubemap::{Coords, Edge, Face, SampleIter};
58

@@ -138,12 +141,9 @@ impl Chunk {
138141
///
139142
/// Because this returns data in batches of `S::VF32_WIDTH`, a few excess values will be
140143
/// computed at the end for any `resolution` whose square is not a multiple of the batch size.
141-
pub fn samples_ps<S>(&self, resolution: u32) -> SampleIterSimd<S>
142-
where
143-
S: SimdRealField + Copy,
144-
S::Element: RealField + Copy,
145-
{
146-
self.coords.samples_ps(self.resolution(), resolution)
144+
#[cfg(feature = "fearless_simd")]
145+
pub fn samples_ps<S: Simd>(&self, simd: S, resolution: u32) -> SampleIterSimd<S> {
146+
self.coords.samples_ps(simd, self.resolution(), resolution)
147147
}
148148

149149
/// Compute the direction identified by a [0..1]^2 vector on this chunk
@@ -187,7 +187,6 @@ impl ExactSizeIterator for Path {
187187
mod test {
188188
use super::*;
189189
use approx::*;
190-
use na::SimdValue;
191190

192191
#[test]
193192
fn neighbors() {
@@ -372,15 +371,16 @@ mod test {
372371
}
373372

374373
#[test]
374+
#[cfg(feature = "fearless_simd")]
375375
fn simd_samples() {
376-
type S = simba::simd::WideF32x4;
376+
use fearless_simd::{Fallback, SimdBase};
377377

378378
let chunk = Chunk::root(Face::Pz);
379379

380380
let mut samples = chunk.samples(5);
381-
for coords in chunk.samples_ps::<S>(5) {
382-
let [x, y, z] = coords.map(<[f32; S::LANES]>::from);
383-
for i in 0..S::LANES {
381+
for coords in chunk.samples_ps(Fallback::new(), 5) {
382+
let [x, y, z] = coords.map(<[f32; <Fallback as Simd>::f32s::N]>::from);
383+
for i in 0..<Fallback as Simd>::f32s::N {
384384
let reference = if let Some(v) = samples.next() {
385385
v
386386
} else {

src/cubemap.rs

Lines changed: 71 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
use std::cmp::Ordering;
2-
use std::marker::PhantomData;
32
use std::ops::{Index, IndexMut, Neg};
43
use std::{alloc, fmt, mem, ptr};
54

6-
use na::{ComplexField, RealField, SimdRealField};
7-
use num_traits::identities::One;
5+
#[cfg(feature = "fearless_simd")]
6+
use fearless_simd::{Simd, SimdBase, SimdElement, SimdFloat};
7+
use na::{RealField, SimdRealField};
88

99
/// A dense, fixed-resolution, warped cube map
1010
///
@@ -651,17 +651,19 @@ impl Coords {
651651
///
652652
/// Because this returns data in batches of `S::VF32_WIDTH`, a few excess values will be
653653
/// computed at the end for any `resolution` whose square is not a multiple of the batch size.
654-
pub fn samples_ps<S>(&self, face_resolution: u32, chunk_resolution: u32) -> SampleIterSimd<S>
655-
where
656-
S: SimdRealField + Copy,
657-
S::Element: RealField + Copy,
658-
{
654+
#[cfg(feature = "fearless_simd")]
655+
pub fn samples_ps<S: Simd>(
656+
&self,
657+
simd: S,
658+
face_resolution: u32,
659+
chunk_resolution: u32,
660+
) -> SampleIterSimd<S> {
659661
SampleIterSimd {
660662
coords: *self,
661663
face_resolution,
662664
chunk_resolution,
663665
index: 0,
664-
_simd: PhantomData,
666+
simd,
665667
}
666668
}
667669
}
@@ -677,6 +679,14 @@ pub(crate) fn warp<N: SimdRealField + Copy>(x: N) -> N {
677679
* x
678680
}
679681

682+
/// See `warp`
683+
#[cfg(feature = "fearless_simd")]
684+
fn warp_ps<S: Simd, N: RealField + SimdElement, F: SimdFloat<N, S>>(x: F) -> F {
685+
let x2 = x * x;
686+
x * ((x2 * na::convert::<_, N>(0.123894434214) + na::convert::<_, N>(0.130546850193)) * x2
687+
+ na::convert::<_, N>(0.745558715593))
688+
}
689+
680690
/// Derivative of `warp`
681691
fn dwarp<N: RealField + Copy>(x: N) -> N {
682692
let x2 = x * x;
@@ -804,98 +814,83 @@ impl ExactSizeIterator for SampleIter {
804814
///
805815
/// Hand-vectorized, returning batches of each dimension in a separate register.
806816
#[derive(Debug)]
817+
#[cfg(feature = "fearless_simd")]
807818
pub struct SampleIterSimd<S> {
808819
coords: Coords,
809820
face_resolution: u32,
810821
chunk_resolution: u32,
811822
index: u32,
812-
_simd: PhantomData<fn() -> S>,
823+
simd: S,
813824
}
814825

815-
impl<S> Iterator for SampleIterSimd<S>
816-
where
817-
S: SimdRealField + Copy,
818-
S::Element: RealField + Copy,
819-
{
820-
type Item = [S; 3];
826+
#[cfg(feature = "fearless_simd")]
827+
impl<S: Simd> Iterator for SampleIterSimd<S> {
828+
type Item = [S::f32s; 3];
821829
fn next(&mut self) -> Option<Self::Item> {
822830
if self.index >= self.chunk_resolution * self.chunk_resolution {
823831
return None;
824832
}
825833
{
826-
let edge_length = Coords::edge_length::<S::Element>(self.face_resolution);
827-
let origin_on_face_x = na::convert::<_, S::Element>(self.coords.x as f32)
828-
.mul_add(edge_length, -S::Element::one());
829-
let origin_on_face_y = na::convert::<_, S::Element>(self.coords.y as f32)
830-
.mul_add(edge_length, -S::Element::one());
834+
let edge_length = Coords::edge_length::<f32>(self.face_resolution);
835+
let origin_on_face_x = (self.coords.x as f32).mul_add(edge_length, -1.0);
836+
let origin_on_face_y = (self.coords.y as f32).mul_add(edge_length, -1.0);
831837
let max = self.chunk_resolution - 1;
832838
let (offset_x, offset_y) = if max == 0 {
833-
let v = S::splat(na::convert::<_, S::Element>(0.5) * edge_length);
839+
let v = S::f32s::splat(self.simd, 0.5 * edge_length);
834840
(v, v)
835841
} else {
836-
let step = edge_length / na::convert(max as f32);
837-
let mut xs = S::zero();
838-
for i in 0..S::LANES {
839-
xs.replace(
840-
i,
841-
na::convert(((self.index + i as u32) % self.chunk_resolution) as f32),
842-
);
842+
let step = edge_length / max as f32;
843+
let mut xs = S::f32s::splat(self.simd, 0.0);
844+
for (i, x) in xs.as_mut_slice().iter_mut().enumerate() {
845+
*x = ((self.index + i as u32) % self.chunk_resolution) as f32;
843846
}
844-
let mut ys = S::zero();
845-
for i in 0..S::LANES {
846-
ys.replace(
847-
i,
848-
na::convert(((self.index + i as u32) / self.chunk_resolution) as f32),
849-
);
847+
let mut ys = S::f32s::splat(self.simd, 0.0);
848+
for (i, y) in ys.as_mut_slice().iter_mut().enumerate() {
849+
*y = ((self.index + i as u32) / self.chunk_resolution) as f32;
850850
}
851-
(xs * S::splat(step), ys * S::splat(step))
851+
(xs * step, ys * step)
852852
};
853-
let pos_on_face_x = S::splat(origin_on_face_x) + offset_x;
854-
let pos_on_face_y = S::splat(origin_on_face_y) + offset_y;
853+
let pos_on_face_x = offset_x + origin_on_face_x;
854+
let pos_on_face_y = offset_y + origin_on_face_y;
855855

856-
let warped_x = warp(pos_on_face_x);
857-
let warped_y = warp(pos_on_face_y);
856+
let warped_x = warp_ps(pos_on_face_x);
857+
let warped_y = warp_ps(pos_on_face_y);
858858

859-
let len = warped_y
860-
.simd_mul_add(warped_y, warped_x.simd_mul_add(warped_x, S::one()))
861-
.simd_sqrt();
859+
let len = warped_y.madd(warped_y, warped_x.madd(warped_x, 1.0)).sqrt();
862860
let dir_x = warped_x / len;
863861
let dir_y = warped_y / len;
864-
let dir_z = len.simd_recip();
862+
let dir_z = S::f32s::splat(self.simd, 1.0) / len;
865863

866-
let basis = self.coords.face.basis::<S::Element>();
864+
let basis = self.coords.face.basis::<f32>();
867865
let basis = basis.matrix();
868-
let x = S::splat(basis.m11).simd_mul_add(
866+
let x = S::f32s::splat(self.simd, basis.m11).madd(
869867
dir_x,
870-
S::splat(basis.m12).simd_mul_add(dir_y, S::splat(basis.m13) * dir_z),
868+
S::f32s::splat(self.simd, basis.m12).madd(dir_y, dir_z * basis.m13),
871869
);
872-
let y = S::splat(basis.m21).simd_mul_add(
870+
let y = S::f32s::splat(self.simd, basis.m21).madd(
873871
dir_x,
874-
S::splat(basis.m22).simd_mul_add(dir_y, S::splat(basis.m23) * dir_z),
872+
S::f32s::splat(self.simd, basis.m22).madd(dir_y, dir_z * basis.m23),
875873
);
876-
let z = S::splat(basis.m31).simd_mul_add(
874+
let z = S::f32s::splat(self.simd, basis.m31).madd(
877875
dir_x,
878-
S::splat(basis.m32).simd_mul_add(dir_y, S::splat(basis.m33) * dir_z),
876+
S::f32s::splat(self.simd, basis.m32).madd(dir_y, dir_z * basis.m33),
879877
);
880878

881-
self.index += S::LANES as u32;
879+
self.index += S::f32s::N as u32;
882880
Some([x, y, z])
883881
}
884882
}
885883

886884
fn size_hint(&self) -> (usize, Option<usize>) {
887885
let total = self.chunk_resolution * self.chunk_resolution;
888886
let remaining = (total - self.index) as usize;
889-
let x = remaining.div_ceil(S::LANES);
887+
let x = remaining.div_ceil(S::f32s::N);
890888
(x, Some(x))
891889
}
892890
}
893891

894-
impl<S> ExactSizeIterator for SampleIterSimd<S>
895-
where
896-
S: SimdRealField + Copy,
897-
S::Element: RealField + Copy,
898-
{
892+
#[cfg(feature = "fearless_simd")]
893+
impl<S: Simd> ExactSizeIterator for SampleIterSimd<S> {
899894
fn len(&self) -> usize {
900895
self.size_hint().0
901896
}
@@ -1131,20 +1126,32 @@ mod test {
11311126
}
11321127

11331128
#[test]
1129+
#[cfg(feature = "fearless_simd")]
11341130
fn simd_samples_consistent() {
1131+
use fearless_simd::Fallback;
1132+
11351133
const COORDS: Coords = Coords {
11361134
x: 0,
11371135
y: 0,
11381136
face: Face::Py,
11391137
};
11401138
const FACE_RES: u32 = 1;
11411139
const CHUNK_RES: u32 = 17;
1142-
let scalar = COORDS.samples(FACE_RES, CHUNK_RES);
1143-
let simd = COORDS.samples_ps::<f32>(FACE_RES, CHUNK_RES);
1144-
assert_eq!(simd.len(), scalar.len());
1145-
for (scalar, [x, y, z]) in scalar.zip(simd) {
1146-
dbg!(x, y, z);
1147-
assert_abs_diff_eq!(scalar, na::Unit::new_unchecked(na::Vector3::new(x, y, z)));
1140+
let mut scalar = COORDS.samples(FACE_RES, CHUNK_RES);
1141+
let simd = COORDS.samples_ps(Fallback::new(), FACE_RES, CHUNK_RES);
1142+
assert_eq!(
1143+
simd.len(),
1144+
scalar.len().div_ceil(<Fallback as Simd>::f32s::N)
1145+
);
1146+
for vs in simd {
1147+
for i in 0..<Fallback as Simd>::f32s::N {
1148+
let [x, y, z] = vs.map(|c| c.as_slice()[i]);
1149+
let Some(scalar) = scalar.next() else {
1150+
break;
1151+
};
1152+
let simd = na::Unit::new_unchecked(na::Vector3::new(x, y, z));
1153+
assert_abs_diff_eq!(scalar, simd);
1154+
}
11481155
}
11491156
}
11501157

0 commit comments

Comments
 (0)