Skip to content

Commit 0c6de71

Browse files
committed
Add SIMD performance testing and correctness framework
This commit introduces a comprehensive testing framework for SIMD operations in BepuPhysics2, enabling incremental optimization and verification. Key features: - Software reference implementations for correctness testing - xUnit tests comparing SIMD implementations against scalar references - Performance benchmarking harness measuring CPU time (ns/iteration) - Comprehensive inventory of 25+ SIMD functions in the codebase Identified optimization opportunities: - 13+ functions missing ARM64/NEON support - Most functions lack AVX512 (Vector512) implementations - Critical functions in constraint solving and body operations Currently tested functions: - FastReciprocal / FastReciprocalSquareRoot (MathHelper) - CreateTrailingMaskForCountInBundle / CreateMaskForCountInBundle - GetFirstSetLaneIndex / GetLastSetLaneCount The framework is designed to be easily extensible for testing new SIMD implementations and validating optimizations. Usage: dotnet test # Run correctness tests dotnet run benchmark # Run performance benchmarks See SimdTests/README.md for detailed documentation.
1 parent cfb5daa commit 0c6de71

7 files changed

Lines changed: 1077 additions & 0 deletions

File tree

SimdTests/CorrectnessTests.cs

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
using System;
2+
using System.Numerics;
3+
using BepuUtilities;
4+
using Xunit;
5+
6+
namespace SimdTests
7+
{
8+
/// <summary>
9+
/// Correctness tests that verify SIMD implementations match scalar reference implementations.
10+
/// </summary>
11+
public class CorrectnessTests
12+
{
13+
private const float Epsilon = 1e-5f; // Tolerance for floating-point comparisons
14+
15+
/// <summary>
16+
/// Helper to compare two vectors element-wise within epsilon tolerance.
17+
/// </summary>
18+
private static void AssertVectorsEqual(Vector<float> expected, Vector<float> actual, string message)
19+
{
20+
for (int i = 0; i < Vector<float>.Count; i++)
21+
{
22+
Assert.True(
23+
MathF.Abs(expected[i] - actual[i]) < Epsilon,
24+
$"{message} - Mismatch at index {i}: expected {expected[i]}, got {actual[i]}"
25+
);
26+
}
27+
}
28+
29+
/// <summary>
30+
/// Helper to compare two integer vectors element-wise.
31+
/// </summary>
32+
private static void AssertIntVectorsEqual(Vector<int> expected, Vector<int> actual, string message)
33+
{
34+
for (int i = 0; i < Vector<int>.Count; i++)
35+
{
36+
Assert.Equal(expected[i], actual[i]);
37+
}
38+
}
39+
40+
[Fact]
41+
public void TestFastReciprocal_RandomValues()
42+
{
43+
var random = new Random(12345);
44+
45+
for (int test = 0; test < 100; test++)
46+
{
47+
var values = new float[Vector<float>.Count];
48+
for (int i = 0; i < Vector<float>.Count; i++)
49+
{
50+
// Generate random values between 0.1 and 100 to avoid division by near-zero
51+
values[i] = (float)(random.NextDouble() * 99.9 + 0.1);
52+
}
53+
54+
var input = new Vector<float>(values);
55+
var expected = ReferenceImplementations.FastReciprocal_Reference(input);
56+
var actual = MathHelper.FastReciprocal(input);
57+
58+
// Note: Hardware reciprocal estimates may have lower precision, so we use a relaxed epsilon
59+
for (int i = 0; i < Vector<float>.Count; i++)
60+
{
61+
var relativeError = MathF.Abs((expected[i] - actual[i]) / expected[i]);
62+
Assert.True(
63+
relativeError < 0.001f, // 0.1% relative error tolerance
64+
$"FastReciprocal mismatch at test {test}, index {i}: expected {expected[i]}, got {actual[i]}, relative error {relativeError}"
65+
);
66+
}
67+
}
68+
}
69+
70+
[Fact]
71+
public void TestFastReciprocal_SpecialValues()
72+
{
73+
// Test with specific values
74+
var testValues = new[]
75+
{
76+
new Vector<float>(1.0f),
77+
new Vector<float>(2.0f),
78+
new Vector<float>(0.5f),
79+
new Vector<float>(100.0f),
80+
};
81+
82+
foreach (var input in testValues)
83+
{
84+
var expected = ReferenceImplementations.FastReciprocal_Reference(input);
85+
var actual = MathHelper.FastReciprocal(input);
86+
87+
for (int i = 0; i < Vector<float>.Count; i++)
88+
{
89+
var relativeError = MathF.Abs((expected[i] - actual[i]) / expected[i]);
90+
Assert.True(relativeError < 0.001f);
91+
}
92+
}
93+
}
94+
95+
[Fact]
96+
public void TestFastReciprocalSquareRoot_RandomValues()
97+
{
98+
var random = new Random(12345);
99+
100+
for (int test = 0; test < 100; test++)
101+
{
102+
var values = new float[Vector<float>.Count];
103+
for (int i = 0; i < Vector<float>.Count; i++)
104+
{
105+
// Generate random positive values
106+
values[i] = (float)(random.NextDouble() * 99.9 + 0.1);
107+
}
108+
109+
var input = new Vector<float>(values);
110+
var expected = ReferenceImplementations.FastReciprocalSquareRoot_Reference(input);
111+
var actual = MathHelper.FastReciprocalSquareRoot(input);
112+
113+
for (int i = 0; i < Vector<float>.Count; i++)
114+
{
115+
var relativeError = MathF.Abs((expected[i] - actual[i]) / expected[i]);
116+
Assert.True(
117+
relativeError < 0.001f,
118+
$"FastReciprocalSquareRoot mismatch at test {test}, index {i}: expected {expected[i]}, got {actual[i]}, relative error {relativeError}"
119+
);
120+
}
121+
}
122+
}
123+
124+
[Fact]
125+
public void TestCreateTrailingMaskForCountInBundle()
126+
{
127+
// Test all possible count values
128+
for (int count = 0; count <= Vector<int>.Count; count++)
129+
{
130+
var expected = ReferenceImplementations.CreateTrailingMaskForCountInBundle_Reference(count);
131+
var actual = BundleIndexing.CreateTrailingMaskForCountInBundle(count);
132+
133+
AssertIntVectorsEqual(expected, actual, $"CreateTrailingMaskForCountInBundle with count={count}");
134+
}
135+
}
136+
137+
[Fact]
138+
public void TestCreateMaskForCountInBundle()
139+
{
140+
// Test all possible count values
141+
for (int count = 0; count <= Vector<int>.Count; count++)
142+
{
143+
var expected = ReferenceImplementations.CreateMaskForCountInBundle_Reference(count);
144+
var actual = BundleIndexing.CreateMaskForCountInBundle(count);
145+
146+
AssertIntVectorsEqual(expected, actual, $"CreateMaskForCountInBundle with count={count}");
147+
}
148+
}
149+
150+
[Fact]
151+
public void TestGetFirstSetLaneIndex()
152+
{
153+
// Test various mask patterns
154+
var testCases = new[]
155+
{
156+
(CreateMask(new[] { -1, 0, 0, 0, 0, 0, 0, 0 }), 0), // First lane set
157+
(CreateMask(new[] { 0, -1, 0, 0, 0, 0, 0, 0 }), 1), // Second lane set
158+
(CreateMask(new[] { 0, 0, 0, -1, 0, 0, 0, 0 }), 3), // Middle lane set
159+
(CreateMask(new[] { 0, 0, 0, 0, 0, 0, 0, 0 }), -1), // No lanes set
160+
(CreateMask(new[] { -1, -1, -1, -1, -1, -1, -1, -1 }), 0), // All lanes set
161+
};
162+
163+
foreach (var (mask, expectedIndex) in testCases)
164+
{
165+
var expected = ReferenceImplementations.GetFirstSetLaneIndex_Reference(mask);
166+
var actual = BundleIndexing.GetFirstSetLaneIndex(mask);
167+
168+
Assert.Equal(expectedIndex, expected);
169+
Assert.Equal(expected, actual);
170+
}
171+
}
172+
173+
[Fact]
174+
public void TestGetLastSetLaneCount()
175+
{
176+
// Test various mask patterns
177+
var testCases = new[]
178+
{
179+
(CreateMask(new[] { 0, 0, 0, 0, 0, 0, 0, 0 }), 0), // No lanes set
180+
(CreateMask(new[] { 0, 0, 0, 0, 0, 0, 0, -1 }), 1), // Last lane set
181+
(CreateMask(new[] { 0, 0, 0, 0, 0, 0, -1, -1 }), 2), // Last two lanes set
182+
(CreateMask(new[] { -1, -1, -1, -1, -1, -1, -1, -1 }), 8), // All lanes set
183+
(CreateMask(new[] { -1, -1, 0, 0, 0, 0, -1, -1 }), 2), // Only last two consecutive
184+
};
185+
186+
foreach (var (mask, expectedCount) in testCases)
187+
{
188+
var expected = ReferenceImplementations.GetLastSetLaneCount_Reference(mask);
189+
var actual = BundleIndexing.GetLastSetLaneCount(mask);
190+
191+
Assert.Equal(expectedCount, expected);
192+
Assert.Equal(expected, actual);
193+
}
194+
}
195+
196+
/// <summary>
197+
/// Helper to create a mask vector from an array (handles both Vector4 and Vector8).
198+
/// </summary>
199+
private static Vector<int> CreateMask(int[] values)
200+
{
201+
var result = new int[Vector<int>.Count];
202+
for (int i = 0; i < Math.Min(values.Length, Vector<int>.Count); i++)
203+
{
204+
result[i] = values[i];
205+
}
206+
return new Vector<int>(result);
207+
}
208+
}
209+
}

SimdTests/PerformanceHarness.cs

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
using System;
2+
using System.Diagnostics;
3+
using System.Numerics;
4+
using System.Runtime.CompilerServices;
5+
using BepuUtilities;
6+
7+
namespace SimdTests
8+
{
9+
/// <summary>
10+
/// Performance testing harness for SIMD operations.
11+
/// Measures CPU time spent executing functions many times.
12+
/// </summary>
13+
public class PerformanceHarness
14+
{
15+
private const int WarmupIterations = 1000;
16+
private const int BenchmarkIterations = 10_000_000;
17+
18+
/// <summary>
19+
/// Runs a benchmark on a function, returning nanoseconds per iteration.
20+
/// </summary>
21+
private static double BenchmarkFunction<T>(Func<T> function, string name, int iterations = BenchmarkIterations)
22+
{
23+
// Warmup
24+
for (int i = 0; i < WarmupIterations; i++)
25+
{
26+
var _ = function();
27+
}
28+
29+
// Force garbage collection before benchmark
30+
GC.Collect();
31+
GC.WaitForPendingFinalizers();
32+
GC.Collect();
33+
34+
var stopwatch = Stopwatch.StartNew();
35+
for (int i = 0; i < iterations; i++)
36+
{
37+
var _ = function();
38+
}
39+
stopwatch.Stop();
40+
41+
var nsPerIteration = (stopwatch.Elapsed.TotalNanoseconds) / iterations;
42+
return nsPerIteration;
43+
}
44+
45+
[MethodImpl(MethodImplOptions.NoInlining)]
46+
private static Vector<float> TestFastReciprocal(Vector<float> input)
47+
{
48+
return MathHelper.FastReciprocal(input);
49+
}
50+
51+
[MethodImpl(MethodImplOptions.NoInlining)]
52+
private static Vector<float> TestFastReciprocal_Reference(Vector<float> input)
53+
{
54+
return ReferenceImplementations.FastReciprocal_Reference(input);
55+
}
56+
57+
[MethodImpl(MethodImplOptions.NoInlining)]
58+
private static Vector<float> TestFastReciprocalSquareRoot(Vector<float> input)
59+
{
60+
return MathHelper.FastReciprocalSquareRoot(input);
61+
}
62+
63+
[MethodImpl(MethodImplOptions.NoInlining)]
64+
private static Vector<float> TestFastReciprocalSquareRoot_Reference(Vector<float> input)
65+
{
66+
return ReferenceImplementations.FastReciprocalSquareRoot_Reference(input);
67+
}
68+
69+
[MethodImpl(MethodImplOptions.NoInlining)]
70+
private static Vector<int> TestCreateTrailingMaskForCountInBundle(int count)
71+
{
72+
return BundleIndexing.CreateTrailingMaskForCountInBundle(count);
73+
}
74+
75+
[MethodImpl(MethodImplOptions.NoInlining)]
76+
private static Vector<int> TestCreateTrailingMaskForCountInBundle_Reference(int count)
77+
{
78+
return ReferenceImplementations.CreateTrailingMaskForCountInBundle_Reference(count);
79+
}
80+
81+
[MethodImpl(MethodImplOptions.NoInlining)]
82+
private static Vector<int> TestCreateMaskForCountInBundle(int count)
83+
{
84+
return BundleIndexing.CreateMaskForCountInBundle(count);
85+
}
86+
87+
[MethodImpl(MethodImplOptions.NoInlining)]
88+
private static Vector<int> TestCreateMaskForCountInBundle_Reference(int count)
89+
{
90+
return ReferenceImplementations.CreateMaskForCountInBundle_Reference(count);
91+
}
92+
93+
public static void RunAllBenchmarks()
94+
{
95+
Console.WriteLine("==============================================");
96+
Console.WriteLine("SIMD Performance Benchmark Suite");
97+
Console.WriteLine("==============================================");
98+
Console.WriteLine($"Vector<float>.Count: {Vector<float>.Count}");
99+
Console.WriteLine($"Vector<int>.Count: {Vector<int>.Count}");
100+
Console.WriteLine($"System.Runtime.Intrinsics.X86.Avx.IsSupported: {System.Runtime.Intrinsics.X86.Avx.IsSupported}");
101+
Console.WriteLine($"System.Runtime.Intrinsics.X86.Avx2.IsSupported: {System.Runtime.Intrinsics.X86.Avx2.IsSupported}");
102+
Console.WriteLine($"System.Runtime.Intrinsics.X86.Sse.IsSupported: {System.Runtime.Intrinsics.X86.Sse.IsSupported}");
103+
Console.WriteLine($"System.Runtime.Intrinsics.Arm.AdvSimd.IsSupported: {System.Runtime.Intrinsics.Arm.AdvSimd.IsSupported}");
104+
Console.WriteLine($"Iterations per benchmark: {BenchmarkIterations:N0}");
105+
Console.WriteLine("==============================================\n");
106+
107+
BenchmarkFastReciprocal();
108+
BenchmarkFastReciprocalSquareRoot();
109+
BenchmarkCreateTrailingMaskForCountInBundle();
110+
BenchmarkCreateMaskForCountInBundle();
111+
112+
Console.WriteLine("\n==============================================");
113+
Console.WriteLine("Benchmark Complete");
114+
Console.WriteLine("==============================================");
115+
}
116+
117+
private static void BenchmarkFastReciprocal()
118+
{
119+
Console.WriteLine("Benchmarking: FastReciprocal");
120+
var testInput = new Vector<float>(2.5f);
121+
122+
var simdTime = BenchmarkFunction(() => TestFastReciprocal(testInput), "FastReciprocal (SIMD)");
123+
var refTime = BenchmarkFunction(() => TestFastReciprocal_Reference(testInput), "FastReciprocal (Reference)");
124+
125+
Console.WriteLine($" SIMD: {simdTime:F2} ns/iteration");
126+
Console.WriteLine($" Reference: {refTime:F2} ns/iteration");
127+
Console.WriteLine($" Speedup: {refTime / simdTime:F2}x");
128+
Console.WriteLine();
129+
}
130+
131+
private static void BenchmarkFastReciprocalSquareRoot()
132+
{
133+
Console.WriteLine("Benchmarking: FastReciprocalSquareRoot");
134+
var testInput = new Vector<float>(4.0f);
135+
136+
var simdTime = BenchmarkFunction(() => TestFastReciprocalSquareRoot(testInput), "FastReciprocalSquareRoot (SIMD)");
137+
var refTime = BenchmarkFunction(() => TestFastReciprocalSquareRoot_Reference(testInput), "FastReciprocalSquareRoot (Reference)");
138+
139+
Console.WriteLine($" SIMD: {simdTime:F2} ns/iteration");
140+
Console.WriteLine($" Reference: {refTime:F2} ns/iteration");
141+
Console.WriteLine($" Speedup: {refTime / simdTime:F2}x");
142+
Console.WriteLine();
143+
}
144+
145+
private static void BenchmarkCreateTrailingMaskForCountInBundle()
146+
{
147+
Console.WriteLine("Benchmarking: CreateTrailingMaskForCountInBundle");
148+
var testCount = Vector<int>.Count / 2;
149+
150+
var simdTime = BenchmarkFunction(() => TestCreateTrailingMaskForCountInBundle(testCount), "CreateTrailingMaskForCountInBundle (SIMD)");
151+
var refTime = BenchmarkFunction(() => TestCreateTrailingMaskForCountInBundle_Reference(testCount), "CreateTrailingMaskForCountInBundle (Reference)");
152+
153+
Console.WriteLine($" SIMD: {simdTime:F2} ns/iteration");
154+
Console.WriteLine($" Reference: {refTime:F2} ns/iteration");
155+
Console.WriteLine($" Speedup: {refTime / simdTime:F2}x");
156+
Console.WriteLine();
157+
}
158+
159+
private static void BenchmarkCreateMaskForCountInBundle()
160+
{
161+
Console.WriteLine("Benchmarking: CreateMaskForCountInBundle");
162+
var testCount = Vector<int>.Count / 2;
163+
164+
var simdTime = BenchmarkFunction(() => TestCreateMaskForCountInBundle(testCount), "CreateMaskForCountInBundle (SIMD)");
165+
var refTime = BenchmarkFunction(() => TestCreateMaskForCountInBundle_Reference(testCount), "CreateMaskForCountInBundle (Reference)");
166+
167+
Console.WriteLine($" SIMD: {simdTime:F2} ns/iteration");
168+
Console.WriteLine($" Reference: {refTime:F2} ns/iteration");
169+
Console.WriteLine($" Speedup: {refTime / simdTime:F2}x");
170+
Console.WriteLine();
171+
}
172+
}
173+
}

0 commit comments

Comments
 (0)