Skip to content

Commit 3741320

Browse files
committed
wip
wip2 wip3
1 parent 58e735a commit 3741320

2 files changed

Lines changed: 207 additions & 1 deletion

File tree

src/Box2D.NET/B2CTZs.cs

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,80 @@
22
// SPDX-FileCopyrightText: 2025 Ikpil Choi(ikpil@naver.com)
33
// SPDX-License-Identifier: MIT
44

5+
using System;
56
using System.Runtime.CompilerServices;
67

78
namespace Box2D.NET
89
{
910
public static class B2CTZs
1011
{
12+
// De Bruijn magic constant and lookup table
13+
private static readonly byte[] SampleMultiplyDeBruijnBitPosition = new byte[32]
14+
{
15+
0, 1, 28, 2, 29, 14, 24, 3,
16+
30, 22, 20, 15, 25, 17, 4, 8,
17+
31, 27, 13, 23, 21, 19, 16, 7,
18+
26, 12, 18, 6, 11, 5, 10, 9
19+
};
20+
21+
private static readonly byte[] SampleMultiplyDeBruijnBitPosition64 = new byte[64]
22+
{
23+
63, 0, 1, 52, 2, 6, 53, 26, 3, 37, 40, 7, 33, 54, 47, 27,
24+
61, 4, 38, 45, 43, 41, 21, 8, 23, 34, 58, 55, 48, 17, 28, 10,
25+
62, 51, 5, 25, 36, 39, 32, 46, 60, 44, 42, 20, 22, 57, 16, 9,
26+
50, 24, 35, 31, 59, 19, 56, 15, 49, 30, 18, 14, 29, 13, 12, 11
27+
};
28+
29+
private static readonly byte[] SampleClzTable = new byte[32]
30+
{
31+
31, 22, 30, 21, 18, 10, 29, 2,
32+
20, 17, 15, 13, 9, 6, 28, 1,
33+
23, 19, 11, 3, 16, 14, 7, 24,
34+
12, 4, 8, 25, 5, 26, 27, 0
35+
};
36+
37+
38+
// uint에 대해 trailing zero count (CTZ)
39+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
40+
public static uint b2CTZ32_DeRrujin(uint block)
41+
{
42+
if (block == 0) return 32;
43+
44+
// isolate lowest set bit and multiply
45+
uint idx = (uint)((block & -block) * 0x077CB531u) >> 27;
46+
return SampleMultiplyDeBruijnBitPosition[idx];
47+
}
48+
49+
// uint에 대해 leading zero count (CLZ)
50+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
51+
public static uint b2CLZ32(uint block)
52+
{
53+
if (block == 0) return 32;
54+
55+
block |= block >> 1;
56+
block |= block >> 2;
57+
block |= block >> 4;
58+
block |= block >> 8;
59+
block |= block >> 16;
60+
61+
uint idx = (block * 0x07C4ACDDu) >> 27;
62+
return SampleClzTable[idx];
63+
}
64+
65+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
66+
public static uint b2CTZ64_DeBrujin(ulong x)
67+
{
68+
if (x == 0)
69+
return 64;
70+
71+
// x & -x : ulong 음수 연산 불가 → ~x + 1 사용
72+
ulong isolated = x & (~x + 1UL);
73+
74+
int index = (int)((isolated * 0x045FBAC7992A70DAUL) >> 58);
75+
76+
return SampleMultiplyDeBruijnBitPosition64[index];
77+
}
78+
1179
// uint에 대해 trailing zero count (CTZ)
1280
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1381
public static uint b2CTZ32(uint block)
@@ -25,7 +93,7 @@ public static uint b2CTZ32(uint block)
2593

2694
// uint에 대해 leading zero count (CLZ)
2795
[MethodImpl(MethodImplOptions.AggressiveInlining)]
28-
public static uint b2CLZ32(uint value)
96+
public static uint b2CLZ32_Old(uint value)
2997
{
3098
if (value == 0) return 32;
3199
uint count = 0;
@@ -54,6 +122,7 @@ public static uint b2CTZ64(ulong block)
54122
return count;
55123
}
56124

125+
57126
[MethodImpl(MethodImplOptions.AggressiveInlining)]
58127
public static int b2PopCount64(ulong block)
59128
{
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
using System;
2+
using System.Numerics;
3+
using BenchmarkDotNet.Attributes;
4+
5+
namespace Box2D.NET.Benchmark.Box2D.NET.Core.Benchmark;
6+
7+
/*
8+
9+
BenchmarkDotNet v0.15.2, Windows 11 (10.0.26100.4652/24H2/2024Update/HudsonValley)
10+
AMD Ryzen 7 5800X 3.80GHz, 1 CPU, 16 logical and 8 physical cores
11+
.NET SDK 9.0.101
12+
[Host] : .NET 8.0.15 (8.0.1525.16413), X64 RyuJIT AVX2
13+
DefaultJob : .NET 8.0.15 (8.0.1525.16413), X64 RyuJIT AVX2
14+
15+
16+
| Method | Mean | Error | StdDev | Allocated |
17+
|-------------------- |----------:|----------:|----------:|----------:|
18+
| CTZ32 | 4.404 us | 0.0198 us | 0.0166 us | - |
19+
| CTZ32_DeBrujin | 6.896 us | 0.1373 us | 0.1925 us | - |
20+
| CTZ32_BitOperations | 6.544 us | 0.0325 us | 0.0272 us | - |
21+
| CLZ32 | 13.590 us | 0.2677 us | 0.2749 us | - |
22+
| CLZ32_BitOperations | 4.594 us | 0.0914 us | 0.1250 us | - |
23+
| CLZ32_Old | 13.247 us | 0.2080 us | 0.1844 us | - |
24+
| CTZ64_DeBrujin | 8.215 us | 0.1581 us | 0.2267 us | - |
25+
| CTZ64_BitOperations | 6.577 us | 0.0437 us | 0.0408 us | - |
26+
| CTZ64 | 4.474 us | 0.0727 us | 0.0680 us | - |
27+
28+
*/
29+
30+
[MemoryDiagnoser]
31+
public class BitOpBenchmarks
32+
{
33+
private const int Size = 10_000;
34+
private static uint[] _randomU32;
35+
private static ulong[] _randomU64;
36+
37+
[GlobalSetup]
38+
public void Setup()
39+
{
40+
if (null == _randomU32)
41+
{
42+
// 고정 시드로 반복 가능한 테스트
43+
var rnd = new Random((int)(DateTime.Now.Ticks / TimeSpan.TicksPerSecond));
44+
_randomU32 = new uint[Size];
45+
_randomU64 = new ulong[Size];
46+
47+
for (int i = 0; i < Size; i++)
48+
{
49+
_randomU32[i] = (uint)rnd.Next() | 1; // 0 피해서 최소한 1
50+
_randomU64[i] = ((ulong)(uint)rnd.Next() << 32) | (uint)rnd.Next() | 1UL;
51+
}
52+
}
53+
}
54+
55+
56+
[Benchmark]
57+
public uint CTZ32()
58+
{
59+
uint sum = 0;
60+
for (int i = 0; i < Size; i++)
61+
sum += B2CTZs.b2CTZ32(_randomU32[i]);
62+
return sum;
63+
}
64+
65+
[Benchmark]
66+
public uint CTZ32_DeBrujin()
67+
{
68+
uint sum = 0;
69+
for (int i = 0; i < Size; i++)
70+
sum += B2CTZs.b2CTZ32_DeRrujin(_randomU32[i]);
71+
return sum;
72+
}
73+
74+
[Benchmark]
75+
public int CTZ32_BitOperations()
76+
{
77+
int sum = 0;
78+
for (int i = 0; i < Size; i++)
79+
sum += BitOperations.TrailingZeroCount(_randomU32[i]);
80+
return sum;
81+
}
82+
83+
[Benchmark]
84+
public uint CLZ32()
85+
{
86+
uint sum = 0;
87+
for (int i = 0; i < Size; i++)
88+
sum += B2CTZs.b2CLZ32(_randomU32[i]);
89+
return sum;
90+
}
91+
92+
[Benchmark]
93+
public int CLZ32_BitOperations()
94+
{
95+
int sum = 0;
96+
for (int i = 0; i < Size; i++)
97+
sum += BitOperations.LeadingZeroCount(_randomU32[i]);
98+
return sum;
99+
}
100+
101+
102+
[Benchmark]
103+
public uint CLZ32_Old()
104+
{
105+
uint sum = 0;
106+
for (int i = 0; i < Size; i++)
107+
sum += B2CTZs.b2CLZ32_Old(_randomU32[i]);
108+
return sum;
109+
}
110+
111+
[Benchmark]
112+
public uint CTZ64_DeBrujin()
113+
{
114+
uint sum = 0;
115+
for (int i = 0; i < Size; i++)
116+
sum += B2CTZs.b2CTZ64_DeBrujin(_randomU64[i]);
117+
return sum;
118+
}
119+
120+
[Benchmark]
121+
public int CTZ64_BitOperations()
122+
{
123+
int sum = 0;
124+
for (int i = 0; i < Size; i++)
125+
sum += BitOperations.TrailingZeroCount(_randomU64[i]);
126+
return sum;
127+
}
128+
129+
[Benchmark]
130+
public uint CTZ64()
131+
{
132+
uint sum = 0;
133+
for (int i = 0; i < Size; i++)
134+
sum += B2CTZs.b2CTZ64(_randomU64[i]);
135+
return sum;
136+
}
137+
}

0 commit comments

Comments
 (0)