Skip to content

Commit 43764b3

Browse files
authored
Add InterlockedAnd tests (#1184)
This PR adds tests for `InterlockedAnd` Adds 32 bit and 64 bit signed/unsigned integer tests Fixes #849
1 parent bb12b97 commit 43764b3

5 files changed

Lines changed: 824 additions & 0 deletions
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#--- source.hlsl
2+
3+
// This test exercises InterlockedAnd against non-resource (groupshared)
4+
// destinations. A single threadgroup of 32 threads concurrently updates
5+
// shared counters, so the test actually exercises atomic behavior.
6+
//
7+
// Both the 2-argument and 3-argument overloads are covered for int and uint.
8+
//
9+
// Atomicity is verified by starting a counter at 0xFFFFFFFF and having each
10+
// of 32 threads atomically clear its own unique bit (AND with ~(1 << tid)).
11+
// If any read-modify-write were non-atomic, some thread's bit clear would
12+
// be lost and the final value would be non-zero. With true atomicity, the
13+
// counter must end at exactly 0.
14+
//
15+
// For the 3-argument form we additionally verify, per-thread, that the
16+
// returned "original value" still had this thread's bit set when the AND
17+
// was performed -- this must always be true under atomic semantics, since
18+
// no other thread ever clears that bit.
19+
20+
RWStructuredBuffer<uint> OutOrigBitSet : register(u0);
21+
RWStructuredBuffer<uint> OutFinal : register(u1);
22+
23+
groupshared uint CounterU; // 3-arg form, unsigned: bit-clear test
24+
groupshared uint CounterUNoOrig; // 2-arg form, unsigned: bit-clear test
25+
groupshared int CounterI; // 3-arg form, signed: bit-clear test on -1
26+
groupshared uint MaskedU; // deterministic mask test (all threads
27+
// AND with the same constant)
28+
29+
groupshared uint OrigBitSet[32]; // per-thread: was my bit set in the
30+
// original value I observed?
31+
32+
[numthreads(32, 1, 1)]
33+
void main(uint3 GTID : SV_GroupThreadID) {
34+
if (GTID.x == 0) {
35+
CounterU = 0xFFFFFFFFu;
36+
CounterUNoOrig = 0xFFFFFFFFu;
37+
CounterI = -1; // 0xFFFFFFFF as int
38+
MaskedU = 0xAAAAAAAAu;
39+
}
40+
OrigBitSet[GTID.x] = 0;
41+
GroupMemoryBarrierWithGroupSync();
42+
43+
uint ThreadBit = 1u << GTID.x;
44+
uint ThreadMask = ~ThreadBit;
45+
46+
// 3-argument form: capture original, then check our bit was set in it.
47+
uint OrigU;
48+
InterlockedAnd(CounterU, ThreadMask, OrigU);
49+
OrigBitSet[GTID.x] = ((OrigU & ThreadBit) != 0u) ? 1u : 0u;
50+
51+
// 3-argument form, signed.
52+
int OrigI;
53+
InterlockedAnd(CounterI, (int)ThreadMask, OrigI);
54+
55+
// 2-argument form: no original captured.
56+
InterlockedAnd(CounterUNoOrig, ThreadMask);
57+
58+
// 2-argument form: every thread ANDs with the same constant. Result
59+
// is deterministic regardless of ordering: 0xAAAAAAAA & 0x0F0F0F0F.
60+
InterlockedAnd(MaskedU, 0x0F0F0F0Fu);
61+
62+
GroupMemoryBarrierWithGroupSync();
63+
64+
OutOrigBitSet[GTID.x] = OrigBitSet[GTID.x];
65+
66+
if (GTID.x == 0) {
67+
OutFinal[0] = CounterU; // 0
68+
OutFinal[1] = (uint)CounterI; // 0
69+
OutFinal[2] = CounterUNoOrig; // 0
70+
OutFinal[3] = MaskedU; // 0xAAAAAAAA & 0x0F0F0F0F = 0x0A0A0A0A
71+
}
72+
}
73+
74+
//--- pipeline.yaml
75+
76+
---
77+
Shaders:
78+
- Stage: Compute
79+
Entry: main
80+
Buffers:
81+
- Name: OutOrigBitSet
82+
Format: UInt32
83+
Stride: 4
84+
FillSize: 128
85+
- Name: ExpectedOrigBitSet
86+
Format: UInt32
87+
Stride: 4
88+
Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]
90+
- Name: OutFinal
91+
Format: UInt32
92+
Stride: 4
93+
FillSize: 16
94+
- Name: ExpectedFinal
95+
Format: UInt32
96+
Stride: 4
97+
Data: [ 0, 0, 0, 0x0A0A0A0A ]
98+
Results:
99+
- Result: TestOrigBitSet
100+
Rule: BufferExact
101+
Actual: OutOrigBitSet
102+
Expected: ExpectedOrigBitSet
103+
- Result: TestFinal
104+
Rule: BufferExact
105+
Actual: OutFinal
106+
Expected: ExpectedFinal
107+
DescriptorSets:
108+
- Resources:
109+
- Name: OutOrigBitSet
110+
Kind: RWStructuredBuffer
111+
DirectXBinding:
112+
Register: 0
113+
Space: 0
114+
VulkanBinding:
115+
Binding: 0
116+
- Name: OutFinal
117+
Kind: RWStructuredBuffer
118+
DirectXBinding:
119+
Register: 1
120+
Space: 0
121+
VulkanBinding:
122+
Binding: 1
123+
...
124+
#--- end
125+
126+
# Unimplemented: https://github.com/llvm/llvm-project/issues/99125
127+
# XFAIL: Clang
128+
129+
# RUN: split-file %s %t
130+
# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl
131+
# RUN: %offloader %t/pipeline.yaml %t.o
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#--- source.hlsl
2+
3+
// This test exercises InterlockedAnd against non-resource (groupshared)
4+
// destinations using 64-bit integer types. A single threadgroup of 64
5+
// threads concurrently updates shared counters, so the test actually
6+
// exercises atomic behavior.
7+
//
8+
// Both the 2-argument and 3-argument overloads are covered for int64_t
9+
// and uint64_t.
10+
//
11+
// Atomicity is verified by starting a counter at 0xFFFFFFFFFFFFFFFF and
12+
// having each of 64 threads atomically clear its own unique bit
13+
// (AND with ~(1ull << tid)). If any read-modify-write were non-atomic,
14+
// some thread's bit clear would be lost and the final value would be
15+
// non-zero. With true atomicity, the counter must end at exactly 0.
16+
//
17+
// For the 3-argument form we additionally verify, per-thread, that the
18+
// returned "original value" still had this thread's bit set when the AND
19+
// was performed -- this must always be true under atomic semantics, since
20+
// no other thread ever clears that bit.
21+
22+
RWStructuredBuffer<uint> OutOrigBitSet : register(u0);
23+
RWStructuredBuffer<uint64_t> OutFinal : register(u1);
24+
25+
groupshared uint64_t CounterU; // 3-arg form, unsigned: bit-clear test
26+
groupshared uint64_t CounterUNoOrig; // 2-arg form, unsigned: bit-clear test
27+
groupshared int64_t CounterI; // 3-arg form, signed: bit-clear test on -1
28+
groupshared uint64_t MaskedU; // deterministic mask test (all threads
29+
// AND with the same constant)
30+
31+
groupshared uint OrigBitSet[64]; // per-thread: was my bit set in the
32+
// original value I observed?
33+
34+
[numthreads(64, 1, 1)]
35+
void main(uint3 GTID : SV_GroupThreadID) {
36+
if (GTID.x == 0) {
37+
CounterU = 0xFFFFFFFFFFFFFFFFull;
38+
CounterUNoOrig = 0xFFFFFFFFFFFFFFFFull;
39+
CounterI = (int64_t)-1; // all bits set
40+
MaskedU = 0xAAAAAAAAAAAAAAAAull;
41+
}
42+
OrigBitSet[GTID.x] = 0;
43+
GroupMemoryBarrierWithGroupSync();
44+
45+
uint64_t ThreadBit = 1ull << GTID.x;
46+
uint64_t ThreadMask = ~ThreadBit;
47+
48+
// 3-argument form: capture original, then check our bit was set in it.
49+
uint64_t OrigU;
50+
InterlockedAnd(CounterU, ThreadMask, OrigU);
51+
OrigBitSet[GTID.x] = ((OrigU & ThreadBit) != 0ull) ? 1u : 0u;
52+
53+
// 3-argument form, signed.
54+
int64_t OrigI;
55+
InterlockedAnd(CounterI, (int64_t)ThreadMask, OrigI);
56+
57+
// 2-argument form: no original captured.
58+
InterlockedAnd(CounterUNoOrig, ThreadMask);
59+
60+
// 2-argument form: every thread ANDs with the same constant. Result
61+
// is deterministic regardless of ordering. The mask exercises both the
62+
// low and high 32-bit halves.
63+
InterlockedAnd(MaskedU, 0x0F0F0F0F0F0F0F0Full);
64+
65+
GroupMemoryBarrierWithGroupSync();
66+
67+
OutOrigBitSet[GTID.x] = OrigBitSet[GTID.x];
68+
69+
if (GTID.x == 0) {
70+
OutFinal[0] = CounterU; // 0
71+
OutFinal[1] = (uint64_t)CounterI; // 0
72+
OutFinal[2] = CounterUNoOrig; // 0
73+
OutFinal[3] = MaskedU; // 0x0A0A0A0A0A0A0A0A
74+
}
75+
}
76+
77+
//--- pipeline.yaml
78+
79+
---
80+
Shaders:
81+
- Stage: Compute
82+
Entry: main
83+
Buffers:
84+
- Name: OutOrigBitSet
85+
Format: UInt32
86+
Stride: 4
87+
FillSize: 256
88+
- Name: ExpectedOrigBitSet
89+
Format: UInt32
90+
Stride: 4
91+
Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
92+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]
95+
- Name: OutFinal
96+
Format: UInt64
97+
Stride: 8
98+
FillSize: 32
99+
- Name: ExpectedFinal
100+
Format: UInt64
101+
Stride: 8
102+
Data: [ 0, 0, 0, 0x0A0A0A0A0A0A0A0A ]
103+
Results:
104+
- Result: TestOrigBitSet
105+
Rule: BufferExact
106+
Actual: OutOrigBitSet
107+
Expected: ExpectedOrigBitSet
108+
- Result: TestFinal
109+
Rule: BufferExact
110+
Actual: OutFinal
111+
Expected: ExpectedFinal
112+
DescriptorSets:
113+
- Resources:
114+
- Name: OutOrigBitSet
115+
Kind: RWStructuredBuffer
116+
DirectXBinding:
117+
Register: 0
118+
Space: 0
119+
VulkanBinding:
120+
Binding: 0
121+
- Name: OutFinal
122+
Kind: RWStructuredBuffer
123+
DirectXBinding:
124+
Register: 1
125+
Space: 0
126+
VulkanBinding:
127+
Binding: 1
128+
...
129+
#--- end
130+
131+
# Unimplemented: https://github.com/llvm/llvm-project/issues/99125
132+
# XFAIL: Clang
133+
134+
# Bug: https://github.com/llvm/offload-test-suite/issues/1164
135+
# XFAIL: Metal && DXC
136+
137+
# REQUIRES: Int64 && Int64GroupSharedAtomics && SM_6_6
138+
# RUN: split-file %s %t
139+
# RUN: %dxc_target -HV 202x -T cs_6_6 -Fo %t.o %t/source.hlsl
140+
# RUN: %offloader %t/pipeline.yaml %t.o

0 commit comments

Comments
 (0)