Skip to content

Commit b457504

Browse files
authored
Add InterlockedOr tests (#1195)
This PR adds tests for the InterlockedOr HLSL function. It tests all existing overloads of this function, including the declarations of this function as members of resources. It also includes a fix to texture element type processing, as there was not a case to account for uint32 element types. Fixes #101
1 parent a315dd1 commit b457504

11 files changed

Lines changed: 927 additions & 1 deletion

lib/API/DX/DXFeatures.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ D3D_FEATURE_ENUM(directx::MeshShaderTier, MeshShaderTier)
3939
D3D_FEATURE_BOOL(Native16BitShaderOpsSupported)
4040
D3D_FEATURE_BOOL(Int64ShaderOps)
4141
D3D_FEATURE_BOOL(AtomicInt64OnGroupSharedSupported)
42+
D3D_FEATURE_BOOL(AtomicInt64OnTypedResourceSupported)
4243
D3D_FEATURE_BOOL(DoublePrecisionFloatShaderOps)
4344
D3D_FEATURE_BOOL(WaveOps)
4445
#undef D3D_FEATURE_BOOL

lib/API/DX/Device.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ static DXGI_FORMAT getDXFormat(DataFormat Format, int Channels) {
107107
switch (Format) {
108108
case DataFormat::Int32:
109109
DXFormats(SINT) break;
110+
case DataFormat::UInt32:
111+
DXFormats(UINT) break;
110112
case DataFormat::Float32:
111113
DXFormats(FLOAT) break;
112114
case DataFormat::UInt64:

lib/API/VK/Device.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,19 @@ class VulkanDevice : public offloadtest::Device {
12521252
VkPhysicalDeviceVulkan14Features Features14{};
12531253
Features14.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_4_FEATURES;
12541254
#endif
1255+
#ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME
1256+
// Opt-in extension features: query and enable
1257+
// VK_EXT_shader_image_atomic_int64 when the device advertises it so that
1258+
// tests using 64-bit atomics on RWBuffer / RWTexture (SPIR-V image storage
1259+
// class) can run.
1260+
const auto AvailableExts = queryDeviceExtensions(PhysicalDevice);
1261+
const bool HasShaderImageAtomicInt64Ext = isExtensionSupported(
1262+
AvailableExts, VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME);
1263+
VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT
1264+
FeaturesImageAtomicInt64{};
1265+
FeaturesImageAtomicInt64.sType =
1266+
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT;
1267+
#endif
12551268

12561269
Features.pNext = &Features11;
12571270
if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0))
@@ -1261,6 +1274,24 @@ class VulkanDevice : public offloadtest::Device {
12611274
#ifdef VK_VERSION_1_4
12621275
if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0))
12631276
Features13.pNext = &Features14;
1277+
#endif
1278+
#ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME
1279+
// Attach the extension features struct to the tail of the version-gated
1280+
// chain so vkGetPhysicalDeviceFeatures2 populates it and vkCreateDevice
1281+
// sees it enabled.
1282+
if (HasShaderImageAtomicInt64Ext) {
1283+
#ifdef VK_VERSION_1_4
1284+
if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0))
1285+
Features14.pNext = &FeaturesImageAtomicInt64;
1286+
else
1287+
#endif
1288+
if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0))
1289+
Features13.pNext = &FeaturesImageAtomicInt64;
1290+
else if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0))
1291+
Features12.pNext = &FeaturesImageAtomicInt64;
1292+
else
1293+
Features11.pNext = &FeaturesImageAtomicInt64;
1294+
}
12641295
#endif
12651296
vkGetPhysicalDeviceFeatures2(PhysicalDevice, &Features);
12661297

@@ -1290,6 +1321,16 @@ class VulkanDevice : public offloadtest::Device {
12901321
DeviceInfo.pEnabledFeatures = &Features.features;
12911322
DeviceInfo.pNext = Features.pNext;
12921323

1324+
#ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME
1325+
llvm::SmallVector<const char *, 1> EnabledDeviceExtensions;
1326+
if (HasShaderImageAtomicInt64Ext &&
1327+
FeaturesImageAtomicInt64.shaderImageInt64Atomics)
1328+
EnabledDeviceExtensions.push_back(
1329+
VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME);
1330+
DeviceInfo.enabledExtensionCount = EnabledDeviceExtensions.size();
1331+
DeviceInfo.ppEnabledExtensionNames = EnabledDeviceExtensions.data();
1332+
#endif
1333+
12931334
VkDevice Device = VK_NULL_HANDLE;
12941335
if (auto Err = VK::toError(
12951336
vkCreateDevice(PhysicalDevice, &DeviceInfo, nullptr, &Device),
@@ -2197,6 +2238,14 @@ class VulkanDevice : public offloadtest::Device {
21972238
VkPhysicalDeviceVulkan14Features Features14{};
21982239
Features14.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_4_FEATURES;
21992240
#endif
2241+
#ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME
2242+
VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT
2243+
FeaturesImageAtomicInt64{};
2244+
FeaturesImageAtomicInt64.sType =
2245+
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT;
2246+
const bool HasShaderImageAtomicInt64Ext = isExtensionSupported(
2247+
DeviceExtensions, VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME);
2248+
#endif
22002249

22012250
Features.pNext = &Features11;
22022251
if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0))
@@ -2206,6 +2255,27 @@ class VulkanDevice : public offloadtest::Device {
22062255
#ifdef VK_VERSION_1_4
22072256
if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0))
22082257
Features13.pNext = &Features14;
2258+
#endif
2259+
#ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME
2260+
// Append the VK_EXT_shader_image_atomic_int64 features struct to the
2261+
// pNext chain, but only if the device advertises the extension --
2262+
// otherwise drivers may reject the unknown sType. The chain above is
2263+
// built version-by-version (11 -> 12 -> 13 -> 14), so the correct
2264+
// attachment point is whichever Features1X struct is currently the
2265+
// tail for this device's apiVersion.
2266+
if (HasShaderImageAtomicInt64Ext) {
2267+
#ifdef VK_VERSION_1_4
2268+
if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0))
2269+
Features14.pNext = &FeaturesImageAtomicInt64;
2270+
else
2271+
#endif
2272+
if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0))
2273+
Features13.pNext = &FeaturesImageAtomicInt64;
2274+
else if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0))
2275+
Features12.pNext = &FeaturesImageAtomicInt64;
2276+
else
2277+
Features11.pNext = &FeaturesImageAtomicInt64;
2278+
}
22092279
#endif
22102280
vkGetPhysicalDeviceFeatures2(PhysicalDevice, &Features);
22112281

@@ -2239,6 +2309,12 @@ class VulkanDevice : public offloadtest::Device {
22392309
Caps.insert( \
22402310
std::make_pair(#Name, makeCapability<bool>(#Name, Features14.Name)));
22412311
#endif
2312+
#ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME
2313+
#define VULKAN_EXT_SHADER_IMAGE_ATOMIC_INT64_FEATURE_BOOL(Name) \
2314+
Caps.insert(std::make_pair( \
2315+
#Name, makeCapability<bool>(#Name, HasShaderImageAtomicInt64Ext && \
2316+
FeaturesImageAtomicInt64.Name)));
2317+
#endif
22422318
#include "VKFeatures.def"
22432319
}
22442320

lib/API/VK/VKFeatures.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,8 @@ VULKAN14_FEATURE_BOOL(hostImageCopy)
187187
VULKAN14_FEATURE_BOOL(pushDescriptor)
188188
#undef VULKAN14_FEATURE_BOOL
189189
#endif
190+
191+
#ifdef VULKAN_EXT_SHADER_IMAGE_ATOMIC_INT64_FEATURE_BOOL
192+
VULKAN_EXT_SHADER_IMAGE_ATOMIC_INT64_FEATURE_BOOL(shaderImageInt64Atomics)
193+
#undef VULKAN_EXT_SHADER_IMAGE_ATOMIC_INT64_FEATURE_BOOL
194+
#endif

test/Feature/HLSLLib/InterlockedAdd.int64.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ DescriptorSets:
152152
...
153153
#--- end
154154

155-
# REQUIRES: Int64 && Int64GroupSharedAtomics
155+
# REQUIRES: Int64 && Int64GroupSharedAtomics && SM_6_6
156156

157157
# Unimplemented: https://github.com/llvm/llvm-project/issues/99122
158158
# XFAIL: Clang
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#--- source.hlsl
2+
3+
// This test exercises InterlockedOr against non-resource (groupshared)
4+
// destinations. A single threadgroup of 32 threads concurrently updates
5+
// shared counters, so the test actually exercises atomic behavior.
6+
//
7+
// Both the 2-argument and 3-argument overloads are covered for int and uint.
8+
//
9+
// Atomicity is verified by starting a counter at 0 and having each of 32
10+
// threads atomically set its own unique bit (OR with (1 << tid)). If any
11+
// read-modify-write were non-atomic, some thread's bit set would be lost
12+
// and the final value would not be all-ones. With true atomicity, the
13+
// counter must end at exactly 0xFFFFFFFF.
14+
//
15+
// For the 3-argument form we additionally verify, per-thread, that the
16+
// returned "original value" had this thread's bit clear when the OR was
17+
// performed -- this must always be true under atomic semantics, since no
18+
// other thread ever sets that bit.
19+
20+
RWStructuredBuffer<uint> OutOrigBitClear : register(u0);
21+
RWStructuredBuffer<uint> OutFinal : register(u1);
22+
23+
groupshared uint CounterU; // 3-arg form, unsigned: bit-set test
24+
groupshared uint CounterUNoOrig; // 2-arg form, unsigned: bit-set test
25+
groupshared int CounterI; // 3-arg form, signed: bit-set test on 0
26+
groupshared uint MaskedU; // deterministic mask test (all threads
27+
// OR with the same constant)
28+
29+
groupshared uint OrigBitClear[32]; // per-thread: was my bit clear in the
30+
// original value I observed?
31+
32+
[numthreads(32, 1, 1)]
33+
void main(uint3 GTID : SV_GroupThreadID) {
34+
if (GTID.x == 0) {
35+
CounterU = 0u;
36+
CounterUNoOrig = 0u;
37+
CounterI = 0;
38+
MaskedU = 0xAAAAAAAAu;
39+
}
40+
OrigBitClear[GTID.x] = 0;
41+
GroupMemoryBarrierWithGroupSync();
42+
43+
uint ThreadBit = 1u << GTID.x;
44+
45+
// 3-argument form: capture original, then check our bit was clear in it.
46+
uint OrigU;
47+
InterlockedOr(CounterU, ThreadBit, OrigU);
48+
OrigBitClear[GTID.x] = ((OrigU & ThreadBit) == 0u) ? 1u : 0u;
49+
50+
// 3-argument form, signed.
51+
int OrigI;
52+
InterlockedOr(CounterI, (int)ThreadBit, OrigI);
53+
54+
// 2-argument form: no original captured.
55+
InterlockedOr(CounterUNoOrig, ThreadBit);
56+
57+
// 2-argument form: every thread ORs with the same constant. Result
58+
// is deterministic regardless of ordering: 0xAAAAAAAA | 0x0F0F0F0F.
59+
InterlockedOr(MaskedU, 0x0F0F0F0Fu);
60+
61+
GroupMemoryBarrierWithGroupSync();
62+
63+
OutOrigBitClear[GTID.x] = OrigBitClear[GTID.x];
64+
65+
if (GTID.x == 0) {
66+
OutFinal[0] = CounterU; // 0xFFFFFFFF
67+
OutFinal[1] = (uint)CounterI; // 0xFFFFFFFF
68+
OutFinal[2] = CounterUNoOrig; // 0xFFFFFFFF
69+
OutFinal[3] = MaskedU; // 0xAAAAAAAA | 0x0F0F0F0F = 0xAFAFAFAF
70+
}
71+
}
72+
73+
//--- pipeline.yaml
74+
75+
---
76+
Shaders:
77+
- Stage: Compute
78+
Entry: main
79+
Buffers:
80+
- Name: OutOrigBitClear
81+
Format: UInt32
82+
Stride: 4
83+
FillSize: 128
84+
- Name: ExpectedOrigBitClear
85+
Format: UInt32
86+
Stride: 4
87+
Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
88+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]
89+
- Name: OutFinal
90+
Format: UInt32
91+
Stride: 4
92+
FillSize: 16
93+
- Name: ExpectedFinal
94+
Format: UInt32
95+
Stride: 4
96+
Data: [ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xAFAFAFAF ]
97+
Results:
98+
- Result: TestOrigBitClear
99+
Rule: BufferExact
100+
Actual: OutOrigBitClear
101+
Expected: ExpectedOrigBitClear
102+
- Result: TestFinal
103+
Rule: BufferExact
104+
Actual: OutFinal
105+
Expected: ExpectedFinal
106+
DescriptorSets:
107+
- Resources:
108+
- Name: OutOrigBitClear
109+
Kind: RWStructuredBuffer
110+
DirectXBinding:
111+
Register: 0
112+
Space: 0
113+
VulkanBinding:
114+
Binding: 0
115+
- Name: OutFinal
116+
Kind: RWStructuredBuffer
117+
DirectXBinding:
118+
Register: 1
119+
Space: 0
120+
VulkanBinding:
121+
Binding: 1
122+
...
123+
#--- end
124+
125+
# Unimplemented: https://github.com/llvm/llvm-project/issues/99126
126+
# XFAIL: Clang
127+
128+
# RUN: split-file %s %t
129+
# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl
130+
# RUN: %offloader %t/pipeline.yaml %t.o

0 commit comments

Comments
 (0)