diff --git a/test/WaveOps/QuadReadAcrossDiagonal.int64.test b/test/WaveOps/QuadReadAcrossDiagonal.int64.test index f14147866..e699493d2 100644 --- a/test/WaveOps/QuadReadAcrossDiagonal.int64.test +++ b/test/WaveOps/QuadReadAcrossDiagonal.int64.test @@ -232,7 +232,7 @@ DescriptorSets: # REQUIRES: Int64 -# Bug: https://github.com/llvm/offload-test-suite/issues/988 +# Bug: https://github.com/llvm/offload-test-suite/issues/959 # XFAIL: Metal # RUN: split-file %s %t diff --git a/test/WaveOps/QuadReadAcrossX.int64.test b/test/WaveOps/QuadReadAcrossX.int64.test index eb50f3281..1737bb5f5 100644 --- a/test/WaveOps/QuadReadAcrossX.int64.test +++ b/test/WaveOps/QuadReadAcrossX.int64.test @@ -232,8 +232,7 @@ DescriptorSets: # REQUIRES: Int64 -# Bug: https://github.com/llvm/offload-test-suite/issues/988 -# Bug: https://github.com/llvm/offload-test-suite/issues/989 +# Bug: https://github.com/llvm/offload-test-suite/issues/959 # XFAIL: Metal # RUN: split-file %s %t diff --git a/test/WaveOps/QuadReadAcrossY.int64.test b/test/WaveOps/QuadReadAcrossY.int64.test index e461d9c53..65903c436 100644 --- a/test/WaveOps/QuadReadAcrossY.int64.test +++ b/test/WaveOps/QuadReadAcrossY.int64.test @@ -232,7 +232,7 @@ DescriptorSets: # REQUIRES: Int64 -# Bug: https://github.com/llvm/offload-test-suite/issues/989 +# Bug: https://github.com/llvm/offload-test-suite/issues/959 # XFAIL: Metal # RUN: split-file %s %t diff --git a/test/WaveOps/QuadReadLaneAt.32.test b/test/WaveOps/QuadReadLaneAt.32.test new file mode 100644 index 000000000..ede0ccca5 --- /dev/null +++ b/test/WaveOps/QuadReadLaneAt.32.test @@ -0,0 +1,373 @@ +#--- source.hlsl +// ints +StructuredBuffer In : register(t0); +RWStructuredBuffer Out1 : register(u1); +RWStructuredBuffer Out2 : register(u2); +RWStructuredBuffer Out3 : register(u3); +RWStructuredBuffer Out4 : register(u4); + +// uints +StructuredBuffer UIn : register(t5); +RWStructuredBuffer UOut1 : register(u6); +RWStructuredBuffer UOut2 : register(u7); +RWStructuredBuffer UOut3 : register(u8); +RWStructuredBuffer UOut4 : register(u9); + +// floats +StructuredBuffer FIn : register(t10); +RWStructuredBuffer FOut1 : register(u11); +RWStructuredBuffer FOut2 : register(u12); +RWStructuredBuffer FOut3 : register(u13); +RWStructuredBuffer FOut4 : register(u14); + +[numthreads(2,2,1)] +void main(uint3 dtid : SV_DispatchThreadID) { + uint index = dtid.y * 2 + dtid.x; + + // int case + int4 v = In[index]; +#ifdef __spirv__ + // SPIR-V requires the lane index to be a compile-time constant (or + // dynamically uniform in SPIR-V 1.5+), so we pass a literal. + int scalar = QuadReadLaneAt(v.x, 2); + int2 vec2 = QuadReadLaneAt(v.xy, 2); + int3 vec3 = QuadReadLaneAt(v.xyz, 2); + int4 vec4 = QuadReadLaneAt(v, 2); +#else + // DXIL permits a non-uniform (per-lane) lane index. Demonstrate this with + // an identity shuffle where each lane passes its own `index`, then + // broadcast lane 2's values so the result matches the SPIR-V path. + int4 id = QuadReadLaneAt(v, index); + int scalar = QuadReadLaneAt(id.x, 2); + int2 vec2 = QuadReadLaneAt(id.xy, 2); + int3 vec3 = QuadReadLaneAt(id.xyz, 2); + int4 vec4 = QuadReadLaneAt(id, 2); +#endif + + Out1[index].x = scalar; + Out2[index].xy = vec2; + Out3[index].xyz = vec3; + Out4[index] = vec4; + + // uint case + uint4 uv = UIn[index]; +#ifdef __spirv__ + uint uscalar = QuadReadLaneAt(uv.x, 2); + uint2 uvec2 = QuadReadLaneAt(uv.xy, 2); + uint3 uvec3 = QuadReadLaneAt(uv.xyz, 2); + uint4 uvec4 = QuadReadLaneAt(uv, 2); +#else + uint4 uid = QuadReadLaneAt(uv, index); + uint uscalar = QuadReadLaneAt(uid.x, 2); + uint2 uvec2 = QuadReadLaneAt(uid.xy, 2); + uint3 uvec3 = QuadReadLaneAt(uid.xyz, 2); + uint4 uvec4 = QuadReadLaneAt(uid, 2); +#endif + + UOut1[index].x = uscalar; + UOut2[index].xy = uvec2; + UOut3[index].xyz = uvec3; + UOut4[index] = uvec4; + + // float case + float4 fv = FIn[index]; +#ifdef __spirv__ + float fscalar = QuadReadLaneAt(fv.x, 2); + float2 fvec2 = QuadReadLaneAt(fv.xy, 2); + float3 fvec3 = QuadReadLaneAt(fv.xyz, 2); + float4 fvec4 = QuadReadLaneAt(fv, 2); +#else + float4 fid = QuadReadLaneAt(fv, index); + float fscalar = QuadReadLaneAt(fid.x, 2); + float2 fvec2 = QuadReadLaneAt(fid.xy, 2); + float3 fvec3 = QuadReadLaneAt(fid.xyz, 2); + float4 fvec4 = QuadReadLaneAt(fid, 2); +#endif + + FOut1[index].x = fscalar; + FOut2[index].xy = fvec2; + FOut3[index].xyz = fvec3; + FOut4[index] = fvec4; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: In + Format: Int32 + Stride: 16 + Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] + - Name: Out1 + Format: Int32 + Stride: 16 + FillSize: 64 + - Name: Out2 + Format: Int32 + Stride: 16 + FillSize: 64 + - Name: Out3 + Format: Int32 + Stride: 16 + FillSize: 64 + - Name: Out4 + Format: Int32 + Stride: 16 + FillSize: 64 + - Name: ExpectedOut1 + Format: Int32 + Stride: 16 + Data: [ 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0 ] + - Name: ExpectedOut2 + Format: Int32 + Stride: 16 + Data: [ 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0 ] + - Name: ExpectedOut3 + Format: Int32 + Stride: 16 + Data: [ 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0 ] + - Name: ExpectedOut4 + Format: Int32 + Stride: 16 + Data: [ 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12 ] + - Name: UIn + Format: UInt32 + Stride: 16 + Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] + - Name: UOut1 + Format: UInt32 + Stride: 16 + FillSize: 64 + - Name: UOut2 + Format: UInt32 + Stride: 16 + FillSize: 64 + - Name: UOut3 + Format: UInt32 + Stride: 16 + FillSize: 64 + - Name: UOut4 + Format: UInt32 + Stride: 16 + FillSize: 64 + - Name: UExpectedOut1 + Format: UInt32 + Stride: 16 + Data: [ 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0 ] + - Name: UExpectedOut2 + Format: UInt32 + Stride: 16 + Data: [ 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0 ] + - Name: UExpectedOut3 + Format: UInt32 + Stride: 16 + Data: [ 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0 ] + - Name: UExpectedOut4 + Format: UInt32 + Stride: 16 + Data: [ 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12 ] + - Name: FIn + Format: Float32 + Stride: 16 + Data: [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 ] + - Name: FOut1 + Format: Float32 + Stride: 16 + FillSize: 64 + - Name: FOut2 + Format: Float32 + Stride: 16 + FillSize: 64 + - Name: FOut3 + Format: Float32 + Stride: 16 + FillSize: 64 + - Name: FOut4 + Format: Float32 + Stride: 16 + FillSize: 64 + - Name: FExpectedOut1 + Format: Float32 + Stride: 16 + Data: [ 9.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0 ] + - Name: FExpectedOut2 + Format: Float32 + Stride: 16 + Data: [ 9.0, 10.0, 0.0, 0.0, 9.0, 10.0, 0.0, 0.0, 9.0, 10.0, 0.0, 0.0, 9.0, 10.0, 0.0, 0.0 ] + - Name: FExpectedOut3 + Format: Float32 + Stride: 16 + Data: [ 9.0, 10.0, 11.0, 0.0, 9.0, 10.0, 11.0, 0.0, 9.0, 10.0, 11.0, 0.0, 9.0, 10.0, 11.0, 0.0 ] + - Name: FExpectedOut4 + Format: Float32 + Stride: 16 + Data: [ 9.0, 10.0, 11.0, 12.0, 9.0, 10.0, 11.0, 12.0, 9.0, 10.0, 11.0, 12.0, 9.0, 10.0, 11.0, 12.0 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: UExpectedOut1 + Rule: BufferExact + Actual: UOut1 + Expected: UExpectedOut1 + - Result: UExpectedOut2 + Rule: BufferExact + Actual: UOut2 + Expected: UExpectedOut2 + - Result: UExpectedOut3 + Rule: BufferExact + Actual: UOut3 + Expected: UExpectedOut3 + - Result: UExpectedOut4 + Rule: BufferExact + Actual: UOut4 + Expected: UExpectedOut4 + - Result: FExpectedOut1 + Rule: BufferExact + Actual: FOut1 + Expected: FExpectedOut1 + - Result: FExpectedOut2 + Rule: BufferExact + Actual: FOut2 + Expected: FExpectedOut2 + - Result: FExpectedOut3 + Rule: BufferExact + Actual: FOut3 + Expected: FExpectedOut3 + - Result: FExpectedOut4 + Rule: BufferExact + Actual: FOut4 + Expected: FExpectedOut4 +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: UIn + Kind: StructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: UOut1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: UOut2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: UOut3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: UOut4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 + - Name: FIn + Kind: StructuredBuffer + DirectXBinding: + Register: 10 + Space: 0 + VulkanBinding: + Binding: 10 + - Name: FOut1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 11 + Space: 0 + VulkanBinding: + Binding: 11 + - Name: FOut2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 12 + Space: 0 + VulkanBinding: + Binding: 12 + - Name: FOut3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 13 + Space: 0 + VulkanBinding: + Binding: 13 + - Name: FOut4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 14 + Space: 0 + VulkanBinding: + Binding: 14 + +... +#--- end + +# Currently not supported in Clang, ongoing PR +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/QuadReadLaneAt.convergence.test b/test/WaveOps/QuadReadLaneAt.convergence.test new file mode 100644 index 000000000..c27b200e8 --- /dev/null +++ b/test/WaveOps/QuadReadLaneAt.convergence.test @@ -0,0 +1,85 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +RWStructuredBuffer Out : register(u1); + +[numthreads(2, 2, 1)] +void main(uint3 dtid : SV_DispatchThreadID) { + uint index = dtid.y * 2 + dtid.x; + float value = In[index]; + // Tests control flow across the quad, but making sure neighboring lanes are active to avoid UB. + + if(index < 2) { + // This reads lane 0 which is (0, 0), active in this branch +#ifdef __spirv__ + // SPIR-V requires the lane index to be a compile-time constant (or + // dynamically uniform in SPIR-V 1.5+). + float value_quad_l = QuadReadLaneAt(value, 0); +#else + // DXIL permits a non-uniform (per-lane) lane index. Do a non-uniform + // identity shuffle (each active lane reads its own lane) and then + // broadcast lane 0's value so the result matches the SPIR-V path. + float id = QuadReadLaneAt(value, index); + float value_quad_l = QuadReadLaneAt(id, 0); +#endif + Out[index] = value - value_quad_l; + } else { + // This reads lane 3 which is (1, 1), active in this branch +#ifdef __spirv__ + float value_quad_l = QuadReadLaneAt(value, 3); +#else + float id = QuadReadLaneAt(value, index); + float value_quad_l = QuadReadLaneAt(id, 3); +#endif + Out[index] = value + value_quad_l; + } +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: In + Format: Float32 + Stride: 4 + Data: [ 1.0, 10.0, 2.0, 20.0 ] + - Name: Out + Format: Float32 + Stride: 4 + FillSize: 16 + - Name: ExpectedOut + Format: Float32 + Stride: 4 + Data: [ 0.0, 9.0, 22.0, 40.0 ] +Results: + - Result: ExpectedOut + Rule: BufferExact + Actual: Out + Expected: ExpectedOut +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# Currently not supported in Clang, ongoing PR +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/QuadReadLaneAt.fp16.test b/test/WaveOps/QuadReadLaneAt.fp16.test new file mode 100644 index 000000000..fe695c1aa --- /dev/null +++ b/test/WaveOps/QuadReadLaneAt.fp16.test @@ -0,0 +1,145 @@ +#--- source.hlsl +StructuredBuffer In: register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test half2 +RWStructuredBuffer Out3 : register(u3); // test half3 +RWStructuredBuffer Out4 : register(u4); // test half4 + +[numthreads(2,2,1)] +void main(uint3 dtid : SV_DispatchThreadID) { + uint index = dtid.y * 2 + dtid.x; + half4 v = In[index]; + +#ifdef __spirv__ + // SPIR-V requires the lane index to be a compile-time constant (or + // dynamically uniform in SPIR-V 1.5+), so we pass a literal. + half scalar = QuadReadLaneAt(v.x, 2); + half2 vec2 = QuadReadLaneAt(v.xy, 2); + half3 vec3 = QuadReadLaneAt(v.xyz, 2); + half4 vec4 = QuadReadLaneAt(v, 2); +#else + // DXIL permits a non-uniform (per-lane) lane index. Demonstrate this with + // an identity shuffle where each lane passes its own `index`, then + // broadcast lane 2's values so the result matches the SPIR-V path. + half4 id = QuadReadLaneAt(v, index); + half scalar = QuadReadLaneAt(id.x, 2); + half2 vec2 = QuadReadLaneAt(id.xy, 2); + half3 vec3 = QuadReadLaneAt(id.xyz, 2); + half4 vec4 = QuadReadLaneAt(id, 2); +#endif + + Out1[index].x = scalar; + Out2[index].xy = vec2; + Out3[index].xyz = vec3; + Out4[index] = vec4; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: In + Format: Float16 + Stride: 8 + Data: [ 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, 0x4880, 0x4900, 0x4980, 0x4a00, 0x4a80, 0x4b00, 0x4b80, 0x4c00 ] + - Name: Out1 + Format: Float16 + Stride: 8 + FillSize: 32 + - Name: Out2 + Format: Float16 + Stride: 8 + FillSize: 32 + - Name: Out3 + Format: Float16 + Stride: 8 + FillSize: 32 + - Name: Out4 + Format: Float16 + Stride: 8 + FillSize: 32 + - Name: ExpectedOut1 + Format: Float16 + Stride: 8 + Data: [ 0x4880, 0x0, 0x0, 0x0, 0x4880, 0x0, 0x0, 0x0, 0x4880, 0x0, 0x0, 0x0, 0x4880, 0x0, 0x0, 0x0 ] + - Name: ExpectedOut2 + Format: Float16 + Stride: 8 + Data: [ 0x4880, 0x4900, 0x0, 0x0, 0x4880, 0x4900, 0x0, 0x0, 0x4880, 0x4900, 0x0, 0x0, 0x4880, 0x4900, 0x0, 0x0 ] + - Name: ExpectedOut3 + Format: Float16 + Stride: 8 + Data: [ 0x4880, 0x4900, 0x4980, 0x0, 0x4880, 0x4900, 0x4980, 0x0, 0x4880, 0x4900, 0x4980, 0x0, 0x4880, 0x4900, 0x4980, 0x0 ] + - Name: ExpectedOut4 + Format: Float16 + Stride: 8 + Data: [ 0x4880, 0x4900, 0x4980, 0x4a00, 0x4880, 0x4900, 0x4980, 0x4a00, 0x4880, 0x4900, 0x4980, 0x4a00, 0x4880, 0x4900, 0x4980, 0x4a00 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + +... +#--- end + +# REQUIRES: Half + +# Currently not supported in Clang, ongoing PR +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/QuadReadLaneAt.fp64.test b/test/WaveOps/QuadReadLaneAt.fp64.test new file mode 100644 index 000000000..e25e72510 --- /dev/null +++ b/test/WaveOps/QuadReadLaneAt.fp64.test @@ -0,0 +1,145 @@ +#--- source.hlsl +StructuredBuffer In: register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test double2 +RWStructuredBuffer Out3 : register(u3); // test double3 +RWStructuredBuffer Out4 : register(u4); // test double4 + +[numthreads(2,2,1)] +void main(uint3 dtid : SV_DispatchThreadID) { + uint index = dtid.y * 2 + dtid.x; + double4 v = In[index]; + +#ifdef __spirv__ + // SPIR-V requires the lane index to be a compile-time constant (or + // dynamically uniform in SPIR-V 1.5+), so we pass a literal. + double scalar = QuadReadLaneAt(v.x, 2); + double2 vec2 = QuadReadLaneAt(v.xy, 2); + double3 vec3 = QuadReadLaneAt(v.xyz, 2); + double4 vec4 = QuadReadLaneAt(v, 2); +#else + // DXIL permits a non-uniform (per-lane) lane index. Demonstrate this with + // an identity shuffle where each lane passes its own `index`, then + // broadcast lane 2's values so the result matches the SPIR-V path. + double4 id = QuadReadLaneAt(v, index); + double scalar = QuadReadLaneAt(id.x, 2); + double2 vec2 = QuadReadLaneAt(id.xy, 2); + double3 vec3 = QuadReadLaneAt(id.xyz, 2); + double4 vec4 = QuadReadLaneAt(id, 2); +#endif + + Out1[index].x = scalar; + Out2[index].xy = vec2; + Out3[index].xyz = vec3; + Out4[index] = vec4; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: In + Format: Float64 + Stride: 32 + Data: [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 ] + - Name: Out1 + Format: Float64 + Stride: 32 + FillSize: 128 + - Name: Out2 + Format: Float64 + Stride: 32 + FillSize: 128 + - Name: Out3 + Format: Float64 + Stride: 32 + FillSize: 128 + - Name: Out4 + Format: Float64 + Stride: 32 + FillSize: 128 + - Name: ExpectedOut1 + Format: Float64 + Stride: 32 + Data: [ 9.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0 ] + - Name: ExpectedOut2 + Format: Float64 + Stride: 32 + Data: [ 9.0, 10.0, 0.0, 0.0, 9.0, 10.0, 0.0, 0.0, 9.0, 10.0, 0.0, 0.0, 9.0, 10.0, 0.0, 0.0 ] + - Name: ExpectedOut3 + Format: Float64 + Stride: 32 + Data: [ 9.0, 10.0, 11.0, 0.0, 9.0, 10.0, 11.0, 0.0, 9.0, 10.0, 11.0, 0.0, 9.0, 10.0, 11.0, 0.0 ] + - Name: ExpectedOut4 + Format: Float64 + Stride: 32 + Data: [ 9.0, 10.0, 11.0, 12.0, 9.0, 10.0, 11.0, 12.0, 9.0, 10.0, 11.0, 12.0, 9.0, 10.0, 11.0, 12.0 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + +... +#--- end + +# REQUIRES: Double + +# Currently not supported in Clang, ongoing PR +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/QuadReadLaneAt.int16.test b/test/WaveOps/QuadReadLaneAt.int16.test new file mode 100644 index 000000000..7c4104807 --- /dev/null +++ b/test/WaveOps/QuadReadLaneAt.int16.test @@ -0,0 +1,261 @@ +#--- source.hlsl +// ints +StructuredBuffer In: register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test int16_t2 +RWStructuredBuffer Out3 : register(u3); // test int16_t3 +RWStructuredBuffer Out4 : register(u4); // test int16_t4 + +// uints +StructuredBuffer UIn: register(t5); +RWStructuredBuffer UOut1 : register(u6); // test scalar +RWStructuredBuffer UOut2 : register(u7); // test uint16_t2 +RWStructuredBuffer UOut3 : register(u8); // test uint16_t3 +RWStructuredBuffer UOut4 : register(u9); // test uint16_t4 + +[numthreads(2,2,1)] +void main(uint3 dtid : SV_DispatchThreadID) { + uint index = dtid.y * 2 + dtid.x; + + // int case + int16_t4 v = In[index]; +#ifdef __spirv__ + // SPIR-V requires the lane index to be a compile-time constant (or + // dynamically uniform in SPIR-V 1.5+), so we pass a literal. + int16_t scalar = QuadReadLaneAt(v.x, 2); + int16_t2 vec2 = QuadReadLaneAt(v.xy, 2); + int16_t3 vec3 = QuadReadLaneAt(v.xyz, 2); + int16_t4 vec4 = QuadReadLaneAt(v, 2); +#else + // DXIL permits a non-uniform (per-lane) lane index. Demonstrate this with + // an identity shuffle where each lane passes its own `index`, then + // broadcast lane 2's values so the result matches the SPIR-V path. + int16_t4 id = QuadReadLaneAt(v, index); + int16_t scalar = QuadReadLaneAt(id.x, 2); + int16_t2 vec2 = QuadReadLaneAt(id.xy, 2); + int16_t3 vec3 = QuadReadLaneAt(id.xyz, 2); + int16_t4 vec4 = QuadReadLaneAt(id, 2); +#endif + + Out1[index].x = scalar; + Out2[index].xy = vec2; + Out3[index].xyz = vec3; + Out4[index] = vec4; + + // uint case + uint16_t4 uv = UIn[index]; +#ifdef __spirv__ + uint16_t uscalar = QuadReadLaneAt(uv.x, 2); + uint16_t2 uvec2 = QuadReadLaneAt(uv.xy, 2); + uint16_t3 uvec3 = QuadReadLaneAt(uv.xyz, 2); + uint16_t4 uvec4 = QuadReadLaneAt(uv, 2); +#else + uint16_t4 uid = QuadReadLaneAt(uv, index); + uint16_t uscalar = QuadReadLaneAt(uid.x, 2); + uint16_t2 uvec2 = QuadReadLaneAt(uid.xy, 2); + uint16_t3 uvec3 = QuadReadLaneAt(uid.xyz, 2); + uint16_t4 uvec4 = QuadReadLaneAt(uid, 2); +#endif + + UOut1[index].x = uscalar; + UOut2[index].xy = uvec2; + UOut3[index].xyz = uvec3; + UOut4[index] = uvec4; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: In + Format: Int16 + Stride: 8 + Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] + - Name: Out1 + Format: Int16 + Stride: 8 + FillSize: 32 + - Name: Out2 + Format: Int16 + Stride: 8 + FillSize: 32 + - Name: Out3 + Format: Int16 + Stride: 8 + FillSize: 32 + - Name: Out4 + Format: Int16 + Stride: 8 + FillSize: 32 + - Name: ExpectedOut1 + Format: Int16 + Stride: 8 + Data: [ 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0 ] + - Name: ExpectedOut2 + Format: Int16 + Stride: 8 + Data: [ 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0 ] + - Name: ExpectedOut3 + Format: Int16 + Stride: 8 + Data: [ 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0 ] + - Name: ExpectedOut4 + Format: Int16 + Stride: 8 + Data: [ 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12 ] + - Name: UIn + Format: UInt16 + Stride: 8 + Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] + - Name: UOut1 + Format: UInt16 + Stride: 8 + FillSize: 32 + - Name: UOut2 + Format: UInt16 + Stride: 8 + FillSize: 32 + - Name: UOut3 + Format: UInt16 + Stride: 8 + FillSize: 32 + - Name: UOut4 + Format: UInt16 + Stride: 8 + FillSize: 32 + - Name: UExpectedOut1 + Format: UInt16 + Stride: 8 + Data: [ 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0 ] + - Name: UExpectedOut2 + Format: UInt16 + Stride: 8 + Data: [ 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0 ] + - Name: UExpectedOut3 + Format: UInt16 + Stride: 8 + Data: [ 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0 ] + - Name: UExpectedOut4 + Format: UInt16 + Stride: 8 + Data: [ 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: UExpectedOut1 + Rule: BufferExact + Actual: UOut1 + Expected: UExpectedOut1 + - Result: UExpectedOut2 + Rule: BufferExact + Actual: UOut2 + Expected: UExpectedOut2 + - Result: UExpectedOut3 + Rule: BufferExact + Actual: UOut3 + Expected: UExpectedOut3 + - Result: UExpectedOut4 + Rule: BufferExact + Actual: UOut4 + Expected: UExpectedOut4 +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: UIn + Kind: StructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: UOut1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: UOut2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: UOut3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: UOut4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 + +... +#--- end + +# REQUIRES: Int16 + +# Currently not supported in Clang, ongoing PR +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/QuadReadLaneAt.int64.test b/test/WaveOps/QuadReadLaneAt.int64.test new file mode 100644 index 000000000..ba6eb614f --- /dev/null +++ b/test/WaveOps/QuadReadLaneAt.int64.test @@ -0,0 +1,264 @@ +#--- source.hlsl +// ints +StructuredBuffer In: register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test int64_t2 +RWStructuredBuffer Out3 : register(u3); // test int64_t3 +RWStructuredBuffer Out4 : register(u4); // test int64_t4 + +// uints +StructuredBuffer UIn: register(t5); +RWStructuredBuffer UOut1 : register(u6); // test scalar +RWStructuredBuffer UOut2 : register(u7); // test uint64_t2 +RWStructuredBuffer UOut3 : register(u8); // test uint64_t3 +RWStructuredBuffer UOut4 : register(u9); // test uint64_t4 + +[numthreads(2,2,1)] +void main(uint3 dtid : SV_DispatchThreadID) { + uint index = dtid.y * 2 + dtid.x; + + // int case + int64_t4 v = In[index]; +#ifdef __spirv__ + // SPIR-V requires the lane index to be a compile-time constant (or + // dynamically uniform in SPIR-V 1.5+), so we pass a literal. + int64_t scalar = QuadReadLaneAt(v.x, 2); + int64_t2 vec2 = QuadReadLaneAt(v.xy, 2); + int64_t3 vec3 = QuadReadLaneAt(v.xyz, 2); + int64_t4 vec4 = QuadReadLaneAt(v, 2); +#else + // DXIL permits a non-uniform (per-lane) lane index. Demonstrate this with + // an identity shuffle where each lane passes its own `index`, then + // broadcast lane 2's values so the result matches the SPIR-V path. + int64_t4 id = QuadReadLaneAt(v, index); + int64_t scalar = QuadReadLaneAt(id.x, 2); + int64_t2 vec2 = QuadReadLaneAt(id.xy, 2); + int64_t3 vec3 = QuadReadLaneAt(id.xyz, 2); + int64_t4 vec4 = QuadReadLaneAt(id, 2); +#endif + + Out1[index].x = scalar; + Out2[index].xy = vec2; + Out3[index].xyz = vec3; + Out4[index] = vec4; + + // uint case + uint64_t4 uv = UIn[index]; +#ifdef __spirv__ + uint64_t uscalar = QuadReadLaneAt(uv.x, 2); + uint64_t2 uvec2 = QuadReadLaneAt(uv.xy, 2); + uint64_t3 uvec3 = QuadReadLaneAt(uv.xyz, 2); + uint64_t4 uvec4 = QuadReadLaneAt(uv, 2); +#else + uint64_t4 uid = QuadReadLaneAt(uv, index); + uint64_t uscalar = QuadReadLaneAt(uid.x, 2); + uint64_t2 uvec2 = QuadReadLaneAt(uid.xy, 2); + uint64_t3 uvec3 = QuadReadLaneAt(uid.xyz, 2); + uint64_t4 uvec4 = QuadReadLaneAt(uid, 2); +#endif + + UOut1[index].x = uscalar; + UOut2[index].xy = uvec2; + UOut3[index].xyz = uvec3; + UOut4[index] = uvec4; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: In + Format: Int64 + Stride: 32 + Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] + - Name: Out1 + Format: Int64 + Stride: 32 + FillSize: 128 + - Name: Out2 + Format: Int64 + Stride: 32 + FillSize: 128 + - Name: Out3 + Format: Int64 + Stride: 32 + FillSize: 128 + - Name: Out4 + Format: Int64 + Stride: 32 + FillSize: 128 + - Name: ExpectedOut1 + Format: Int64 + Stride: 32 + Data: [ 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0 ] + - Name: ExpectedOut2 + Format: Int64 + Stride: 32 + Data: [ 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0 ] + - Name: ExpectedOut3 + Format: Int64 + Stride: 32 + Data: [ 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0 ] + - Name: ExpectedOut4 + Format: Int64 + Stride: 32 + Data: [ 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12 ] + - Name: UIn + Format: UInt64 + Stride: 32 + Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] + - Name: UOut1 + Format: UInt64 + Stride: 32 + FillSize: 128 + - Name: UOut2 + Format: UInt64 + Stride: 32 + FillSize: 128 + - Name: UOut3 + Format: UInt64 + Stride: 32 + FillSize: 128 + - Name: UOut4 + Format: UInt64 + Stride: 32 + FillSize: 128 + - Name: UExpectedOut1 + Format: UInt64 + Stride: 32 + Data: [ 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0 ] + - Name: UExpectedOut2 + Format: UInt64 + Stride: 32 + Data: [ 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0, 9, 10, 0, 0 ] + - Name: UExpectedOut3 + Format: UInt64 + Stride: 32 + Data: [ 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0, 9, 10, 11, 0 ] + - Name: UExpectedOut4 + Format: UInt64 + Stride: 32 + Data: [ 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: UExpectedOut1 + Rule: BufferExact + Actual: UOut1 + Expected: UExpectedOut1 + - Result: UExpectedOut2 + Rule: BufferExact + Actual: UOut2 + Expected: UExpectedOut2 + - Result: UExpectedOut3 + Rule: BufferExact + Actual: UOut3 + Expected: UExpectedOut3 + - Result: UExpectedOut4 + Rule: BufferExact + Actual: UOut4 + Expected: UExpectedOut4 +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: UIn + Kind: StructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: UOut1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: UOut2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: UOut3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: UOut4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 + +... +#--- end + +# REQUIRES: Int64 + +# Currently not supported in Clang, ongoing PR +# XFAIL: Clang + +# Bug: https://github.com/llvm/offload-test-suite/issues/959 +# XFAIL: Metal + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o