diff --git a/include/API/AccelerationStructure.h b/include/API/AccelerationStructure.h index 616865ecc..844e8e0ac 100644 --- a/include/API/AccelerationStructure.h +++ b/include/API/AccelerationStructure.h @@ -55,6 +55,8 @@ struct AccelerationStructureInstance { float Transform[3][4] = {{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}}; uint32_t InstanceID = 0; uint8_t InstanceMask = 0xFF; + // 24-bit; high bits are truncated by the backend to match DXR's bitfield. + uint32_t InstanceContributionToHitGroupIndex = 0; AccelerationStructure *BLAS = nullptr; }; diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 9cf0e5f77..c138457c2 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -515,6 +515,7 @@ struct InstanceDesc { float Transform[12] = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}; uint32_t InstanceID = 0; uint8_t InstanceMask = 0xFF; + uint32_t InstanceContributionToHitGroupIndex = 0; }; struct TLASDesc { diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 0050b91f0..2d49704b6 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -3065,7 +3065,8 @@ llvm::Error DXComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { // silent narrowing. NI.InstanceID = Inst.InstanceID & 0xFFFFFFu; NI.InstanceMask = Inst.InstanceMask; - NI.InstanceContributionToHitGroupIndex = 0; + NI.InstanceContributionToHitGroupIndex = + Inst.InstanceContributionToHitGroupIndex & 0xFFFFFFu; NI.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_NONE; auto *BLASPtr = llvm::cast(Inst.BLAS); NI.AccelerationStructure = BLASPtr->getGPUVirtualAddress(); diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index a66f7b49e..e94de4dc2 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -200,6 +200,8 @@ llvm::Error offloadtest::buildPipelineAccelerationStructures( memcpy(Inst.Transform, I.Transform, sizeof(I.Transform)); Inst.InstanceID = I.InstanceID; Inst.InstanceMask = I.InstanceMask; + Inst.InstanceContributionToHitGroupIndex = + I.InstanceContributionToHitGroupIndex; Inst.BLAS = It->second; Req.Instances.push_back(Inst); } diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 9eb449f34..b605ae77e 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -1381,7 +1381,11 @@ class MTLDevice : public offloadtest::Device { // `InstanceContributionToHitGroupIndex`). const uint32_t InstCount = static_cast(R.first->TLASPtr->Instances.size()); - llvm::SmallVector Contributions(InstCount, 0); + llvm::SmallVector Contributions; + Contributions.reserve(InstCount); + for (const auto &Inst : R.first->TLASPtr->Instances) + Contributions.push_back(Inst.InstanceContributionToHitGroupIndex & + 0xFFFFFFu); const BufferCreateDesc Desc{MemoryLocation::GpuToCpu, BufferUsage::Storage}; auto ContribBufOrErr = createBufferWithData( diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 7adc393b9..35f7d7d8b 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -4397,7 +4397,8 @@ llvm::Error VKComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { memcpy(&NI.transform.matrix, Inst.Transform, sizeof(Inst.Transform)); NI.instanceCustomIndex = Inst.InstanceID & 0xFFFFFFu; NI.mask = Inst.InstanceMask; - NI.instanceShaderBindingTableRecordOffset = 0; + NI.instanceShaderBindingTableRecordOffset = + Inst.InstanceContributionToHitGroupIndex & 0xFFFFFFu; NI.flags = 0; auto *BLASPtr = llvm::cast(Inst.BLAS); NI.accelerationStructureReference = BLASPtr->getDeviceAddress(); diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 3940a550d..5bd9b0355 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -668,6 +668,8 @@ void MappingTraits::mapping( uint32_t Mask = D.InstanceMask; I.mapOptional("InstanceMask", Mask, 255u); D.InstanceMask = static_cast(Mask); + I.mapOptional("InstanceContributionToHitGroupIndex", + D.InstanceContributionToHitGroupIndex, 0u); } void MappingTraits::mapping(IO &I, diff --git a/test/Feature/InlineRT/instance-contribution.test b/test/Feature/InlineRT/instance-contribution.test new file mode 100644 index 000000000..6eda7cae0 --- /dev/null +++ b/test/Feature/InlineRT/instance-contribution.test @@ -0,0 +1,93 @@ +#--- source.hlsl + +[[vk::binding(0, 0)]] RaytracingAccelerationStructure Scene : register(t0); +[[vk::binding(1, 0)]] RWStructuredBuffer Output : register(u0); + +[numthreads(3,1,1)] +void main(uint3 tid : SV_DispatchThreadID) { + // Three instances of the same triangle BLAS tiled along x at x = -4, 0, +4 + // with distinct InstanceContributionToHitGroupIndex values. Each lane fires + // straight down at its own instance, so CommittedInstanceContributionToHit- + // GroupIndex() must equal the per-instance value. + RayDesc Ray; + Ray.Origin = float3((float(tid.x) - 1.0) * 4.0, 0, 1); + Ray.Direction = float3(0, 0, -1); + Ray.TMin = 0.0; + Ray.TMax = 100.0; + RayQuery Q; + Q.TraceRayInline(Scene, RAY_FLAG_NONE, 0xFF, Ray); + Q.Proceed(); + Output[tid.x] = Q.CommittedStatus() == COMMITTED_TRIANGLE_HIT + ? Q.CommittedInstanceContributionToHitGroupIndex() + : 0xFFFFFFFF; +} +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: Vertices + Format: Float32 + Stride: 12 + Data: [ 0.0, 1.0, 0.0, -1.0, -1.0, 0.0, 1.0, -1.0, 0.0 ] + - Name: Output + Format: UInt32 + Stride: 4 + FillSize: 12 + - Name: Expected + Format: UInt32 + Stride: 4 + # 24-bit field: highest legal value is 0xFFFFFF. Pick three distinct + # values across the range, including one that exercises the top bits. + Data: [ 7, 99, 16777215 ] +AccelerationStructures: + BLAS: + - Name: TriangleBLAS + Triangles: + - VertexBuffer: Vertices + VertexFormat: RGB32Float + VertexStride: 12 + VertexCount: 3 + TLAS: + - Name: Scene + Instances: + - BLAS: TriangleBLAS + Transform: [1, 0, 0, -4, 0, 1, 0, 0, 0, 0, 1, 0] + InstanceContributionToHitGroupIndex: 7 + - BLAS: TriangleBLAS + Transform: [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0] + InstanceContributionToHitGroupIndex: 99 + - BLAS: TriangleBLAS + Transform: [1, 0, 0, 4, 0, 1, 0, 0, 0, 0, 1, 0] + InstanceContributionToHitGroupIndex: 16777215 +DescriptorSets: + - Resources: + - Name: Scene + Kind: AccelerationStructure + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Output + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 1 +Results: + - Result: InstanceContribution + Rule: BufferExact + Actual: Output + Expected: Expected +... +#--- end + +# REQUIRES: acceleration-structure +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o