diff --git a/include/API/Device.h b/include/API/Device.h index 259923a7f..37a10c386 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -327,9 +327,12 @@ createBufferWithData(Device &Dev, std::string Name, // TLAS handles come in pre-allocated because the caller's binding loop // stamps the AS pointer into descriptor bundles before this helper runs; // BLAS handles are allocated inline since BLASes aren't user-bindable. -// BLAS and TLAS builds get separate `Enc.batchBuildAS()` calls so the -// implicit BLAS-write → TLAS-read barrier sits between them. Outputs -// (`OutBLAS`, `OutInputBuffers`) must outlive command-buffer submission. +// `PreallocatedTLASes` is keyed by `TLASDesc::Name`; each map value is a +// vector of `TLASDesc::ArraySize` handles (one per descriptor-array +// element). BLAS and TLAS builds get separate `Enc.batchBuildAS()` calls +// so the implicit BLAS-write → TLAS-read barrier sits between them. +// Outputs (`OutBLAS`, `OutInputBuffers`) must outlive command-buffer +// submission. // // TODO: `Pipeline` belongs to the test framework, not the rendering backend // API. This helper lives here only because `executeProgram` is still on @@ -337,7 +340,8 @@ createBufferWithData(Device &Dev, std::string Name, llvm::Error buildPipelineAccelerationStructures( Device &Dev, ComputeEncoder &Enc, Pipeline &P, llvm::SmallVectorImpl> &OutBLAS, - const llvm::StringMap> + const llvm::StringMap< + llvm::SmallVector>> &PreallocatedTLASes, llvm::SmallVectorImpl> &OutInputBuffers); diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 9cf0e5f77..20fdd121d 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -338,11 +338,7 @@ struct Resource { return isByteAddressBuffer() ? 4 : BufferPtr->getElementSize(); } - uint32_t getArraySize() const { - if (isSampler() || isAccelerationStructure()) - return 1; - return BufferPtr->ArraySize; - } + uint32_t getArraySize() const; // out-of-line: needs complete TLASDesc. uint32_t size() const { assert(!isSampler() && !isAccelerationStructure() && @@ -519,7 +515,11 @@ struct InstanceDesc { struct TLASDesc { std::string Name; - llvm::SmallVector Instances; + uint32_t ArraySize = 1; + // Outer vector has ArraySize entries (one per descriptor-array element); + // inner vector lists the instances for that element. Mirrors + // CPUBuffer::Data's ArraySize-driven layout. + llvm::SmallVector, 1> Instances; }; struct AccelerationStructureDescs { @@ -527,6 +527,14 @@ struct AccelerationStructureDescs { llvm::SmallVector TLAS; }; +inline uint32_t Resource::getArraySize() const { + if (isSampler()) + return 1; + if (isAccelerationStructure()) + return TLASPtr->ArraySize; + return BufferPtr->ArraySize; +} + struct Pipeline { ShaderPipelineKind Kind; llvm::SmallVector Shaders; diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 0050b91f0..f07258e97 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -1129,8 +1129,11 @@ class DXDevice : public offloadtest::Device { // Parallel-indexed to `P.AccelStructs.BLAS`. llvm::SmallVector> BLASes; - // Keyed by `TLASDesc::Name`. - llvm::StringMap> TLASes; + // Keyed by `TLASDesc::Name`; each value holds `TLASDesc::ArraySize` + // handles (one per descriptor-array element). + llvm::StringMap< + llvm::SmallVector>> + TLASes; // Vertex/index buffers consumed during AS builds; must outlive submission. llvm::SmallVector> ASInputBuffers; }; @@ -2254,11 +2257,9 @@ class DXDevice : public offloadtest::Device { return HeapIdx; } - llvm::Expected> createAS(Resource &R) { - assert(R.TLASPtr && "AS resource must be resolved to a TLAS"); - assert(R.getArraySize() == 1 && "AS arrays not yet supported"); - auto SizesOrErr = - getTLASBuildSizes(static_cast(R.TLASPtr->Instances.size())); + llvm::Expected> + createAS(uint32_t InstanceCount) { + auto SizesOrErr = getTLASBuildSizes(InstanceCount); if (!SizesOrErr) return SizesOrErr.takeError(); return createTLAS(*SizesOrErr); @@ -2266,18 +2267,25 @@ class DXDevice : public offloadtest::Device { llvm::Error createBuffers(Pipeline &P, InvocationState &IS) { auto CreateBuffer = - [&IS, + [&P, &IS, this](Resource &R, llvm::SmallVectorImpl &Resources) -> llvm::Error { if (R.isAccelerationStructure()) { - auto ASOrErr = createAS(R); - if (!ASOrErr) - return ASOrErr.takeError(); + assert(R.TLASPtr && "AS resource must be resolved to a TLAS"); + const TLASDesc &TD = *R.TLASPtr; ResourceBundle Bundle; - Bundle.emplace_back( - llvm::cast(ASOrErr->get())); - auto Inserted = - IS.TLASes.try_emplace(R.TLASPtr->Name, std::move(*ASOrErr)); + llvm::SmallVector> Handles; + Handles.reserve(TD.ArraySize); + for (uint32_t Elt = 0; Elt < TD.ArraySize; ++Elt) { + auto ASOrErr = + createAS(static_cast(TD.Instances[Elt].size())); + if (!ASOrErr) + return ASOrErr.takeError(); + Bundle.emplace_back( + llvm::cast(ASOrErr->get())); + Handles.push_back(std::move(*ASOrErr)); + } + auto Inserted = IS.TLASes.try_emplace(TD.Name, std::move(Handles)); assert(Inserted.second && "TLAS bound to multiple resources NYI"); (void)Inserted; Resources.push_back(std::make_pair(&R, std::move(Bundle))); diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index a66f7b49e..41199b5f4 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -98,7 +98,8 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev, llvm::Error offloadtest::buildPipelineAccelerationStructures( Device &Dev, ComputeEncoder &Enc, Pipeline &P, llvm::SmallVectorImpl> &OutBLAS, - const llvm::StringMap> + const llvm::StringMap< + llvm::SmallVector>> &PreallocatedTLASes, llvm::SmallVectorImpl> &OutInputBuffers) { if (P.AccelStructs.BLAS.empty() && P.AccelStructs.TLAS.empty()) @@ -179,33 +180,41 @@ llvm::Error offloadtest::buildPipelineAccelerationStructures( // Separate `batchBuildAS()` from the BLAS batch so the BLAS-write → // TLAS-read barrier between them is implicit. llvm::SmallVector TLASRequests; - TLASRequests.reserve(PreallocatedTLASes.size()); for (const TLASDesc &TD : P.AccelStructs.TLAS) { auto ASIt = PreallocatedTLASes.find(TD.Name); if (ASIt == PreallocatedTLASes.end()) continue; // TLAS declared but not bound to any resource. - TLASBuildRequest Req; - Req.AS = ASIt->second.get(); - Req.Instances.reserve(TD.Instances.size()); - for (const auto &I : TD.Instances) { - auto It = BLASesByName.find(I.BLAS); - if (It == BLASesByName.end()) - return llvm::createStringError(std::errc::invalid_argument, - "TLAS '%s' references unknown BLAS '%s'", - TD.Name.c_str(), I.BLAS.c_str()); - - AccelerationStructureInstance Inst; - static_assert(sizeof(Inst.Transform) == sizeof(I.Transform), - "Transform layout mismatch"); - memcpy(Inst.Transform, I.Transform, sizeof(I.Transform)); - Inst.InstanceID = I.InstanceID; - Inst.InstanceMask = I.InstanceMask; - Inst.BLAS = It->second; - Req.Instances.push_back(Inst); + const auto &Handles = ASIt->second; + assert(Handles.size() == TD.ArraySize && + "PreallocatedTLASes entry size must equal TLASDesc::ArraySize"); + assert(TD.Instances.size() == TD.ArraySize && + "TLASDesc::Instances must have ArraySize entries (one per element)"); + for (uint32_t Elt = 0; Elt < TD.ArraySize; ++Elt) { + TLASBuildRequest Req; + Req.AS = Handles[Elt].get(); + const auto &EltInstances = TD.Instances[Elt]; + Req.Instances.reserve(EltInstances.size()); + for (const auto &I : EltInstances) { + auto It = BLASesByName.find(I.BLAS); + if (It == BLASesByName.end()) + return llvm::createStringError( + std::errc::invalid_argument, + "TLAS '%s' element %u references unknown BLAS '%s'", + TD.Name.c_str(), Elt, I.BLAS.c_str()); + + AccelerationStructureInstance Inst; + static_assert(sizeof(Inst.Transform) == sizeof(I.Transform), + "Transform layout mismatch"); + memcpy(Inst.Transform, I.Transform, sizeof(I.Transform)); + Inst.InstanceID = I.InstanceID; + Inst.InstanceMask = I.InstanceMask; + Inst.BLAS = It->second; + Req.Instances.push_back(Inst); + } + if (auto Err = validateTLASBuildRequest(Req)) + return Err; + TLASRequests.push_back(std::move(Req)); } - if (auto Err = validateTLASBuildRequest(Req)) - return Err; - TLASRequests.push_back(std::move(Req)); } llvm::SmallVector TLASBatch; diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 9eb449f34..021ef187c 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -962,8 +962,11 @@ class MTLDevice : public offloadtest::Device { // Parallel-indexed to `P.AccelStructs.BLAS`. llvm::SmallVector> BLASes; - // Keyed by `TLASDesc::Name`. - llvm::StringMap> TLASes; + // Keyed by `TLASDesc::Name`; each value holds `TLASDesc::ArraySize` + // handles (one per descriptor-array element). + llvm::StringMap< + llvm::SmallVector>> + TLASes; // Vertex/index buffers consumed during AS builds; must outlive submission. llvm::SmallVector> ASInputBuffers; // Per-AS header + contributions buffers; resident at dispatch. @@ -1302,11 +1305,9 @@ class MTLDevice : public offloadtest::Device { return HeapIdx; } - llvm::Expected> createAS(Resource &R) { - assert(R.TLASPtr && "AS resource must be resolved to a TLAS"); - assert(R.getArraySize() == 1 && "AS arrays not yet supported"); - auto SizesOrErr = - getTLASBuildSizes(static_cast(R.TLASPtr->Instances.size())); + llvm::Expected> + createAS(uint32_t InstanceCount) { + auto SizesOrErr = getTLASBuildSizes(InstanceCount); if (!SizesOrErr) return SizesOrErr.takeError(); return createTLAS(*SizesOrErr); @@ -1314,18 +1315,25 @@ class MTLDevice : public offloadtest::Device { llvm::Error createBuffers(Pipeline &P, InvocationState &IS) { auto CreateBuffer = - [&IS, + [&P, &IS, this](Resource &R, llvm::SmallVectorImpl &Resources) -> llvm::Error { if (R.isAccelerationStructure()) { - auto ASOrErr = createAS(R); - if (!ASOrErr) - return ASOrErr.takeError(); + assert(R.TLASPtr && "AS resource must be resolved to a TLAS"); + const TLASDesc &TD = *R.TLASPtr; ResourceBundle Bundle; - Bundle.emplace_back( - llvm::cast(ASOrErr->get())); - auto Inserted = - IS.TLASes.try_emplace(R.TLASPtr->Name, std::move(*ASOrErr)); + llvm::SmallVector> Handles; + Handles.reserve(TD.ArraySize); + for (uint32_t Elt = 0; Elt < TD.ArraySize; ++Elt) { + auto ASOrErr = + createAS(static_cast(TD.Instances[Elt].size())); + if (!ASOrErr) + return ASOrErr.takeError(); + Bundle.emplace_back( + llvm::cast(ASOrErr->get())); + Handles.push_back(std::move(*ASOrErr)); + } + auto Inserted = IS.TLASes.try_emplace(TD.Name, std::move(Handles)); assert(Inserted.second && "TLAS bound to multiple resources NYI"); (void)Inserted; Resources.emplace_back(&R, std::move(Bundle)); @@ -1373,43 +1381,50 @@ class MTLDevice : public offloadtest::Device { uint32_t HeapIndex = 0; for (auto &T : IS.DescTables) { for (auto &R : T.Resources) { - if (MetalAccelerationStructure *MTLAS = R.second[0].AS) { + if (R.first->isAccelerationStructure()) { // The Metal shader converter binds the AS indirectly through an // `IRRaytracingAccelerationStructureGPUHeader` buffer carrying the // AS's `gpuResourceID` and a pointer to an instance-contributions // array (one `uint32` per instance, equivalent to D3D12's // `InstanceContributionToHitGroupIndex`). - const uint32_t InstCount = - static_cast(R.first->TLASPtr->Instances.size()); - llvm::SmallVector Contributions(InstCount, 0); - const BufferCreateDesc Desc{MemoryLocation::GpuToCpu, - BufferUsage::Storage}; - auto ContribBufOrErr = createBufferWithData( - *IS.CB->Dev, "AS-Contributions", Desc, Contributions.data(), - InstCount * sizeof(uint32_t), nullptr, nullptr); - if (!ContribBufOrErr) - return ContribBufOrErr.takeError(); - auto *MTLContrib = llvm::cast(ContribBufOrErr->get()); - auto HeaderBufOrErr = IS.CB->Dev->createBuffer( - "AS-Header", Desc, - sizeof(IRRaytracingAccelerationStructureGPUHeader)); - if (!HeaderBufOrErr) - return HeaderBufOrErr.takeError(); - auto *MTLHeader = llvm::cast(HeaderBufOrErr->get()); - IRRaytracingSetAccelerationStructure( - static_cast(MTLHeader->Buf->contents()), - MTLAS->AccelStruct->gpuResourceID(), - static_cast(MTLContrib->Buf->contents()), - MTLContrib->Buf->gpuAddress(), Contributions.data(), InstCount); - - IRDescriptorTableSetAccelerationStructure( - IS.DescHeap->getEntryHandle(HeapIndex), - MTLHeader->Buf->gpuAddress()); - - // The shader dereferences the contributions buffer through the - // header, so both must be resident at dispatch. - IS.ASDescriptorBuffers.push_back(std::move(*HeaderBufOrErr)); - IS.ASDescriptorBuffers.push_back(std::move(*ContribBufOrErr)); + const TLASDesc &TD = *R.first->TLASPtr; + assert(R.second.size() == TD.ArraySize && + "AS bundle must hold one ResourceSet per array element"); + for (uint32_t Elt = 0; Elt < TD.ArraySize; ++Elt) { + auto *MTLAS = + llvm::cast(R.second[Elt].AS); + const uint32_t InstCount = + static_cast(TD.Instances[Elt].size()); + llvm::SmallVector Contributions(InstCount, 0); + const BufferCreateDesc Desc{MemoryLocation::GpuToCpu, + BufferUsage::Storage}; + auto ContribBufOrErr = createBufferWithData( + *IS.CB->Dev, "AS-Contributions", Desc, Contributions.data(), + InstCount * sizeof(uint32_t), nullptr, nullptr); + if (!ContribBufOrErr) + return ContribBufOrErr.takeError(); + auto *MTLContrib = llvm::cast(ContribBufOrErr->get()); + auto HeaderBufOrErr = IS.CB->Dev->createBuffer( + "AS-Header", Desc, + sizeof(IRRaytracingAccelerationStructureGPUHeader)); + if (!HeaderBufOrErr) + return HeaderBufOrErr.takeError(); + auto *MTLHeader = llvm::cast(HeaderBufOrErr->get()); + IRRaytracingSetAccelerationStructure( + static_cast(MTLHeader->Buf->contents()), + MTLAS->AccelStruct->gpuResourceID(), + static_cast(MTLContrib->Buf->contents()), + MTLContrib->Buf->gpuAddress(), Contributions.data(), InstCount); + + IRDescriptorTableSetAccelerationStructure( + IS.DescHeap->getEntryHandle(HeapIndex + Elt), + MTLHeader->Buf->gpuAddress()); + + // The shader dereferences the contributions buffer through the + // header, so both must be resident at dispatch. + IS.ASDescriptorBuffers.push_back(std::move(*HeaderBufOrErr)); + IS.ASDescriptorBuffers.push_back(std::move(*ContribBufOrErr)); + } HeapIndex += R.first->getArraySize(); continue; } @@ -1481,7 +1496,8 @@ class MTLDevice : public offloadtest::Device { for (auto &AS : IS.BLASes) MarkASResident(AS); for (auto &Entry : IS.TLASes) - MarkASResident(Entry.second); + for (auto &AS : Entry.second) + MarkASResident(AS); for (auto &B : IS.ASDescriptorBuffers) NativeEncoder->useResource(llvm::cast(B.get())->Buf, MTL::ResourceUsageRead); diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 7adc393b9..ff837c7d9 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -1279,8 +1279,11 @@ class VulkanDevice : public offloadtest::Device { // Parallel-indexed to `P.AccelStructs.BLAS`. llvm::SmallVector> BLASes; - // Keyed by `TLASDesc::Name`. - llvm::StringMap> TLASes; + // Keyed by `TLASDesc::Name`; each value holds `TLASDesc::ArraySize` + // handles (one per descriptor-array element). + llvm::StringMap< + llvm::SmallVector>> + TLASes; // Vertex/index buffers consumed during AS builds; must outlive submission. llvm::SmallVector> ASInputBuffers; }; @@ -2987,11 +2990,9 @@ class VulkanDevice : public offloadtest::Device { return ResourceRef(Host, ImageRef{0, Sampler, 0}); } - llvm::Expected> createAS(Resource &R) { - assert(R.TLASPtr && "AS resource must be resolved to a TLAS"); - assert(R.getArraySize() == 1 && "AS arrays not yet supported"); - auto SizesOrErr = - getTLASBuildSizes(static_cast(R.TLASPtr->Instances.size())); + llvm::Expected> + createAS(uint32_t InstanceCount) { + auto SizesOrErr = getTLASBuildSizes(InstanceCount); if (!SizesOrErr) return SizesOrErr.takeError(); return createTLAS(*SizesOrErr); @@ -3114,15 +3115,23 @@ class VulkanDevice : public offloadtest::Device { for (auto &D : P.Sets) { for (auto &R : D.Resources) { if (R.isAccelerationStructure()) { - auto ASOrErr = createAS(R); - if (!ASOrErr) - return ASOrErr.takeError(); - auto *VkAS = llvm::cast(ASOrErr->get()); + assert(R.TLASPtr && "AS resource must be resolved to a TLAS"); + const TLASDesc &TD = *R.TLASPtr; ResourceBundle Bundle{getDescriptorType(R.Kind), 0, nullptr}; - Bundle.ResourceRefs.push_back(ResourceRef{VkAS}); + llvm::SmallVector> Handles; + Handles.reserve(TD.ArraySize); + for (uint32_t Elt = 0; Elt < TD.ArraySize; ++Elt) { + auto ASOrErr = + createAS(static_cast(TD.Instances[Elt].size())); + if (!ASOrErr) + return ASOrErr.takeError(); + auto *VkAS = + llvm::cast(ASOrErr->get()); + Bundle.ResourceRefs.push_back(ResourceRef{VkAS}); + Handles.push_back(std::move(*ASOrErr)); + } IS.Resources.push_back(std::move(Bundle)); - auto Inserted = - IS.TLASes.try_emplace(R.TLASPtr->Name, std::move(*ASOrErr)); + auto Inserted = IS.TLASes.try_emplace(TD.Name, std::move(Handles)); assert(Inserted.second && "TLAS bound to multiple resources NYI"); (void)Inserted; continue; @@ -3296,14 +3305,19 @@ class VulkanDevice : public offloadtest::Device { for (uint32_t RIdx = 0; RIdx < P.Sets[SetIdx].Resources.size(); ++RIdx, ++OverallResIdx) { const Resource &R = P.Sets[SetIdx].Resources[RIdx]; - if (VulkanAccelerationStructure *VkAS = - IS.Resources[OverallResIdx].ResourceRefs[0].AS) { + if (R.isAccelerationStructure()) { + const auto &Refs = IS.Resources[OverallResIdx].ResourceRefs; + assert(Refs.size() == R.getArraySize() && + "AS bundle must hold one ResourceRef per array element"); const size_t HandleStart = ASHandles.size(); - ASHandles.push_back(VkAS->AccelStruct); + for (const auto &Ref : Refs) { + assert(Ref.AS && "AS ResourceRef missing AS handle"); + ASHandles.push_back(Ref.AS->AccelStruct); + } VkWriteDescriptorSetAccelerationStructureKHR ASWrite = {}; ASWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; - ASWrite.accelerationStructureCount = 1; + ASWrite.accelerationStructureCount = R.getArraySize(); ASWrite.pAccelerationStructures = &ASHandles[HandleStart]; ASInfos.push_back(ASWrite); @@ -3312,10 +3326,11 @@ class VulkanDevice : public offloadtest::Device { WDS.pNext = &ASInfos.back(); WDS.dstSet = IS.DescriptorSets[SetIdx]; WDS.dstBinding = R.VKBinding->Binding; - WDS.descriptorCount = 1; + WDS.descriptorCount = R.getArraySize(); WDS.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; llvm::outs() << "Updating AS Descriptor [" << OverallResIdx << "] { " - << SetIdx << ", " << RIdx << " }\n"; + << SetIdx << ", " << RIdx + << " } count = " << R.getArraySize() << "\n"; WriteDescriptors.push_back(WDS); continue; } diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 3940a550d..64c7a6a23 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -191,16 +191,18 @@ void MappingTraits::mapping(IO &I, // Resolve BLAS name references in TLAS instance descriptions. for (auto &T : P.AccelStructs.TLAS) { - for (auto &Inst : T.Instances) { - for (int Idx = 0, E = P.AccelStructs.BLAS.size(); Idx < E; ++Idx) { - if (P.AccelStructs.BLAS[Idx].Name == Inst.BLAS) { - Inst.BLASIdx = Idx; - break; + for (auto &Elt : T.Instances) { + for (auto &Inst : Elt) { + for (int Idx = 0, E = P.AccelStructs.BLAS.size(); Idx < E; ++Idx) { + if (P.AccelStructs.BLAS[Idx].Name == Inst.BLAS) { + Inst.BLASIdx = Idx; + break; + } } + if (Inst.BLASIdx < 0) + I.setError(Twine("TLAS '") + T.Name + "': referenced BLAS '" + + Inst.BLAS + "' not found!"); } - if (Inst.BLASIdx < 0) - I.setError(Twine("TLAS '") + T.Name + "': referenced BLAS '" + - Inst.BLAS + "' not found!"); } } } @@ -673,7 +675,34 @@ void MappingTraits::mapping( void MappingTraits::mapping(IO &I, offloadtest::TLASDesc &D) { I.mapRequired("Name", D.Name); + I.mapOptional("ArraySize", D.ArraySize, 1u); + + if (I.outputting()) { + if (D.ArraySize == 1) { + // single-element output: emit a flat `Instances:` list (parallel to + // CPUBuffer's single-element `Data: [...]` form). + I.mapRequired("Instances", D.Instances.front()); + } else { + I.mapRequired("Instances", D.Instances); + } + return; + } + + if (D.ArraySize == 1) { + // single-element input: read a flat `Instances:` list. + llvm::SmallVector Insts; + I.mapRequired("Instances", Insts); + D.Instances.push_back(std::move(Insts)); + return; + } + + // array input: read a list-of-lists; validate length matches ArraySize. I.mapRequired("Instances", D.Instances); + if (D.Instances.size() != D.ArraySize) { + I.setError(llvm::Twine("Expected ") + llvm::Twine(D.ArraySize) + + " instance lists, found " + llvm::Twine(D.Instances.size())); + return; + } } void MappingTraits::mapping( diff --git a/test/Feature/InlineRT/tlas-array.test b/test/Feature/InlineRT/tlas-array.test new file mode 100644 index 000000000..5bec02363 --- /dev/null +++ b/test/Feature/InlineRT/tlas-array.test @@ -0,0 +1,92 @@ +#--- source.hlsl + +// Verifies binding an array of `RaytracingAccelerationStructure`s. +// `Scenes[0]` and `Scenes[1]` each carry a single triangle instance with a +// distinct `InstanceID`; the shader queries each in turn and writes the hit's +// `CommittedInstanceID()` into the corresponding slot of `Output`. + +[[vk::binding(0, 0)]] RaytracingAccelerationStructure Scenes[2] : register(t0); +[[vk::binding(1, 0)]] RWStructuredBuffer Output : register(u0); + +[numthreads(1,1,1)] +void main() { + RayDesc Ray; + Ray.Origin = float3(0, 0, 1); + Ray.Direction = float3(0, 0, -1); + Ray.TMin = 0.0; + Ray.TMax = 100.0; + for (uint i = 0; i < 2; ++i) { + RayQuery Q; + Q.TraceRayInline(Scenes[i], RAY_FLAG_NONE, 0xFF, Ray); + Q.Proceed(); + Output[i] = Q.CommittedStatus() == COMMITTED_TRIANGLE_HIT + ? Q.CommittedInstanceID() + : 0xFFFFFFFF; + } +} +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: Vertices + Format: Float32 + Stride: 12 + Data: [ 0.0, 1.0, 0.0, -1.0, -1.0, 0.0, 1.0, -1.0, 0.0 ] + - Name: Output + Format: UInt32 + Stride: 4 + FillSize: 8 + - Name: Expected + Format: UInt32 + Stride: 4 + Data: [ 10, 20 ] +AccelerationStructures: + BLAS: + - Name: TriangleBLAS + Triangles: + - VertexBuffer: Vertices + VertexFormat: RGB32Float + VertexStride: 12 + VertexCount: 3 + TLAS: + - Name: Scenes + ArraySize: 2 + Instances: + - - BLAS: TriangleBLAS + InstanceID: 10 + Transform: [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0] + - - BLAS: TriangleBLAS + InstanceID: 20 + Transform: [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0] +DescriptorSets: + - Resources: + - Name: Scenes + Kind: AccelerationStructure + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Output + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 1 +Results: + - Result: TLASArray + Rule: BufferExact + Actual: Output + Expected: Expected +... +#--- end + +# REQUIRES: acceleration-structure +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o