diff --git a/include/API/AccelerationStructure.h b/include/API/AccelerationStructure.h new file mode 100644 index 000000000..1856f8e84 --- /dev/null +++ b/include/API/AccelerationStructure.h @@ -0,0 +1,171 @@ +//===- AccelerationStructure.h - RT Acceleration Structure Types ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_ACCELERATIONSTRUCTURE_H +#define OFFLOADTEST_API_ACCELERATIONSTRUCTURE_H + +#include "API/API.h" +#include "API/Buffer.h" +#include "API/Resources.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Error.h" + +#include +#include + +namespace offloadtest { + +struct AccelerationStructureSizes { + uint64_t ResultDataMaxSizeInBytes = 0; + uint64_t ScratchDataSizeInBytes = 0; + uint64_t UpdateScratchDataSizeInBytes = 0; +}; + +struct TriangleGeometryDesc { + Buffer *VertexBuffer = nullptr; + uint64_t VertexBufferOffset = 0; + uint32_t VertexCount = 0; + uint32_t VertexStride = 0; + Format VertexFormat = Format::RGB32Float; + Buffer *IndexBuffer = nullptr; + uint64_t IndexBufferOffset = 0; + uint32_t IndexCount = 0; + IndexFormat IdxFormat = IndexFormat::Uint32; + bool Opaque = true; +}; + +struct AABBGeometryDesc { + Buffer *AABBBuffer = nullptr; + uint64_t AABBBufferOffset = 0; + uint32_t AABBCount = 0; + uint32_t AABBStride = 24; + bool Opaque = true; +}; + +class AccelerationStructure; + +struct AccelerationStructureInstance { + float Transform[3][4] = {{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}}; + uint32_t InstanceID = 0; + uint8_t InstanceMask = 0xFF; + AccelerationStructure *BLAS = nullptr; +}; + +struct BLASBuildRequest { + // DXR / Vulkan / Metal all forbid mixing triangle and AABB geometry in a + // single BLAS, so the geometry list is held as a variant — the invalid + // mixed-geometry state is unrepresentable. + std::variant, + llvm::SmallVector> + Geometry; + AccelerationStructureSizes Sizes; +}; + +struct TLASBuildRequest { + llvm::SmallVector Instances; + AccelerationStructureSizes Sizes; +}; + +inline llvm::Error validateGeometryDesc(const TriangleGeometryDesc &D) { + if (!D.VertexBuffer) + return llvm::createStringError( + std::errc::invalid_argument, + "TriangleGeometryDesc: VertexBuffer is null."); + if (!isPositionCompatible(D.VertexFormat)) + return llvm::createStringError( + std::errc::invalid_argument, + "TriangleGeometryDesc: VertexFormat '%s' is not position-compatible.", + getFormatName(D.VertexFormat).data()); + if (D.VertexStride < getFormatSizeInBytes(D.VertexFormat)) + return llvm::createStringError( + std::errc::invalid_argument, + "TriangleGeometryDesc: VertexStride (%u) must be >= format size (%u).", + D.VertexStride, getFormatSizeInBytes(D.VertexFormat)); + if (D.VertexCount == 0) + return llvm::createStringError(std::errc::invalid_argument, + "TriangleGeometryDesc: VertexCount is 0."); + if (D.IndexBuffer && D.IndexCount == 0) + return llvm::createStringError( + std::errc::invalid_argument, + "TriangleGeometryDesc: IndexBuffer is set but IndexCount is 0."); + if (!D.IndexBuffer && D.IndexCount != 0) + return llvm::createStringError( + std::errc::invalid_argument, + "TriangleGeometryDesc: IndexCount is set but IndexBuffer is null."); + if (D.IndexBuffer && D.IndexCount % 3 != 0) + return llvm::createStringError( + std::errc::invalid_argument, + "TriangleGeometryDesc: IndexCount (%u) must be a multiple of 3.", + D.IndexCount); + if (!D.IndexBuffer && D.VertexCount % 3 != 0) + return llvm::createStringError( + std::errc::invalid_argument, + "TriangleGeometryDesc: VertexCount (%u) must be a multiple of 3 when " + "no index buffer is provided.", + D.VertexCount); + return llvm::Error::success(); +} + +inline llvm::Error validateGeometryDesc(const AABBGeometryDesc &D) { + if (!D.AABBBuffer) + return llvm::createStringError(std::errc::invalid_argument, + "AABBGeometryDesc: AABBBuffer is null."); + if (D.AABBCount == 0) + return llvm::createStringError(std::errc::invalid_argument, + "AABBGeometryDesc: AABBCount is 0."); + if (D.AABBStride < 24) + return llvm::createStringError( + std::errc::invalid_argument, + "AABBGeometryDesc: AABBStride (%u) must be >= 24.", D.AABBStride); + return llvm::Error::success(); +} + +template +inline llvm::Error validateBLASGeometry(llvm::ArrayRef Geoms) { + if (Geoms.empty()) + return llvm::createStringError( + std::errc::invalid_argument, + "BLASBuildRequest must have at least one geometry descriptor."); + for (const auto &G : Geoms) + if (auto Err = validateGeometryDesc(G)) + return Err; + return llvm::Error::success(); +} + +inline llvm::Error validateTLASBuildRequest(const TLASBuildRequest &Req) { + if (Req.Instances.empty()) + return llvm::createStringError( + std::errc::invalid_argument, + "TLASBuildRequest: Must have at least one instance."); + for (size_t I = 0; I < Req.Instances.size(); ++I) + if (!Req.Instances[I].BLAS) + return llvm::createStringError( + std::errc::invalid_argument, + "TLASBuildRequest: Instance %zu has a null BLAS pointer.", I); + return llvm::Error::success(); +} + +class AccelerationStructure { + GPUAPI API; + +public: + virtual ~AccelerationStructure(); + AccelerationStructure(const AccelerationStructure &) = delete; + AccelerationStructure &operator=(const AccelerationStructure &) = delete; + + GPUAPI getAPI() const { return API; } + +protected: + explicit AccelerationStructure(GPUAPI API) : API(API) {} +}; + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_ACCELERATIONSTRUCTURE_H diff --git a/include/API/Device.h b/include/API/Device.h index f0b85829b..dd2a42e61 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -15,6 +15,7 @@ #include "Config.h" #include "API/API.h" +#include "API/AccelerationStructure.h" #include "API/Buffer.h" #include "API/Capabilities.h" #include "API/CommandBuffer.h" @@ -226,6 +227,21 @@ class Device { virtual llvm::Expected> createCommandBuffer() = 0; + virtual llvm::Expected createTriangleBLASBuildRequest( + llvm::ArrayRef Triangles) = 0; + + virtual llvm::Expected + createAABBBLASBuildRequest(llvm::ArrayRef AABBs) = 0; + + virtual llvm::Expected createTLASBuildRequest( + llvm::ArrayRef Instances) = 0; + + virtual llvm::Expected> + createAccelerationStructure(const BLASBuildRequest &Request) = 0; + + virtual llvm::Expected> + createAccelerationStructure(const TLASBuildRequest &Request) = 0; + virtual ~Device() = 0; llvm::StringRef getDescription() const { return Description; } diff --git a/include/API/Texture.h b/include/API/Texture.h index 26b9b030f..0922df156 100644 --- a/include/API/Texture.h +++ b/include/API/Texture.h @@ -26,6 +26,7 @@ #include namespace offloadtest { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); enum TextureUsage : uint32_t { Sampled = 1 << 0, diff --git a/lib/API/DX/DXResources.h b/lib/API/DX/DXResources.h index 3aa6f35e1..0636a9407 100644 --- a/lib/API/DX/DXResources.h +++ b/lib/API/DX/DXResources.h @@ -84,6 +84,16 @@ inline D3D12_RESOURCE_FLAGS getDXResourceFlags(TextureUsage Usage) { return Flags; } +inline DXGI_FORMAT getDXGIIndexFormat(IndexFormat Fmt) { + switch (Fmt) { + case IndexFormat::Uint16: + return DXGI_FORMAT_R16_UINT; + case IndexFormat::Uint32: + return DXGI_FORMAT_R32_UINT; + } + llvm_unreachable("All IndexFormat cases handled"); +} + } // namespace offloadtest #endif // OFFLOADTEST_API_DXRESOURCES_H diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index edfaccb7b..52fc3ec86 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -58,7 +58,7 @@ using namespace offloadtest; using Microsoft::WRL::ComPtr; -using ID3D12DeviceX = ID3D12Device2; +using ID3D12DeviceX = ID3D12Device5; using ID3D12GraphicsCommandListX = ID3D12GraphicsCommandList6; template <> char CapabilityValueEnum::ID = 0; @@ -434,6 +434,23 @@ class DXPipelineState : public offloadtest::PipelineState { } }; +class DXAccelerationStructure : public offloadtest::AccelerationStructure { +public: + ComPtr Resource; + + DXAccelerationStructure(ComPtr Resource) + : offloadtest::AccelerationStructure(GPUAPI::DirectX), + Resource(Resource) {} + + D3D12_GPU_VIRTUAL_ADDRESS getGPUVirtualAddress() const { + return Resource->GetGPUVirtualAddress(); + } + + static bool classof(const offloadtest::AccelerationStructure *AS) { + return AS->getAPI() == GPUAPI::DirectX; + } +}; + class DXFence : public offloadtest::Fence { public: #ifdef _WIN32 @@ -1559,6 +1576,160 @@ class DXDevice : public offloadtest::Device { return std::make_unique(Desc); } + llvm::Expected createTriangleBLASBuildRequest( + llvm::ArrayRef Triangles) override { + if (auto Err = validateBLASGeometry(Triangles)) + return Err; + + BLASBuildRequest Req; + Req.Geometry = llvm::SmallVector(Triangles.begin(), + Triangles.end()); + + llvm::SmallVector GeomDescs; + GeomDescs.reserve(Triangles.size()); + + for (const auto &T : Triangles) { + D3D12_RAYTRACING_GEOMETRY_DESC GD = {}; + GD.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES; + if (T.Opaque) + GD.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE; + + auto &Tri = GD.Triangles; + // GPU addresses are not needed for the prebuild size query; they will + // be populated at build time. + Tri.VertexBuffer.StrideInBytes = T.VertexStride; + Tri.VertexCount = T.VertexCount; + Tri.VertexFormat = getDXGIFormat(T.VertexFormat); + + if (T.IndexBuffer) { + Tri.IndexCount = T.IndexCount; + Tri.IndexFormat = getDXGIIndexFormat(T.IdxFormat); + } + + GeomDescs.push_back(GD); + } + + if (auto Err = queryBLASPrebuildSize(GeomDescs, Req.Sizes)) + return Err; + return Req; + } + + llvm::Expected + createAABBBLASBuildRequest(llvm::ArrayRef AABBs) override { + if (auto Err = validateBLASGeometry(AABBs)) + return Err; + + BLASBuildRequest Req; + Req.Geometry = + llvm::SmallVector(AABBs.begin(), AABBs.end()); + + llvm::SmallVector GeomDescs; + GeomDescs.reserve(AABBs.size()); + + for (const auto &A : AABBs) { + D3D12_RAYTRACING_GEOMETRY_DESC GD = {}; + GD.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS; + if (A.Opaque) + GD.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE; + + GD.AABBs.AABBs.StrideInBytes = A.AABBStride; + GD.AABBs.AABBCount = A.AABBCount; + + GeomDescs.push_back(GD); + } + + if (auto Err = queryBLASPrebuildSize(GeomDescs, Req.Sizes)) + return Err; + return Req; + } + +private: + llvm::Error queryBLASPrebuildSize( + llvm::ArrayRef GeomDescs, + AccelerationStructureSizes &OutSizes) { + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS Inputs = {}; + Inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + Inputs.NumDescs = GeomDescs.size(); + Inputs.pGeometryDescs = GeomDescs.data(); + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO Info = {}; + Device->GetRaytracingAccelerationStructurePrebuildInfo(&Inputs, &Info); + + OutSizes.ResultDataMaxSizeInBytes = Info.ResultDataMaxSizeInBytes; + OutSizes.ScratchDataSizeInBytes = Info.ScratchDataSizeInBytes; + OutSizes.UpdateScratchDataSizeInBytes = Info.UpdateScratchDataSizeInBytes; + return llvm::Error::success(); + } + +public: + llvm::Expected createTLASBuildRequest( + llvm::ArrayRef Instances) override { + TLASBuildRequest Req; + Req.Instances.assign(Instances.begin(), Instances.end()); + + if (auto Err = validateTLASBuildRequest(Req)) + return Err; + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS Inputs = {}; + Inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL; + Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + Inputs.NumDescs = Instances.size(); + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO Info = {}; + Device->GetRaytracingAccelerationStructurePrebuildInfo(&Inputs, &Info); + + Req.Sizes.ResultDataMaxSizeInBytes = Info.ResultDataMaxSizeInBytes; + Req.Sizes.ScratchDataSizeInBytes = Info.ScratchDataSizeInBytes; + Req.Sizes.UpdateScratchDataSizeInBytes = Info.UpdateScratchDataSizeInBytes; + + return Req; + } + + llvm::Expected> + createAccelerationStructure(const BLASBuildRequest &Request) override { + const uint64_t AlignedSize = + llvm::alignTo(Request.Sizes.ResultDataMaxSizeInBytes, + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT); + const D3D12_HEAP_PROPERTIES HeapProps = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + const D3D12_RESOURCE_DESC BufferDesc = CD3DX12_RESOURCE_DESC::Buffer( + AlignedSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + + ComPtr ASBuffer; + if (auto Err = HR::toError( + Device->CreateCommittedResource( + &HeapProps, D3D12_HEAP_FLAG_NONE, &BufferDesc, + D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, nullptr, + IID_PPV_ARGS(&ASBuffer)), + "Failed to create BLAS resource.")) + return Err; + + return std::make_unique(ASBuffer); + } + + llvm::Expected> + createAccelerationStructure(const TLASBuildRequest &Request) override { + const uint64_t AlignedSize = + llvm::alignTo(Request.Sizes.ResultDataMaxSizeInBytes, + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT); + const D3D12_HEAP_PROPERTIES HeapProps = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + const D3D12_RESOURCE_DESC BufferDesc = CD3DX12_RESOURCE_DESC::Buffer( + AlignedSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + + ComPtr ASBuffer; + if (auto Err = HR::toError( + Device->CreateCommittedResource( + &HeapProps, D3D12_HEAP_FLAG_NONE, &BufferDesc, + D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, nullptr, + IID_PPV_ARGS(&ASBuffer)), + "Failed to create TLAS resource.")) + return Err; + + return std::make_unique(ASBuffer); + } + void addResourceUploadCommands(Resource &R, InvocationState &IS, ComPtr Destination, ComPtr Source) { diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index 86875096e..41c5dd7ba 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -36,6 +36,8 @@ Texture::~Texture() {} RenderPass::~RenderPass() {} +AccelerationStructure::~AccelerationStructure() {} + Device::~Device() {} llvm::Expected>> diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 7959daa9b..66763063c 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -403,6 +403,24 @@ class MTLCommandBuffer : public offloadtest::CommandBuffer { MTLCommandBuffer() : CommandBuffer(GPUAPI::Metal) {} }; +class MetalAccelerationStructure : public offloadtest::AccelerationStructure { +public: + MTL::AccelerationStructure *AccelStruct; + + MetalAccelerationStructure(MTL::AccelerationStructure *AccelStruct) + : offloadtest::AccelerationStructure(GPUAPI::Metal), + AccelStruct(AccelStruct) {} + + ~MetalAccelerationStructure() override { + if (AccelStruct) + AccelStruct->release(); + } + + static bool classof(const offloadtest::AccelerationStructure *AS) { + return AS->getAPI() == GPUAPI::Metal; + } +}; + llvm::Expected MTLQueue::submit( llvm::SmallVector> CBs) { // Non-blocking: query how far the GPU has progressed and release @@ -1936,6 +1954,164 @@ class MTLDevice : public offloadtest::Device { MTL::CullModeNone, MeshTGSize, ObjectTGSize); } + llvm::Expected createTriangleBLASBuildRequest( + llvm::ArrayRef Triangles) override { + if (!Device->supportsRaytracing()) + return llvm::createStringError( + std::errc::not_supported, + "Ray tracing is not supported on this device."); + + if (auto Err = validateBLASGeometry(Triangles)) + return Err; + + BLASBuildRequest Req; + Req.Geometry = llvm::SmallVector(Triangles.begin(), + Triangles.end()); + + llvm::SmallVector Descs; + Descs.reserve(Triangles.size()); + for (const auto &T : Triangles) { + auto *TD = + MTL::AccelerationStructureTriangleGeometryDescriptor::alloc()->init(); + auto *VB = llvm::cast(T.VertexBuffer); + TD->setVertexBuffer(VB->Buf); + TD->setVertexBufferOffset(T.VertexBufferOffset); + TD->setVertexStride(T.VertexStride); + TD->setVertexFormat(getMetalPositionFormat(T.VertexFormat)); + TD->setTriangleCount(T.IndexBuffer ? T.IndexCount / 3 + : T.VertexCount / 3); + if (T.IndexBuffer) { + auto *IB = llvm::cast(T.IndexBuffer); + TD->setIndexBuffer(IB->Buf); + TD->setIndexBufferOffset(T.IndexBufferOffset); + TD->setIndexType(getMetalIndexType(T.IdxFormat)); + } + TD->setOpaque(T.Opaque); + Descs.push_back(TD); + } + + queryBLASPrebuildSize(Descs, Req.Sizes); + for (auto *D : Descs) + D->release(); + return Req; + } + + llvm::Expected + createAABBBLASBuildRequest(llvm::ArrayRef AABBs) override { + if (!Device->supportsRaytracing()) + return llvm::createStringError( + std::errc::not_supported, + "Ray tracing is not supported on this device."); + + if (auto Err = validateBLASGeometry(AABBs)) + return Err; + + BLASBuildRequest Req; + Req.Geometry = + llvm::SmallVector(AABBs.begin(), AABBs.end()); + + llvm::SmallVector Descs; + Descs.reserve(AABBs.size()); + for (const auto &A : AABBs) { + auto *AD = + MTL::AccelerationStructureBoundingBoxGeometryDescriptor::alloc() + ->init(); + auto *BB = llvm::cast(A.AABBBuffer); + AD->setBoundingBoxBuffer(BB->Buf); + AD->setBoundingBoxBufferOffset(A.AABBBufferOffset); + AD->setBoundingBoxStride(A.AABBStride); + AD->setBoundingBoxCount(A.AABBCount); + AD->setOpaque(A.Opaque); + Descs.push_back(AD); + } + + queryBLASPrebuildSize(Descs, Req.Sizes); + for (auto *D : Descs) + D->release(); + return Req; + } + +private: + void queryBLASPrebuildSize( + llvm::ArrayRef Descs, + AccelerationStructureSizes &OutSizes) { + NS::Array *GeomDescs = NS::Array::array( + reinterpret_cast(Descs.data()), Descs.size()); + + auto *Descriptor = + MTL::PrimitiveAccelerationStructureDescriptor::alloc()->init(); + Descriptor->setGeometryDescriptors(GeomDescs); + + MTL::AccelerationStructureSizes Sizes = + Device->accelerationStructureSizes(Descriptor); + + OutSizes.ResultDataMaxSizeInBytes = Sizes.accelerationStructureSize; + OutSizes.ScratchDataSizeInBytes = Sizes.buildScratchBufferSize; + OutSizes.UpdateScratchDataSizeInBytes = Sizes.refitScratchBufferSize; + + Descriptor->release(); + } + +public: + llvm::Expected createTLASBuildRequest( + llvm::ArrayRef Instances) override { + if (!Device->supportsRaytracing()) + return llvm::createStringError( + std::errc::not_supported, + "Ray tracing is not supported on this device."); + + TLASBuildRequest Req; + Req.Instances.assign(Instances.begin(), Instances.end()); + + if (auto Err = validateTLASBuildRequest(Req)) + return Err; + + auto *Descriptor = + MTL::InstanceAccelerationStructureDescriptor::alloc()->init(); + Descriptor->setInstanceCount(Instances.size()); + + MTL::AccelerationStructureSizes Sizes = + Device->accelerationStructureSizes(Descriptor); + + Req.Sizes.ResultDataMaxSizeInBytes = Sizes.accelerationStructureSize; + Req.Sizes.ScratchDataSizeInBytes = Sizes.buildScratchBufferSize; + Req.Sizes.UpdateScratchDataSizeInBytes = Sizes.refitScratchBufferSize; + + Descriptor->release(); + + return Req; + } + + llvm::Expected> + createAccelerationStructure(const BLASBuildRequest &Request) override { + if (!Device->supportsRaytracing()) + return llvm::createStringError( + std::errc::not_supported, + "Ray tracing is not supported on this device."); + + MTL::AccelerationStructure *AS = Device->newAccelerationStructure( + Request.Sizes.ResultDataMaxSizeInBytes); + if (!AS) + return llvm::createStringError(std::errc::not_enough_memory, + "Failed to create Metal BLAS."); + return std::make_unique(AS); + } + + llvm::Expected> + createAccelerationStructure(const TLASBuildRequest &Request) override { + if (!Device->supportsRaytracing()) + return llvm::createStringError( + std::errc::not_supported, + "Ray tracing is not supported on this device."); + + MTL::AccelerationStructure *AS = Device->newAccelerationStructure( + Request.Sizes.ResultDataMaxSizeInBytes); + if (!AS) + return llvm::createStringError(std::errc::not_enough_memory, + "Failed to create Metal TLAS."); + return std::make_unique(AS); + } + llvm::Error executeProgram(Pipeline &P) override { InvocationState IS; diff --git a/lib/API/MTL/MTLResources.h b/lib/API/MTL/MTLResources.h index 57a56a9d3..7cb35b6dc 100644 --- a/lib/API/MTL/MTLResources.h +++ b/lib/API/MTL/MTLResources.h @@ -148,6 +148,29 @@ inline MTL::VertexFormat getMetalVertexFormat(Format Fmt) { llvm_unreachable("All Format cases handled"); } +inline MTL::AttributeFormat getMetalPositionFormat(Format Fmt) { + switch (Fmt) { + case Format::RG32Float: + return MTL::AttributeFormatFloat2; + case Format::RGB32Float: + return MTL::AttributeFormatFloat3; + case Format::RGBA32Float: + return MTL::AttributeFormatFloat4; + default: + llvm_unreachable("Format is not position-compatible"); + } +} + +inline MTL::IndexType getMetalIndexType(IndexFormat Fmt) { + switch (Fmt) { + case IndexFormat::Uint16: + return MTL::IndexTypeUInt16; + case IndexFormat::Uint32: + return MTL::IndexTypeUInt32; + } + llvm_unreachable("All IndexFormat cases handled"); +} + } // namespace offloadtest #endif // OFFLOADTEST_API_MTLRESOURCES_H diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 282b2b9c2..96a3899ff 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -505,6 +505,40 @@ class VulkanTexture : public offloadtest::Texture { } }; +class VulkanAccelerationStructure : public offloadtest::AccelerationStructure { +public: + VkDevice Dev; + VkAccelerationStructureKHR AccelStruct; + VkBuffer Buffer; + VkDeviceMemory Memory; + VkDeviceAddress DeviceAddress; + PFN_vkDestroyAccelerationStructureKHR FnDestroyAS; + + VulkanAccelerationStructure(VkDevice Dev, + VkAccelerationStructureKHR AccelStruct, + VkBuffer Buffer, VkDeviceMemory Memory, + VkDeviceAddress DeviceAddress, + PFN_vkDestroyAccelerationStructureKHR FnDestroyAS) + : offloadtest::AccelerationStructure(GPUAPI::Vulkan), Dev(Dev), + AccelStruct(AccelStruct), Buffer(Buffer), Memory(Memory), + DeviceAddress(DeviceAddress), FnDestroyAS(FnDestroyAS) {} + + ~VulkanAccelerationStructure() override { + if (AccelStruct != VK_NULL_HANDLE) + FnDestroyAS(Dev, AccelStruct, nullptr); + if (Buffer != VK_NULL_HANDLE) + vkDestroyBuffer(Dev, Buffer, nullptr); + if (Memory != VK_NULL_HANDLE) + vkFreeMemory(Dev, Memory, nullptr); + } + + VkDeviceAddress getDeviceAddress() const { return DeviceAddress; } + + static bool classof(const offloadtest::AccelerationStructure *AS) { + return AS->getAPI() == GPUAPI::Vulkan; + } +}; + class VulkanFence : public offloadtest::Fence { public: VulkanFence(VkDevice Device, VkSemaphore Semaphore, llvm::StringRef Name) @@ -1116,6 +1150,15 @@ class VulkanDevice : public offloadtest::Device { PFN_vkCmdInsertDebugUtilsLabelEXT CmdInsertDebugUtilsLabel = nullptr; MeshShaderFunctions MeshShaderFns; + bool HasRayTracingSupport = false; + struct RaytracingFunctions { + PFN_vkCreateAccelerationStructureKHR CreateAS = nullptr; + PFN_vkDestroyAccelerationStructureKHR DestroyAS = nullptr; + PFN_vkGetAccelerationStructureBuildSizesKHR GetBuildSizes = nullptr; + PFN_vkGetAccelerationStructureDeviceAddressKHR GetDeviceAddress = nullptr; + }; + RaytracingFunctions RT; + struct BufferRef { VkBuffer Buffer; VkDeviceMemory Memory; @@ -1245,6 +1288,17 @@ class VulkanDevice : public offloadtest::Device { DeviceInfo.queueCreateInfoCount = 1; DeviceInfo.pQueueCreateInfos = &QueueInfo; + const VulkanDevice::ExtensionVector AvailableDeviceExtensions = + queryDeviceExtensions(PhysicalDevice); + const bool HasVulkan12 = + Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0); + const bool HasVulkan13 = + Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0); +#ifdef VK_VERSION_1_4 + const bool HasVulkan14 = + Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0); +#endif + VkPhysicalDeviceFeatures2 Features{}; Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; VkPhysicalDeviceVulkan11Features Features11{}; @@ -1257,76 +1311,143 @@ class VulkanDevice : public offloadtest::Device { VkPhysicalDeviceVulkan14Features Features14{}; Features14.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_4_FEATURES; #endif -#ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME - // Opt-in extension features: query and enable - // VK_EXT_shader_image_atomic_int64 when the device advertises it so that - // tests using 64-bit atomics on RWBuffer / RWTexture (SPIR-V image storage - // class) can run. - const auto AvailableExts = queryDeviceExtensions(PhysicalDevice); - const bool HasShaderImageAtomicInt64Ext = isExtensionSupported( - AvailableExts, VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME); - VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT - FeaturesImageAtomicInt64{}; - FeaturesImageAtomicInt64.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT; -#endif Features.pNext = &Features11; - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) + if (HasVulkan12) Features11.pNext = &Features12; - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) + if (HasVulkan13) Features12.pNext = &Features13; #ifdef VK_VERSION_1_4 - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0)) + if (HasVulkan14) Features13.pNext = &Features14; #endif + + llvm::SmallVector EnabledDeviceExtensions; + #ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME - // Attach the extension features struct to the tail of the version-gated - // chain so vkGetPhysicalDeviceFeatures2 populates it and vkCreateDevice - // sees it enabled. + const bool HasShaderImageAtomicInt64Ext = + isExtensionSupported(AvailableDeviceExtensions, + VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME); + VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT + FeaturesImageAtomicInt64{}; if (HasShaderImageAtomicInt64Ext) { -#ifdef VK_VERSION_1_4 - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0)) - Features14.pNext = &FeaturesImageAtomicInt64; - else -#endif - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) - Features13.pNext = &FeaturesImageAtomicInt64; - else if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) - Features12.pNext = &FeaturesImageAtomicInt64; - else - Features11.pNext = &FeaturesImageAtomicInt64; + FeaturesImageAtomicInt64.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT; + FeaturesImageAtomicInt64.pNext = Features.pNext; + Features.pNext = &FeaturesImageAtomicInt64; } #endif - vkGetPhysicalDeviceFeatures2(PhysicalDevice, &Features); - const VulkanDevice::ExtensionVector AvailableDeviceExtensions = - queryDeviceExtensions(PhysicalDevice); - - llvm::SmallVector EnabledDeviceExtensions; - const llvm::StringRef ExtensionName = "VK_EXT_mesh_shader"; + const bool HasMeshShader = isExtensionSupported( + AvailableDeviceExtensions, VK_EXT_MESH_SHADER_EXTENSION_NAME); VkPhysicalDeviceMeshShaderFeaturesEXT MeshFeatures{}; - if (isExtensionSupported(AvailableDeviceExtensions, ExtensionName)) { - EnabledDeviceExtensions.push_back(ExtensionName.data()); + if (HasMeshShader) { MeshFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT; - MeshFeatures.taskShader = 1; - MeshFeatures.meshShader = 1; + MeshFeatures.pNext = Features.pNext; + Features.pNext = &MeshFeatures; + } + + const bool HasASExts = + isExtensionSupported(AvailableDeviceExtensions, + VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME) && + isExtensionSupported(AvailableDeviceExtensions, + VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME) && + // bufferDeviceAddress is core in Vulkan 1.2; on 1.1 we need the ext. + (HasVulkan12 || + isExtensionSupported(AvailableDeviceExtensions, + VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)); + + VkPhysicalDeviceAccelerationStructureFeaturesKHR ASFeatures{}; + // On Vulkan 1.1 we need a separate BDA features struct; on 1.2+ + // bufferDeviceAddress lives in VkPhysicalDeviceVulkan12Features which is + // already in the chain, and adding a duplicate is a validation error. + VkPhysicalDeviceBufferDeviceAddressFeatures BDAFeatures{}; + if (HasASExts) { + ASFeatures.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR; + ASFeatures.pNext = Features.pNext; + Features.pNext = &ASFeatures; + if (!HasVulkan12) { + BDAFeatures.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES; + BDAFeatures.pNext = Features.pNext; + Features.pNext = &BDAFeatures; + } + } + + vkGetPhysicalDeviceFeatures2(PhysicalDevice, &Features); + + // For every extension chained above we verify that its gating feature bool + // came back true; if it didn't, return a descriptive error rather than + // letting vkCreateDevice raise the generic VK_ERROR_FEATURE_NOT_PRESENT. + // If this ever fires on a real driver, make the check infallible: either + // splice the struct back out of pNext or treat the feature as unsupported + // down below (skip the matching extension push). + if (HasMeshShader) { + if (!MeshFeatures.taskShader) + return llvm::createStringError( + std::errc::not_supported, + "Device advertises %s but reports taskShader=0", + VK_EXT_MESH_SHADER_EXTENSION_NAME); + if (!MeshFeatures.meshShader) + return llvm::createStringError( + std::errc::not_supported, + "Device advertises %s but reports meshShader=0", + VK_EXT_MESH_SHADER_EXTENSION_NAME); + // primitiveFragmentShadingRateMeshShader depends on + // primitiveFragmentShadingRate (VUID-...-07033), which we don't enable. MeshFeatures.multiviewMeshShader = 0; MeshFeatures.primitiveFragmentShadingRateMeshShader = 0; MeshFeatures.meshShaderQueries = 0; - - MeshFeatures.pNext = Features.pNext; - Features.pNext = &MeshFeatures; + EnabledDeviceExtensions.push_back(VK_EXT_MESH_SHADER_EXTENSION_NAME); } #ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME - if (HasShaderImageAtomicInt64Ext && - FeaturesImageAtomicInt64.shaderImageInt64Atomics) + if (HasShaderImageAtomicInt64Ext) { + if (!FeaturesImageAtomicInt64.shaderImageInt64Atomics) + return llvm::createStringError( + std::errc::not_supported, + "Device advertises %s but reports shaderImageInt64Atomics=0", + VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME); + FeaturesImageAtomicInt64.sparseImageInt64Atomics = 0; EnabledDeviceExtensions.push_back( VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME); + } #endif + if (HasASExts) { + if (!ASFeatures.accelerationStructure) + return llvm::createStringError( + std::errc::not_supported, + "Device advertises %s but reports accelerationStructure=0", + VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); + const bool HasBDA = HasVulkan12 ? Features12.bufferDeviceAddress + : BDAFeatures.bufferDeviceAddress; + if (!HasBDA) + return llvm::createStringError( + std::errc::not_supported, + "Device advertises %s but reports bufferDeviceAddress=0", + VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); + // Enable only the base feature; capture-replay / indirect-build / + // host-commands / updateAfterBind aren't used by these tests. + ASFeatures.accelerationStructureCaptureReplay = 0; + ASFeatures.accelerationStructureIndirectBuild = 0; + ASFeatures.accelerationStructureHostCommands = 0; + ASFeatures.descriptorBindingAccelerationStructureUpdateAfterBind = 0; + if (!HasVulkan12) { + BDAFeatures.bufferDeviceAddressCaptureReplay = 0; + BDAFeatures.bufferDeviceAddressMultiDevice = 0; + } + EnabledDeviceExtensions.push_back( + VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); + EnabledDeviceExtensions.push_back( + VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME); + if (!HasVulkan12) + EnabledDeviceExtensions.push_back( + VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME); + } + DeviceInfo.enabledExtensionCount = static_cast(EnabledDeviceExtensions.size()); DeviceInfo.ppEnabledExtensionNames = EnabledDeviceExtensions.data(); @@ -1347,9 +1468,29 @@ class VulkanDevice : public offloadtest::Device { VulkanQueue GraphicsQueue(DeviceQueue, QueueFamilyIdx, Device, std::move(*SubmitFenceOrErr)); - return std::make_unique( + auto Dev = std::make_unique( Instance, PhysicalDevice, Props, Device, std::move(GraphicsQueue), std::move(InstanceLayers), std::move(AvailableDeviceExtensions)); + + // Load ray tracing function pointers after device creation. + if (HasASExts) { + Dev->HasRayTracingSupport = true; + Dev->RT.CreateAS = reinterpret_cast( + vkGetDeviceProcAddr(Device, "vkCreateAccelerationStructureKHR")); + Dev->RT.DestroyAS = + reinterpret_cast( + vkGetDeviceProcAddr(Device, "vkDestroyAccelerationStructureKHR")); + Dev->RT.GetBuildSizes = + reinterpret_cast( + vkGetDeviceProcAddr(Device, + "vkGetAccelerationStructureBuildSizesKHR")); + Dev->RT.GetDeviceAddress = + reinterpret_cast( + vkGetDeviceProcAddr( + Device, "vkGetAccelerationStructureDeviceAddressKHR")); + } + + return Dev; } VulkanDevice(std::shared_ptr I, VkPhysicalDevice P, @@ -2227,6 +2368,14 @@ class VulkanDevice : public offloadtest::Device { private: void queryCapabilities() { + const bool HasVulkan12 = + Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0); + const bool HasVulkan13 = + Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0); +#ifdef VK_VERSION_1_4 + const bool HasVulkan14 = + Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0); +#endif VkPhysicalDeviceFeatures2 Features{}; Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -2250,12 +2399,12 @@ class VulkanDevice : public offloadtest::Device { #endif Features.pNext = &Features11; - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) + if (HasVulkan12) Features11.pNext = &Features12; - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) + if (HasVulkan13) Features12.pNext = &Features13; #ifdef VK_VERSION_1_4 - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0)) + if (HasVulkan14) Features13.pNext = &Features14; #endif #ifdef VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME @@ -2267,13 +2416,13 @@ class VulkanDevice : public offloadtest::Device { // tail for this device's apiVersion. if (HasShaderImageAtomicInt64Ext) { #ifdef VK_VERSION_1_4 - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 4, 0)) + if (HasVulkan14) Features14.pNext = &FeaturesImageAtomicInt64; else #endif - if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) + if (HasVulkan13) Features13.pNext = &FeaturesImageAtomicInt64; - else if (Props.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) + else if (HasVulkan12) Features12.pNext = &FeaturesImageAtomicInt64; else Features11.pNext = &FeaturesImageAtomicInt64; @@ -2404,9 +2553,236 @@ class VulkanDevice : public offloadtest::Device { return std::make_unique(Device, Handle, Desc); } + // Helper: create a buffer with device address support. + llvm::Expected + createBufferWithDeviceAddress(VkDeviceSize Size, + VkBufferUsageFlags ExtraUsage) { + return createBuffer(ExtraUsage | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, Size, nullptr, + VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT); + } + + llvm::Expected createTriangleBLASBuildRequest( + llvm::ArrayRef Triangles) override { + if (auto Err = validateBLASGeometry(Triangles)) + return Err; + + BLASBuildRequest Req; + Req.Geometry = llvm::SmallVector(Triangles.begin(), + Triangles.end()); + + llvm::SmallVector Geoms; + Geoms.reserve(Triangles.size()); + + llvm::SmallVector MaxPrimCounts; + MaxPrimCounts.reserve(Triangles.size()); + + for (const auto &T : Triangles) { + VkAccelerationStructureGeometryKHR Geom = {}; + Geom.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + Geom.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR; + if (T.Opaque) + Geom.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; + + auto &Tri = Geom.geometry.triangles; + Tri.sType = + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR; + // Device addresses are not needed for the size query; they will be + // populated at build time. + Tri.vertexStride = T.VertexStride; + Tri.maxVertex = T.VertexCount - 1; + Tri.vertexFormat = getVulkanFormat(T.VertexFormat); + Tri.indexType = T.IndexBuffer ? getVulkanIndexType(T.IdxFormat) + : VK_INDEX_TYPE_NONE_KHR; + + Geoms.push_back(Geom); + MaxPrimCounts.push_back(T.IndexBuffer ? T.IndexCount / 3 + : T.VertexCount / 3); + } + + queryBLASPrebuildSize(Geoms, MaxPrimCounts, Req.Sizes); + return Req; + } + + llvm::Expected + createAABBBLASBuildRequest(llvm::ArrayRef AABBs) override { + if (auto Err = validateBLASGeometry(AABBs)) + return Err; + + BLASBuildRequest Req; + Req.Geometry = + llvm::SmallVector(AABBs.begin(), AABBs.end()); + + llvm::SmallVector Geoms; + Geoms.reserve(AABBs.size()); + + llvm::SmallVector MaxPrimCounts; + MaxPrimCounts.reserve(AABBs.size()); + + for (const auto &A : AABBs) { + VkAccelerationStructureGeometryKHR Geom = {}; + Geom.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + Geom.geometryType = VK_GEOMETRY_TYPE_AABBS_KHR; + if (A.Opaque) + Geom.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; + + auto &Aabbs = Geom.geometry.aabbs; + Aabbs.sType = + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR; + Aabbs.stride = A.AABBStride; + + Geoms.push_back(Geom); + MaxPrimCounts.push_back(A.AABBCount); + } + + queryBLASPrebuildSize(Geoms, MaxPrimCounts, Req.Sizes); + return Req; + } + +private: + void queryBLASPrebuildSize( + llvm::ArrayRef Geoms, + llvm::ArrayRef MaxPrimCounts, + AccelerationStructureSizes &OutSizes) { + VkAccelerationStructureBuildGeometryInfoKHR BuildInfo = {}; + BuildInfo.sType = + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + BuildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + BuildInfo.geometryCount = Geoms.size(); + BuildInfo.pGeometries = Geoms.data(); + + VkAccelerationStructureBuildSizesInfoKHR SizesInfo = {}; + SizesInfo.sType = + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + + RT.GetBuildSizes(Device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &BuildInfo, MaxPrimCounts.data(), &SizesInfo); + + OutSizes.ResultDataMaxSizeInBytes = SizesInfo.accelerationStructureSize; + OutSizes.ScratchDataSizeInBytes = SizesInfo.buildScratchSize; + OutSizes.UpdateScratchDataSizeInBytes = SizesInfo.updateScratchSize; + } + +public: + llvm::Expected createTLASBuildRequest( + llvm::ArrayRef Instances) override { + TLASBuildRequest Req; + Req.Instances.assign(Instances.begin(), Instances.end()); + + if (auto Err = validateTLASBuildRequest(Req)) + return Err; + + VkAccelerationStructureGeometryKHR Geom = {}; + Geom.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + Geom.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR; + Geom.geometry.instances.sType = + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR; + + VkAccelerationStructureBuildGeometryInfoKHR BuildInfo = {}; + BuildInfo.sType = + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + BuildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + BuildInfo.geometryCount = 1; + BuildInfo.pGeometries = &Geom; + + const uint32_t InstanceCount = static_cast(Instances.size()); + + VkAccelerationStructureBuildSizesInfoKHR SizesInfo = {}; + SizesInfo.sType = + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + + RT.GetBuildSizes(Device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &BuildInfo, &InstanceCount, &SizesInfo); + + Req.Sizes.ResultDataMaxSizeInBytes = SizesInfo.accelerationStructureSize; + Req.Sizes.ScratchDataSizeInBytes = SizesInfo.buildScratchSize; + Req.Sizes.UpdateScratchDataSizeInBytes = SizesInfo.updateScratchSize; + + return Req; + } + + llvm::Expected> + createAccelerationStructure(const BLASBuildRequest &Request) override { + if (!HasRayTracingSupport) + return llvm::createStringError( + std::errc::not_supported, + "Ray tracing is not supported on this device."); + + auto BufOrErr = createBufferWithDeviceAddress( + Request.Sizes.ResultDataMaxSizeInBytes, + VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR); + if (!BufOrErr) + return BufOrErr.takeError(); + + VkAccelerationStructureCreateInfoKHR CreateInfo = {}; + CreateInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR; + CreateInfo.buffer = BufOrErr->Buffer; + CreateInfo.size = Request.Sizes.ResultDataMaxSizeInBytes; + CreateInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + + VkAccelerationStructureKHR AccelStruct = VK_NULL_HANDLE; + if (auto Err = + VK::toError(RT.CreateAS(Device, &CreateInfo, nullptr, &AccelStruct), + "Failed to create BLAS.")) { + vkDestroyBuffer(Device, BufOrErr->Buffer, nullptr); + vkFreeMemory(Device, BufOrErr->Memory, nullptr); + return Err; + } + + VkAccelerationStructureDeviceAddressInfoKHR AddrInfo = {}; + AddrInfo.sType = + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR; + AddrInfo.accelerationStructure = AccelStruct; + const VkDeviceAddress DevAddr = RT.GetDeviceAddress(Device, &AddrInfo); + + return std::make_unique( + Device, AccelStruct, BufOrErr->Buffer, BufOrErr->Memory, DevAddr, + RT.DestroyAS); + } + + llvm::Expected> + createAccelerationStructure(const TLASBuildRequest &Request) override { + if (!HasRayTracingSupport) + return llvm::createStringError( + std::errc::not_supported, + "Ray tracing is not supported on this device."); + + auto BufOrErr = createBufferWithDeviceAddress( + Request.Sizes.ResultDataMaxSizeInBytes, + VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR); + if (!BufOrErr) + return BufOrErr.takeError(); + + VkAccelerationStructureCreateInfoKHR CreateInfo = {}; + CreateInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR; + CreateInfo.buffer = BufOrErr->Buffer; + CreateInfo.size = Request.Sizes.ResultDataMaxSizeInBytes; + CreateInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + + VkAccelerationStructureKHR AccelStruct = VK_NULL_HANDLE; + if (auto Err = + VK::toError(RT.CreateAS(Device, &CreateInfo, nullptr, &AccelStruct), + "Failed to create TLAS.")) { + vkDestroyBuffer(Device, BufOrErr->Buffer, nullptr); + vkFreeMemory(Device, BufOrErr->Memory, nullptr); + return Err; + } + + VkAccelerationStructureDeviceAddressInfoKHR AddrInfo = {}; + AddrInfo.sType = + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR; + AddrInfo.accelerationStructure = AccelStruct; + const VkDeviceAddress DevAddr = RT.GetDeviceAddress(Device, &AddrInfo); + + return std::make_unique( + Device, AccelStruct, BufOrErr->Buffer, BufOrErr->Memory, DevAddr, + RT.DestroyAS); + } + llvm::Expected createBuffer(VkBufferUsageFlags Usage, VkMemoryPropertyFlags MemoryFlags, - size_t Size, void *Data = nullptr) { + size_t Size, void *Data = nullptr, + VkMemoryAllocateFlags AllocFlags = 0) { VkBuffer Buffer; VkDeviceMemory Memory; VkBufferCreateInfo BufferInfo = {}; @@ -2422,9 +2798,16 @@ class VulkanDevice : public offloadtest::Device { VkMemoryRequirements MemReqs; vkGetBufferMemoryRequirements(Device, Buffer, &MemReqs); + + VkMemoryAllocateFlagsInfo FlagsInfo = {}; VkMemoryAllocateInfo AllocInfo = {}; AllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; AllocInfo.allocationSize = MemReqs.size; + if (AllocFlags) { + FlagsInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO; + FlagsInfo.flags = AllocFlags; + AllocInfo.pNext = &FlagsInfo; + } llvm::Expected MemIdx = getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits, MemoryFlags); diff --git a/lib/API/VK/VKResources.h b/lib/API/VK/VKResources.h index 9f39ab24c..3df6dcf6f 100644 --- a/lib/API/VK/VKResources.h +++ b/lib/API/VK/VKResources.h @@ -88,6 +88,16 @@ inline VkImageUsageFlags getVulkanImageUsage(TextureUsage Usage) { return Flags; } +inline VkIndexType getVulkanIndexType(IndexFormat Fmt) { + switch (Fmt) { + case IndexFormat::Uint16: + return VK_INDEX_TYPE_UINT16; + case IndexFormat::Uint32: + return VK_INDEX_TYPE_UINT32; + } + llvm_unreachable("All IndexFormat cases handled"); +} + } // namespace offloadtest #endif // OFFLOADTEST_API_VKRESOURCES_H