diff --git a/include/API/Buffer.h b/include/API/Buffer.h index 1523dfe63..f1fe0e456 100644 --- a/include/API/Buffer.h +++ b/include/API/Buffer.h @@ -20,21 +20,49 @@ namespace offloadtest { -enum class BufferUsage { +enum class BufferShaderAccessType : uint32_t { + Raw, + Typed, + Structured, +}; + +union BufferShaderAccessTypeParams { + Format Fmt; // Typed Only + uint32_t StructureStride; // Structured Only +}; + +enum class BufferUsage : uint32_t { Storage, + ConstantBuffer, + IndexBuffer, VertexBuffer, + IndirectArgs, }; struct BufferCreateDesc { MemoryLocation Location; + MemoryBacking Backing; BufferUsage Usage; + BufferShaderAccessType AccessType; + BufferShaderAccessTypeParams AccessTypeParams; + bool HasCounter; static BufferCreateDesc uploadBuffer() { - return BufferCreateDesc{MemoryLocation::CpuToGpu, BufferUsage::Storage}; + return BufferCreateDesc{MemoryLocation::CpuToGpu, + MemoryBacking::Automatic, + BufferUsage::Storage, + BufferShaderAccessType::Raw, + {}, + false}; } static BufferCreateDesc readbackBuffer() { - return BufferCreateDesc{MemoryLocation::GpuToCpu, BufferUsage::Storage}; + return BufferCreateDesc{MemoryLocation::GpuToCpu, + MemoryBacking::Automatic, + BufferUsage::Storage, + BufferShaderAccessType::Raw, + {}, + false}; } }; @@ -55,6 +83,7 @@ class Buffer { Buffer &operator=(const Buffer &) = delete; GPUAPI getAPI() const { return API; } + virtual const BufferCreateDesc &getDesc() const = 0; protected: explicit Buffer(GPUAPI API) : API(API) {} diff --git a/include/API/Device.h b/include/API/Device.h index 08be29599..8a578ca89 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -256,6 +256,11 @@ class Device { virtual llvm::Expected> createTexture(std::string Name, const TextureCreateDesc &Desc) = 0; + // The row stride required when uploading data to (or reading back from) a + // texture created with the given description, via an upload buffer. + virtual uint32_t + getTextureUploadRowStrideInBytes(const TextureCreateDesc &Desc) const = 0; + virtual llvm::Expected> createRenderPass(const RenderPassDesc &Desc) = 0; @@ -316,6 +321,18 @@ createBufferWithData(Device &Dev, std::string Name, size_t SizeInBytes, ComputeEncoder *Encoder, std::unique_ptr *OutUploadBuffer); +llvm::Expected> +createBufferWithData(Device &Dev, std::string Name, + const BufferCreateDesc &Desc, const void *Data, + size_t SizeInBytes, ComputeEncoder *Encoder, + std::unique_ptr *OutUploadBuffer); + +llvm::Expected> +createTextureWithData(Device &Dev, std::string Name, + const TextureCreateDesc &Desc, const void *Data, + size_t SizeInBytes, ComputeEncoder *Encoder, + std::unique_ptr *OutUploadBuffer); + } // namespace offloadtest #endif // OFFLOADTEST_API_DEVICE_H diff --git a/include/API/Encoder.h b/include/API/Encoder.h index 9d2ed1cf4..07ceb8682 100644 --- a/include/API/Encoder.h +++ b/include/API/Encoder.h @@ -21,6 +21,7 @@ namespace offloadtest { class Buffer; class PipelineState; +class Texture; /// Base class for all command encoders. An encoder records commands into a /// command buffer. Call endEncoding() when done recording. Barriers are @@ -82,6 +83,11 @@ class ComputeEncoder : public CommandEncoder { virtual llvm::Error copyBufferToBuffer(Buffer &Src, size_t SrcOffset, Buffer &Dst, size_t DstOffset, size_t Size) = 0; + virtual llvm::Error copyCounterToBuffer(Buffer &Src, Buffer &Dst) = 0; + + virtual llvm::Error copyBufferToTexture(Buffer &Src, Texture &Dst) = 0; + + virtual llvm::Error copyTextureToBuffer(Texture &Src, Buffer &Dst) = 0; }; struct Viewport { diff --git a/include/API/FormatConversion.h b/include/API/FormatConversion.h index 816705389..779b02744 100644 --- a/include/API/FormatConversion.h +++ b/include/API/FormatConversion.h @@ -87,13 +87,27 @@ inline llvm::Expected toFormat(DataFormat Format, int Channels) { if (Channels == 1) return Format::D32Float; break; + case DataFormat::UInt64: + switch (Channels) { + case 1: + return Format::R64Uint; + case 2: + return Format::RG64Uint; + } + break; + case DataFormat::Int64: + switch (Channels) { + case 1: + return Format::R64Sint; + case 2: + return Format::RG64Sint; + } + break; // No Format mapping for these DataFormats. case DataFormat::Hex8: case DataFormat::Hex16: case DataFormat::Hex32: case DataFormat::Hex64: - case DataFormat::UInt64: - case DataFormat::Int64: case DataFormat::Float16: case DataFormat::Float64: case DataFormat::Bool: diff --git a/include/API/Resources.h b/include/API/Resources.h index f3fe6f22c..0b901765e 100644 --- a/include/API/Resources.h +++ b/include/API/Resources.h @@ -25,6 +25,18 @@ enum class MemoryLocation { GpuToCpu, }; +enum class MemoryBacking { + // Allocates all memory for this resource. + Automatic, + + // No memory allocated; physical pages mapped manually on demand. + // DX: CreateReservedResource + UpdateTileMappings. + // VK: VK_IMAGE_CREATE_SPARSE_BINDING_BIT + vkQueueBindSparse. + // Metal: MTLTextureDescriptor.sparseLevel + heap tile mapping + // (requires Apple Silicon). + Sparse, +}; + enum class IndexFormat { Uint16, Uint32 }; // TODO: Add Unorm types (e.g. R8Unorm, RGBA8Unorm) which can be sampled as @@ -47,6 +59,10 @@ enum class Format { RGBA32Sint, RGBA32Uint, RGBA32Float, + R64Uint, + R64Sint, + RG64Uint, + RG64Sint, D32Float, D32FloatS8Uint, }; @@ -85,6 +101,14 @@ inline llvm::StringRef getFormatName(Format Format) { return "RGBA32Uint"; case Format::RGBA32Float: return "RGBA32Float"; + case Format::R64Uint: + return "R64Uint"; + case Format::R64Sint: + return "R64Sint"; + case Format::RG64Uint: + return "RG64Uint"; + case Format::RG64Sint: + return "RG64Sint"; case Format::D32Float: return "D32Float"; case Format::D32FloatS8Uint: @@ -112,12 +136,16 @@ inline uint32_t getFormatSizeInBytes(Format Format) { case Format::RG32Uint: case Format::RG32Float: case Format::D32FloatS8Uint: + case Format::R64Uint: + case Format::R64Sint: return 8; case Format::RGB32Float: return 12; case Format::RGBA32Sint: case Format::RGBA32Uint: case Format::RGBA32Float: + case Format::RG64Uint: + case Format::RG64Sint: return 16; } llvm_unreachable("All Format cases handled"); @@ -141,6 +169,10 @@ inline bool isDepthFormat(Format Format) { case Format::RGBA32Sint: case Format::RGBA32Uint: case Format::RGBA32Float: + case Format::R64Uint: + case Format::R64Sint: + case Format::RG64Uint: + case Format::RG64Sint: return false; case Format::D32Float: case Format::D32FloatS8Uint: @@ -167,6 +199,10 @@ inline bool isStencilFormat(Format Format) { case Format::RGBA32Sint: case Format::RGBA32Uint: case Format::RGBA32Float: + case Format::R64Uint: + case Format::R64Sint: + case Format::RG64Uint: + case Format::RG64Sint: case Format::D32Float: return false; case Format::D32FloatS8Uint: @@ -199,6 +235,11 @@ inline bool isTextureCompatible(Format Format) { case Format::RGBA32Float: case Format::D32Float: case Format::D32FloatS8Uint: + // Only for RWTextures + case Format::R64Uint: + case Format::R64Sint: + case Format::RG64Uint: + case Format::RG64Sint: return true; } llvm_unreachable("All Format cases handled"); @@ -224,6 +265,10 @@ inline bool isVertexCompatible(Format Format) { case Format::RGBA32Uint: case Format::RGBA32Float: return true; + case Format::R64Uint: + case Format::R64Sint: + case Format::RG64Uint: + case Format::RG64Sint: case Format::D32Float: case Format::D32FloatS8Uint: return false; @@ -253,6 +298,10 @@ inline bool isPositionCompatible(Format Format) { case Format::RG32Uint: case Format::RGBA32Sint: case Format::RGBA32Uint: + case Format::R64Uint: + case Format::R64Sint: + case Format::RG64Uint: + case Format::RG64Sint: case Format::D32Float: case Format::D32FloatS8Uint: return false; diff --git a/include/API/Texture.h b/include/API/Texture.h index 0922df156..e37a1b610 100644 --- a/include/API/Texture.h +++ b/include/API/Texture.h @@ -28,6 +28,8 @@ namespace offloadtest { LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +class Device; + enum TextureUsage : uint32_t { Sampled = 1 << 0, Storage = 1 << 1, @@ -150,9 +152,25 @@ class Texture { Texture(const Texture &) = delete; Texture &operator=(const Texture &) = delete; + // Calculate the size in bytes of the texture data given a linear layout + // Useful for calculating the size for an upload or readback buffer. + size_t calculateLinearSizeInBytes(Device &Dev) const; + + // Maps the texture's memory for host access. Only valid for CpuToGpu and + // GpuToCpu textures; returns an error for GpuOnly. Each successful map() must + // be paired with a call to unmap() before the texture is used on the GPU. + virtual llvm::Expected map() = 0; + virtual void unmap() = 0; + GPUAPI getAPI() const { return API; } virtual const TextureCreateDesc &getDesc() const = 0; + // The byte stride between consecutive rows when the texture is mapped for + // direct host access. Errors if the texture is not host-visible, or if its + // memory layout is not linear (the mapped bytes have no well-defined row + // stride otherwise). + virtual llvm::Expected getMappedRowPitchInBytes() const = 0; + protected: explicit Texture(GPUAPI API) : API(API) {} }; diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 9cf0e5f77..191668e63 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -232,7 +232,7 @@ struct Resource { std::optional VKBinding; CPUBuffer *BufferPtr = nullptr; Sampler *SamplerPtr = nullptr; - bool HasCounter; + bool HasCounter = false; std::optional TilesMapped; bool IsReserved = false; TLASDesc *TLASPtr = nullptr; @@ -281,6 +281,26 @@ struct Resource { llvm_unreachable("All cases handled"); } + bool isBuffer() const { + switch (Kind) { + case ResourceKind::Buffer: + case ResourceKind::RWBuffer: + case ResourceKind::StructuredBuffer: + case ResourceKind::RWStructuredBuffer: + case ResourceKind::ByteAddressBuffer: + case ResourceKind::RWByteAddressBuffer: + case ResourceKind::ConstantBuffer: + return true; + case ResourceKind::Sampler: + case ResourceKind::Texture2D: + case ResourceKind::RWTexture2D: + case ResourceKind::SampledTexture2D: + case ResourceKind::AccelerationStructure: + return false; + } + llvm_unreachable("All cases handled"); + } + bool isTexture() const { switch (Kind) { case ResourceKind::Buffer: diff --git a/lib/API/CMakeLists.txt b/lib/API/CMakeLists.txt index 21e917fe4..9f3836e83 100644 --- a/lib/API/CMakeLists.txt +++ b/lib/API/CMakeLists.txt @@ -34,6 +34,7 @@ endif() add_offloadtest_library(API Capabilities.cpp Device.cpp + Texture.cpp Util.cpp ${api_sources}) diff --git a/lib/API/DX/DXResources.h b/lib/API/DX/DXResources.h index 0636a9407..f081e2ba7 100644 --- a/lib/API/DX/DXResources.h +++ b/lib/API/DX/DXResources.h @@ -31,8 +31,8 @@ inline D3D12_HEAP_TYPE getDXHeapType(MemoryLocation Location) { llvm_unreachable("All MemoryLocation cases handled"); } -inline DXGI_FORMAT getDXGIFormat(Format Format) { - switch (Format) { +inline DXGI_FORMAT getDXGIFormat(Format Fmt) { + switch (Fmt) { case Format::R16Sint: return DXGI_FORMAT_R16_SINT; case Format::R16Uint: @@ -65,6 +65,14 @@ inline DXGI_FORMAT getDXGIFormat(Format Format) { return DXGI_FORMAT_R32G32B32A32_UINT; case Format::RGBA32Float: return DXGI_FORMAT_R32G32B32A32_FLOAT; + case Format::R64Uint: + return DXGI_FORMAT_R32G32_UINT; + case Format::R64Sint: + return DXGI_FORMAT_R32G32_SINT; + case Format::RG64Uint: + return DXGI_FORMAT_R32G32B32A32_UINT; + case Format::RG64Sint: + return DXGI_FORMAT_R32G32B32A32_SINT; case Format::D32Float: return DXGI_FORMAT_D32_FLOAT; case Format::D32FloatS8Uint: @@ -73,6 +81,17 @@ inline DXGI_FORMAT getDXGIFormat(Format Format) { llvm_unreachable("All Format cases handled"); } +inline DXGI_FORMAT getDXGIFormatSRV(Format Fmt) { + switch (Fmt) { + case Format::D32Float: + return DXGI_FORMAT_R32_FLOAT; + case Format::D32FloatS8Uint: + return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + default: + return getDXGIFormat(Fmt); + } +} + inline D3D12_RESOURCE_FLAGS getDXResourceFlags(TextureUsage Usage) { D3D12_RESOURCE_FLAGS Flags = D3D12_RESOURCE_FLAG_NONE; if ((Usage & TextureUsage::Storage) != 0) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 1bd3aa2a6..56da803d0 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -94,64 +94,6 @@ static void dumpD3DInfoQueues(void *) { } } -#define DXFormats(FMT) \ - if (Channels == 1) \ - return DXGI_FORMAT_R32_##FMT; \ - if (Channels == 2) \ - return DXGI_FORMAT_R32G32_##FMT; \ - if (Channels == 3) \ - return DXGI_FORMAT_R32G32B32_##FMT; \ - if (Channels == 4) \ - return DXGI_FORMAT_R32G32B32A32_##FMT; - -static DXGI_FORMAT getDXFormat(DataFormat Format, int Channels) { - switch (Format) { - case DataFormat::Int32: - DXFormats(SINT) break; - case DataFormat::UInt32: - DXFormats(UINT) break; - case DataFormat::Float32: - DXFormats(FLOAT) break; - case DataFormat::UInt64: - case DataFormat::Int64: - if (Channels == 1) - return DXGI_FORMAT_R32G32_UINT; - if (Channels == 2) - return DXGI_FORMAT_R32G32B32A32_UINT; - llvm_unreachable("Unsupported channel count for 64-bit format"); - case DataFormat::Depth32: - llvm_unreachable( - "Depth32 format is not yet supported in the DirectX backend."); - default: - llvm_unreachable("Unsupported Resource format specified"); - } - return DXGI_FORMAT_UNKNOWN; -} - -static DXGI_FORMAT getRawDXFormat(const Resource &R) { - if (!R.isByteAddressBuffer()) - return DXGI_FORMAT_UNKNOWN; - - switch (R.BufferPtr->Format) { - case DataFormat::Hex16: - case DataFormat::UInt16: - case DataFormat::Int16: - case DataFormat::Float16: - case DataFormat::Hex32: - case DataFormat::UInt32: - case DataFormat::Int32: - case DataFormat::Float32: - case DataFormat::Hex64: - case DataFormat::UInt64: - case DataFormat::Int64: - case DataFormat::Float64: - return DXGI_FORMAT_R32_TYPELESS; - default: - llvm_unreachable("Unsupported Resource format specified"); - } - return DXGI_FORMAT_UNKNOWN; -} - // D3D12 requires the RowPitch in a placed subresource footprint (used for // texture <-> buffer copies via CopyTextureRegion) to be a multiple of // D3D12_TEXTURE_DATA_PITCH_ALIGNMENT (256 bytes). For textures whose natural @@ -201,155 +143,53 @@ static uint64_t getAlignedTextureBufferSize(const CPUBuffer &B) { return uint64_t(B.OutputProps.Height - 1) * AlignedPitch + LastRowSize; } -static uint32_t getUAVBufferSize(const Resource &R) { - return R.HasCounter - ? llvm::alignTo(R.size(), D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT) + - sizeof(uint32_t) - : R.size(); -} - -static uint32_t getUAVBufferCounterOffset(const Resource &R) { - return R.HasCounter - ? llvm::alignTo(R.size(), D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT) - : 0; -} - -static D3D12_RESOURCE_DIMENSION getDXDimension(ResourceKind RK) { - switch (RK) { +static BufferUsage BufferUsageFromResourceKind(ResourceKind Kind) { + // Determine Buffer Usage + switch (Kind) { case ResourceKind::Buffer: case ResourceKind::StructuredBuffer: case ResourceKind::ByteAddressBuffer: - case ResourceKind::RWStructuredBuffer: case ResourceKind::RWBuffer: + case ResourceKind::RWStructuredBuffer: case ResourceKind::RWByteAddressBuffer: + return BufferUsage::Storage; + break; case ResourceKind::ConstantBuffer: - case ResourceKind::AccelerationStructure: - return D3D12_RESOURCE_DIMENSION_BUFFER; - case ResourceKind::Texture2D: - case ResourceKind::RWTexture2D: - return D3D12_RESOURCE_DIMENSION_TEXTURE2D; - case ResourceKind::Sampler: - return D3D12_RESOURCE_DIMENSION_UNKNOWN; - case ResourceKind::SampledTexture2D: - llvm_unreachable("SampledTextures aren't supported in DirectX!"); - } - llvm_unreachable("All cases handled"); -} - -static llvm::Expected -getResourceDescription(const Resource &R) { - const D3D12_RESOURCE_DIMENSION Dimension = getDXDimension(R.Kind); - const offloadtest::CPUBuffer &B = *R.BufferPtr; - - if (B.OutputProps.MipLevels != 1) - return llvm::createStringError(std::errc::not_supported, - "Multiple mip levels are not yet supported " - "for DirectX textures."); - - const DXGI_FORMAT Format = - R.isTexture() ? getDXFormat(B.Format, B.Channels) : DXGI_FORMAT_UNKNOWN; - const uint32_t Width = - R.isTexture() ? B.OutputProps.Width : getUAVBufferSize(R); - const uint32_t Height = R.isTexture() ? B.OutputProps.Height : 1; - D3D12_TEXTURE_LAYOUT Layout; - - if (R.isTexture()) - Layout = - R.IsReserved && (getDescriptorKind(R.Kind) == DescriptorKind::SRV || - getDescriptorKind(R.Kind) == DescriptorKind::UAV) - ? D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE - : D3D12_TEXTURE_LAYOUT_UNKNOWN; - else - Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - const D3D12_RESOURCE_FLAGS Flags = - R.isReadWrite() ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS - : D3D12_RESOURCE_FLAG_NONE; - const D3D12_RESOURCE_DESC ResDesc = {Dimension, 0, Width, Height, 1, 1, - Format, {1, 0}, Layout, Flags}; - return ResDesc; + return BufferUsage::ConstantBuffer; + break; + default: + llvm_unreachable("Invalid case, ResourceKind is not a buffer."); + } } -static D3D12_SHADER_RESOURCE_VIEW_DESC getSRVDescription(const Resource &R) { - const uint32_t EltSize = R.getElementSize(); - const uint32_t NumElts = R.size() / EltSize; - - llvm::outs() << " EltSize = " << EltSize << " NumElts = " << NumElts - << "\n"; - D3D12_SHADER_RESOURCE_VIEW_DESC Desc = {}; - Desc.Format = R.isRaw() - ? getRawDXFormat(R) - : getDXFormat(R.BufferPtr->Format, R.BufferPtr->Channels); - Desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - switch (R.Kind) { +static BufferShaderAccessType BufferShaderAccessTypeFromResourceKind( + const Resource &Resource, BufferShaderAccessTypeParams &OutParams) { + // Determine Buffer Access Type + switch (Resource.Kind) { case ResourceKind::Buffer: + case ResourceKind::RWBuffer: { + auto FmtOrErr = + toFormat(Resource.BufferPtr->Format, Resource.BufferPtr->Channels); + if (!FmtOrErr) { + printf("Invalid format! FMT: %d, CHANNELS: %d\n", + Resource.BufferPtr->Format, Resource.BufferPtr->Channels); + assert(false && "Invalid format."); + } + OutParams.Fmt = *FmtOrErr; + return BufferShaderAccessType::Typed; + } case ResourceKind::StructuredBuffer: - case ResourceKind::ByteAddressBuffer: - - Desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - Desc.Buffer = - D3D12_BUFFER_SRV{0, NumElts, R.isStructuredBuffer() ? EltSize : 0, - R.isByteAddressBuffer() ? D3D12_BUFFER_SRV_FLAG_RAW - : D3D12_BUFFER_SRV_FLAG_NONE}; - break; - case ResourceKind::Texture2D: - Desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - Desc.Texture2D = D3D12_TEX2D_SRV{0, 1, 0, 0}; - break; - case ResourceKind::RWStructuredBuffer: - case ResourceKind::RWBuffer: - case ResourceKind::RWByteAddressBuffer: - case ResourceKind::RWTexture2D: - case ResourceKind::ConstantBuffer: - case ResourceKind::Sampler: - llvm_unreachable("Not an SRV type!"); - case ResourceKind::SampledTexture2D: - llvm_unreachable("Sampled textures aren't supported in DirectX!"); - case ResourceKind::AccelerationStructure: - llvm_unreachable("Acceleration structures use a separate descriptor path!"); - } - return Desc; -} - -static D3D12_UNORDERED_ACCESS_VIEW_DESC getUAVDescription(const Resource &R) { - const uint32_t EltSize = R.getElementSize(); - const uint32_t NumElts = R.size() / EltSize; - const uint32_t CounterOffset = getUAVBufferCounterOffset(R); - - llvm::outs() << " EltSize = " << EltSize << " NumElts = " << NumElts - << "\n"; - D3D12_UNORDERED_ACCESS_VIEW_DESC Desc = {}; - Desc.Format = R.isRaw() - ? getRawDXFormat(R) - : getDXFormat(R.BufferPtr->Format, R.BufferPtr->Channels); - switch (R.Kind) { - case ResourceKind::RWBuffer: case ResourceKind::RWStructuredBuffer: - case ResourceKind::RWByteAddressBuffer: - - Desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - Desc.Buffer = D3D12_BUFFER_UAV{ - 0, NumElts, R.isStructuredBuffer() ? EltSize : 0, CounterOffset, - R.isByteAddressBuffer() ? D3D12_BUFFER_UAV_FLAG_RAW - : D3D12_BUFFER_UAV_FLAG_NONE}; - break; - case ResourceKind::RWTexture2D: - Desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - Desc.Texture2D = D3D12_TEX2D_UAV{0, 0}; - break; - case ResourceKind::StructuredBuffer: - case ResourceKind::Buffer: + OutParams.StructureStride = Resource.BufferPtr->getElementSize(); + return BufferShaderAccessType::Structured; case ResourceKind::ByteAddressBuffer: - case ResourceKind::Texture2D: + case ResourceKind::RWByteAddressBuffer: case ResourceKind::ConstantBuffer: - case ResourceKind::Sampler: - llvm_unreachable("Not a UAV type!"); - case ResourceKind::SampledTexture2D: - llvm_unreachable("Sampled textures aren't supported in DirectX!"); - case ResourceKind::AccelerationStructure: - llvm_unreachable("Acceleration structures use a separate descriptor path!"); - } - return Desc; + return BufferShaderAccessType::Raw; + default: + llvm_unreachable( + "Invalid case, non-buffers should have been filtered out."); + } } namespace { @@ -360,11 +200,28 @@ class DXBuffer : public offloadtest::Buffer { std::string Name; BufferCreateDesc Desc; size_t SizeInBytes; + uint64_t CounterOffsetInBytes; + D3D12_RESOURCE_STATES PreferredState; + + D3D12_CPU_DESCRIPTOR_HANDLE SRVHandle; + D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle; + D3D12_CPU_DESCRIPTOR_HANDLE CBVHandle; DXBuffer(ComPtr Buffer, llvm::StringRef Name, - BufferCreateDesc Desc, size_t SizeInBytes) + BufferCreateDesc Desc, size_t SizeInBytes, + uint64_t CounterOffsetInBytes, D3D12_RESOURCE_STATES PreferredState, + D3D12_CPU_DESCRIPTOR_HANDLE SRVHandle, + D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle, + D3D12_CPU_DESCRIPTOR_HANDLE CBVHandle) : offloadtest::Buffer(GPUAPI::DirectX), Buffer(Buffer), Name(Name), - Desc(Desc), SizeInBytes(SizeInBytes) {} + Desc(Desc), SizeInBytes(SizeInBytes), + CounterOffsetInBytes(CounterOffsetInBytes), + PreferredState(PreferredState), SRVHandle(SRVHandle), + UAVHandle(UAVHandle), CBVHandle(CBVHandle) {} + DXBuffer(const DXBuffer &) = delete; + DXBuffer(DXBuffer &&) = delete; + DXBuffer &operator=(const DXBuffer &) = delete; + DXBuffer &operator=(DXBuffer &&) = delete; size_t getSizeInBytes() const override { return SizeInBytes; } @@ -381,6 +238,8 @@ class DXBuffer : public offloadtest::Buffer { void unmap() override { Buffer->Unmap(0, nullptr); } + const BufferCreateDesc &getDesc() const override { return Desc; } + static bool classof(const offloadtest::Buffer *B) { return B->getAPI() == GPUAPI::DirectX; } @@ -389,27 +248,65 @@ class DXBuffer : public offloadtest::Buffer { class DXTexture : public offloadtest::Texture { public: ComPtr Resource; + D3D12_RESOURCE_STATES PreferredState; - // TODO: - // Ideally SRVs/UAVs would also live here, but they currently require a - // shared CBV_SRV_UAV heap whose indices are determined at pipeline bind time. - // Moving them here would require a descriptor heap allocator, which is not - // yet implemented. - // Either an RTV or DSV descriptor, depending on Desc.Usage. + // Optional descriptors, depending on Desc.Usage. // A zero ptr means no descriptor was created for that view type. D3D12_CPU_DESCRIPTOR_HANDLE RTVHandle = {}; D3D12_CPU_DESCRIPTOR_HANDLE DSVHandle = {}; + D3D12_CPU_DESCRIPTOR_HANDLE SRVHandle = {}; + D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle = {}; std::string Name; TextureCreateDesc Desc; DXTexture(ComPtr Resource, llvm::StringRef Name, - TextureCreateDesc Desc) - : offloadtest::Texture(GPUAPI::DirectX), Resource(Resource), Name(Name), - Desc(Desc) {} + TextureCreateDesc Desc, D3D12_RESOURCE_STATES PreferredState, + D3D12_CPU_DESCRIPTOR_HANDLE SRVHandle, + D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle) + : offloadtest::Texture(GPUAPI::DirectX), Resource(Resource), + PreferredState(PreferredState), SRVHandle(SRVHandle), + UAVHandle(UAVHandle), Name(Name), Desc(Desc) {} + + llvm::Expected map() override { + if (Desc.Location == MemoryLocation::GpuOnly) + return llvm::createStringError(std::errc::invalid_argument, + "Cannot map a GpuOnly texture."); + void *Ptr = nullptr; + if (auto Err = HR::toError(Resource->Map(0, nullptr, &Ptr), + "Failed to map texture.")) + return std::move(Err); + return Ptr; + } + + void unmap() override { Resource->Unmap(0, nullptr); } const TextureCreateDesc &getDesc() const override { return Desc; } + llvm::Expected getMappedRowPitchInBytes() const override { + if (Desc.Location == MemoryLocation::GpuOnly) + return llvm::createStringError( + std::errc::invalid_argument, + "Cannot query mapped row pitch of a GpuOnly texture."); + + const D3D12_RESOURCE_DESC ResourceDesc = Resource->GetDesc(); + if (ResourceDesc.Layout != D3D12_TEXTURE_LAYOUT_ROW_MAJOR) + return llvm::createStringError( + std::errc::invalid_argument, + "Mapped row pitch is only defined for row-major textures."); + + ComPtr Device; + if (auto Err = HR::toError(Resource->GetDevice(IID_PPV_ARGS(&Device)), + "Failed to get device from texture resource.")) + return std::move(Err); + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint = {}; + Device->GetCopyableFootprints(&ResourceDesc, /*FirstSubresource*/ 0, + /*NumSubresources*/ 1, /*BaseOffset*/ 0, + &Footprint, nullptr, nullptr, nullptr); + return Footprint.Footprint.RowPitch; + } + static bool classof(const offloadtest::Texture *T) { return T->getAPI() == GPUAPI::DirectX; } @@ -577,6 +474,7 @@ class DXCommandBuffer : public offloadtest::CommandBuffer { ComPtr CmdList; /// Whether a UAV barrier is pending from a prior compute command. bool PendingUAVBarrier = false; + llvm::SmallVector PendingTransitions; static llvm::Expected> create(ComPtr Device) { @@ -602,14 +500,34 @@ class DXCommandBuffer : public offloadtest::CommandBuffer { } void addPendingUAVBarrier() { PendingUAVBarrier = true; } + void addResourceTransition(ID3D12Resource *pResource, + D3D12_RESOURCE_STATES StateBefore, + D3D12_RESOURCE_STATES StateAfter) { + + for (auto &Trans : PendingTransitions) { + if (Trans.Transition.pResource == pResource) { + assert(StateBefore == Trans.Transition.StateAfter); + Trans.Transition.StateAfter = StateAfter; + return; + } + } + + PendingTransitions.push_back(CD3DX12_RESOURCE_BARRIER::Transition( + pResource, StateBefore, StateAfter)); + } void flushBarrier() { - if (!PendingUAVBarrier) - return; - const D3D12_RESOURCE_BARRIER Barrier = - CD3DX12_RESOURCE_BARRIER::UAV(nullptr); - CmdList->ResourceBarrier(1, &Barrier); - PendingUAVBarrier = false; + + if (PendingUAVBarrier) { + PendingTransitions.push_back(CD3DX12_RESOURCE_BARRIER::UAV(nullptr)); + PendingUAVBarrier = false; + } + + if (!PendingTransitions.empty()) { + CmdList->ResourceBarrier(PendingTransitions.size(), + PendingTransitions.data()); + PendingTransitions.clear(); + } } llvm::Expected> @@ -708,12 +626,137 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder { llvm::Error copyBufferToBuffer(offloadtest::Buffer &Src, size_t SrcOffset, offloadtest::Buffer &Dst, size_t DstOffset, size_t Size) override { - auto &DXSrc = static_cast(Src); - auto &DXDst = static_cast(Dst); - addUAVBarrier(); + auto &DXSrc = llvm::cast(Src); + auto &DXDst = llvm::cast(Dst); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Buffer.Get(), DXSrc.PreferredState, + D3D12_RESOURCE_STATE_COPY_SOURCE); + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Buffer.Get(), DXDst.PreferredState, + D3D12_RESOURCE_STATE_COPY_DEST); + CB.flushBarrier(); + insertDebugSignpost(llvm::formatv("CopyBuffer {0}B", Size).str()); CB.CmdList->CopyBufferRegion(DXDst.Buffer.Get(), DstOffset, DXSrc.Buffer.Get(), SrcOffset, Size); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Buffer.Get(), + D3D12_RESOURCE_STATE_COPY_SOURCE, + DXSrc.PreferredState); + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Buffer.Get(), + D3D12_RESOURCE_STATE_COPY_DEST, + DXDst.PreferredState); + CB.flushBarrier(); + + return llvm::Error::success(); + } + + llvm::Error copyCounterToBuffer(offloadtest::Buffer &Src, + offloadtest::Buffer &Dst) override { + auto &DXSrc = llvm::cast(Src); + auto &DXDst = llvm::cast(Dst); + + if (!DXSrc.Desc.HasCounter) + return llvm::createStringError( + "Counter resource passed does not hvae a counter."); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Buffer.Get(), DXSrc.PreferredState, + D3D12_RESOURCE_STATE_COPY_SOURCE); + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Buffer.Get(), DXDst.PreferredState, + D3D12_RESOURCE_STATE_COPY_DEST); + CB.flushBarrier(); + + insertDebugSignpost("copyCounterToBuffer 4B"); + CB.CmdList->CopyBufferRegion(DXDst.Buffer.Get(), 0, DXSrc.Buffer.Get(), + DXSrc.CounterOffsetInBytes, sizeof(uint32_t)); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Buffer.Get(), + D3D12_RESOURCE_STATE_COPY_SOURCE, + DXSrc.PreferredState); + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Buffer.Get(), + D3D12_RESOURCE_STATE_COPY_DEST, + DXDst.PreferredState); + CB.flushBarrier(); + return llvm::Error::success(); + } + + llvm::Error copyBufferToTexture(Buffer &Src, Texture &Dst) override { + auto &DXSrc = llvm::cast(Src); + auto &DXDst = llvm::cast(Dst); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Buffer.Get(), DXSrc.PreferredState, + D3D12_RESOURCE_STATE_COPY_SOURCE); + + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Resource.Get(), DXDst.PreferredState, + D3D12_RESOURCE_STATE_COPY_DEST); + CB.flushBarrier(); + + const uint32_t ElementSize = getFormatSizeInBytes(DXDst.Desc.Fmt); + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ + 0, + CD3DX12_SUBRESOURCE_FOOTPRINT( + getDXGIFormat(DXDst.Desc.Fmt), DXDst.Desc.Width, DXDst.Desc.Height, + 1, getAlignedTexturePitch(DXDst.Desc.Width, ElementSize))}; + const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(DXDst.Resource.Get(), 0); + const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(DXSrc.Buffer.Get(), Footprint); + CB.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Buffer.Get(), + D3D12_RESOURCE_STATE_COPY_SOURCE, + DXSrc.PreferredState); + + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Resource.Get(), + D3D12_RESOURCE_STATE_COPY_DEST, + DXDst.PreferredState); + + return llvm::Error::success(); + } + + llvm::Error copyTextureToBuffer(Texture &Src, Buffer &Dst) override { + auto &DXSrc = llvm::cast(Src); + auto &DXDst = llvm::cast(Dst); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Resource.Get(), DXSrc.PreferredState, + D3D12_RESOURCE_STATE_COPY_SOURCE); + + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Buffer.Get(), DXDst.PreferredState, + D3D12_RESOURCE_STATE_COPY_DEST); + + CB.flushBarrier(); + + const uint32_t ElementSize = getFormatSizeInBytes(DXSrc.Desc.Fmt); + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ + 0, + CD3DX12_SUBRESOURCE_FOOTPRINT( + getDXGIFormat(DXSrc.Desc.Fmt), DXSrc.Desc.Width, DXSrc.Desc.Height, + 1, getAlignedTexturePitch(DXSrc.Desc.Width, ElementSize))}; + const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(DXDst.Buffer.Get(), Footprint); + const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(DXSrc.Resource.Get(), 0); + CB.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Resource.Get(), + D3D12_RESOURCE_STATE_COPY_SOURCE, + DXSrc.PreferredState); + + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Buffer.Get(), + D3D12_RESOURCE_STATE_COPY_DEST, + DXDst.PreferredState); + return llvm::Error::success(); } @@ -741,6 +784,7 @@ class DXRenderPass final : public offloadtest::RenderPass { class DXRenderEncoder : public offloadtest::RenderEncoder { DXCommandBuffer &CB; + offloadtest::RenderPassBeginDesc Desc; // Encoder contract: viewport and scissor must both be set before draw(). bool ViewportSet = false; @@ -765,8 +809,9 @@ class DXRenderEncoder : public offloadtest::RenderEncoder { } public: - DXRenderEncoder(DXCommandBuffer &CB) - : RenderEncoder(GPUAPI::DirectX), CB(CB) {} + DXRenderEncoder(DXCommandBuffer &CB, + const offloadtest::RenderPassBeginDesc &Desc) + : RenderEncoder(GPUAPI::DirectX), CB(CB), Desc(Desc) {} DXRenderEncoder(const DXRenderEncoder &CB) = delete; DXRenderEncoder(DXRenderEncoder &&CB) = delete; DXRenderEncoder &operator=(DXRenderEncoder &CB) = delete; @@ -840,7 +885,25 @@ class DXRenderEncoder : public offloadtest::RenderEncoder { return llvm::Error::success(); } - void endEncodingImpl() override { popDebugGroup(); } + void endEncodingImpl() override { + // State transitions + for (offloadtest::Texture *Tex : Desc.ColorAttachments) { + auto &DXTex = llvm::cast(*Tex); + if (DXTex.PreferredState != D3D12_RESOURCE_STATE_RENDER_TARGET) + CB.addResourceTransition(DXTex.Resource.Get(), + D3D12_RESOURCE_STATE_RENDER_TARGET, + DXTex.PreferredState); + } + if (Desc.DepthStencil) { + auto &DXTex = llvm::cast(*Desc.DepthStencil); + if (DXTex.PreferredState != D3D12_RESOURCE_STATE_DEPTH_WRITE) + CB.addResourceTransition(DXTex.Resource.Get(), + D3D12_RESOURCE_STATE_DEPTH_WRITE, + DXTex.PreferredState); + } + + popDebugGroup(); + } }; llvm::Expected> @@ -901,6 +964,22 @@ DXCommandBuffer::createRenderEncoder( DSVHandle = DXDS.DSVHandle; } + // State transitions + for (offloadtest::Texture *Tex : Desc.ColorAttachments) { + auto &DXTex = llvm::cast(*Tex); + if (DXTex.PreferredState != D3D12_RESOURCE_STATE_RENDER_TARGET) + this->addResourceTransition(DXTex.Resource.Get(), DXTex.PreferredState, + D3D12_RESOURCE_STATE_RENDER_TARGET); + } + if (Desc.DepthStencil) { + auto &DXTex = llvm::cast(*Desc.DepthStencil); + if (DXTex.PreferredState != D3D12_RESOURCE_STATE_DEPTH_WRITE) + this->addResourceTransition(DXTex.Resource.Get(), DXTex.PreferredState, + D3D12_RESOURCE_STATE_DEPTH_WRITE); + } + + this->flushBarrier(); + CmdList->OMSetRenderTargets(static_cast(RTVHandles.size()), RTVHandles.data(), /*RTsSingleHandleToDescriptorRange=*/false, @@ -941,7 +1020,7 @@ DXCommandBuffer::createRenderEncoder( } } - auto Enc = std::make_unique(*this); + auto Enc = std::make_unique(*this, Desc); Enc->pushDebugGroup("RenderEncoder"); return Enc; } @@ -954,27 +1033,43 @@ class DXDevice : public offloadtest::Device { Capabilities Caps; DescriptorAllocator RTVAllocator; DescriptorAllocator DSVAllocator; + DescriptorAllocator CSUAllocator; struct ResourceSet { - ComPtr Upload; - ComPtr Buffer; + std::unique_ptr UploadBuffer; // Keep-alive + + // TODO(manon): use std::variant instead? + std::unique_ptr Buffer; + std::unique_ptr Texture; std::unique_ptr Readback; - ComPtr Heap; - ResourceSet(ComPtr Upload, ComPtr Buffer, + std::unique_ptr CounterReadback; + + ResourceSet(std::unique_ptr UploadBuffer, + std::unique_ptr Buffer, std::unique_ptr Readback, - ComPtr Heap = nullptr) - : Upload(Upload), Buffer(Buffer), Readback(std::move(Readback)), - Heap(Heap) {} + std::unique_ptr CounterReadback) + : UploadBuffer(std::move(UploadBuffer)), Buffer(std::move(Buffer)), + Readback(std::move(Readback)), + CounterReadback(std::move(CounterReadback)) {} + ResourceSet(std::unique_ptr UploadBuffer, + std::unique_ptr Texture, + std::unique_ptr Readback) + : UploadBuffer(std::move(UploadBuffer)), Texture(std::move(Texture)), + Readback(std::move(Readback)) {} + ResourceSet(const ResourceSet &) = delete; - ResourceSet(ResourceSet &&A) - : Upload(A.Upload), Buffer(A.Buffer), Readback(std::move(A.Readback)), - Heap(A.Heap) {} ResourceSet &operator=(const ResourceSet &) = delete; + + ResourceSet(ResourceSet &&A) + : UploadBuffer(std::move(A.UploadBuffer)), Buffer(std::move(A.Buffer)), + Texture(std::move(A.Texture)), Readback(std::move(A.Readback)), + CounterReadback(std::move(A.CounterReadback)) {} ResourceSet &operator=(ResourceSet &&A) { - Upload = A.Upload; - Buffer = A.Buffer; + UploadBuffer = std::move(A.UploadBuffer); + Buffer = std::move(A.Buffer); + Texture = std::move(A.Texture); Readback = std::move(A.Readback); - Heap = A.Heap; + CounterReadback = std::move(A.CounterReadback); return *this; } }; @@ -1007,10 +1102,12 @@ class DXDevice : public offloadtest::Device { public: DXDevice(ComPtr A, ComPtr D, DXQueue Q, DescriptorAllocator RTVAllocator, DescriptorAllocator DSVAllocator, - std::string Desc, std::string DriverVer) + DescriptorAllocator CSUAllocator, std::string Desc, + std::string DriverVer) : Adapter(A), Device(D), GraphicsQueue(std::move(Q)), RTVAllocator(std::move(RTVAllocator)), - DSVAllocator(std::move(DSVAllocator)) { + DSVAllocator(std::move(DSVAllocator)), + CSUAllocator(std::move(CSUAllocator)) { Description = std::move(Desc); DriverVersion = std::move(DriverVer); DriverName = "DirectX"; @@ -1356,34 +1453,159 @@ class DXDevice : public offloadtest::Device { createBuffer(std::string Name, const BufferCreateDesc &Desc, size_t SizeInBytes) override { const D3D12_HEAP_TYPE HeapType = getDXHeapType(Desc.Location); - + // This flag is only allowed on GpuOnly memory. const D3D12_RESOURCE_FLAGS Flags = - HeapType == D3D12_HEAP_TYPE_DEFAULT + Desc.Location == MemoryLocation::GpuOnly ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE; + // Modify the size if needed + UINT64 CounterOffsetInBytes = 0; + UINT64 BufferSizeInBytes = SizeInBytes; + if (Desc.Usage == BufferUsage::ConstantBuffer) { + if (Desc.HasCounter) + return llvm::createStringError( + "Constant Buffers are not allowed to have a counter."); + + BufferSizeInBytes = getCBVSize(BufferSizeInBytes); + } else if (Desc.HasCounter) { + if (Desc.AccessType == BufferShaderAccessType::Raw) + return llvm::createStringError( + "Raw Resources are not allowed to have a counter."); + + CounterOffsetInBytes = llvm::alignTo( + BufferSizeInBytes, D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT); + BufferSizeInBytes = CounterOffsetInBytes + sizeof(uint32_t); + } + const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(HeapType); const D3D12_RESOURCE_DESC BufferDesc = - CD3DX12_RESOURCE_DESC::Buffer(SizeInBytes, Flags); - - D3D12_RESOURCE_STATES InitialState = D3D12_RESOURCE_STATE_COMMON; - if (HeapType == D3D12_HEAP_TYPE_UPLOAD) - InitialState = D3D12_RESOURCE_STATE_GENERIC_READ; - else if (HeapType == D3D12_HEAP_TYPE_READBACK) - // As per the readback heap docs - // > Resources in this heap must be created with - // > D3D12_RESOURCE_STATE_COPY_DEST, and cannot be changed away from this. - InitialState = D3D12_RESOURCE_STATE_COPY_DEST; - - ComPtr DeviceBuffer; - if (auto Err = - HR::toError(Device->CreateCommittedResource( - &HeapProps, D3D12_HEAP_FLAG_NONE, &BufferDesc, - InitialState, nullptr, IID_PPV_ARGS(&DeviceBuffer)), - "Failed to create buffer.")) - return Err; + CD3DX12_RESOURCE_DESC::Buffer(BufferSizeInBytes, Flags); + + D3D12_RESOURCE_STATES PreferredState = D3D12_RESOURCE_STATE_COMMON; + ComPtr BufferObject; + if (Desc.Backing == MemoryBacking::Sparse) { + if (auto Err = HR::toError(Device->CreateReservedResource( + &BufferDesc, D3D12_RESOURCE_STATE_COMMON, + nullptr, IID_PPV_ARGS(&BufferObject)), + "Failed to create reserved buffer.")) + return Err; + } else { + D3D12_RESOURCE_STATES InitialState = D3D12_RESOURCE_STATE_COMMON; + if (HeapType == D3D12_HEAP_TYPE_UPLOAD) + InitialState = D3D12_RESOURCE_STATE_GENERIC_READ; + else if (HeapType == D3D12_HEAP_TYPE_READBACK) + // As per the readback heap docs + // > Resources in this heap must be created with + // > D3D12_RESOURCE_STATE_COPY_DEST, and cannot be changed away from + // this. + InitialState = D3D12_RESOURCE_STATE_COPY_DEST; + PreferredState = InitialState; + if (auto Err = HR::toError(Device->CreateCommittedResource( + &HeapProps, D3D12_HEAP_FLAG_NONE, + &BufferDesc, InitialState, nullptr, + IID_PPV_ARGS(&BufferObject)), + "Failed to create buffer.")) + return Err; + } + + const std::wstring WStr(Name.begin(), Name.end()); + BufferObject->SetName(WStr.c_str()); + + D3D12_CPU_DESCRIPTOR_HANDLE SRVHandle = {}; + { + auto SRVHandleOrErr = CSUAllocator.allocate(); + if (!SRVHandleOrErr) + return SRVHandleOrErr.takeError(); + SRVHandle = *SRVHandleOrErr; + + D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + SRVDesc.Shader4ComponentMapping = + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + switch (Desc.AccessType) { + case BufferShaderAccessType::Raw: + SRVDesc.Format = DXGI_FORMAT_R32_TYPELESS; + SRVDesc.Buffer.NumElements = static_cast(SizeInBytes / 4); + SRVDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + break; + case BufferShaderAccessType::Typed: + SRVDesc.Format = getDXGIFormat(Desc.AccessTypeParams.Fmt); + SRVDesc.Buffer.NumElements = static_cast( + SizeInBytes / getFormatSizeInBytes(Desc.AccessTypeParams.Fmt)); + break; + case BufferShaderAccessType::Structured: + assert(Desc.AccessTypeParams.StructureStride > 0 && + "Structured buffers must have a Structure Stride."); + SRVDesc.Format = DXGI_FORMAT_UNKNOWN; + SRVDesc.Buffer.NumElements = static_cast( + SizeInBytes / Desc.AccessTypeParams.StructureStride); + SRVDesc.Buffer.StructureByteStride = + Desc.AccessTypeParams.StructureStride; + break; + } + + Device->CreateShaderResourceView(BufferObject.Get(), &SRVDesc, SRVHandle); + } + + D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle = {}; + if ((Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) != 0) { + auto UAVHandleOrErr = CSUAllocator.allocate(); + if (!UAVHandleOrErr) + return UAVHandleOrErr.takeError(); + UAVHandle = *UAVHandleOrErr; + + D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc = {}; + UAVDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + switch (Desc.AccessType) { + case BufferShaderAccessType::Raw: + UAVDesc.Format = DXGI_FORMAT_R32_TYPELESS; + UAVDesc.Buffer.NumElements = static_cast(SizeInBytes / 4); + UAVDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + break; + case BufferShaderAccessType::Typed: + UAVDesc.Format = getDXGIFormat(Desc.AccessTypeParams.Fmt); + UAVDesc.Buffer.NumElements = static_cast( + SizeInBytes / getFormatSizeInBytes(Desc.AccessTypeParams.Fmt)); + break; + case BufferShaderAccessType::Structured: + assert(Desc.AccessTypeParams.StructureStride > 0 && + "Structured buffers must have a Structure Stride."); + UAVDesc.Format = DXGI_FORMAT_UNKNOWN; + UAVDesc.Buffer.NumElements = static_cast( + SizeInBytes / Desc.AccessTypeParams.StructureStride); + UAVDesc.Buffer.StructureByteStride = + Desc.AccessTypeParams.StructureStride; + break; + } + + ID3D12Resource *CounterObject = nullptr; + if (Desc.HasCounter) { + UAVDesc.Buffer.CounterOffsetInBytes = CounterOffsetInBytes; + CounterObject = BufferObject.Get(); + } + + Device->CreateUnorderedAccessView(BufferObject.Get(), CounterObject, + &UAVDesc, UAVHandle); + } + + D3D12_CPU_DESCRIPTOR_HANDLE CBVHandle = {}; + if (Desc.Usage == BufferUsage::ConstantBuffer) { + auto CBVHandleOrErr = CSUAllocator.allocate(); + if (!CBVHandleOrErr) + return CBVHandleOrErr.takeError(); + CBVHandle = *CBVHandleOrErr; + + D3D12_CONSTANT_BUFFER_VIEW_DESC CBVDesc = {}; + CBVDesc.BufferLocation = BufferObject->GetGPUVirtualAddress(); + CBVDesc.SizeInBytes = BufferSizeInBytes; + + Device->CreateConstantBufferView(&CBVDesc, CBVHandle); + } - return std::make_unique(DeviceBuffer, Name, Desc, SizeInBytes); + return std::make_unique(BufferObject, Name, Desc, SizeInBytes, + CounterOffsetInBytes, PreferredState, + SRVHandle, UAVHandle, CBVHandle); } llvm::Expected> @@ -1402,7 +1624,9 @@ class DXDevice : public offloadtest::Device { TexDesc.MipLevels = static_cast(Desc.MipLevels); TexDesc.Format = getDXGIFormat(Desc.Fmt); TexDesc.SampleDesc.Count = 1; - TexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + TexDesc.Layout = Desc.Location == MemoryLocation::GpuOnly + ? D3D12_TEXTURE_LAYOUT_UNKNOWN + : D3D12_TEXTURE_LAYOUT_ROW_MAJOR; TexDesc.Flags = getDXResourceFlags(Desc.Usage); const D3D12_CLEAR_VALUE *ClearValuePtr = nullptr; @@ -1426,21 +1650,60 @@ class DXDevice : public offloadtest::Device { ClearValuePtr = &ClearValue; } - D3D12_RESOURCE_STATES InitialState = D3D12_RESOURCE_STATE_COMMON; - if ((Desc.Usage & TextureUsage::RenderTarget) != 0) - InitialState = D3D12_RESOURCE_STATE_RENDER_TARGET; - else if ((Desc.Usage & TextureUsage::DepthStencil) != 0) - InitialState = D3D12_RESOURCE_STATE_DEPTH_WRITE; + D3D12_RESOURCE_STATES InitialState = + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + if ((Desc.Usage & TextureUsage::Storage)) + InitialState = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - ComPtr DeviceTexture; + ComPtr TextureObject; if (auto Err = HR::toError(Device->CreateCommittedResource( &HeapProps, D3D12_HEAP_FLAG_NONE, &TexDesc, InitialState, ClearValuePtr, - IID_PPV_ARGS(&DeviceTexture)), + IID_PPV_ARGS(&TextureObject)), "Failed to create texture.")) return Err; - auto Tex = std::make_unique(DeviceTexture, Name, Desc); + D3D12_CPU_DESCRIPTOR_HANDLE SRVHandle = {}; + { + auto SRVHandleOrErr = CSUAllocator.allocate(); + if (!SRVHandleOrErr) + return SRVHandleOrErr.takeError(); + SRVHandle = *SRVHandleOrErr; + + D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.ViewDimension = + D3D12_SRV_DIMENSION_TEXTURE2D; // assume this is correct for now. + SRVDesc.Shader4ComponentMapping = + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + SRVDesc.Format = getDXGIFormatSRV(Desc.Fmt); + SRVDesc.Texture2D.MostDetailedMip = 0; + SRVDesc.Texture2D.MipLevels = Desc.MipLevels; + SRVDesc.Texture2D.PlaneSlice = 0; + SRVDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + Device->CreateShaderResourceView(TextureObject.Get(), &SRVDesc, + SRVHandle); + } + + D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle = {}; + if ((TexDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) != 0) { + auto UAVHandleOrErr = CSUAllocator.allocate(); + if (!UAVHandleOrErr) + return UAVHandleOrErr.takeError(); + UAVHandle = *UAVHandleOrErr; + + D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc = {}; + UAVDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + UAVDesc.Texture2D.MipSlice = 0; + UAVDesc.Texture2D.PlaneSlice = 0; + + Device->CreateUnorderedAccessView(TextureObject.Get(), nullptr, &UAVDesc, + UAVHandle); + } + + auto Tex = std::make_unique(TextureObject, Name, Desc, + InitialState, SRVHandle, UAVHandle); const bool IsRT = (Desc.Usage & TextureUsage::RenderTarget) != 0; const bool IsDS = (Desc.Usage & TextureUsage::DepthStencil) != 0; @@ -1449,7 +1712,7 @@ class DXDevice : public offloadtest::Device { if (!HandleOrErr) return HandleOrErr.takeError(); Tex->RTVHandle = *HandleOrErr; - Device->CreateRenderTargetView(DeviceTexture.Get(), nullptr, + Device->CreateRenderTargetView(TextureObject.Get(), nullptr, Tex->RTVHandle); } if (IsDS) { @@ -1457,11 +1720,16 @@ class DXDevice : public offloadtest::Device { if (!HandleOrErr) return HandleOrErr.takeError(); Tex->DSVHandle = *HandleOrErr; - Device->CreateDepthStencilView(DeviceTexture.Get(), nullptr, + Device->CreateDepthStencilView(TextureObject.Get(), nullptr, Tex->DSVHandle); } - return Tex; + return std::move(Tex); + } + + uint32_t getTextureUploadRowStrideInBytes( + const TextureCreateDesc &Desc) const override { + return getAlignedTexturePitch(Desc.Width, getFormatSizeInBytes(Desc.Fmt)); } static llvm::Expected> @@ -1524,10 +1792,16 @@ class DXDevice : public offloadtest::Device { if (!DSVHeapOrErr) return DSVHeapOrErr.takeError(); + auto CSUHeapOrErr = DescriptorAllocator::create( + Device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4096); + if (!CSUHeapOrErr) + return CSUHeapOrErr.takeError(); + return std::make_unique( Adapter, Device, std::move(*GraphicsQueueOrErr), std::move(*RTVHeapOrErr), std::move(*DSVHeapOrErr), - std::string(DescVec.data()), std::move(DriverVer)); + std::move(*CSUHeapOrErr), std::string(DescVec.data()), + std::move(DriverVer)); } const Capabilities &getCapabilities() override { @@ -1570,8 +1844,10 @@ class DXDevice : public offloadtest::Device { } llvm::Error createDescriptorHeap(Pipeline &P, InvocationState &State) { - if (P.getDescriptorCount() == 0) + if (P.getDescriptorCount() == 0) { + printf("P.getDescriptorCount() == 0\n"); return llvm::Error::success(); + } const D3D12_DESCRIPTOR_HEAP_DESC HeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, P.getDescriptorCountWithFlattenedArrays(), @@ -1747,27 +2023,6 @@ class DXDevice : public offloadtest::Device { return std::make_unique(ASBuffer); } - void addResourceUploadCommands(Resource &R, InvocationState &IS, - ComPtr Destination, - ComPtr Source) { - addUploadBeginBarrier(IS, Destination); - if (R.isTexture()) { - const offloadtest::CPUBuffer &B = *R.BufferPtr; - const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ - 0, CD3DX12_SUBRESOURCE_FOOTPRINT( - getDXFormat(B.Format, B.Channels), B.OutputProps.Width, - B.OutputProps.Height, 1, - B.OutputProps.Width * B.getElementSize())}; - const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(Destination.Get(), 0); - const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(Source.Get(), Footprint); - - IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); - } else - IS.CB->CmdList->CopyBufferRegion(Destination.Get(), 0, Source.Get(), 0, - R.size()); - addUploadEndBarrier(IS, Destination, R.isReadWrite()); - } - static UINT getNumTiles(std::optional NumTiles, uint32_t Width) { UINT Ret; if (NumTiles.has_value()) @@ -1834,339 +2089,118 @@ class DXDevice : public offloadtest::Device { return GraphicsQueue.SubmitFence->waitForCompletion(CurrentCounter); } - llvm::Expected createSRV(Resource &R, InvocationState &IS) { - ResourceBundle Bundle; - - auto ResDescOrErr = getResourceDescription(R); - if (!ResDescOrErr) - return ResDescOrErr.takeError(); - const D3D12_RESOURCE_DESC ResDesc = *ResDescOrErr; - const D3D12_HEAP_PROPERTIES UploadHeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); - const D3D12_RESOURCE_DESC UploadResDesc = - CD3DX12_RESOURCE_DESC::Buffer(R.size()); - - uint32_t RegOffset = 0; - - for (const auto &ResData : R.BufferPtr->Data) { - llvm::outs() << "Creating SRV: { Size = " << R.size() << ", Register = t" - << R.DXBinding.Register + RegOffset - << ", Space = " << R.DXBinding.Space; - - if (R.TilesMapped) - llvm::outs() << ", TilesMapped = " << *R.TilesMapped; - llvm::outs() << " }\n"; - - ComPtr Buffer; - if (R.IsReserved) { - if (auto Err = - HR::toError(Device->CreateReservedResource( - &ResDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, - IID_PPV_ARGS(&Buffer)), - "Failed to create reserved resource (buffer).")) - return Err; - } else { - // for committed resources - const D3D12_HEAP_PROPERTIES CommittedResourceHeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - - if (auto Err = HR::toError( - Device->CreateCommittedResource(&CommittedResourceHeapProp, - D3D12_HEAP_FLAG_NONE, &ResDesc, - D3D12_RESOURCE_STATE_COMMON, - nullptr, IID_PPV_ARGS(&Buffer)), - "Failed to create committed resource (buffer).")) - return Err; - } - - ComPtr UploadBuffer; - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &UploadHeapProp, D3D12_HEAP_FLAG_NONE, &UploadResDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS(&UploadBuffer)), - "Failed to create committed resource (upload buffer).")) - return Err; - - ComPtr Heap; // optional, only created if NumTiles > 0 - if (R.IsReserved) - if (auto Err = setupReservedResource(R, ResDesc, Heap, Buffer)) - return Err; - - // Upload data initialization - void *ResDataPtr = nullptr; - if (SUCCEEDED(UploadBuffer->Map(0, NULL, &ResDataPtr))) { - memcpy(ResDataPtr, ResData.get(), R.size()); - UploadBuffer->Unmap(0, nullptr); - } else { - return llvm::createStringError(std::errc::io_error, - "Failed to map SRV upload buffer."); - } - - addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - - Bundle.emplace_back(UploadBuffer, Buffer, nullptr, Heap); - RegOffset++; - } - return Bundle; - } - - // returns the next available HeapIdx - uint32_t bindSRV(Resource &R, InvocationState &IS, uint32_t HeapIdx, - const ResourceBundle &ResBundle) { - const uint32_t EltSize = R.getElementSize(); - const uint32_t NumElts = R.size() / EltSize; - const D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = getSRVDescription(R); - const uint32_t DescHandleIncSize = Device->GetDescriptorHandleIncrementSize( - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - const D3D12_CPU_DESCRIPTOR_HANDLE SRVHandleHeapStart = - IS.DescHeap->GetCPUDescriptorHandleForHeapStart(); - - for (const ResourceSet &RS : ResBundle) { - llvm::outs() << "SRV: HeapIdx = " << HeapIdx << " EltSize = " << EltSize - << " NumElts = " << NumElts << "\n"; - D3D12_CPU_DESCRIPTOR_HANDLE SRVHandle = SRVHandleHeapStart; - SRVHandle.ptr += HeapIdx * DescHandleIncSize; - Device->CreateShaderResourceView(RS.Buffer.Get(), &SRVDesc, SRVHandle); - HeapIdx++; - } - return HeapIdx; - } - - llvm::Expected createUAV(Resource &R, InvocationState &IS) { - ResourceBundle Bundle; - const uint32_t BufferSize = getUAVBufferSize(R); - - auto ResDescOrErr = getResourceDescription(R); - if (!ResDescOrErr) - return ResDescOrErr.takeError(); - const D3D12_RESOURCE_DESC ResDesc = *ResDescOrErr; - - const D3D12_HEAP_PROPERTIES UploadHeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); - const D3D12_RESOURCE_DESC UploadResDesc = - CD3DX12_RESOURCE_DESC::Buffer(BufferSize); - - uint32_t RegOffset = 0; - - for (const auto &ResData : R.BufferPtr->Data) { - llvm::outs() << "Creating UAV: { Size = " << BufferSize - << ", Register = u" << R.DXBinding.Register + RegOffset - << ", Space = " << R.DXBinding.Space - << ", HasCounter = " << R.HasCounter; - - if (R.TilesMapped) - llvm::outs() << ", TilesMapped = " << *R.TilesMapped; - llvm::outs() << " }\n"; - - ComPtr Buffer; - if (R.IsReserved) { - if (auto Err = - HR::toError(Device->CreateReservedResource( - &ResDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, - IID_PPV_ARGS(&Buffer)), - "Failed to create reserved resource (buffer).")) - return Err; - } else { - // for committed resources - const D3D12_HEAP_PROPERTIES CommittedResourceHeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - - if (auto Err = HR::toError( - Device->CreateCommittedResource(&CommittedResourceHeapProp, - D3D12_HEAP_FLAG_NONE, &ResDesc, - D3D12_RESOURCE_STATE_COMMON, - nullptr, IID_PPV_ARGS(&Buffer)), - "Failed to create committed resource (buffer).")) - return Err; - } - - ComPtr UploadBuffer; - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &UploadHeapProp, D3D12_HEAP_FLAG_NONE, &UploadResDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS(&UploadBuffer)), - "Failed to create committed resource (upload buffer).")) - return Err; - - const BufferCreateDesc ReadbackDesc = BufferCreateDesc::readbackBuffer(); - auto ReadbackOrErr = createBuffer("Readback", ReadbackDesc, BufferSize); - if (!ReadbackOrErr) - return ReadbackOrErr.takeError(); - - ComPtr Heap; // optional, only created if NumTiles > 0 - if (R.IsReserved) - if (auto Err = setupReservedResource(R, ResDesc, Heap, Buffer)) - return Err; - - // Upload data initialization - void *ResDataPtr = nullptr; - if (SUCCEEDED(UploadBuffer->Map(0, NULL, &ResDataPtr))) { - memcpy(ResDataPtr, ResData.get(), R.size()); - UploadBuffer->Unmap(0, nullptr); - } else { - return llvm::createStringError(std::errc::io_error, - "Failed to map UAV upload buffer."); - } - - addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - - Bundle.emplace_back(UploadBuffer, Buffer, std::move(*ReadbackOrErr), - Heap); - RegOffset++; - } - return Bundle; - } - - // returns the next available HeapIdx - uint32_t bindUAV(Resource &R, InvocationState &IS, uint32_t HeapIdx, - const ResourceBundle &ResBundle) { - const uint32_t EltSize = R.getElementSize(); - const uint32_t NumElts = R.size() / EltSize; - const D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc = getUAVDescription(R); - const uint32_t DescHandleIncSize = Device->GetDescriptorHandleIncrementSize( - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - const D3D12_CPU_DESCRIPTOR_HANDLE UAVHandleHeapStart = - IS.DescHeap->GetCPUDescriptorHandleForHeapStart(); - - for (const ResourceSet &RS : ResBundle) { - llvm::outs() << "UAV: HeapIdx = " << HeapIdx << " EltSize = " << EltSize - << " NumElts = " << NumElts - << " HasCounter = " << R.HasCounter << "\n"; - - D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle = UAVHandleHeapStart; - UAVHandle.ptr += HeapIdx * DescHandleIncSize; - ID3D12Resource *CounterBuffer = R.HasCounter ? RS.Buffer.Get() : nullptr; - Device->CreateUnorderedAccessView(RS.Buffer.Get(), CounterBuffer, - &UAVDesc, UAVHandle); - HeapIdx++; - } - return HeapIdx; - } - static size_t getCBVSize(size_t Sz) { return (Sz + 255u) & 0xFFFFFFFFFFFFFF00; } - llvm::Expected createCBV(Resource &R, InvocationState &IS) { - ResourceBundle Bundle; - - const size_t CBVSize = getCBVSize(R.size()); - const D3D12_HEAP_PROPERTIES HeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - const D3D12_RESOURCE_DESC ResDesc = { - D3D12_RESOURCE_DIMENSION_BUFFER, - 0, - CBVSize, - 1, - 1, - 1, - DXGI_FORMAT_UNKNOWN, - {1, 0}, - D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; - - const D3D12_HEAP_PROPERTIES UploadHeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); - const D3D12_RESOURCE_DESC UploadResDesc = - CD3DX12_RESOURCE_DESC::Buffer(CBVSize); - - uint32_t RegOffset = 0; - for (const auto &ResData : R.BufferPtr->Data) { - llvm::outs() << "Creating CBV: { Size = " << CBVSize << ", Register = b" - << R.DXBinding.Register + RegOffset - << ", Space = " << R.DXBinding.Space << " }\n"; - - ComPtr Buffer; - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &HeapProp, D3D12_HEAP_FLAG_NONE, &ResDesc, - D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&Buffer)), - "Failed to create committed resource (buffer).")) - return Err; - - ComPtr UploadBuffer; - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &UploadHeapProp, D3D12_HEAP_FLAG_NONE, &UploadResDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS(&UploadBuffer)), - "Failed to create committed resource (upload buffer).")) - return Err; - - // Initialize the CBV data - void *ResDataPtr = nullptr; - if (auto Err = HR::toError(UploadBuffer->Map(0, nullptr, &ResDataPtr), - "Failed to acquire UAV data pointer.")) - return Err; - memset(ResDataPtr, 0, CBVSize); - memcpy(ResDataPtr, ResData.get(), R.size()); + llvm::Error createBuffers(Pipeline &P, InvocationState &IS) { + auto EncOrErr = IS.CB->createComputeEncoder(); + if (!EncOrErr) + return EncOrErr.takeError(); + auto Enc = std::move(*EncOrErr); - UploadBuffer->Unmap(0, nullptr); + auto CreateBuffer = + [&Enc, + this](Resource &R, + llvm::SmallVectorImpl &Resources) -> llvm::Error { + ResourceBundle ResBundle; + if (R.isBuffer()) { + BufferCreateDesc CreateDesc = {}; + CreateDesc.Location = MemoryLocation::GpuOnly; + CreateDesc.Backing = MemoryBacking::Automatic; + CreateDesc.Usage = BufferUsageFromResourceKind(R.Kind); + CreateDesc.AccessType = BufferShaderAccessTypeFromResourceKind( + R, CreateDesc.AccessTypeParams); + CreateDesc.HasCounter = R.HasCounter; + + for (auto &Data : R.BufferPtr->Data) { + std::unique_ptr UploadBuffer; + auto BufferOrErr = + createBufferWithData(*this, "Buffer", CreateDesc, Data.get(), + R.size(), Enc.get(), &UploadBuffer); + if (!BufferOrErr) + return BufferOrErr.takeError(); + auto Buffer = std::move(*BufferOrErr); + + std::unique_ptr ReadbackBuffer; + std::unique_ptr CounterReadbackBuffer; + if (getDescriptorKind(R.Kind) == DescriptorKind::UAV) { + const BufferCreateDesc ReadbackDesc = + BufferCreateDesc::readbackBuffer(); + auto ReadbackOrErr = createBuffer("Readback", ReadbackDesc, + Buffer->getSizeInBytes()); + if (!ReadbackOrErr) + return ReadbackOrErr.takeError(); + ReadbackBuffer = std::move(*ReadbackOrErr); + + if (R.HasCounter) { + auto CounterReadbackOrErr = + createBuffer("Readback", ReadbackDesc, sizeof(uint32_t)); + if (!CounterReadbackOrErr) + return CounterReadbackOrErr.takeError(); + CounterReadbackBuffer = std::move(*CounterReadbackOrErr); + } + } - addResourceUploadCommands(R, IS, Buffer, UploadBuffer); + ResourceSet RSet(std::move(UploadBuffer), std::move(Buffer), + std::move(ReadbackBuffer), + std::move(CounterReadbackBuffer)); + ResBundle.push_back(std::move(RSet)); + } + } else if (R.isTexture()) { + if (R.BufferPtr->OutputProps.MipLevels != 1) + return llvm::createStringError(std::errc::not_supported, + "Multiple mip levels are not yet " + "supported for DirectX textures."); - Bundle.emplace_back(UploadBuffer, Buffer, nullptr); - RegOffset++; - } - return Bundle; - } + auto FormatOrErr = toFormat(R.BufferPtr->Format, R.BufferPtr->Channels); + if (!FormatOrErr) + return FormatOrErr.takeError(); - // returns the next available HeapIdx - uint32_t bindCBV(Resource &R, InvocationState &IS, uint32_t HeapIdx, - const ResourceBundle &ResBundle) { - const size_t CBVSize = getCBVSize(R.size()); - const uint32_t DescHandleIncSize = Device->GetDescriptorHandleIncrementSize( - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - const D3D12_CPU_DESCRIPTOR_HANDLE CVBHandleHeapStart = - IS.DescHeap->GetCPUDescriptorHandleForHeapStart(); - - for (const ResourceSet &RS : ResBundle) { - llvm::outs() << "CBV: HeapIdx = " << HeapIdx << " Size = " << CBVSize - << "\n"; - const D3D12_CONSTANT_BUFFER_VIEW_DESC CBVDesc = { - RS.Buffer->GetGPUVirtualAddress(), static_cast(CBVSize)}; - D3D12_CPU_DESCRIPTOR_HANDLE CBVHandle = CVBHandleHeapStart; - CBVHandle.ptr += HeapIdx * DescHandleIncSize; - Device->CreateConstantBufferView(&CBVDesc, CBVHandle); - HeapIdx++; - } - return HeapIdx; - } + LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + + TextureCreateDesc CreateDesc = {}; + CreateDesc.Location = MemoryLocation::GpuOnly; + CreateDesc.Usage = TextureUsage::Sampled; + if (R.Kind == ResourceKind::RWTexture2D) + CreateDesc.Usage |= TextureUsage::Storage; + CreateDesc.Fmt = *FormatOrErr; + CreateDesc.Width = R.BufferPtr->OutputProps.Width; + CreateDesc.Height = R.BufferPtr->OutputProps.Height; + CreateDesc.MipLevels = 1; + + for (auto &Data : R.BufferPtr->Data) { + std::unique_ptr UploadBuffer; + auto TextureOrErr = + createTextureWithData(*this, "Texture", CreateDesc, Data.get(), + R.size(), Enc.get(), &UploadBuffer); + if (!TextureOrErr) + return TextureOrErr.takeError(); + auto Texture = std::move(*TextureOrErr); + + std::unique_ptr ReadbackBuffer; + if (getDescriptorKind(R.Kind) == DescriptorKind::UAV) { + const BufferCreateDesc ReadbackDesc = + BufferCreateDesc::readbackBuffer(); + auto ReadbackOrErr = + createBuffer("Readback", ReadbackDesc, + Texture->calculateLinearSizeInBytes(*this)); + if (!ReadbackOrErr) + return ReadbackOrErr.takeError(); + ReadbackBuffer = std::move(*ReadbackOrErr); + } - llvm::Error createBuffers(Pipeline &P, InvocationState &IS) { - auto CreateBuffer = - [&IS, - this](Resource &R, - llvm::SmallVectorImpl &Resources) -> llvm::Error { - switch (getDescriptorKind(R.Kind)) { - case DescriptorKind::SRV: { - auto ExRes = createSRV(R, IS); - if (!ExRes) - return ExRes.takeError(); - Resources.push_back(std::make_pair(&R, std::move(*ExRes))); - break; - } - case DescriptorKind::UAV: { - auto ExRes = createUAV(R, IS); - if (!ExRes) - return ExRes.takeError(); - Resources.push_back(std::make_pair(&R, std::move(*ExRes))); - break; - } - case DescriptorKind::CBV: { - auto ExRes = createCBV(R, IS); - if (!ExRes) - return ExRes.takeError(); - Resources.push_back(std::make_pair(&R, std::move(*ExRes))); - break; - } - case DescriptorKind::SAMPLER: + ResourceSet RSet(std::move(UploadBuffer), std::move(Texture), + std::move(ReadbackBuffer)); + ResBundle.push_back(std::move(RSet)); + } + } else { return llvm::createStringError( std::errc::not_supported, "Samplers are not yet implemented for DirectX."); } + + Resources.push_back(std::make_pair(&R, std::move(ResBundle))); return llvm::Error::success(); }; @@ -2178,22 +2212,77 @@ class DXDevice : public offloadtest::Device { return Err; } + Enc->endEncoding(); + // Bind descriptors in descriptor tables. - uint32_t HeapIndex = 0; - for (auto &T : IS.DescTables) { - for (auto &R : T.Resources) { - switch (getDescriptorKind(R.first->Kind)) { - case DescriptorKind::SRV: - HeapIndex = bindSRV(*(R.first), IS, HeapIndex, R.second); - break; - case DescriptorKind::UAV: - HeapIndex = bindUAV(*(R.first), IS, HeapIndex, R.second); - break; - case DescriptorKind::CBV: - HeapIndex = bindCBV(*(R.first), IS, HeapIndex, R.second); - break; - case DescriptorKind::SAMPLER: - llvm_unreachable("Not implemented yet."); + if (IS.DescHeap) { + uint32_t HeapIndex = 0; + const D3D12_CPU_DESCRIPTOR_HANDLE HeapStart = + IS.DescHeap->GetCPUDescriptorHandleForHeapStart(); + const uint32_t DescHandleIncSize = + Device->GetDescriptorHandleIncrementSize( + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + for (auto &T : IS.DescTables) { + for (auto &R : T.Resources) { + for (const auto &Set : R.second) { + D3D12_CPU_DESCRIPTOR_HANDLE DescriptorHandle = {}; + if (Set.Buffer != nullptr) { + const DXBuffer &BufferDX = + llvm::cast(*Set.Buffer.get()); + switch (getDescriptorKind(R.first->Kind)) { + case DescriptorKind::SRV: + assert(BufferDX.SRVHandle.ptr != 0 && + "Missing SRV Descriptor. Is BufferUsage correct?"); + DescriptorHandle = BufferDX.SRVHandle; + break; + case DescriptorKind::UAV: + assert(BufferDX.UAVHandle.ptr != 0 && + "Missing UAV Descriptor. Is BufferUsage correct?"); + DescriptorHandle = BufferDX.UAVHandle; + break; + case DescriptorKind::CBV: + assert(BufferDX.CBVHandle.ptr != 0 && + "Missing CBV Descriptor. Is BufferUsage correct?"); + DescriptorHandle = BufferDX.CBVHandle; + break; + default: + assert(false && "Invalid DescriptorKind for a Buffer."); + llvm_unreachable("Invalid DescriptorKind for a Buffer."); + break; + } + } else if (Set.Texture != nullptr) { + const DXTexture &TextureDX = + llvm::cast(*Set.Texture.get()); + switch (getDescriptorKind(R.first->Kind)) { + case DescriptorKind::SRV: + assert(TextureDX.SRVHandle.ptr != 0 && + "Missing SRV Descriptor. Is TextureUsage correct?"); + DescriptorHandle = TextureDX.SRVHandle; + break; + case DescriptorKind::UAV: + assert(TextureDX.UAVHandle.ptr != 0 && + "Missing UAV Descriptor. Is TextureUsage correct?"); + DescriptorHandle = TextureDX.UAVHandle; + break; + default: + assert(false && "Invalid DescriptorKind for a Texture."); + llvm_unreachable("Invalid DescriptorKind for a Texture."); + break; + } + } else { + assert(false && "Resource was a texture nor buffer. Samplers are " + "unsupported"); + llvm_unreachable("Resource was a texture nor buffer. Samplers " + "are unsupported"); + } + + assert(DescriptorHandle.ptr != 0 && + "Somehow got a null descriptor :("); + Device->CopyDescriptorsSimple( + 1, {HeapStart.ptr + HeapIndex * DescHandleIncSize}, + DescriptorHandle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + HeapIndex += 1; + } } } } @@ -2231,35 +2320,6 @@ class DXDevice : public offloadtest::Device { return llvm::Error::success(); } - void addUploadBeginBarrier(InvocationState &IS, ComPtr R) { - const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( - R.Get(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST); - IS.CB->CmdList->ResourceBarrier(1, &Barrier); - } - - void addUploadEndBarrier(InvocationState &IS, ComPtr R, - bool IsUAV) { - const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( - R.Get(), D3D12_RESOURCE_STATE_COPY_DEST, - IsUAV ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS - : D3D12_RESOURCE_STATE_GENERIC_READ); - IS.CB->CmdList->ResourceBarrier(1, &Barrier); - } - - void addReadbackBeginBarrier(InvocationState &IS, ComPtr R) { - const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( - R.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - D3D12_RESOURCE_STATE_COPY_SOURCE); - IS.CB->CmdList->ResourceBarrier(1, &Barrier); - } - - void addReadbackEndBarrier(InvocationState &IS, ComPtr R) { - const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( - R.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, - D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - IS.CB->CmdList->ResourceBarrier(1, &Barrier); - } - llvm::Error createComputeCommands(Pipeline &P, InvocationState &IS) { CD3DX12_GPU_DESCRIPTOR_HANDLE Handle; if (IS.DescHeap) { @@ -2306,21 +2366,33 @@ class DXDevice : public offloadtest::Device { return llvm::createStringError( std::errc::value_too_large, "Root descriptor cannot refer to resource arrays."); + + D3D12_GPU_VIRTUAL_ADDRESS VirtualAddress = {}; + if (RootDescIt->second.back().Buffer) { + const auto &BufferDX = + llvm::cast(*RootDescIt->second.back().Buffer); + VirtualAddress = BufferDX.Buffer->GetGPUVirtualAddress(); + } else if (RootDescIt->second.back().Texture) { + const auto &TextureDX = + llvm::cast(*RootDescIt->second.back().Texture); + VirtualAddress = TextureDX.Resource->GetGPUVirtualAddress(); + } else { + assert(false && + "Resource is a buffer nor texture. Must be one of the two."); + } + switch (getDescriptorKind(RootDescIt->first->Kind)) { case DescriptorKind::SRV: - IS.CB->CmdList->SetComputeRootShaderResourceView( - RootParamIndex++, - RootDescIt->second.back().Buffer->GetGPUVirtualAddress()); + IS.CB->CmdList->SetComputeRootShaderResourceView(RootParamIndex++, + VirtualAddress); break; case DescriptorKind::UAV: - IS.CB->CmdList->SetComputeRootUnorderedAccessView( - RootParamIndex++, - RootDescIt->second.back().Buffer->GetGPUVirtualAddress()); + IS.CB->CmdList->SetComputeRootUnorderedAccessView(RootParamIndex++, + VirtualAddress); break; case DescriptorKind::CBV: - IS.CB->CmdList->SetComputeRootConstantBufferView( - RootParamIndex++, - RootDescIt->second.back().Buffer->GetGPUVirtualAddress()); + IS.CB->CmdList->SetComputeRootConstantBufferView(RootParamIndex++, + VirtualAddress); break; case DescriptorKind::SAMPLER: llvm_unreachable("Not implemented yet."); @@ -2352,49 +2424,55 @@ class DXDevice : public offloadtest::Device { Encoder.endEncoding(); } - auto CopyBackResource = [&IS, this](ResourcePair &R) { + auto EncoderOrErr = IS.CB->createComputeEncoder(); + if (!EncoderOrErr) + return EncoderOrErr.takeError(); + auto ReadbackEncoder = std::move(*EncoderOrErr); + + auto CopyBackResource = [&ReadbackEncoder](ResourcePair &R) -> llvm::Error { if (R.first->isTexture()) { - const offloadtest::CPUBuffer &B = *R.first->BufferPtr; - const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ - 0, CD3DX12_SUBRESOURCE_FOOTPRINT( - getDXFormat(B.Format, B.Channels), B.OutputProps.Width, - B.OutputProps.Height, 1, - B.OutputProps.Width * B.getElementSize())}; for (const ResourceSet &RS : R.second) { if (RS.Readback == nullptr) continue; - const DXBuffer &ReadbackDX = llvm::cast(*RS.Readback); - addReadbackBeginBarrier(IS, RS.Buffer); - const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(ReadbackDX.Buffer.Get(), - Footprint); - const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RS.Buffer.Get(), 0); - IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); - addReadbackEndBarrier(IS, RS.Buffer); + if (auto Err = ReadbackEncoder->copyTextureToBuffer(*RS.Texture, + *RS.Readback)) + return Err; + } + } else { + for (const ResourceSet &RS : R.second) { + if (RS.Readback == nullptr) + continue; + + if (auto Err = ReadbackEncoder->copyBufferToBuffer( + *RS.Buffer, 0, *RS.Readback, 0, + RS.Readback->getSizeInBytes())) + return Err; + + if (RS.Buffer->getDesc().HasCounter) + if (auto Err = ReadbackEncoder->copyCounterToBuffer( + *RS.Buffer, *RS.CounterReadback)) + return Err; } - return; - } - for (const ResourceSet &RS : R.second) { - if (RS.Readback == nullptr) - continue; - const DXBuffer &ReadbackDX = llvm::cast(*RS.Readback); - addReadbackBeginBarrier(IS, RS.Buffer); - IS.CB->CmdList->CopyResource(ReadbackDX.Buffer.Get(), RS.Buffer.Get()); - addReadbackEndBarrier(IS, RS.Buffer); } + return llvm::Error::success(); }; for (auto &Table : IS.DescTables) for (auto &R : Table.Resources) - CopyBackResource(R); + if (auto Err = CopyBackResource(R)) + return Err; for (auto &R : IS.RootResources) - CopyBackResource(R); + if (auto Err = CopyBackResource(R)) + return Err; + + ReadbackEncoder->endEncoding(); return llvm::Error::success(); } llvm::Error readBack(Pipeline &P, InvocationState &IS) { - auto MemCpyBack = [](ResourcePair &R) -> llvm::Error { + auto MemCpyBack = [this](ResourcePair &R) -> llvm::Error { if (!R.first->isReadWrite()) return llvm::Error::success(); @@ -2402,23 +2480,44 @@ class DXDevice : public offloadtest::Device { auto *DataIt = R.first->BufferPtr->Data.begin(); for (; RSIt != R.second.end() && DataIt != R.first->BufferPtr->Data.end(); ++RSIt, ++DataIt) { - DXBuffer &ReadbackDX = llvm::cast(*RSIt->Readback); - auto DataPtrOrErr = ReadbackDX.map(); + offloadtest::Buffer &Readback = *RSIt->Readback; + auto DataPtrOrErr = Readback.map(); if (!DataPtrOrErr) return DataPtrOrErr.takeError(); - void *DataPtr = *DataPtrOrErr; + const void *DataPtr = *DataPtrOrErr; + + if (R.first->isTexture()) { + const TextureCreateDesc &Desc = RSIt->Texture->getDesc(); + const uint32_t SrcStrideInBytes = + getTextureUploadRowStrideInBytes(Desc); + const uint32_t DstStrideInBytes = + Desc.Width * getFormatSizeInBytes(Desc.Fmt); + assert(DstStrideInBytes <= SrcStrideInBytes && + "Destination should not have padding and thus should be <= " + "than SrcStride where we do expect potential padding."); + uint8_t *Dst = (uint8_t *)DataIt->get(); + const uint8_t *Src = (const uint8_t *)DataPtr; + + for (uint32_t Y = 0; Y < Desc.Height; ++Y) { + memcpy(Dst, Src, DstStrideInBytes); + Dst += DstStrideInBytes; + Src += SrcStrideInBytes; + } + } else { + memcpy(DataIt->get(), DataPtr, R.first->size()); + } - memcpy(DataIt->get(), DataPtr, R.first->size()); + Readback.unmap(); if (R.first->HasCounter) { - uint32_t Counter; - memcpy(&Counter, - static_cast(DataPtr) + - getUAVBufferCounterOffset(*R.first), - sizeof(uint32_t)); - R.first->BufferPtr->Counters.push_back(Counter); + offloadtest::Buffer &CounterReadback = *RSIt->CounterReadback; + auto CounterPtrOrErr = CounterReadback.map(); + if (!CounterPtrOrErr) + return CounterPtrOrErr.takeError(); + const uint32_t *CounterPtr = (const uint32_t *)*CounterPtrOrErr; + R.first->BufferPtr->Counters.push_back(*CounterPtr); + CounterReadback.unmap(); } - ReadbackDX.unmap(); } return llvm::Error::success(); @@ -2437,26 +2536,16 @@ class DXDevice : public offloadtest::Device { if (!IS.RTReadback) return llvm::Error::success(); - void *Mapped = nullptr; - auto &Readback = llvm::cast(*IS.RTReadback); - if (auto Err = HR::toError(Readback.Buffer->Map(0, nullptr, &Mapped), - "Failed to map render target readback")) - return Err; + auto DataPtrOrErr = IS.RTReadback->map(); + if (!DataPtrOrErr) + return DataPtrOrErr.takeError(); + const void *Mapped = *DataPtrOrErr; - // Query the copy footprint to get the actual padded row pitch used by - // the copy operation (D3D12 requires 256-byte aligned rows). - auto &RT = llvm::cast(*IS.RenderTarget); - const D3D12_RESOURCE_DESC RTDesc = RT.Resource->GetDesc(); - D3D12_PLACED_SUBRESOURCE_FOOTPRINT Placed = {}; - uint32_t NumRows = 0; - uint64_t RowSizeInBytes = 0; - uint64_t TotalBytes = 0; - Device->GetCopyableFootprints(&RTDesc, 0u, 1u, 0u, &Placed, &NumRows, - &RowSizeInBytes, &TotalBytes); - - P.Bindings.RTargetBufferPtr->copyFromTexture(Mapped, - Placed.Footprint.RowPitch); - Readback.Buffer->Unmap(0, nullptr); + const uint32_t SrcStrideInBytes = + getTextureUploadRowStrideInBytes(IS.RenderTarget->getDesc()); + + P.Bindings.RTargetBufferPtr->copyFromTexture(Mapped, SrcStrideInBytes); + IS.RTReadback->unmap(); return llvm::Error::success(); } @@ -2500,10 +2589,6 @@ class DXDevice : public offloadtest::Device { } llvm::Error createGraphicsCommands(Pipeline &P, InvocationState &IS) { - auto &RT = llvm::cast(*IS.RenderTarget); - auto &DS = llvm::cast(*IS.DepthStencil); - auto &RTReadback = llvm::cast(*IS.RTReadback); - const DXPipelineState &DXPipeline = llvm::cast(*IS.Pipeline.get()); IS.CB->CmdList->SetGraphicsRootSignature(DXPipeline.RootSig.Get()); @@ -2516,8 +2601,8 @@ class DXDevice : public offloadtest::Device { RenderPassBeginDesc BeginDesc = {}; BeginDesc.Pass = IS.RenderPass.get(); - BeginDesc.ColorAttachments.push_back(&RT); - BeginDesc.DepthStencil = &DS; + BeginDesc.ColorAttachments.push_back(IS.RenderTarget.get()); + BeginDesc.DepthStencil = IS.DepthStencil.get(); auto EncOrErr = IS.CB->createRenderEncoder(BeginDesc); if (!EncOrErr) @@ -2555,63 +2640,45 @@ class DXDevice : public offloadtest::Device { Encoder.endEncoding(); - // Transition the render target to copy source and copy to the readback - // buffer. - const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( - RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, - D3D12_RESOURCE_STATE_COPY_SOURCE); - IS.CB->CmdList->ResourceBarrier(1, &Barrier); + auto EncoderOrErr = IS.CB->createComputeEncoder(); + if (!EncoderOrErr) + return EncoderOrErr.takeError(); + auto ReadbackEncoder = std::move(*EncoderOrErr); - const CPUBuffer &B = *P.Bindings.RTargetBufferPtr; - const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ - 0, - CD3DX12_SUBRESOURCE_FOOTPRINT( - getDXFormat(B.Format, B.Channels), B.OutputProps.Width, - B.OutputProps.Height, 1, - getAlignedTexturePitch(B.OutputProps.Width, B.getElementSize()))}; - const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RTReadback.Buffer.Get(), - Footprint); - const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RT.Resource.Get(), 0); - - IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + if (auto Err = ReadbackEncoder->copyTextureToBuffer(*IS.RenderTarget, + *IS.RTReadback)) + return Err; - auto CopyBackResource = [&IS, this](ResourcePair &R) { + auto CopyBackResource = [&ReadbackEncoder](ResourcePair &R) -> llvm::Error { if (R.first->isTexture()) { - const offloadtest::CPUBuffer &B = *R.first->BufferPtr; - const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ - 0, CD3DX12_SUBRESOURCE_FOOTPRINT( - getDXFormat(B.Format, B.Channels), B.OutputProps.Width, - B.OutputProps.Height, 1, - B.OutputProps.Width * B.getElementSize())}; for (const ResourceSet &RS : R.second) { if (RS.Readback == nullptr) continue; - const DXBuffer &ReadbackDX = llvm::cast(*RS.Readback); - addReadbackBeginBarrier(IS, RS.Buffer); - const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(ReadbackDX.Buffer.Get(), - Footprint); - const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RS.Buffer.Get(), 0); - IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); - addReadbackEndBarrier(IS, RS.Buffer); + + if (auto Err = ReadbackEncoder->copyTextureToBuffer(*RS.Texture, + *RS.Readback)) + return Err; + } + } else { + for (const ResourceSet &RS : R.second) { + if (RS.Readback != nullptr) + if (auto Err = ReadbackEncoder->copyBufferToBuffer( + *RS.Buffer, 0, *RS.Readback, 0, + RS.Buffer->getSizeInBytes())) + return Err; } - return; - } - for (const ResourceSet &RS : R.second) { - if (RS.Readback == nullptr) - continue; - const DXBuffer &ReadbackDX = llvm::cast(*RS.Readback); - addReadbackBeginBarrier(IS, RS.Buffer); - IS.CB->CmdList->CopyResource(ReadbackDX.Buffer.Get(), RS.Buffer.Get()); - addReadbackEndBarrier(IS, RS.Buffer); } + return llvm::Error::success(); }; for (auto &Table : IS.DescTables) for (auto &R : Table.Resources) - CopyBackResource(R); + if (auto Err = CopyBackResource(R)) + return Err; for (auto &R : IS.RootResources) - CopyBackResource(R); + if (auto Err = CopyBackResource(R)) + return Err; return llvm::Error::success(); } @@ -2629,8 +2696,10 @@ class DXDevice : public offloadtest::Device { State.CB = std::move(*CBOrErr); llvm::outs() << "Command buffer created.\n"; - if (auto Err = createBuffers(P, State)) + if (auto Err = createBuffers(P, State)) { + llvm::outs() << Err; return Err; + } llvm::outs() << "Buffers created.\n"; BindingsDesc BndDesc = {}; diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index 41c5dd7ba..1d9a4389c 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -175,3 +175,83 @@ offloadtest::createBufferWithData( return Buffer; } + +llvm::Expected> +offloadtest::createTextureWithData( + Device &Dev, std::string Name, const TextureCreateDesc &Desc, + const void *Data, size_t SizeInBytes, ComputeEncoder *Encoder, + std::unique_ptr *OutUploadBuffer) { + + const uint64_t PackedRowStrideInBytes = + Desc.Width * getFormatSizeInBytes(Desc.Fmt); + if (SizeInBytes < PackedRowStrideInBytes * Desc.Height) + return llvm::createStringError( + "Data upload is not enough for texture size."); + + auto TextureOrErr = Dev.createTexture(Name, Desc); + if (!TextureOrErr) + return TextureOrErr.takeError(); + auto Texture = std::move(*TextureOrErr); + + if (Desc.Location == MemoryLocation::GpuOnly) { + if (OutUploadBuffer == nullptr) + return llvm::createStringError("An upload buffer is required to create a " + "GpuOnly texture with data."); + + const uint64_t TexRowStrideInBytes = + Dev.getTextureUploadRowStrideInBytes(Desc); + const uint64_t UploadBufferSizeInBytes = + (Desc.Height - 1) * TexRowStrideInBytes + PackedRowStrideInBytes; + + // Create Upload buffer + const BufferCreateDesc UploadDesc = BufferCreateDesc::uploadBuffer(); + std::string UploadBufferName = Name + " (Upload Buffer)"; + auto UploadBufferOrErr = + Dev.createBuffer(UploadBufferName, UploadDesc, UploadBufferSizeInBytes); + if (!UploadBufferOrErr) + return UploadBufferOrErr.takeError(); + *OutUploadBuffer = std::move(*UploadBufferOrErr); + + auto MappedPtrOrErr = (*OutUploadBuffer)->map(); + if (!MappedPtrOrErr) + return MappedPtrOrErr.takeError(); + + uint8_t *DstPtr = (uint8_t *)*MappedPtrOrErr; + const uint8_t *SrcPtr = (const uint8_t *)Data; + + for (uint32_t Y = 0; Y < Desc.Height; ++Y) { + memcpy(DstPtr, SrcPtr, PackedRowStrideInBytes); + DstPtr += TexRowStrideInBytes; + SrcPtr += PackedRowStrideInBytes; + } + (*OutUploadBuffer)->unmap(); + + // Copy Buffer to Buffer + if (auto Err = Encoder->copyBufferToTexture(**OutUploadBuffer, *Texture)) + return Err; + + } else { + auto MappedStrideInBytesOrErr = Texture->getMappedRowPitchInBytes(); + if (!MappedStrideInBytesOrErr) + return MappedStrideInBytesOrErr.takeError(); + const uint32_t MappedStrideInBytes = *MappedStrideInBytesOrErr; + + // Copy data over + auto MappedPtrOrErr = Texture->map(); + if (!MappedPtrOrErr) + return MappedPtrOrErr.takeError(); + + uint8_t *DstPtr = (uint8_t *)*MappedPtrOrErr; + const uint8_t *SrcPtr = (const uint8_t *)Data; + + for (uint32_t Y = 0; Y < Desc.Height; ++Y) { + memcpy(DstPtr, SrcPtr, PackedRowStrideInBytes); + DstPtr += MappedStrideInBytes; + SrcPtr += PackedRowStrideInBytes; + } + + Texture->unmap(); + } + + return Texture; +} diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index cc3f3920d..adefb59cf 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -308,6 +308,10 @@ class MTLBuffer : public offloadtest::Buffer { size_t SizeInBytes) : offloadtest::Buffer(GPUAPI::Metal), Buf(Buf), Name(Name), Desc(Desc), SizeInBytes(SizeInBytes) {} + MTLBuffer(const MTLBuffer &) = delete; + MTLBuffer(MTLBuffer &&) = delete; + MTLBuffer &operator=(const MTLBuffer &) = delete; + MTLBuffer &operator=(MTLBuffer &&) = delete; size_t getSizeInBytes() const override { return SizeInBytes; } @@ -331,6 +335,8 @@ class MTLBuffer : public offloadtest::Buffer { Buf->release(); } + const BufferCreateDesc &getDesc() const override { return Desc; } + static bool classof(const offloadtest::Buffer *B) { return B->getAPI() == GPUAPI::Metal; } @@ -352,6 +358,16 @@ class MTLTexture : public offloadtest::Texture { const TextureCreateDesc &getDesc() const override { return Desc; } + llvm::Expected getMappedRowPitchInBytes() const override { + if (Desc.Location == MemoryLocation::GpuOnly) + return llvm::createStringError( + std::errc::invalid_argument, + "Cannot query mapped row pitch of a GpuOnly texture."); + // Metal host-visible textures are accessed via getBytes/replaceRegion with + // a caller-supplied bytesPerRow, so a tightly packed stride is valid. + return Desc.Width * getFormatSizeInBytes(Desc.Fmt); + } + static bool classof(const offloadtest::Texture *T) { return T->getAPI() == GPUAPI::Metal; } @@ -1547,6 +1563,9 @@ class MTLDevice : public offloadtest::Device { llvm::Expected> createBuffer(std::string Name, const BufferCreateDesc &Desc, size_t SizeInBytes) override { + assert(!Desc.HasCounter && + "Metal Backend does not support buffers with a counter."); + MTL::Buffer *Buf = Device->newBuffer( SizeInBytes, getMetalBufferResourceOptions(Desc.Location)); if (!Buf) @@ -1574,6 +1593,11 @@ class MTLDevice : public offloadtest::Device { return std::make_unique(Tex, Name, Desc); } + uint32_t getTextureUploadRowStrideInBytes( + const TextureCreateDesc &Desc) const override { + return Desc.Width * getFormatSizeInBytes(Desc.Fmt); + } + llvm::Expected> createCommandBuffer() override { return MTLCommandBuffer::create(GraphicsQueue.Queue); diff --git a/lib/API/MTL/MTLResources.h b/lib/API/MTL/MTLResources.h index 7cb35b6dc..02a1d3625 100644 --- a/lib/API/MTL/MTLResources.h +++ b/lib/API/MTL/MTLResources.h @@ -85,6 +85,12 @@ inline MTL::PixelFormat getMetalPixelFormat(Format Format) { return MTL::PixelFormatRGBA32Uint; case Format::RGBA32Float: return MTL::PixelFormatRGBA32Float; + // Metal has no 64-bit-per-channel pixel formats. + case Format::R64Uint: + case Format::R64Sint: + case Format::RG64Uint: + case Format::RG64Sint: + llvm_unreachable("64-bit formats have no Metal pixel format equivalent"); case Format::D32Float: return MTL::PixelFormatDepth32Float; case Format::D32FloatS8Uint: @@ -140,6 +146,12 @@ inline MTL::VertexFormat getMetalVertexFormat(Format Fmt) { return MTL::VertexFormatUInt4; case Format::RGBA32Float: return MTL::VertexFormatFloat4; + // Metal has no 64-bit-per-channel vertex formats. + case Format::R64Uint: + case Format::R64Sint: + case Format::RG64Uint: + case Format::RG64Sint: + llvm_unreachable("64-bit formats have no Metal vertex format equivalent"); // Depth formats cannot be used as vertex attributes. case Format::D32Float: case Format::D32FloatS8Uint: diff --git a/lib/API/Texture.cpp b/lib/API/Texture.cpp new file mode 100644 index 000000000..8e080dc9d --- /dev/null +++ b/lib/API/Texture.cpp @@ -0,0 +1,11 @@ +#include "API/Texture.h" +#include "API/Device.h" + +// Calculate the size in bytes of the texture data given a linear layout +// Useful for calculating the size for an upload or readback buffer. +size_t offloadtest::Texture::calculateLinearSizeInBytes(Device &Dev) const { + const auto &Desc = getDesc(); + const uint32_t Stride = Dev.getTextureUploadRowStrideInBytes(Desc); + return (Desc.Height - 1) * Stride + + Desc.Width * getFormatSizeInBytes(Desc.Fmt); +} diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index b2c0873b8..f9b5ab01e 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MathExtras.h" #include "../Util.h" @@ -427,15 +428,23 @@ class VulkanBuffer : public offloadtest::Buffer { public: VkDevice Dev; // Needed for clean-up VkBuffer Buffer; + VkBuffer CounterBuffer; VkDeviceMemory Memory; std::string Name; BufferCreateDesc Desc; size_t SizeInBytes; - VulkanBuffer(VkDevice Dev, VkBuffer Buffer, VkDeviceMemory Memory, - llvm::StringRef Name, BufferCreateDesc Desc, size_t SizeInBytes) + VulkanBuffer(VkDevice Dev, VkBuffer Buffer, VkBuffer CounterBuffer, + VkDeviceMemory Memory, llvm::StringRef Name, + BufferCreateDesc Desc, size_t SizeInBytes) : offloadtest::Buffer(GPUAPI::Vulkan), Dev(Dev), Buffer(Buffer), - Memory(Memory), Name(Name), Desc(Desc), SizeInBytes(SizeInBytes) {} + CounterBuffer(CounterBuffer), Memory(Memory), Name(Name), Desc(Desc), + SizeInBytes(SizeInBytes) {} + + VulkanBuffer(const VulkanBuffer &) = delete; + VulkanBuffer(VulkanBuffer &&) = delete; + VulkanBuffer &operator=(const VulkanBuffer &) = delete; + VulkanBuffer &operator=(VulkanBuffer &&) = delete; size_t getSizeInBytes() const override { return SizeInBytes; } @@ -463,10 +472,14 @@ class VulkanBuffer : public offloadtest::Buffer { void unmap() override { vkUnmapMemory(Dev, Memory); } ~VulkanBuffer() override { + if (CounterBuffer != nullptr) + vkDestroyBuffer(Dev, CounterBuffer, nullptr); vkDestroyBuffer(Dev, Buffer, nullptr); vkFreeMemory(Dev, Memory, nullptr); } + const BufferCreateDesc &getDesc() const override { return Desc; } + static bool classof(const offloadtest::Buffer *B) { return B->getAPI() == GPUAPI::Vulkan; } @@ -483,13 +496,28 @@ class VulkanTexture : public offloadtest::Texture { // currently created during descriptor set setup, which determines their // binding layout. VkImageView View = VK_NULL_HANDLE; + VkImageSubresourceRange FullRange; std::string Name; TextureCreateDesc Desc; + VkImageTiling Tiling = VK_IMAGE_TILING_OPTIMAL; + + VkImageLayout PreferredLayout = VK_IMAGE_LAYOUT_GENERAL; + bool IsInUndefinedLayout = true; + uint64_t SizeInBytes; + + VkImageLayout preferredLayoutOrUndefined() { + return IsInUndefinedLayout ? VK_IMAGE_LAYOUT_UNDEFINED : PreferredLayout; + } VulkanTexture(VkDevice Dev, VkImage Image, VkDeviceMemory Memory, - llvm::StringRef Name, TextureCreateDesc Desc) + llvm::StringRef Name, TextureCreateDesc Desc, + VkImageLayout PreferredLayout, + VkImageSubresourceRange FullRange, VkImageTiling Tiling, + uint64_t SizeInBytes) : offloadtest::Texture(GPUAPI::Vulkan), Dev(Dev), Image(Image), - Memory(Memory), Name(Name), Desc(Desc) {} + Memory(Memory), FullRange(FullRange), Name(Name), Desc(Desc), + Tiling(Tiling), PreferredLayout(PreferredLayout), + SizeInBytes(SizeInBytes) {} ~VulkanTexture() override { if (View) @@ -498,8 +526,50 @@ class VulkanTexture : public offloadtest::Texture { vkFreeMemory(Dev, Memory, nullptr); } + llvm::Expected map() override { + if (Desc.Location == MemoryLocation::GpuOnly) + return llvm::createStringError(std::errc::invalid_argument, + "Cannot map a GpuOnly texture."); + void *Ptr = nullptr; + if (vkMapMemory(Dev, Memory, 0, SizeInBytes, 0, &Ptr) != VK_SUCCESS) + return llvm::createStringError(std::errc::io_error, + "Failed to map texture."); + // HOST_CACHED memory that is *not* HOST_COHERENT (GpuToCpu) needs explicit + // invalidation so the CPU sees the GPU-side writes. + if (Desc.Location == MemoryLocation::GpuToCpu) { + VkMappedMemoryRange Range = {}; + Range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + Range.memory = Memory; + Range.offset = 0; + Range.size = VK_WHOLE_SIZE; + vkInvalidateMappedMemoryRanges(Dev, 1, &Range); + } + return Ptr; + } + + void unmap() override { vkUnmapMemory(Dev, Memory); } + const TextureCreateDesc &getDesc() const override { return Desc; } + llvm::Expected getMappedRowPitchInBytes() const override { + if (Desc.Location == MemoryLocation::GpuOnly) + return llvm::createStringError( + std::errc::invalid_argument, + "Cannot query mapped row pitch of a GpuOnly texture."); + if (Tiling != VK_IMAGE_TILING_LINEAR) + return llvm::createStringError( + std::errc::invalid_argument, + "Mapped row pitch is only defined for linear-tiled textures."); + + VkImageSubresource Sub = {}; + Sub.aspectMask = FullRange.aspectMask; + Sub.mipLevel = 0; + Sub.arrayLayer = 0; + VkSubresourceLayout Layout = {}; + vkGetImageSubresourceLayout(Dev, Image, &Sub, &Layout); + return static_cast(Layout.rowPitch); + } + static bool classof(const offloadtest::Texture *T) { return T->getAPI() == GPUAPI::Vulkan; } @@ -690,6 +760,25 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer { VkAccessFlags PendingSrcAccess = VK_ACCESS_HOST_WRITE_BIT; VkPipelineStageFlags PendingDstStage = 0; VkAccessFlags PendingDstAccess = 0; + llvm::SmallVector PendingImageTransitions; + + void addImageTransition(VkAccessFlags SrcAccessMask, + VkAccessFlags DstAccessMask, VkImageLayout OldLayout, + VkImageLayout NewLayout, VkImage Image, + VkImageSubresourceRange SubresourceRange) { + PendingImageTransitions.push_back(VkImageMemoryBarrier{ + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, /*sType*/ + nullptr, /*pNext*/ + SrcAccessMask, + DstAccessMask, + OldLayout, + NewLayout, + 0, /*srcQueueFamilyIndex*/ + 0, /*dstQueueFamilyIndex*/ + Image, + SubresourceRange, + }); + } void addPendingBarrier(VkPipelineStageFlags Stage, VkAccessFlags Access) { PendingDstStage |= Stage; @@ -697,22 +786,18 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer { } void flushBarrier() { - if (PendingSrcStage == 0) { - // Nothing produced yet — no barrier needed, but carry dst forward as - // the next src (the command we're about to run will produce at these - // stages). - PendingSrcStage = PendingDstStage; - PendingSrcAccess = PendingDstAccess; - PendingDstStage = 0; - PendingDstAccess = 0; - return; + if (PendingSrcStage != 0 || !PendingImageTransitions.empty()) { + VkMemoryBarrier Barrier = {}; + Barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + Barrier.srcAccessMask = PendingSrcAccess; + Barrier.dstAccessMask = PendingDstAccess; + vkCmdPipelineBarrier(CmdBuffer, PendingSrcStage, PendingDstStage, 0, 1, + &Barrier, 0, nullptr, PendingImageTransitions.size(), + PendingImageTransitions.data()); + + PendingImageTransitions.clear(); } - VkMemoryBarrier Barrier = {}; - Barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - Barrier.srcAccessMask = PendingSrcAccess; - Barrier.dstAccessMask = PendingDstAccess; - vkCmdPipelineBarrier(CmdBuffer, PendingSrcStage, PendingDstStage, 0, 1, - &Barrier, 0, nullptr, 0, nullptr); + PendingSrcStage = PendingDstStage; PendingSrcAccess = PendingDstAccess; PendingDstStage = 0; @@ -833,8 +918,8 @@ class VKComputeEncoder : public offloadtest::ComputeEncoder { llvm::Error copyBufferToBuffer(offloadtest::Buffer &Src, size_t SrcOffset, offloadtest::Buffer &Dst, size_t DstOffset, size_t Size) override { - auto &VKSrc = static_cast(Src); - auto &VKDst = static_cast(Dst); + auto &VKSrc = llvm::cast(Src); + auto &VKDst = llvm::cast(Dst); VkBufferCopy Region = {}; Region.srcOffset = SrcOffset; Region.dstOffset = DstOffset; @@ -845,6 +930,92 @@ class VKComputeEncoder : public offloadtest::ComputeEncoder { return llvm::Error::success(); } + llvm::Error copyCounterToBuffer(offloadtest::Buffer &Src, + offloadtest::Buffer &Dst) override { + auto &VKSrc = llvm::cast(Src); + auto &VKDst = llvm::cast(Dst); + + if (!VKSrc.Desc.HasCounter) + return llvm::createStringError( + "Counter resource passed does not hvae a counter."); + + const VkBufferCopy Region{ + 0, /*srcOffset*/ + 0, /*dstOffset*/ + sizeof(uint32_t) /*size*/ + }; + addDstBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); + insertDebugSignpost("copyCounterToBuffer 4B"); + vkCmdCopyBuffer(CB.CmdBuffer, VKSrc.CounterBuffer, VKDst.Buffer, 1, + &Region); + return llvm::Error::success(); + } + + llvm::Error copyBufferToTexture(offloadtest::Buffer &Src, + offloadtest::Texture &Dst) override { + auto &VKSrc = llvm::cast(Src); + auto &VKDst = llvm::cast(Dst); + + CB.addImageTransition(CB.PendingSrcAccess, /*SrcAccessMask*/ + VK_ACCESS_TRANSFER_WRITE_BIT, /*DstAccessMask*/ + VKDst.preferredLayoutOrUndefined(), /*OldLayout*/ + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, /*NewLayout*/ + VKDst.Image, VKDst.FullRange); + VKDst.IsInUndefinedLayout = false; + + CB.addPendingBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_READ_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT); + CB.flushBarrier(); + + insertDebugSignpost( + llvm::formatv("copyTextureToBuffer {0} -> {1}", VKSrc.Name, VKDst.Name) + .str()); + vkCmdCopyBufferToImage(CB.CmdBuffer, VKSrc.Buffer, VKDst.Image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, nullptr); + + CB.addImageTransition(VK_ACCESS_TRANSFER_WRITE_BIT, /*SrcAccessMask*/ + VK_ACCESS_NONE, /*DstAccessMask*/ + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, /*OldLayout*/ + VKDst.preferredLayoutOrUndefined(), /*NewLayout*/ + VKDst.Image, VKDst.FullRange); + + return llvm::Error::success(); + } + + llvm::Error copyTextureToBuffer(offloadtest::Texture &Src, + offloadtest::Buffer &Dst) override { + auto &VKSrc = llvm::cast(Src); + auto &VKDst = llvm::cast(Dst); + + CB.addImageTransition(CB.PendingSrcAccess, /*SrcAccessMask*/ + VK_ACCESS_TRANSFER_READ_BIT, /*DstAccessMask*/ + VKSrc.preferredLayoutOrUndefined(), /*OldLayout*/ + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, /*NewLayout*/ + VKSrc.Image, VKSrc.FullRange); + VKSrc.IsInUndefinedLayout = false; + + CB.addPendingBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_READ_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT); + CB.flushBarrier(); + + insertDebugSignpost( + llvm::formatv("copyTextureToBuffer {0} -> {1}", VKSrc.Name, VKDst.Name) + .str()); + vkCmdCopyImageToBuffer(CB.CmdBuffer, VKSrc.Image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VKDst.Buffer, + 0, nullptr); + + CB.addImageTransition(VK_ACCESS_TRANSFER_READ_BIT, /*SrcAccessMask*/ + VK_ACCESS_NONE, /*DstAccessMask*/ + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, /*OldLayout*/ + VKSrc.preferredLayoutOrUndefined(), /*NewLayout*/ + VKSrc.Image, VKSrc.FullRange); + + return llvm::Error::success(); + } + void endEncodingImpl() override { popDebugGroup(); } }; @@ -900,14 +1071,16 @@ class VulkanRenderPass final : public offloadtest::RenderPass { class VulkanRenderEncoder : public offloadtest::RenderEncoder { VulkanCommandBuffer &CB; + offloadtest::RenderPassBeginDesc Desc; // Encoder contract: viewport and scissor must both be set before draw(). bool ViewportSet = false; bool ScissorSet = false; public: - VulkanRenderEncoder(VulkanCommandBuffer &CB) - : RenderEncoder(GPUAPI::Vulkan), CB(CB) { + VulkanRenderEncoder(VulkanCommandBuffer &CB, + const offloadtest::RenderPassBeginDesc &Desc) + : RenderEncoder(GPUAPI::Vulkan), CB(CB), Desc(Desc) { pushDebugGroup("RenderEncoder"); } VulkanRenderEncoder(const VulkanRenderEncoder &CB) = delete; @@ -1008,6 +1181,29 @@ class VulkanRenderEncoder : public offloadtest::RenderEncoder { void endEncodingImpl() override { vkCmdEndRenderPass(CB.CmdBuffer); + + for (size_t I = 0; I < Desc.ColorAttachments.size(); ++I) { + auto &Tex = llvm::cast(*Desc.ColorAttachments[I]); + CB.addImageTransition( + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, /*SrcAccessMask*/ + CB.PendingSrcAccess, /*DstAccessMask*/ + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, /*OldLayout*/ + Tex.PreferredLayout, /*NewLayout*/ + Tex.Image, Tex.FullRange); + } + + if (Desc.DepthStencil) { + auto &Tex = llvm::cast(*Desc.DepthStencil); + CB.addImageTransition( + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, /*SrcAccessMask*/ + CB.PendingSrcAccess, /*DstAccessMask*/ + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, /*OldLayout*/ + Tex.PreferredLayout, /*NewLayout*/ + Tex.Image, Tex.FullRange); + } + popDebugGroup(); } }; @@ -1094,6 +1290,31 @@ VulkanCommandBuffer::createRenderEncoder( ClearValues.push_back(CV); } + for (size_t I = 0; I < Desc.ColorAttachments.size(); ++I) { + auto &Tex = llvm::cast(*Desc.ColorAttachments[I]); + this->addImageTransition( + this->PendingSrcAccess, /*SrcAccessMask*/ + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, /*DstAccessMask*/ + Tex.preferredLayoutOrUndefined(), /*OldLayout*/ + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, /*NewLayout*/ + Tex.Image, Tex.FullRange); + + Tex.IsInUndefinedLayout = false; + } + + if (Desc.DepthStencil) { + auto &Tex = llvm::cast(*Desc.DepthStencil); + this->addImageTransition( + this->PendingSrcAccess, /*SrcAccessMask*/ + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, /*DstAccessMask*/ + Tex.preferredLayoutOrUndefined(), /*OldLayout*/ + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, /*NewLayout*/ + Tex.Image, Tex.FullRange); + + Tex.IsInUndefinedLayout = false; + } VkFramebufferCreateInfo FBCI = {}; FBCI.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; FBCI.renderPass = VKPass.Handle; @@ -1123,9 +1344,11 @@ VulkanCommandBuffer::createRenderEncoder( BeginInfo.clearValueCount = static_cast(ClearValues.size()); BeginInfo.pClearValues = ClearValues.data(); + this->flushBarrier(); + vkCmdBeginRenderPass(CmdBuffer, &BeginInfo, VK_SUBPASS_CONTENTS_INLINE); - return std::make_unique(*this); + return std::make_unique(*this, Desc); } class VulkanDevice : public offloadtest::Device { @@ -2224,47 +2447,114 @@ class VulkanDevice : public offloadtest::Device { BufInfo.size = SizeInBytes; BufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + BufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + switch (Desc.Usage) { case BufferUsage::Storage: BufInfo.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; break; + case BufferUsage::ConstantBuffer: + BufInfo.usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + break; + case BufferUsage::IndexBuffer: + BufInfo.usage |= + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + break; case BufferUsage::VertexBuffer: BufInfo.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; break; + case BufferUsage::IndirectArgs: + BufInfo.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + break; } - BufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - VkBuffer DeviceBuffer; + VkBuffer BufferObject; if (auto Err = VK::toError( - vkCreateBuffer(Device, &BufInfo, nullptr, &DeviceBuffer), + vkCreateBuffer(Device, &BufInfo, nullptr, &BufferObject), "Failed to create device buffer.")) return Err; VkMemoryRequirements MemReqs; - vkGetBufferMemoryRequirements(Device, DeviceBuffer, &MemReqs); + vkGetBufferMemoryRequirements(Device, BufferObject, &MemReqs); VkMemoryAllocateInfo AllocInfo = {}; AllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; AllocInfo.allocationSize = MemReqs.size; auto MemIdx = getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits, getVulkanMemoryFlags(Desc.Location)); - if (!MemIdx) + if (!MemIdx) { + vkDestroyBuffer(Device, BufferObject, nullptr); return MemIdx.takeError(); + } AllocInfo.memoryTypeIndex = *MemIdx; + VkBuffer CounterBuffer = nullptr; + VkMemoryRequirements CounterMemReqs = {}; + VkDeviceSize CounterOffsetInBytes = 0; + if (Desc.HasCounter) { + VkBuffer CounterBuffer; + VkBufferCreateInfo CounterBufferInfo = {}; + CounterBufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + CounterBufferInfo.size = sizeof(uint32_t); + CounterBufferInfo.usage = BufInfo.usage; + CounterBufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + if (auto Err = VK::toError(vkCreateBuffer(Device, &CounterBufferInfo, + nullptr, &CounterBuffer), + "Could not create counter buffer.")) { + vkDestroyBuffer(Device, BufferObject, nullptr); + return Err; + } + + vkGetBufferMemoryRequirements(Device, CounterBuffer, &CounterMemReqs); + + CounterOffsetInBytes = + llvm::alignTo(AllocInfo.allocationSize, CounterMemReqs.alignment); + AllocInfo.allocationSize = CounterOffsetInBytes + CounterMemReqs.size; + + assert(MemReqs.memoryTypeBits == CounterMemReqs.memoryTypeBits && + "We are expecting the main resource and counter resource to have " + "the same memory type."); + } + VkDeviceMemory DeviceMemory; if (auto Err = VK::toError( vkAllocateMemory(Device, &AllocInfo, nullptr, &DeviceMemory), - "Failed to allocate device memory.")) + "Failed to allocate device memory.")) { + if (CounterBuffer) + vkDestroyBuffer(Device, CounterBuffer, nullptr); + vkDestroyBuffer(Device, BufferObject, nullptr); return Err; + } + if (auto Err = VK::toError( - vkBindBufferMemory(Device, DeviceBuffer, DeviceMemory, 0), - "Failed to bind device buffer memory.")) + vkBindBufferMemory(Device, BufferObject, DeviceMemory, 0), + "Failed to bind device buffer memory.")) { + if (CounterBuffer) + vkDestroyBuffer(Device, CounterBuffer, nullptr); + vkDestroyBuffer(Device, BufferObject, nullptr); + vkFreeMemory(Device, DeviceMemory, nullptr); return Err; + } + + if (CounterBuffer != nullptr) { + if (auto Err = VK::toError(vkBindBufferMemory(Device, CounterBuffer, + DeviceMemory, + CounterOffsetInBytes), + "Failed to bind counter buffer memory.")) { + if (CounterBuffer) + vkDestroyBuffer(Device, CounterBuffer, nullptr); + vkDestroyBuffer(Device, BufferObject, nullptr); + vkFreeMemory(Device, DeviceMemory, nullptr); + return Err; + } + } - return std::make_unique(Device, DeviceBuffer, DeviceMemory, - Name, Desc, SizeInBytes); + return std::make_unique(Device, BufferObject, CounterBuffer, + DeviceMemory, Name, Desc, + SizeInBytes); } llvm::Expected> @@ -2280,7 +2570,9 @@ class VulkanDevice : public offloadtest::Device { ImageInfo.mipLevels = Desc.MipLevels; ImageInfo.arrayLayers = 1; ImageInfo.samples = VK_SAMPLE_COUNT_1_BIT; - ImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + ImageInfo.tiling = Desc.Location == MemoryLocation::GpuOnly + ? VK_IMAGE_TILING_OPTIMAL + : VK_IMAGE_TILING_LINEAR; ImageInfo.usage = getVulkanImageUsage(Desc.Usage); ImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -2320,8 +2612,27 @@ class VulkanDevice : public offloadtest::Device { return Err; } - auto Tex = std::make_unique(Device, Image, DeviceMemory, - Name, Desc); + VkImageAspectFlags FullAspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + if (isDepthFormat(Desc.Fmt)) { + FullAspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + if (isStencilFormat(Desc.Fmt)) + FullAspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + const VkImageSubresourceRange FullRange{ + FullAspectMask, + 0, /*baseMipLevel*/ + Desc.MipLevels, + 0, /*baseArrayLayer*/ + 1, /*layerCount*/ + }; + + VkImageLayout PreferredLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + if ((Desc.Usage & TextureUsage::Storage)) + PreferredLayout = VK_IMAGE_LAYOUT_GENERAL; + + auto Tex = std::make_unique( + Device, Image, DeviceMemory, Name, Desc, PreferredLayout, FullRange, + ImageInfo.tiling, MemReqs.size); const bool IsRT = (Desc.Usage & TextureUsage::RenderTarget) != 0; const bool IsDS = (Desc.Usage & TextureUsage::DepthStencil) != 0; @@ -2353,6 +2664,14 @@ class VulkanDevice : public offloadtest::Device { return Tex; } + uint32_t getTextureUploadRowStrideInBytes( + const TextureCreateDesc &Desc) const override { + const uint64_t TightRow = + uint64_t(Desc.Width) * getFormatSizeInBytes(Desc.Fmt); + return static_cast(llvm::alignTo( + TightRow, Props.limits.optimalBufferCopyRowPitchAlignment)); + } + const Capabilities &getCapabilities() override { if (Caps.empty()) queryCapabilities(); diff --git a/lib/API/VK/VKResources.h b/lib/API/VK/VKResources.h index 3df6dcf6f..3b41f5c34 100644 --- a/lib/API/VK/VKResources.h +++ b/lib/API/VK/VKResources.h @@ -66,6 +66,14 @@ inline VkFormat getVulkanFormat(Format Format) { return VK_FORMAT_R32G32B32A32_UINT; case Format::RGBA32Float: return VK_FORMAT_R32G32B32A32_SFLOAT; + case Format::R64Uint: + return VK_FORMAT_R64_UINT; + case Format::R64Sint: + return VK_FORMAT_R64_SINT; + case Format::RG64Uint: + return VK_FORMAT_R64G64_UINT; + case Format::RG64Sint: + return VK_FORMAT_R64G64_SINT; case Format::D32Float: return VK_FORMAT_D32_SFLOAT; case Format::D32FloatS8Uint: diff --git a/test/Feature/CBuffer/arrays.test b/test/Feature/CBuffer/arrays.test index 90e433431..50340c809 100644 --- a/test/Feature/CBuffer/arrays.test +++ b/test/Feature/CBuffer/arrays.test @@ -49,11 +49,11 @@ Buffers: ] - Name: Out Format: Hex32 - Stride: 144 - FillSize: 144 + Stride: 128 + FillSize: 128 - Name: ExpectedOut Format: Hex32 - Stride: 144 + Stride: 128 Data: [ 0x3f800000, 0x40800000, @@ -66,7 +66,7 @@ Buffers: 0x00000010, 0x00000011, 0x00000012, 0x00000013, 0x00000014, 0x00000015, 0x00000016, 0x00000017, 0x00000018, - 0x00000019, 0x0000001A, 0x0000001B, 0x0, 0x0, 0x0, 0x0 + 0x00000019, 0x0000001A, 0x0000001B, ] Results: - Result: Test1 diff --git a/test/Feature/CBuffer/vectors-16bit.test b/test/Feature/CBuffer/vectors-16bit.test index e5cbbc63a..5ba4bb0eb 100644 --- a/test/Feature/CBuffer/vectors-16bit.test +++ b/test/Feature/CBuffer/vectors-16bit.test @@ -34,9 +34,8 @@ Buffers: ] - Name: Out Format: Hex16 - # Warp doesn't seem to be able to handle a stride of 10 so we use 12 here - Stride: 12 - FillSize: 12 + Stride: 10 + FillSize: 10 DescriptorSets: - Resources: - Name: CBVectors diff --git a/test/Feature/StructuredBuffer/inc_counter_array.test b/test/Feature/StructuredBuffer/inc_counter_array.test index f909b1a7e..f0251f624 100644 --- a/test/Feature/StructuredBuffer/inc_counter_array.test +++ b/test/Feature/StructuredBuffer/inc_counter_array.test @@ -56,10 +56,7 @@ DescriptorSets: # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s %if DirectX %{ --check-prefixes=CHECK,DX-CHECK %} - -# DX-CHECK: Creating UAV: { Size = 4100, Register = u0, Space = 0, HasCounter = 1 } -# DX-CHECK: UAV: HeapIdx = 0 EltSize = 4 NumElts = 1 HasCounter = 1 +# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s # CHECK: Name: Out # CHECK: Counters: [ 1, 2, 3, 4 ] diff --git a/test/Feature/StructuredBuffer/inc_counter_array_imm_idx.test b/test/Feature/StructuredBuffer/inc_counter_array_imm_idx.test index eac569f57..e99be1800 100644 --- a/test/Feature/StructuredBuffer/inc_counter_array_imm_idx.test +++ b/test/Feature/StructuredBuffer/inc_counter_array_imm_idx.test @@ -55,10 +55,7 @@ DescriptorSets: # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s %if DirectX %{ --check-prefixes=CHECK,DX-CHECK %} - -# DX-CHECK: Creating UAV: { Size = 4100, Register = u0, Space = 0, HasCounter = 1 } -# DX-CHECK: UAV: HeapIdx = 0 EltSize = 4 NumElts = 1 HasCounter = 1 +# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s # CHECK: Name: Out # CHECK: Counters: [ 4, 8, 12, 16 ]