diff --git a/include/API/Device.h b/include/API/Device.h index 259923a7f..eaa7e48ca 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -257,6 +257,11 @@ class Device { virtual llvm::Expected> createTexture(std::string Name, const TextureCreateDesc &Desc) = 0; + // The row stride required when uploading data to (or reading back from) a + // texture created with the given description, via an upload buffer. + virtual uint32_t + getTextureUploadRowStrideInBytes(const TextureCreateDesc &Desc) const = 0; + virtual llvm::Expected> createRenderPass(const RenderPassDesc &Desc) = 0; @@ -324,6 +329,12 @@ createBufferWithData(Device &Dev, std::string Name, size_t SizeInBytes, ComputeEncoder *Encoder, std::unique_ptr *OutUploadBuffer); +llvm::Expected> +createTextureWithData(Device &Dev, std::string Name, + const TextureCreateDesc &Desc, const void *Data, + size_t SizeInBytes, ComputeEncoder *Encoder, + std::unique_ptr *OutUploadBuffer); + // TLAS handles come in pre-allocated because the caller's binding loop // stamps the AS pointer into descriptor bundles before this helper runs; // BLAS handles are allocated inline since BLASes aren't user-bindable. diff --git a/include/API/Encoder.h b/include/API/Encoder.h index 526044b79..b7eec9eec 100644 --- a/include/API/Encoder.h +++ b/include/API/Encoder.h @@ -22,6 +22,7 @@ namespace offloadtest { class Buffer; +class Texture; class PipelineState; class AccelerationStructure; struct BLASBuildRequest; @@ -97,6 +98,11 @@ class ComputeEncoder : public CommandEncoder { Buffer &Dst, size_t DstOffset, size_t Size) = 0; + /// Copy a buffer into a texture. The caller is expected to set up correct + /// striding using the stride acquired from + /// `Device::getTextureUploadRowStrideInBytes`. + virtual llvm::Error copyBufferToTexture(Buffer &Src, Texture &Dst) = 0; + /// Build a batch of acceleration structures in a single barrier slot. All /// items in `Items` must be independent — no item may depend on another's /// build output. Backends may issue this as one native batch call (Vulkan) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 0050b91f0..18491bede 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -791,6 +791,42 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder { return llvm::Error::success(); } + llvm::Error copyBufferToTexture(Buffer &Src, Texture &Dst) override { + auto &DXSrc = llvm::cast(Src); + auto &DXDst = llvm::cast(Dst); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Buffer.Get(), DXSrc.PreferredState, + D3D12_RESOURCE_STATE_COPY_SOURCE); + + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Resource.Get(), DXDst.PreferredState, + D3D12_RESOURCE_STATE_COPY_DEST); + CB.flushBarrier(); + + const uint32_t ElementSize = getFormatSizeInBytes(DXDst.Desc.Fmt); + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ + 0, + CD3DX12_SUBRESOURCE_FOOTPRINT( + getDXGIFormat(DXDst.Desc.Fmt), DXDst.Desc.Width, DXDst.Desc.Height, + 1, getAlignedTexturePitch(DXDst.Desc.Width, ElementSize))}; + const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(DXDst.Resource.Get(), 0); + const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(DXSrc.Buffer.Get(), Footprint); + CB.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + + if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE) + CB.addResourceTransition(DXSrc.Buffer.Get(), + D3D12_RESOURCE_STATE_COPY_SOURCE, + DXSrc.PreferredState); + + if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST) + CB.addResourceTransition(DXDst.Resource.Get(), + D3D12_RESOURCE_STATE_COPY_DEST, + DXDst.PreferredState); + + return llvm::Error::success(); + } + // Defined out-of-line below — needs DXDevice's full type for access to the // ID3D12Device5 entry point and helper allocators. llvm::Error batchBuildAS(llvm::ArrayRef Items) override; @@ -1599,6 +1635,11 @@ class DXDevice : public offloadtest::Device { return Tex; } + uint32_t getTextureUploadRowStrideInBytes( + const TextureCreateDesc &Desc) const override { + return getAlignedTexturePitch(Desc.Width, getFormatSizeInBytes(Desc.Fmt)); + } + static llvm::Expected> create(ComPtr Adapter, const DeviceConfig &Config) { ComPtr Device; diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index a66f7b49e..56c80eb15 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -300,3 +300,59 @@ offloadtest::createBufferWithData( return Buffer; } + +llvm::Expected> +offloadtest::createTextureWithData( + Device &Dev, std::string Name, const TextureCreateDesc &Desc, + const void *Data, size_t SizeInBytes, ComputeEncoder *Encoder, + std::unique_ptr *OutUploadBuffer) { + + const uint64_t PackedRowStrideInBytes = + Desc.Width * getFormatSizeInBytes(Desc.Fmt); + if (SizeInBytes < PackedRowStrideInBytes * Desc.Height) + return llvm::createStringError( + "Data upload is not enough for texture size."); + + auto TextureOrErr = Dev.createTexture(Name, Desc); + if (!TextureOrErr) + return TextureOrErr.takeError(); + auto Texture = std::move(*TextureOrErr); + + if (OutUploadBuffer == nullptr) + return llvm::createStringError("An upload buffer is required to create a " + "GpuOnly texture with data."); + + const uint64_t TexRowStrideInBytes = + Dev.getTextureUploadRowStrideInBytes(Desc); + const uint64_t UploadBufferSizeInBytes = + (Desc.Height - 1) * TexRowStrideInBytes + PackedRowStrideInBytes; + + // Create Upload buffer + const BufferCreateDesc UploadDesc = BufferCreateDesc::uploadBuffer(); + const std::string UploadBufferName = Name + " (Upload Buffer)"; + auto UploadBufferOrErr = + Dev.createBuffer(UploadBufferName, UploadDesc, UploadBufferSizeInBytes); + if (!UploadBufferOrErr) + return UploadBufferOrErr.takeError(); + *OutUploadBuffer = std::move(*UploadBufferOrErr); + + auto MappedPtrOrErr = (*OutUploadBuffer)->map(); + if (!MappedPtrOrErr) + return MappedPtrOrErr.takeError(); + + uint8_t *DstPtr = (uint8_t *)*MappedPtrOrErr; + const uint8_t *SrcPtr = (const uint8_t *)Data; + + for (uint32_t Y = 0; Y < Desc.Height; ++Y) { + memcpy(DstPtr, SrcPtr, PackedRowStrideInBytes); + DstPtr += TexRowStrideInBytes; + SrcPtr += PackedRowStrideInBytes; + } + (*OutUploadBuffer)->unmap(); + + // Copy Buffer to Texture + if (auto Err = Encoder->copyBufferToTexture(**OutUploadBuffer, *Texture)) + return Err; + + return Texture; +} diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 9eb449f34..9e9679822 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -587,6 +587,33 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder { return llvm::Error::success(); } + llvm::Error copyBufferToTexture(offloadtest::Buffer &Src, + offloadtest::Texture &Dst) override { + if (auto Err = ensureBlitEncoder()) + return Err; + auto &MTLSrc = static_cast(Src); + auto &MTLDst = static_cast(Dst); + + // The upload buffer is laid out with a tightly packed row stride matching + // getTextureUploadRowStrideInBytes(), so the source bytes-per-row is the + // texture width times the element size. + const size_t ElemSize = getFormatSizeInBytes(MTLDst.Desc.Fmt); + const size_t RowBytes = MTLDst.Desc.Width * ElemSize; + const size_t ImageBytes = RowBytes * MTLDst.Desc.Height; + const MTL::Size CopySize(MTLDst.Desc.Width, MTLDst.Desc.Height, 1); + + insertDebugSignpost(llvm::formatv("copyBufferToTexture {0} -> {1}", + MTLSrc.Name, MTLDst.Name) + .str()); + BlitEnc->copyFromBuffer(MTLSrc.Buf, /*sourceOffset=*/0, RowBytes, + ImageBytes, CopySize, MTLDst.Tex, + /*destinationSlice=*/0, /*destinationLevel=*/0, + MTL::Origin(0, 0, 0)); + addBarrierScope(MTL::BarrierScopeTextures); + + return llvm::Error::success(); + } + // Defined out-of-line below — needs MTLDevice's full type for access to the // MTL::Device handle (used to allocate scratch and instance buffers). llvm::Error batchBuildAS(llvm::ArrayRef Items) override; @@ -1706,6 +1733,11 @@ class MTLDevice : public offloadtest::Device { return std::make_unique(Tex, Name, Desc); } + uint32_t getTextureUploadRowStrideInBytes( + const TextureCreateDesc &Desc) const override { + return Desc.Width * getFormatSizeInBytes(Desc.Fmt); + } + llvm::Expected> createCommandBuffer() override { auto CBOrErr = MTLCommandBuffer::create(GraphicsQueue.Queue); diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 17f72705a..e103d8d30 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -901,6 +901,38 @@ class VKComputeEncoder : public offloadtest::ComputeEncoder { return llvm::Error::success(); } + llvm::Error copyBufferToTexture(offloadtest::Buffer &Src, + offloadtest::Texture &Dst) override { + auto &VKSrc = llvm::cast(Src); + auto &VKDst = llvm::cast(Dst); + + CB.addImageTransition(CB.PendingSrcAccess, /*SrcAccessMask*/ + VK_ACCESS_TRANSFER_WRITE_BIT, /*DstAccessMask*/ + VKDst.preferredLayoutOrUndefined(), /*OldLayout*/ + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, /*NewLayout*/ + VKDst); + VKDst.IsInUndefinedLayout = false; + + CB.addPendingBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_READ_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT); + CB.flushBarrier(); + + insertDebugSignpost( + llvm::formatv("copyTextureToBuffer {0} -> {1}", VKSrc.Name, VKDst.Name) + .str()); + vkCmdCopyBufferToImage(CB.CmdBuffer, VKSrc.Buffer, VKDst.Image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, nullptr); + + CB.addImageTransition(VK_ACCESS_TRANSFER_WRITE_BIT, /*SrcAccessMask*/ + VK_ACCESS_NONE, /*DstAccessMask*/ + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, /*OldLayout*/ + VKDst.preferredLayoutOrUndefined(), /*NewLayout*/ + VKDst); + + return llvm::Error::success(); + } + // Defined out-of-line below — needs VulkanDevice's full type for access to // the device-loaded ray-tracing entry points and helpers. llvm::Error batchBuildAS(llvm::ArrayRef Items) override; @@ -2547,6 +2579,14 @@ class VulkanDevice : public offloadtest::Device { return Tex; } + uint32_t getTextureUploadRowStrideInBytes( + const TextureCreateDesc &Desc) const override { + const uint64_t TightRow = + uint64_t(Desc.Width) * getFormatSizeInBytes(Desc.Fmt); + return static_cast(llvm::alignTo( + TightRow, Props.limits.optimalBufferCopyRowPitchAlignment)); + } + const Capabilities &getCapabilities() override { if (Caps.empty()) queryCapabilities();