Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions include/API/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,11 @@ class Device {
virtual llvm::Expected<std::unique_ptr<Texture>>
createTexture(std::string Name, const TextureCreateDesc &Desc) = 0;

// The row stride required when uploading data to (or reading back from) a
// texture created with the given description, via an upload buffer.
virtual uint32_t
getTextureUploadRowStrideInBytes(const TextureCreateDesc &Desc) const = 0;

virtual llvm::Expected<std::unique_ptr<RenderPass>>
createRenderPass(const RenderPassDesc &Desc) = 0;

Expand Down Expand Up @@ -324,6 +329,12 @@ createBufferWithData(Device &Dev, std::string Name,
size_t SizeInBytes, ComputeEncoder *Encoder,
std::unique_ptr<offloadtest::Buffer> *OutUploadBuffer);

llvm::Expected<std::unique_ptr<offloadtest::Texture>>
createTextureWithData(Device &Dev, std::string Name,
const TextureCreateDesc &Desc, const void *Data,
size_t SizeInBytes, ComputeEncoder *Encoder,
std::unique_ptr<offloadtest::Buffer> *OutUploadBuffer);

// TLAS handles come in pre-allocated because the caller's binding loop
// stamps the AS pointer into descriptor bundles before this helper runs;
// BLAS handles are allocated inline since BLASes aren't user-bindable.
Expand Down
6 changes: 6 additions & 0 deletions include/API/Encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
namespace offloadtest {

class Buffer;
class Texture;
class PipelineState;
class AccelerationStructure;
struct BLASBuildRequest;
Expand Down Expand Up @@ -97,6 +98,11 @@ class ComputeEncoder : public CommandEncoder {
Buffer &Dst, size_t DstOffset,
size_t Size) = 0;

/// Copy a buffer into a texture. The caller is expected to set up correct
/// striding using the stride acquired from
/// `Device::getTextureUploadRowStrideInBytes`.
virtual llvm::Error copyBufferToTexture(Buffer &Src, Texture &Dst) = 0;

/// Build a batch of acceleration structures in a single barrier slot. All
/// items in `Items` must be independent — no item may depend on another's
/// build output. Backends may issue this as one native batch call (Vulkan)
Expand Down
41 changes: 41 additions & 0 deletions lib/API/DX/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,42 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder {
return llvm::Error::success();
}

llvm::Error copyBufferToTexture(Buffer &Src, Texture &Dst) override {
auto &DXSrc = llvm::cast<DXBuffer>(Src);
auto &DXDst = llvm::cast<DXTexture>(Dst);

if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE)
CB.addResourceTransition(DXSrc.Buffer.Get(), DXSrc.PreferredState,
D3D12_RESOURCE_STATE_COPY_SOURCE);

if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST)
CB.addResourceTransition(DXDst.Resource.Get(), DXDst.PreferredState,
D3D12_RESOURCE_STATE_COPY_DEST);
CB.flushBarrier();

const uint32_t ElementSize = getFormatSizeInBytes(DXDst.Desc.Fmt);
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{
0,
CD3DX12_SUBRESOURCE_FOOTPRINT(
getDXGIFormat(DXDst.Desc.Fmt), DXDst.Desc.Width, DXDst.Desc.Height,
1, getAlignedTexturePitch(DXDst.Desc.Width, ElementSize))};
const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(DXDst.Resource.Get(), 0);
const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(DXSrc.Buffer.Get(), Footprint);
Comment on lines +808 to +814

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the buffer was not created with the necessary padding, the Footprint will be bigger than the actual buffer size.
Let's say I want to define a 3x2 render target. I can't be expected to add the padding in the yaml file (as it's only a DX concern). Do we have something in the backend code that is adding that padding?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the calling code is protecting from the above case with

  if (SizeInBytes < PackedRowStrideInBytes * Desc.Height)
    return llvm::createStringError(
        "Data upload is not enough for texture size.");

but I think as soon as you try to create a tiny 2d texture this might trigger 🤔

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR also introduces the getTextureUploadRowStrideInBytes function to acquire the required row stride to add the necessary padding. The caller of copyBufferToTexture is expected to set up their upload buffer correctly.

In the case of the textures we create from the yaml file, we will be using createTextureWithData, which memcpys the data with the required padding into the upload buffer.

CB.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr);

if (DXSrc.PreferredState != D3D12_RESOURCE_STATE_COPY_SOURCE)
CB.addResourceTransition(DXSrc.Buffer.Get(),
D3D12_RESOURCE_STATE_COPY_SOURCE,
DXSrc.PreferredState);

if (DXDst.PreferredState != D3D12_RESOURCE_STATE_COPY_DEST)
CB.addResourceTransition(DXDst.Resource.Get(),
D3D12_RESOURCE_STATE_COPY_DEST,
DXDst.PreferredState);

return llvm::Error::success();
}

// Defined out-of-line below — needs DXDevice's full type for access to the
// ID3D12Device5 entry point and helper allocators.
llvm::Error batchBuildAS(llvm::ArrayRef<ASBuildItem> Items) override;
Expand Down Expand Up @@ -1599,6 +1635,11 @@ class DXDevice : public offloadtest::Device {
return Tex;
}

uint32_t getTextureUploadRowStrideInBytes(
const TextureCreateDesc &Desc) const override {
return getAlignedTexturePitch(Desc.Width, getFormatSizeInBytes(Desc.Fmt));
}

static llvm::Expected<std::unique_ptr<offloadtest::Device>>
create(ComPtr<IDXCoreAdapter> Adapter, const DeviceConfig &Config) {
ComPtr<ID3D12DeviceX> Device;
Expand Down
56 changes: 56 additions & 0 deletions lib/API/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,59 @@ offloadtest::createBufferWithData(

return Buffer;
}

llvm::Expected<std::unique_ptr<offloadtest::Texture>>
offloadtest::createTextureWithData(
Device &Dev, std::string Name, const TextureCreateDesc &Desc,
const void *Data, size_t SizeInBytes, ComputeEncoder *Encoder,
std::unique_ptr<offloadtest::Buffer> *OutUploadBuffer) {

const uint64_t PackedRowStrideInBytes =
Desc.Width * getFormatSizeInBytes(Desc.Fmt);
if (SizeInBytes < PackedRowStrideInBytes * Desc.Height)
return llvm::createStringError(
"Data upload is not enough for texture size.");

auto TextureOrErr = Dev.createTexture(Name, Desc);
if (!TextureOrErr)
return TextureOrErr.takeError();
auto Texture = std::move(*TextureOrErr);

if (OutUploadBuffer == nullptr)
return llvm::createStringError("An upload buffer is required to create a "
"GpuOnly texture with data.");

const uint64_t TexRowStrideInBytes =
Dev.getTextureUploadRowStrideInBytes(Desc);
const uint64_t UploadBufferSizeInBytes =
(Desc.Height - 1) * TexRowStrideInBytes + PackedRowStrideInBytes;

// Create Upload buffer
const BufferCreateDesc UploadDesc = BufferCreateDesc::uploadBuffer();
const std::string UploadBufferName = Name + " (Upload Buffer)";
auto UploadBufferOrErr =
Dev.createBuffer(UploadBufferName, UploadDesc, UploadBufferSizeInBytes);
if (!UploadBufferOrErr)
return UploadBufferOrErr.takeError();
*OutUploadBuffer = std::move(*UploadBufferOrErr);

auto MappedPtrOrErr = (*OutUploadBuffer)->map();
if (!MappedPtrOrErr)
return MappedPtrOrErr.takeError();

uint8_t *DstPtr = (uint8_t *)*MappedPtrOrErr;
const uint8_t *SrcPtr = (const uint8_t *)Data;

for (uint32_t Y = 0; Y < Desc.Height; ++Y) {
memcpy(DstPtr, SrcPtr, PackedRowStrideInBytes);
DstPtr += TexRowStrideInBytes;
SrcPtr += PackedRowStrideInBytes;
}
(*OutUploadBuffer)->unmap();

// Copy Buffer to Texture
if (auto Err = Encoder->copyBufferToTexture(**OutUploadBuffer, *Texture))
return Err;

return Texture;
}
32 changes: 32 additions & 0 deletions lib/API/MTL/MTLDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,33 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
return llvm::Error::success();
}

llvm::Error copyBufferToTexture(offloadtest::Buffer &Src,
offloadtest::Texture &Dst) override {
if (auto Err = ensureBlitEncoder())
return Err;
auto &MTLSrc = static_cast<MTLBuffer &>(Src);
auto &MTLDst = static_cast<MTLTexture &>(Dst);

// The upload buffer is laid out with a tightly packed row stride matching
// getTextureUploadRowStrideInBytes(), so the source bytes-per-row is the
// texture width times the element size.
const size_t ElemSize = getFormatSizeInBytes(MTLDst.Desc.Fmt);
const size_t RowBytes = MTLDst.Desc.Width * ElemSize;
const size_t ImageBytes = RowBytes * MTLDst.Desc.Height;
const MTL::Size CopySize(MTLDst.Desc.Width, MTLDst.Desc.Height, 1);

insertDebugSignpost(llvm::formatv("copyBufferToTexture {0} -> {1}",
MTLSrc.Name, MTLDst.Name)
.str());
BlitEnc->copyFromBuffer(MTLSrc.Buf, /*sourceOffset=*/0, RowBytes,
ImageBytes, CopySize, MTLDst.Tex,
/*destinationSlice=*/0, /*destinationLevel=*/0,
MTL::Origin(0, 0, 0));
addBarrierScope(MTL::BarrierScopeTextures);

return llvm::Error::success();
}

// Defined out-of-line below — needs MTLDevice's full type for access to the
// MTL::Device handle (used to allocate scratch and instance buffers).
llvm::Error batchBuildAS(llvm::ArrayRef<ASBuildItem> Items) override;
Expand Down Expand Up @@ -1706,6 +1733,11 @@ class MTLDevice : public offloadtest::Device {
return std::make_unique<MTLTexture>(Tex, Name, Desc);
}

uint32_t getTextureUploadRowStrideInBytes(
const TextureCreateDesc &Desc) const override {
return Desc.Width * getFormatSizeInBytes(Desc.Fmt);
}

llvm::Expected<std::unique_ptr<offloadtest::CommandBuffer>>
createCommandBuffer() override {
auto CBOrErr = MTLCommandBuffer::create(GraphicsQueue.Queue);
Expand Down
40 changes: 40 additions & 0 deletions lib/API/VK/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,38 @@ class VKComputeEncoder : public offloadtest::ComputeEncoder {
return llvm::Error::success();
}

llvm::Error copyBufferToTexture(offloadtest::Buffer &Src,
offloadtest::Texture &Dst) override {
auto &VKSrc = llvm::cast<VulkanBuffer>(Src);
auto &VKDst = llvm::cast<VulkanTexture>(Dst);

CB.addImageTransition(CB.PendingSrcAccess, /*SrcAccessMask*/
VK_ACCESS_TRANSFER_WRITE_BIT, /*DstAccessMask*/
VKDst.preferredLayoutOrUndefined(), /*OldLayout*/
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, /*NewLayout*/
VKDst);
VKDst.IsInUndefinedLayout = false;

CB.addPendingBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT);
CB.flushBarrier();

insertDebugSignpost(
llvm::formatv("copyTextureToBuffer {0} -> {1}", VKSrc.Name, VKDst.Name)
.str());
vkCmdCopyBufferToImage(CB.CmdBuffer, VKSrc.Buffer, VKDst.Image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, nullptr);

CB.addImageTransition(VK_ACCESS_TRANSFER_WRITE_BIT, /*SrcAccessMask*/
VK_ACCESS_NONE, /*DstAccessMask*/
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, /*OldLayout*/
VKDst.preferredLayoutOrUndefined(), /*NewLayout*/
VKDst);

return llvm::Error::success();
}

// Defined out-of-line below — needs VulkanDevice's full type for access to
// the device-loaded ray-tracing entry points and helpers.
llvm::Error batchBuildAS(llvm::ArrayRef<ASBuildItem> Items) override;
Expand Down Expand Up @@ -2547,6 +2579,14 @@ class VulkanDevice : public offloadtest::Device {
return Tex;
}

uint32_t getTextureUploadRowStrideInBytes(
const TextureCreateDesc &Desc) const override {
const uint64_t TightRow =
uint64_t(Desc.Width) * getFormatSizeInBytes(Desc.Fmt);
return static_cast<uint32_t>(llvm::alignTo(
TightRow, Props.limits.optimalBufferCopyRowPitchAlignment));
}

const Capabilities &getCapabilities() override {
if (Caps.empty())
queryCapabilities();
Expand Down
Loading