Skip to content

Commit 6bb2b65

Browse files
committed
Use recently added Buffer API for render target readback buffer. Cleanup code to use the new helper functions
1 parent c667ac7 commit 6bb2b65

3 files changed

Lines changed: 125 additions & 87 deletions

File tree

lib/API/DX/Device.cpp

Lines changed: 31 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -448,8 +448,8 @@ class DXDevice : public offloadtest::Device {
448448
std::unique_ptr<offloadtest::Fence> Fence;
449449

450450
// Resources for graphics pipelines.
451-
ComPtr<ID3D12Resource> RTReadback;
452451
std::shared_ptr<DXTexture> RT;
452+
std::shared_ptr<DXBuffer> RTReadback;
453453
ComPtr<ID3D12Resource> VB;
454454

455455
llvm::SmallVector<DescriptorTable> DescTables;
@@ -479,33 +479,32 @@ class DXDevice : public offloadtest::Device {
479479
llvm::Expected<std::shared_ptr<offloadtest::Buffer>>
480480
createBuffer(std::string Name, BufferCreateDesc &Desc,
481481
size_t SizeInBytes) override {
482+
const D3D12_HEAP_TYPE HeapType = getDXHeapType(Desc.Location);
482483

483-
D3D12_HEAP_TYPE HeapType = D3D12_HEAP_TYPE_DEFAULT;
484-
switch (Desc.Location) {
485-
case MemoryLocation::GpuOnly:
486-
HeapType = D3D12_HEAP_TYPE_DEFAULT;
487-
break;
488-
case MemoryLocation::CpuToGpu:
489-
HeapType = D3D12_HEAP_TYPE_UPLOAD;
490-
break;
491-
case MemoryLocation::GpuToCpu:
492-
HeapType = D3D12_HEAP_TYPE_READBACK;
493-
break;
494-
}
495-
484+
// As per the readback heap docs
485+
// > Resources in this heap must be created with
486+
// > D3D12_RESOURCE_STATE_COPY_DEST, and cannot be changed away from this.
496487
const D3D12_RESOURCE_FLAGS Flags =
497-
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
488+
HeapType == D3D12_HEAP_TYPE_READBACK
489+
? D3D12_RESOURCE_FLAG_NONE
490+
: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
498491

499492
const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(HeapType);
500493
const D3D12_RESOURCE_DESC BufferDesc =
501494
CD3DX12_RESOURCE_DESC::Buffer(SizeInBytes, Flags);
502495

496+
D3D12_RESOURCE_STATES InitialState = D3D12_RESOURCE_STATE_COMMON;
497+
if (HeapType == D3D12_HEAP_TYPE_UPLOAD)
498+
InitialState = D3D12_RESOURCE_STATE_GENERIC_READ;
499+
else if (HeapType == D3D12_HEAP_TYPE_READBACK)
500+
InitialState = D3D12_RESOURCE_STATE_COPY_DEST;
501+
503502
ComPtr<ID3D12Resource> DeviceBuffer;
504-
if (auto Err = HR::toError(Device->CreateCommittedResource(
505-
&HeapProps, D3D12_HEAP_FLAG_NONE,
506-
&BufferDesc, D3D12_RESOURCE_STATE_COMMON,
507-
nullptr, IID_PPV_ARGS(&DeviceBuffer)),
508-
"Failed to create buffer."))
503+
if (auto Err =
504+
HR::toError(Device->CreateCommittedResource(
505+
&HeapProps, D3D12_HEAP_FLAG_NONE, &BufferDesc,
506+
InitialState, nullptr, IID_PPV_ARGS(&DeviceBuffer)),
507+
"Failed to create buffer."))
509508
return Err;
510509

511510
return std::make_shared<DXBuffer>(DeviceBuffer, Name, Desc, SizeInBytes);
@@ -1504,15 +1503,15 @@ class DXDevice : public offloadtest::Device {
15041503
return Err;
15051504

15061505
// If there is no render target, return early.
1507-
if (IS.RTReadback == nullptr)
1506+
if (!IS.RTReadback)
15081507
return llvm::Error::success();
15091508

15101509
// Map readback and copy into host buffer, accounting for row pitch and
15111510
// flipping vertical orientation. DirectX render target origin is top-left,
15121511
// while our image writer expects bottom-left.
15131512
const CPUBuffer &B = *P.Bindings.RTargetBufferPtr;
15141513
void *Mapped = nullptr;
1515-
if (auto Err = HR::toError(IS.RTReadback->Map(0, nullptr, &Mapped),
1514+
if (auto Err = HR::toError(IS.RTReadback->Buffer->Map(0, nullptr, &Mapped),
15161515
"Failed to map render target readback"))
15171516
return Err;
15181517

@@ -1543,7 +1542,7 @@ class DXDevice : public offloadtest::Device {
15431542
memcpy(DstRow, SrcRow, RowBytes);
15441543
}
15451544

1546-
IS.RTReadback->Unmap(0, nullptr);
1545+
IS.RTReadback->Buffer->Unmap(0, nullptr);
15471546
return llvm::Error::success();
15481547
}
15491548

@@ -1561,17 +1560,12 @@ class DXDevice : public offloadtest::Device {
15611560
IS.RT = std::static_pointer_cast<DXTexture>(*TexOrErr);
15621561

15631562
// Create readback buffer sized for the pixel data (raw bytes).
1564-
const uint64_t RBSize = static_cast<uint64_t>(OutBuf.size());
1565-
D3D12_RESOURCE_DESC const RbDesc = CD3DX12_RESOURCE_DESC::Buffer(RBSize);
1566-
CD3DX12_HEAP_PROPERTIES RbHeap =
1567-
CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK);
1568-
if (auto Err =
1569-
HR::toError(Device->CreateCommittedResource(
1570-
&RbHeap, D3D12_HEAP_FLAG_NONE, &RbDesc,
1571-
D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
1572-
IID_PPV_ARGS(&IS.RTReadback)),
1573-
"Failed to create render target readback buffer"))
1574-
return Err;
1563+
BufferCreateDesc BufDesc = {};
1564+
BufDesc.Location = MemoryLocation::GpuToCpu;
1565+
auto BufOrErr = createBuffer("RTReadback", BufDesc, OutBuf.size());
1566+
if (!BufOrErr)
1567+
return BufOrErr.takeError();
1568+
IS.RTReadback = std::static_pointer_cast<DXBuffer>(*BufOrErr);
15751569

15761570
return llvm::Error::success();
15771571
}
@@ -1710,8 +1704,9 @@ class DXDevice : public offloadtest::Device {
17101704
CD3DX12_SUBRESOURCE_FOOTPRINT(
17111705
getDXFormat(B.Format, B.Channels), B.OutputProps.Width,
17121706
B.OutputProps.Height, 1, B.OutputProps.Width * B.getElementSize())};
1713-
const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(IS.RTReadback.Get(), Footprint);
1714-
const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(IS.RT.Get(), 0);
1707+
const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(IS.RTReadback->Buffer.Get(),
1708+
Footprint);
1709+
const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(IS.RT->Resource.Get(), 0);
17151710

17161711
IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr);
17171712

lib/API/MTL/MTLDevice.cpp

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,14 @@ class MTLDevice : public offloadtest::Device {
497497
return TexOrErr.takeError();
498498

499499
IS.FrameBufferTexture = std::static_pointer_cast<MTLTexture>(*TexOrErr);
500+
501+
// Create a readback buffer for copying render target data to the CPU.
502+
BufferCreateDesc BufDesc = {};
503+
BufDesc.Location = MemoryLocation::GpuToCpu;
504+
auto BufOrErr = createBuffer("RTReadback", BufDesc, OutBuf.size());
505+
if (!BufOrErr)
506+
return BufOrErr.takeError();
507+
IS.FrameBufferReadback = std::static_pointer_cast<MTLBuffer>(*BufOrErr);
500508
return llvm::Error::success();
501509
}
502510

@@ -545,6 +553,15 @@ class MTLDevice : public offloadtest::Device {
545553

546554
CmdEncoder->endEncoding();
547555

556+
// Blit the render target into the readback buffer for CPU access.
557+
MTL::BlitCommandEncoder *Blit = IS.CmdBuffer->blitCommandEncoder();
558+
const size_t ElemSize = getFormatSize(IS.FrameBufferTexture->Desc.Format);
559+
const size_t RowBytes = Width * ElemSize;
560+
Blit->copyFromTexture(IS.FrameBufferTexture->Tex, 0, 0,
561+
MTL::Origin(0, 0, 0), MTL::Size(Width, Height, 1),
562+
IS.FrameBufferReadback->Buf, 0, RowBytes, 0);
563+
Blit->endEncoding();
564+
548565
return llvm::Error::success();
549566
}
550567

@@ -604,16 +621,15 @@ class MTLDevice : public offloadtest::Device {
604621
const size_t ElemSize = RTarget->getElementSize();
605622
const size_t RowBytes = Width * ElemSize;
606623

607-
// Read the framebuffer one row at a time into the output buffer.
608-
// Read rows from the texture bottom-to-top into the buffer top-to-bottom
609-
// so the final image is upright.
624+
// Read from the readback buffer. The blit copied the texture data in
625+
// GPU layout order, so we flip rows here to produce an upright image.
626+
const unsigned char *Src = reinterpret_cast<const unsigned char *>(
627+
IS.FrameBufferReadback->Buf->contents());
610628
unsigned char *Buf =
611629
reinterpret_cast<unsigned char *>(RTarget->Data[0].get());
612630
for (uint64_t R = 0; R < Height; ++R) {
613-
const uint32_t SrcRow = (uint32_t)((Height - 1) - R);
614-
unsigned char *Dst = Buf + R * RowBytes;
615-
IS.FrameBufferTexture->getBytes(
616-
Dst, RowBytes, MTL::Region(0, SrcRow, (uint32_t)Width, 1), 0);
631+
const uint64_t SrcRow = (Height - 1) - R;
632+
memcpy(Buf + R * RowBytes, Src + SrcRow * RowBytes, RowBytes);
617633
}
618634
}
619635
return llvm::Error::success();
@@ -643,18 +659,8 @@ class MTLDevice : public offloadtest::Device {
643659
llvm::Expected<std::shared_ptr<offloadtest::Buffer>>
644660
createBuffer(std::string Name, BufferCreateDesc &Desc,
645661
size_t SizeInBytes) override {
646-
MTL::ResourceOptions StorageMode;
647-
switch (Desc.Location) {
648-
case MemoryLocation::GpuOnly:
649-
StorageMode = MTL::ResourceStorageModePrivate;
650-
break;
651-
case MemoryLocation::CpuToGpu:
652-
case MemoryLocation::GpuToCpu:
653-
StorageMode = MTL::ResourceStorageModeManaged;
654-
break;
655-
}
656-
657-
MTL::Buffer *Buf = Device->newBuffer(SizeInBytes, StorageMode);
662+
MTL::Buffer *Buf = Device->newBuffer(
663+
SizeInBytes, getMetalBufferResourceOptions(Desc.Location));
658664
if (!Buf)
659665
return llvm::createStringError(std::errc::not_enough_memory,
660666
"Failed to create Metal buffer.");

lib/API/VK/Device.cpp

Lines changed: 69 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,7 @@ class VulkanDevice : public offloadtest::Device {
572572
VkFramebuffer FrameBuffer = VK_NULL_HANDLE;
573573
ImageRef DepthStencil = {0, 0, 0};
574574
std::shared_ptr<VulkanTexture> RenderTarget;
575+
std::shared_ptr<VulkanBuffer> RTReadback;
575576
std::optional<ResourceRef> VertexBuffer = std::nullopt;
576577

577578
VkRenderPass RenderPass = VK_NULL_HANDLE;
@@ -737,21 +738,6 @@ class VulkanDevice : public offloadtest::Device {
737738
llvm::Expected<std::shared_ptr<offloadtest::Buffer>>
738739
createBuffer(std::string Name, BufferCreateDesc &Desc,
739740
size_t SizeInBytes) override {
740-
VkMemoryPropertyFlags MemFlags = 0;
741-
switch (Desc.Location) {
742-
case MemoryLocation::GpuOnly:
743-
MemFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
744-
break;
745-
case MemoryLocation::CpuToGpu:
746-
MemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
747-
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
748-
break;
749-
case MemoryLocation::GpuToCpu:
750-
MemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
751-
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
752-
break;
753-
}
754-
755741
VkBufferCreateInfo BufInfo = {};
756742
BufInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
757743
BufInfo.size = SizeInBytes;
@@ -771,8 +757,8 @@ class VulkanDevice : public offloadtest::Device {
771757
VkMemoryAllocateInfo AllocInfo = {};
772758
AllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
773759
AllocInfo.allocationSize = MemReqs.size;
774-
auto MemIdx =
775-
getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits, MemFlags);
760+
auto MemIdx = getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits,
761+
getVulkanMemoryFlags(Desc.Location));
776762
if (!MemIdx)
777763
return MemIdx.takeError();
778764
AllocInfo.memoryTypeIndex = *MemIdx;
@@ -2153,6 +2139,63 @@ class VulkanDevice : public offloadtest::Device {
21532139
}
21542140
}
21552141

2142+
// Record commands to copy a texture into a readback buffer.
2143+
void copyTextureToReadback(VkCommandBuffer CmdBuffer,
2144+
const VulkanTexture &Tex,
2145+
const VulkanBuffer &Readback,
2146+
VkImageLayout OldLayout,
2147+
VkAccessFlags SrcAccessMask,
2148+
VkPipelineStageFlags SrcStageMask) {
2149+
const VkImageAspectFlags AspectMask = isDepthFormat(Tex.Desc.Format)
2150+
? VK_IMAGE_ASPECT_DEPTH_BIT
2151+
: VK_IMAGE_ASPECT_COLOR_BIT;
2152+
2153+
// Transition texture to transfer source.
2154+
VkImageSubresourceRange SubRange = {};
2155+
SubRange.aspectMask = AspectMask;
2156+
SubRange.baseMipLevel = 0;
2157+
SubRange.levelCount = 1;
2158+
SubRange.layerCount = 1;
2159+
2160+
VkImageMemoryBarrier ImageBarrier = {};
2161+
ImageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
2162+
ImageBarrier.subresourceRange = SubRange;
2163+
ImageBarrier.srcAccessMask = SrcAccessMask;
2164+
ImageBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
2165+
ImageBarrier.oldLayout = OldLayout;
2166+
ImageBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
2167+
ImageBarrier.image = Tex.Image;
2168+
vkCmdPipelineBarrier(CmdBuffer, SrcStageMask,
2169+
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
2170+
nullptr, 1, &ImageBarrier);
2171+
2172+
// Copy image to readback buffer.
2173+
VkBufferImageCopy Region = {};
2174+
Region.imageSubresource.aspectMask = AspectMask;
2175+
Region.imageSubresource.mipLevel = 0;
2176+
Region.imageSubresource.baseArrayLayer = 0;
2177+
Region.imageSubresource.layerCount = 1;
2178+
Region.imageExtent.width = Tex.Desc.Width;
2179+
Region.imageExtent.height = Tex.Desc.Height;
2180+
Region.imageExtent.depth = 1;
2181+
vkCmdCopyImageToBuffer(CmdBuffer, Tex.Image,
2182+
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2183+
Readback.Buffer, 1, &Region);
2184+
2185+
// Barrier to make the readback buffer visible to the host.
2186+
VkBufferMemoryBarrier BufBarrier = {};
2187+
BufBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
2188+
BufBarrier.size = VK_WHOLE_SIZE;
2189+
BufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
2190+
BufBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
2191+
BufBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2192+
BufBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2193+
BufBarrier.buffer = Readback.Buffer;
2194+
vkCmdPipelineBarrier(CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
2195+
VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1,
2196+
&BufBarrier, 0, nullptr);
2197+
}
2198+
21562199
void copyResourceDataToHost(InvocationState &IS, ResourceBundle &R) {
21572200
if (!R.isReadWrite())
21582201
return;
@@ -2345,7 +2388,10 @@ class VulkanDevice : public offloadtest::Device {
23452388
vkCmdDraw(IS.CmdBuffer, P.Bindings.getVertexCount(), 1, 0, 0);
23462389
llvm::outs() << "Drew " << P.Bindings.getVertexCount() << " vertices.\n";
23472390
vkCmdEndRenderPass(IS.CmdBuffer);
2348-
copyResourceDataToHost(IS, IS.FrameBufferResource);
2391+
copyTextureToReadback(IS.CmdBuffer, *IS.RenderTarget, *IS.RTReadback,
2392+
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2393+
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2394+
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
23492395
}
23502396

23512397
for (auto &R : IS.Resources)
@@ -2400,17 +2446,15 @@ class VulkanDevice : public offloadtest::Device {
24002446
Range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
24012447
Range.offset = 0;
24022448
Range.size = VK_WHOLE_SIZE;
2403-
const ResourceRef &ResRef = IS.FrameBufferResource.ResourceRefs[0];
2449+
Range.memory = IS.RTReadback->Memory;
24042450

24052451
void *Mapped = nullptr; // NOLINT(misc-const-correctness)
2406-
vkMapMemory(Device, ResRef.Host.Memory, 0, VK_WHOLE_SIZE, 0, &Mapped);
2407-
2408-
Range.memory = ResRef.Host.Memory;
2452+
vkMapMemory(Device, IS.RTReadback->Memory, 0, VK_WHOLE_SIZE, 0, &Mapped);
24092453
vkInvalidateMappedMemoryRanges(Device, 1, &Range);
24102454

24112455
const CPUBuffer &B = *P.Bindings.RTargetBufferPtr;
24122456
memcpy(B.Data[0].get(), Mapped, B.size());
2413-
vkUnmapMemory(Device, ResRef.Host.Memory);
2457+
vkUnmapMemory(Device, IS.RTReadback->Memory);
24142458
}
24152459
return llvm::Error::success();
24162460
}
@@ -2458,13 +2502,6 @@ class VulkanDevice : public offloadtest::Device {
24582502
vkDestroyBuffer(Device, IS.VertexBuffer->Host.Buffer, nullptr);
24592503
vkFreeMemory(Device, IS.VertexBuffer->Host.Memory, nullptr);
24602504
}
2461-
for (auto &ResRef : IS.FrameBufferResource.ResourceRefs) {
2462-
// We know the device resource is an image, so no need to check it.
2463-
vkDestroyImage(Device, ResRef.Image.Image, nullptr);
2464-
vkFreeMemory(Device, ResRef.Image.Memory, nullptr);
2465-
vkDestroyBuffer(Device, ResRef.Host.Buffer, nullptr);
2466-
vkFreeMemory(Device, ResRef.Host.Memory, nullptr);
2467-
}
24682505
vkDestroyImage(Device, IS.DepthStencil.Image, nullptr);
24692506
vkFreeMemory(Device, IS.DepthStencil.Memory, nullptr);
24702507
vkDestroyFramebuffer(Device, IS.FrameBuffer, nullptr);
@@ -2518,10 +2555,10 @@ class VulkanDevice : public offloadtest::Device {
25182555
if (auto Err = createResources(P, State))
25192556
return Err;
25202557
if (P.isGraphics()) {
2521-
if (auto Err = createRenderPass(P, State))
2558+
if (auto Err = createRenderPass(State))
25222559
return Err;
25232560
llvm::outs() << "Render pass created.\n";
2524-
if (auto Err = createFrameBuffer(P, State))
2561+
if (auto Err = createFrameBuffer(State))
25252562
return Err;
25262563
llvm::outs() << "Frame buffer created.\n";
25272564
}

0 commit comments

Comments
 (0)