From 0ddaa5b30a2b834771adfe1c460fb46c9e809e39 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Thu, 4 Jun 2026 16:45:48 +0200 Subject: [PATCH 1/4] Add per-geometry Transform on TriangleGeometryDesc Adds an optional 3x4 row-major affine transform on triangle BLAS geometries; vertices are baked through it at AS-build time, so Object* shader queries report the transformed positions. Plumbed via DX's Transform3x4 GPU VA, VK's transformData device address, and Metal's transformationMatrixBuffer + MatrixLayoutRowMajor (matches the DX/VK row-major byte layout, so the same upload buffer is reused). The covering test bakes a translate-x-by-5 into a single-triangle BLAS and verifies the same world-space rays hit/miss accordingly with an identity TLAS instance. Part of the inline-RT test coverage epic (https://github.com/llvm/offload-test-suite/issues/1258). Co-Authored-By: Claude Opus 4.7 (1M context) --- include/API/AccelerationStructure.h | 6 ++ include/Support/Pipeline.h | 2 + lib/API/DX/Device.cpp | 20 +++++ lib/API/Device.cpp | 1 + lib/API/MTL/MTLDevice.cpp | 12 +++ lib/API/VK/Device.cpp | 12 +++ lib/Support/Pipeline.cpp | 13 +++ test/Feature/InlineRT/geometry-transform.test | 84 +++++++++++++++++++ 8 files changed, 150 insertions(+) create mode 100644 test/Feature/InlineRT/geometry-transform.test diff --git a/include/API/AccelerationStructure.h b/include/API/AccelerationStructure.h index df357771a..90dae2eda 100644 --- a/include/API/AccelerationStructure.h +++ b/include/API/AccelerationStructure.h @@ -18,7 +18,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Error.h" +#include #include +#include #include namespace offloadtest { @@ -52,6 +54,10 @@ struct TriangleGeometryDesc { uint32_t IndexCount = 0; IndexFormat IdxFormat = IndexFormat::Uint32; bool Opaque = true; + // Optional BLAS-side bake transform, 3x4 row-major. Vertices are + // multiplied by this before AS build, so the resulting BLAS reports + // transformed positions via Object* shader queries. + std::optional> Transform; }; struct AABBGeometryDesc { diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index f8dc1785d..1e58f0048 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -21,6 +21,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/YAMLTraits.h" +#include #include #include #include @@ -552,6 +553,7 @@ struct TriangleGeometry { IndexFormat IdxFormat = IndexFormat::Uint32; uint32_t IndexCount = 0; bool Opaque = true; + std::optional> Transform; }; struct AABBGeometry { diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index afce98aa6..bb9735a63 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -2356,6 +2356,13 @@ class DXDevice : public offloadtest::Device { Tri.IndexFormat = getDXGIIndexFormat(T.IdxFormat); } + // Scratch sizing depends on whether Transform3x4 will be present at + // build time; signal that with any non-NULL sentinel here — the DXR + // spec lets the value be NULL or non-NULL for the prebuild query and + // does not dereference it. + if (T.Transform) + Tri.Transform3x4 = 1; + GeomDescs.push_back(GD); } return queryBLASPrebuildSize(GeomDescs); @@ -3406,6 +3413,19 @@ llvm::Error DXComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { GD.Triangles.IndexCount = T.IndexCount; GD.Triangles.IndexFormat = getDXGIIndexFormat(T.IdxFormat); } + if (T.Transform) { + const BufferCreateDesc XformDesc{MemoryLocation::CpuToGpu, + BufferUsage::Storage}; + auto XformOrErr = offloadtest::createBufferWithData( + *Dev, "AS-Geom-Transform", XformDesc, T.Transform->data(), + T.Transform->size() * sizeof(float), nullptr, nullptr); + if (!XformOrErr) + return XformOrErr.takeError(); + auto *XformBuf = llvm::cast(XformOrErr->get()); + GD.Triangles.Transform3x4 = + XformBuf->Buffer->GetGPUVirtualAddress(); + CB.KeepAliveOwned.push_back(std::move(*XformOrErr)); + } GeomDescs.push_back(GD); } } else { diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index 33260a6cf..20ed77ad7 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -182,6 +182,7 @@ llvm::Error offloadtest::buildPipelineAccelerationStructures( TGD.VertexStride = T.VertexStride; TGD.VertexFormat = T.VertexFormat; TGD.Opaque = T.Opaque; + TGD.Transform = T.Transform; OutInputBuffers.push_back(std::move(*VBOrErr)); diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index bf3554bb2..809cd7050 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -3184,6 +3184,18 @@ llvm::Error MTLComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { TD->setIndexType(getMetalIndexType(T.IdxFormat)); } TD->setOpaque(T.Opaque); + if (T.Transform) { + MTL::Buffer *XformBuf = MTLDev->newBuffer( + T.Transform->data(), T.Transform->size() * sizeof(float), + MTL::ResourceStorageModeShared); + if (!XformBuf) + return llvm::createStringError( + std::errc::not_enough_memory, + "Failed to allocate BLAS transform buffer."); + TD->setTransformationMatrixBuffer(XformBuf); + TD->setTransformationMatrixLayout(MTL::MatrixLayoutRowMajor); + CB->KeepAliveMTLBuffers.push_back(XformBuf); + } Geoms.push_back(TD); } } else { diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 5e5f5c9f7..902830eaa 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -4848,6 +4848,18 @@ llvm::Error VKComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { } else { Tri.indexType = VK_INDEX_TYPE_NONE_KHR; } + if (T.Transform) { + const BufferCreateDesc XformDesc{MemoryLocation::CpuToGpu, + BufferUsage::Storage}; + auto XformOrErr = offloadtest::createBufferWithData( + *Dev, "AS-Geom-Transform", XformDesc, T.Transform->data(), + T.Transform->size() * sizeof(float), nullptr, nullptr); + if (!XformOrErr) + return XformOrErr.takeError(); + auto *XformBuf = llvm::cast(XformOrErr->get()); + Tri.transformData.deviceAddress = XformBuf->getDeviceAddress(); + CB.KeepAliveOwned.push_back(std::move(*XformOrErr)); + } Geoms[I].push_back(G); VkAccelerationStructureBuildRangeInfoKHR R = {}; diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 41fa3f7d6..01846e690 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -638,6 +638,19 @@ void MappingTraits::mapping( I.mapOptional("IndexFormat", G.IdxFormat, IndexFormat::Uint32); I.mapOptional("IndexCount", G.IndexCount, 0u); I.mapOptional("Opaque", G.Opaque, true); + llvm::SmallVector Transform; + I.mapOptional("Transform", Transform); + if (!Transform.empty()) { + if (Transform.size() != 12) { + I.setError(llvm::Twine("TriangleGeometry.Transform must have exactly 12 " + "floats (3x4 row-major), got ") + + llvm::Twine(Transform.size())); + return; + } + std::array T; + std::copy(Transform.begin(), Transform.end(), T.begin()); + G.Transform = T; + } } void MappingTraits::mapping( diff --git a/test/Feature/InlineRT/geometry-transform.test b/test/Feature/InlineRT/geometry-transform.test new file mode 100644 index 000000000..1db616755 --- /dev/null +++ b/test/Feature/InlineRT/geometry-transform.test @@ -0,0 +1,84 @@ +#--- source.hlsl + +[[vk::binding(0, 0)]] RaytracingAccelerationStructure Scene : register(t0); +[[vk::binding(1, 0)]] RWStructuredBuffer Output : register(u0); + +[numthreads(2,1,1)] +void main(uint3 tid : SV_DispatchThreadID) { + // The triangle vertices are centered around x=0, but a per-geometry + // BLAS-bake transform translates them to x=5 at AS-build time. With an + // identity-transform TLAS instance, only the ray at x=5 hits. + RayDesc Ray; + Ray.Origin = float3(tid.x == 0 ? 5.0 : 0.0, 0, 1); + Ray.Direction = float3(0, 0, -1); + Ray.TMin = 0.0; + Ray.TMax = 100.0; + RayQuery Q; + Q.TraceRayInline(Scene, RAY_FLAG_NONE, 0xFF, Ray); + Q.Proceed(); + Output[tid.x] = (uint)Q.CommittedStatus(); +} +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main +Buffers: + - Name: Vertices + Format: Float32 + Stride: 12 + Data: [ 0.0, 1.0, 0.0, -1.0, -1.0, 0.0, 1.0, -1.0, 0.0 ] + - Name: Output + Format: UInt32 + Stride: 4 + FillSize: 8 + - Name: Expected + Format: UInt32 + Stride: 4 + # Lane 0: ray hits the baked-translated triangle → COMMITTED_TRIANGLE_HIT (1) + # Lane 1: ray misses (triangle no longer at origin) → COMMITTED_NOTHING (0) + Data: [ 1, 0 ] +AccelerationStructures: + BLAS: + - Name: TriangleBLAS + Triangles: + - VertexBuffer: Vertices + VertexFormat: RGB32Float + VertexStride: 12 + VertexCount: 3 + # 3x4 row-major affine — translate x by +5. + Transform: [1, 0, 0, 5, 0, 1, 0, 0, 0, 0, 1, 0] + TLAS: + - Name: Scene + Instances: + - BLAS: TriangleBLAS +DescriptorSets: + - Resources: + - Name: Scene + Kind: AccelerationStructure + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Output + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 1 +Results: + - Result: GeometryTransform + Rule: BufferExact + Actual: Output + Expected: Expected +... +#--- end + +# REQUIRES: acceleration-structure +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o From a6d32af542af0cf407126025ff1600017e06ab03 Mon Sep 17 00:00:00 2001 From: EmilioLaiso Date: Mon, 15 Jun 2026 11:48:41 +0200 Subject: [PATCH 2/4] Fix Metal BLAS transform buffer allocation The per-geometry transform path referenced a nonexistent `KeepAliveMTLBuffers` member and allocated the buffer via a raw `newBuffer` with `ResourceStorageModeShared`, breaking the Metal build. Route the transform upload through `createBufferWithData` with `MemoryLocation::CpuToGpu` (matching the adjacent TLAS-Instances path): this yields the correct `Managed` storage mode for a CPU-written, GPU-read buffer, performs the proper `didModifyRange` on unmap, and returns an `offloadtest::Buffer` that lives in the existing `KeepAliveOwned` keep-alive vector. --- lib/API/MTL/MTLDevice.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 809cd7050..956de5239 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -3185,16 +3185,17 @@ llvm::Error MTLComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { } TD->setOpaque(T.Opaque); if (T.Transform) { - MTL::Buffer *XformBuf = MTLDev->newBuffer( - T.Transform->data(), T.Transform->size() * sizeof(float), - MTL::ResourceStorageModeShared); - if (!XformBuf) - return llvm::createStringError( - std::errc::not_enough_memory, - "Failed to allocate BLAS transform buffer."); - TD->setTransformationMatrixBuffer(XformBuf); + const BufferCreateDesc XformDesc{MemoryLocation::CpuToGpu, + BufferUsage::Storage}; + auto XformBufOrErr = offloadtest::createBufferWithData( + *CB->Dev, "BLAS-Transform", XformDesc, T.Transform->data(), + T.Transform->size() * sizeof(float), nullptr, nullptr); + if (!XformBufOrErr) + return XformBufOrErr.takeError(); + auto *MTLXform = llvm::cast(XformBufOrErr->get()); + TD->setTransformationMatrixBuffer(MTLXform->Buf); TD->setTransformationMatrixLayout(MTL::MatrixLayoutRowMajor); - CB->KeepAliveMTLBuffers.push_back(XformBuf); + CB->KeepAliveOwned.push_back(std::move(*XformBufOrErr)); } Geoms.push_back(TD); } From d735f6bb5dfafd7bbb239fec18df4b46264d82f0 Mon Sep 17 00:00:00 2001 From: EmilioLaiso Date: Wed, 24 Jun 2026 10:48:41 +0200 Subject: [PATCH 3/4] use buffer desc helper --- lib/API/MTL/MTLDevice.cpp | 3 +-- lib/API/VK/Device.cpp | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 956de5239..1820d13a6 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -3185,8 +3185,7 @@ llvm::Error MTLComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { } TD->setOpaque(T.Opaque); if (T.Transform) { - const BufferCreateDesc XformDesc{MemoryLocation::CpuToGpu, - BufferUsage::Storage}; + const BufferCreateDesc XformDesc = BufferCreateDesc::uploadBuffer(); auto XformBufOrErr = offloadtest::createBufferWithData( *CB->Dev, "BLAS-Transform", XformDesc, T.Transform->data(), T.Transform->size() * sizeof(float), nullptr, nullptr); diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 902830eaa..f13dc8eac 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -4849,8 +4849,7 @@ llvm::Error VKComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { Tri.indexType = VK_INDEX_TYPE_NONE_KHR; } if (T.Transform) { - const BufferCreateDesc XformDesc{MemoryLocation::CpuToGpu, - BufferUsage::Storage}; + const BufferCreateDesc XformDesc = BufferCreateDesc::uploadBuffer(); auto XformOrErr = offloadtest::createBufferWithData( *Dev, "AS-Geom-Transform", XformDesc, T.Transform->data(), T.Transform->size() * sizeof(float), nullptr, nullptr); From b750dfb18e5d48564ac6fa8596eadd51b5748e3a Mon Sep 17 00:00:00 2001 From: EmilioLaiso Date: Wed, 24 Jun 2026 12:22:48 +0200 Subject: [PATCH 4/4] compile error --- lib/API/DX/Device.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index bb9735a63..f0b499b6f 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -3414,8 +3414,7 @@ llvm::Error DXComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { GD.Triangles.IndexFormat = getDXGIIndexFormat(T.IdxFormat); } if (T.Transform) { - const BufferCreateDesc XformDesc{MemoryLocation::CpuToGpu, - BufferUsage::Storage}; + const BufferCreateDesc XformDesc = BufferCreateDesc::uploadBuffer(); auto XformOrErr = offloadtest::createBufferWithData( *Dev, "AS-Geom-Transform", XformDesc, T.Transform->data(), T.Transform->size() * sizeof(float), nullptr, nullptr);