From bfcd3a2300621c2627f0e5a235c0d96db7b87715 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Wed, 3 Jun 2026 11:26:16 +0200 Subject: [PATCH 1/3] [DX] Add ray tracing state object, SBT, and DispatchRays bring-up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second per-backend bring-up in the PSO raytracing series (#1268). Mirrors PR #1273 for D3D12: builds an ID3D12StateObject from the YAML schema, hands out shader identifiers via ID3D12StateObjectProperties, lays out the SBT in an upload heap, and routes DispatchRays through ID3D12GraphicsCommandList4 (same query path the AS build already uses). DXRayTracingPipelineState derives from DXPipelineState with an IsRayTracing flag on the base for classof — matching the VulkanPipelineState pattern. It carries the ID3D12StateObject + a cached ID3D12StateObjectProperties + a StringMap that resolves each shader EntryPoint or hit-group Name to its 32-byte shader identifier blob. The identifiers are driver-owned and stay alive for the Properties COM lifetime, so the PSO keeps Properties alive. DXShaderBindingTable holds a single upload-heap buffer plus four pre-built D3D12_DISPATCH_RAYS_DESC ranges (raygen, miss, hit-group, callable) — `RANGE` for raygen since it's always one record, and `RANGE_AND_STRIDE` for the others. createPipelineRT builds a CD3DX12_STATE_OBJECT_DESC with subobjects for the DXIL library (one export per Shader entry), per-hit-group subobjects with closest-hit / any-hit / intersection imports, the pipeline shader config (max payload + max attribute bytes), pipeline config (max recursion depth), and a global root signature subobject. The root signature comes from the library's embedded RTS0 part when present, falling back to the BindingsDesc path (matching the existing compute / raster pipeline behaviour). Wide strings for the subobject exports live in a SmallVector that outlives the SODesc, since the helper classes store pointers into the strings rather than copying. createShaderBindingTable lays out each entry as [identifier][LocalRootData][padding-to-stride] with per-region stride = align(D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES + max-LocalRoot- Data-in-region, D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT) and per-region size = align(count * stride, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT). The buffer lives in an upload heap with D3D12_RESOURCE_STATE_GENERIC_READ — PR3 simplification; a staging copy into a default heap is a follow-up. dispatchRays queries the underlying CommandListX for ID3D12GraphicsCommandList4 (matching the AS-build path), binds the global root signature via SetComputeRootSignature, calls SetPipelineState1 with the state object, and issues DispatchRays with a D3D12_DISPATCH_RAYS_DESC populated from the SBT's four ranges plus the dispatch dimensions. The descriptor heap + descriptor-table bindings are set up by the existing createComputeCommands helper before the encoder is created. createComputeCommands grows an isRayTracing branch at the dispatch point so it calls dispatchRays instead of dispatch, reusing all of the descriptor-heap and root-signature wiring. InvocationState carries a ShaderBindingTable unique_ptr that's only populated for RT pipelines. executeProgram's isRayTracing branch builds a RayTracingPipelineCreate- Desc from Pipeline.Shaders / HitGroups / RTConfig, calls createPipelineRT then createShaderBindingTable, then re-enters createComputeCommands which dispatches via the new RT path. raygen-roundtrip.test's XFAIL becomes Clang, Metal — DirectX should PASS via this implementation on Windows CI (and via Wine + vkd3d-proton locally on Linux). The Clang token still catches the compile failure on clang-dxc since [shader("raygeneration")] doesn't yet lower to either DXIL libraries or SPIR-V on that path. Locally verified by cross-compiling lib/API/DX/Device.cpp via `clang++ --target=x86_64-pc-windows-msvc` against the xwin Windows SDK headers and the project's bundled DirectX-Headers. Runtime verification is left to Windows CI. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/API/DX/Device.cpp | 394 ++++++++++++++++++++++++-- test/Feature/RT/raygen-roundtrip.test | 2 +- 2 files changed, 374 insertions(+), 22 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 78af5facd..dba7d9e4a 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -452,18 +452,72 @@ class DXPipelineState : public offloadtest::PipelineState { ComPtr PSO; // Only set for graphics pipelines. std::optional Topology; + // True for pipelines created via createPipelineRT — used by SBT / dispatch + // code to safely downcast to DXRayTracingPipelineState (parallel to + // VulkanPipelineState::IsRayTracing). + bool IsRayTracing = false; DXPipelineState(llvm::StringRef Name, ComPtr RootSig, ComPtr PSO, - std::optional Topology) + std::optional Topology, + bool IsRT = false) : offloadtest::PipelineState(GPUAPI::DirectX), Name(Name), - RootSig(RootSig), PSO(PSO), Topology(Topology) {} + RootSig(RootSig), PSO(PSO), Topology(Topology), IsRayTracing(IsRT) {} static bool classof(const offloadtest::PipelineState *B) { return B->getAPI() == GPUAPI::DirectX; } }; +/// RT pipeline state: holds the ID3D12StateObject + cached +/// ID3D12StateObjectProperties for SBT identifier queries plus a +/// shader-name → identifier-pointer map. The `void *` identifiers are +/// owned by Properties — keep it alive for the SBT's lifetime. +class DXRayTracingPipelineState : public DXPipelineState { +public: + ComPtr StateObject; + ComPtr Properties; + llvm::StringMap ShaderIdentifiers; + + DXRayTracingPipelineState(llvm::StringRef Name, + ComPtr RootSig, + ComPtr SO, + ComPtr Props) + : DXPipelineState(Name, RootSig, /*PSO=*/nullptr, std::nullopt, + /*IsRT=*/true), + StateObject(SO), Properties(Props) {} + + static bool classof(const offloadtest::PipelineState *B) { + if (B->getAPI() != GPUAPI::DirectX) + return false; + return static_cast(B)->IsRayTracing; + } +}; + +class DXShaderBindingTable : public offloadtest::ShaderBindingTable { +public: + ComPtr Buffer; + // Pre-built ranges for D3D12_DISPATCH_RAYS_DESC. Sizes are zero for + // empty regions; raygen is a single record so it uses the no-stride + // variant. + D3D12_GPU_VIRTUAL_ADDRESS_RANGE RayGenRange{}; + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE MissRange{}; + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE HitGroupRange{}; + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE CallableRange{}; + + DXShaderBindingTable(ComPtr Buf, + D3D12_GPU_VIRTUAL_ADDRESS_RANGE RG, + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE MS, + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE HG, + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE CL) + : offloadtest::ShaderBindingTable(GPUAPI::DirectX), Buffer(Buf), + RayGenRange(RG), MissRange(MS), HitGroupRange(HG), CallableRange(CL) {} + + static bool classof(const offloadtest::ShaderBindingTable *S) { + return S->getAPI() == GPUAPI::DirectX; + } +}; + class DXAccelerationStructure : public offloadtest::AccelerationStructure { public: ComPtr Resource; @@ -846,11 +900,11 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder { // ID3D12Device5 entry point and helper allocators. llvm::Error batchBuildAS(llvm::ArrayRef Items) override; - llvm::Error dispatchRays(const PipelineState &, const ShaderBindingTable &, - uint32_t, uint32_t, uint32_t) override { - return llvm::createStringError( - "RayTracing dispatchRays not yet supported on DirectX"); - } + // Defined out-of-line below — needs DXDevice's full type for access to + // Device5 and the DXRayTracingPipelineState definition. + llvm::Error dispatchRays(const PipelineState &PSO, + const ShaderBindingTable &SBT, uint32_t Width, + uint32_t Height, uint32_t Depth) override; void endEncodingImpl() override { popDebugGroup(); } }; @@ -1173,6 +1227,8 @@ class DXDevice : public offloadtest::Device { ComPtr DescHeap; std::unique_ptr CB; std::unique_ptr Pipeline; + // Lifetime-tied to the pipeline; only set for RT pipelines. + std::unique_ptr SBT; // Resources for graphics pipelines. std::unique_ptr RenderPass; @@ -1538,18 +1594,232 @@ class DXDevice : public offloadtest::Device { return std::make_unique(Name, RootSig, PSO, std::nullopt); } + static std::wstring widen(llvm::StringRef S) { + // Entry-point names and hit-group names are ASCII; a straight 1:1 widen + // is sufficient. + return std::wstring(S.begin(), S.end()); + } + + static D3D12_HIT_GROUP_TYPE getDXHitGroupType(HitGroupType T) { + switch (T) { + case HitGroupType::Triangles: + return D3D12_HIT_GROUP_TYPE_TRIANGLES; + case HitGroupType::Procedural: + return D3D12_HIT_GROUP_TYPE_PROCEDURAL_PRIMITIVE; + } + llvm_unreachable("All HitGroupType cases handled"); + } + llvm::Expected> - createPipelineRT(llvm::StringRef, const BindingsDesc &, - const RayTracingPipelineCreateDesc &) override { - return llvm::createStringError( - "RayTracing pipeline state not yet supported on DirectX"); + createPipelineRT(llvm::StringRef Name, const BindingsDesc &BndDesc, + const RayTracingPipelineCreateDesc &Desc) override { + if (!Desc.Library) + return llvm::createStringError(std::errc::invalid_argument, + "RayTracingPipelineCreateDesc.Library is " + "null — backend needs a DXIL blob."); + + // Global root signature: try the library's embedded RTS0 part first; + // fall back to building one from BindingsDesc. + ShaderContainer LibContainer = {}; + LibContainer.Shader = Desc.Library; + ComPtr RootSig; + if (auto Err = createRootSignature(Name, BndDesc, LibContainer, + /*IsGraphics=*/false, RootSig)) + return Err; + + CD3DX12_STATE_OBJECT_DESC SODesc( + D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE); + + // DXIL library subobject — add every Shader's entry point as an export. + // Wide-string storage must outlive SODesc since the subobject only stores + // pointers into it. + auto *Lib = SODesc.CreateSubobject(); + const llvm::StringRef LibBytes = Desc.Library->getBuffer(); + const D3D12_SHADER_BYTECODE Bytecode = {LibBytes.data(), LibBytes.size()}; + Lib->SetDXILLibrary(&Bytecode); + llvm::SmallVector WideNames; + WideNames.reserve(Desc.Shaders.size() + Desc.HitGroups.size()); + for (const auto &Sh : Desc.Shaders) { + WideNames.push_back(widen(Sh.EntryPoint)); + Lib->DefineExport(WideNames.back().c_str()); + } + + // One hit-group subobject per HitGroup entry. + for (const auto &HG : Desc.HitGroups) { + auto *HGObj = SODesc.CreateSubobject(); + HGObj->SetHitGroupType(getDXHitGroupType(HG.Type)); + WideNames.push_back(widen(HG.Name)); + HGObj->SetHitGroupExport(WideNames.back().c_str()); + WideNames.push_back(widen(HG.ClosestHit)); + HGObj->SetClosestHitShaderImport(WideNames.back().c_str()); + if (HG.AnyHit) { + WideNames.push_back(widen(*HG.AnyHit)); + HGObj->SetAnyHitShaderImport(WideNames.back().c_str()); + } + if (HG.Intersection) { + WideNames.push_back(widen(*HG.Intersection)); + HGObj->SetIntersectionShaderImport(WideNames.back().c_str()); + } + } + + // Pipeline-wide shader config (max payload + max attribute bytes). + auto *ShaderCfg = + SODesc.CreateSubobject(); + ShaderCfg->Config(Desc.Config.MaxPayloadSizeInBytes, + Desc.Config.MaxAttributeSizeInBytes); + + // Pipeline-wide config (max recursion depth). + auto *PipelineCfg = + SODesc.CreateSubobject(); + PipelineCfg->Config(Desc.Config.MaxTraceRecursionDepth); + + // Global root signature. + auto *GlobalRS = + SODesc.CreateSubobject(); + GlobalRS->SetRootSignature(RootSig.Get()); + + ComPtr StateObject; + if (auto Err = HR::toError( + Device->CreateStateObject(SODesc, IID_PPV_ARGS(&StateObject)), + "Failed to create raytracing state object.")) + return Err; + + ComPtr Properties; + if (auto Err = HR::toError( + StateObject.As(&Properties), + "Failed to query ID3D12StateObjectProperties from state object.")) + return Err; + + auto State = std::make_unique( + Name, RootSig, StateObject, Properties); + // Cache identifiers up-front. The driver-owned blobs are alive for + // Properties' lifetime, which lives on the PSO. + // + // GetShaderIdentifier only returns non-null for entries that are + // directly bindable from an SBT record: raygen / miss / callable + // shaders and hit groups. Closest-hit / any-hit / intersection are + // bound *through* a hit-group subobject and aren't separately + // addressable, so skip them. + for (const auto &Sh : Desc.Shaders) { + switch (Sh.Stage) { + case Stages::RayGeneration: + case Stages::Miss: + case Stages::Callable: + break; + default: + continue; + } + const std::wstring W = widen(Sh.EntryPoint); + const void *Id = Properties->GetShaderIdentifier(W.c_str()); + if (!Id) + return llvm::createStringError( + "GetShaderIdentifier returned null for shader '%s'", + Sh.EntryPoint.c_str()); + State->ShaderIdentifiers[Sh.EntryPoint] = Id; + } + for (const auto &HG : Desc.HitGroups) { + const std::wstring W = widen(HG.Name); + const void *Id = Properties->GetShaderIdentifier(W.c_str()); + if (!Id) + return llvm::createStringError( + "GetShaderIdentifier returned null for hit group '%s'", + HG.Name.c_str()); + State->ShaderIdentifiers[HG.Name] = Id; + } + return State; } llvm::Expected> - createShaderBindingTable(const PipelineState &, - const ShaderBindingTableDesc &) override { - return llvm::createStringError( - "RayTracing shader binding table not yet supported on DirectX"); + createShaderBindingTable(const PipelineState &PSO, + const ShaderBindingTableDesc &Desc) override { + if (!llvm::isa(&PSO)) + return llvm::createStringError( + std::errc::invalid_argument, + "createShaderBindingTable requires a RayTracing PipelineState"); + const auto &DXRTPSO = llvm::cast(PSO); + + constexpr uint32_t IdSize = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + const SBTLayout Layout = + computeSBTLayout(IdSize, D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT, + D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT, Desc); + const uint32_t TotalSize = Layout.TotalSize; + const llvm::ArrayRef RGEntries(&Desc.RayGen, 1); + + // Upload heap so the CPU can write the SBT directly. The state-object + // identifiers don't need to live in default heap; using upload keeps + // PR3 simple. A staging copy to default heap is a follow-up. + const D3D12_HEAP_PROPERTIES HeapProps = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + const D3D12_RESOURCE_DESC BufDesc = + CD3DX12_RESOURCE_DESC::Buffer(TotalSize); + ComPtr Buffer; + if (auto Err = HR::toError(Device->CreateCommittedResource( + &HeapProps, D3D12_HEAP_FLAG_NONE, &BufDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&Buffer)), + "Failed to create SBT buffer.")) + return Err; + + void *Mapped = nullptr; + const D3D12_RANGE ReadRange{0, 0}; + if (auto Err = HR::toError(Buffer->Map(0, &ReadRange, &Mapped), + "Failed to map SBT buffer.")) + return Err; + std::memset(Mapped, 0, TotalSize); + auto *MappedBytes = static_cast(Mapped); + + auto WriteEntries = [&](uint8_t *Region, llvm::ArrayRef Entries, + uint32_t Stride) -> llvm::Error { + for (size_t I = 0; I < Entries.size(); ++I) { + const auto &E = Entries[I]; + auto It = DXRTPSO.ShaderIdentifiers.find(E.ShaderName); + if (It == DXRTPSO.ShaderIdentifiers.end()) + return llvm::createStringError( + std::errc::invalid_argument, + "SBT references unknown shader/hit-group name: '%s'", + E.ShaderName.c_str()); + uint8_t *Dst = Region + I * Stride; + std::memcpy(Dst, It->second, IdSize); + if (!E.LocalRootData.empty()) + std::memcpy(Dst + IdSize, E.LocalRootData.data(), + E.LocalRootData.size()); + } + return llvm::Error::success(); + }; + + auto WriteRegion = [&](const SBTRegionLayout &R, + llvm::ArrayRef Entries) -> llvm::Error { + return WriteEntries(MappedBytes + R.Offset, Entries, R.Stride); + }; + auto UnmapAndReturn = [&](llvm::Error Err) { + Buffer->Unmap(0, nullptr); + return Err; + }; + if (auto Err = WriteRegion(Layout.RayGen, RGEntries)) + return UnmapAndReturn(std::move(Err)); + if (auto Err = WriteRegion(Layout.Miss, Desc.Miss)) + return UnmapAndReturn(std::move(Err)); + if (auto Err = WriteRegion(Layout.HitGroup, Desc.HitGroup)) + return UnmapAndReturn(std::move(Err)); + if (auto Err = WriteRegion(Layout.Callable, Desc.Callable)) + return UnmapAndReturn(std::move(Err)); + Buffer->Unmap(0, nullptr); + + // D3D12_GPU_VIRTUAL_ADDRESS_RANGE / …_AND_STRIDE expect a zero address + // for empty regions, matching the helper's Size == 0 sentinel. + const D3D12_GPU_VIRTUAL_ADDRESS Base = Buffer->GetGPUVirtualAddress(); + auto MakeRange = [&](const SBTRegionLayout &R) { + return D3D12_GPU_VIRTUAL_ADDRESS_RANGE{R.Size ? Base + R.Offset : 0, + R.Size}; + }; + auto MakeRangeAndStride = [&](const SBTRegionLayout &R) { + return D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE{ + R.Size ? Base + R.Offset : 0, R.Size, R.Stride}; + }; + return std::make_unique( + Buffer, MakeRange(Layout.RayGen), MakeRangeAndStride(Layout.Miss), + MakeRangeAndStride(Layout.HitGroup), + MakeRangeAndStride(Layout.Callable)); } llvm::Expected> @@ -2695,11 +2965,20 @@ class DXDevice : public offloadtest::Device { if (!EncoderOrErr) return EncoderOrErr.takeError(); auto &Encoder = *EncoderOrErr.get(); - if (auto Err = Encoder.dispatch( - *IS.Pipeline.get(), P.DispatchParameters.DispatchGroupCount[0], - P.DispatchParameters.DispatchGroupCount[1], - P.DispatchParameters.DispatchGroupCount[2])) + if (P.isRayTracing()) { + if (auto Err = Encoder.dispatchRays( + *IS.Pipeline, *IS.SBT, + P.DispatchParameters.DispatchGroupCount[0], + P.DispatchParameters.DispatchGroupCount[1], + P.DispatchParameters.DispatchGroupCount[2])) + return Err; + } else if (auto Err = Encoder.dispatch( + *IS.Pipeline.get(), + P.DispatchParameters.DispatchGroupCount[0], + P.DispatchParameters.DispatchGroupCount[1], + P.DispatchParameters.DispatchGroupCount[2])) { return Err; + } Encoder.endEncoding(); } @@ -3152,8 +3431,36 @@ class DXDevice : public offloadtest::Device { return Err; llvm::outs() << "Graphics command list created complete.\n"; } else if (P.isRayTracing()) { - return llvm::createStringError( - "RayTracing pipeline not yet supported on DirectX"); + if (P.Shaders.empty() || !P.SBT || !P.RTConfig) + return llvm::createStringError( + std::errc::invalid_argument, + "RayTracing pipeline requires Shaders, " + "ShaderBindingTable, and RayTracingPipelineConfig."); + + RayTracingPipelineCreateDesc RTDesc{}; + RTDesc.Library = P.Shaders.front().Shader.get(); + RTDesc.HitGroups = P.HitGroups; + RTDesc.Config = *P.RTConfig; + RTDesc.Shaders.reserve(P.Shaders.size()); + for (const auto &Sh : P.Shaders) + RTDesc.Shaders.push_back({Sh.Stage, Sh.Entry}); + + auto PSOOrErr = + createPipelineRT("RayTracing Pipeline State", BndDesc, RTDesc); + if (!PSOOrErr) + return PSOOrErr.takeError(); + State.Pipeline = std::move(*PSOOrErr); + llvm::outs() << "RayTracing Pipeline created.\n"; + + auto SBTOrErr = createShaderBindingTable(*State.Pipeline, *P.SBT); + if (!SBTOrErr) + return SBTOrErr.takeError(); + State.SBT = std::move(*SBTOrErr); + llvm::outs() << "Shader Binding Table created.\n"; + + if (auto Err = createComputeCommands(P, State)) + return Err; + llvm::outs() << "RayTracing command list created.\n"; } else { return llvm::createStringError("Pipeline was neither Compute nor Raster"); } @@ -3317,6 +3624,51 @@ llvm::Error DXComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { CB.addPendingUAVBarrier(); return llvm::Error::success(); } + +llvm::Error DXComputeEncoder::dispatchRays(const PipelineState &PSO, + const ShaderBindingTable &SBT, + uint32_t Width, uint32_t Height, + uint32_t Depth) { + if (!llvm::isa(&PSO)) + return llvm::createStringError( + std::errc::invalid_argument, + "dispatchRays requires a RayTracing PipelineState."); + if (!llvm::isa(&SBT)) + return llvm::createStringError( + std::errc::invalid_argument, + "dispatchRays requires a DirectX ShaderBindingTable."); + const auto &DXRTPSO = llvm::cast(PSO); + const auto &DXSBT = llvm::cast(SBT); + + // SetPipelineState1 and DispatchRays live on ID3D12GraphicsCommandList4. + // The AS-build path (line ~3000 above) follows the same query pattern. + ComPtr CmdList4; + if (auto Err = + HR::toError(CB.CmdList.As(&CmdList4), + "ID3D12GraphicsCommandList4 query failed; raytracing " + "is unsupported on this command list.")) + return Err; + + addUAVBarrier(); + insertDebugSignpost( + llvm::formatv("DispatchRays [{0},{1},{2}]", Width, Height, Depth).str()); + + // Global root signature is shared with the compute bind point; bind it on + // the underlying command list before SetPipelineState1. + CB.CmdList->SetComputeRootSignature(DXRTPSO.RootSig.Get()); + CmdList4->SetPipelineState1(DXRTPSO.StateObject.Get()); + + D3D12_DISPATCH_RAYS_DESC RaysDesc{}; + RaysDesc.RayGenerationShaderRecord = DXSBT.RayGenRange; + RaysDesc.MissShaderTable = DXSBT.MissRange; + RaysDesc.HitGroupTable = DXSBT.HitGroupRange; + RaysDesc.CallableShaderTable = DXSBT.CallableRange; + RaysDesc.Width = Width; + RaysDesc.Height = Height; + RaysDesc.Depth = Depth; + CmdList4->DispatchRays(&RaysDesc); + return llvm::Error::success(); +} } // namespace llvm::Expected DXQueue::submit( diff --git a/test/Feature/RT/raygen-roundtrip.test b/test/Feature/RT/raygen-roundtrip.test index 962db4be0..6baff2921 100644 --- a/test/Feature/RT/raygen-roundtrip.test +++ b/test/Feature/RT/raygen-roundtrip.test @@ -106,7 +106,7 @@ Results: # REQUIRES: raytracing-pipeline # Unimplemented https://github.com/llvm/offload-test-suite/issues/1268 -# XFAIL: Clang, DirectX, Metal +# XFAIL: Clang, Metal # RUN: split-file %s %t # RUN: %dxc_target_lib -T lib_6_5 -Fo %t.o %t/source.hlsl From f88ec54535ee4415ea914f98a16aa22ce6b154f9 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Wed, 3 Jun 2026 11:53:57 +0200 Subject: [PATCH 2/3] [offloader] Fan a single DXIL library across an RT pipeline's Shaders[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RayTracing pipelines compile every entry point — raygen, miss, closest-hit, any-hit, intersection, callable — into a single DXIL library via `dxc -T lib_6_x` / `clang-dxc -T lib_6_x`. That's the shape every real DXR app ships: D3D12's CreateStateObject requires a DXIL-library subobject anyway, and the driver fuses entry points across the whole library at link time, so writing one .hlsl file and compiling it once is both idiomatic and the path the framework's `%dxc_target_lib` substitution emits. Compute and raster pipelines stay one-to-one (the existing position- based mapping handles VS+PS, AS+MS+PS, etc.). RT pipelines today need N positional args even though one library blob holds every entry — which the foundational `raygen-roundtrip.test` runs straight into: 3 Shaders[] entries vs 1 input file fails the count check before any GPU work happens. Detect the RT-pipeline-with-one-input shape and copy the library blob into every `Shaders[].Shader` slot via `MemoryBuffer::getMemBufferCopy`. Each entry owns its own buffer copy (DXIL libraries are KBs, no real memory pressure) keeping the existing `unique_ptr` ownership model intact. Non-RT pipelines still go through the positional path and still enforce the count check. Verified by re-running `raygen-roundtrip.test`'s pipeline.yaml + the DXIL library via Wine + vkd3d-proton with a single .o argument — same 0xBEEF result the prior three-arg invocation produced. Co-Authored-By: Claude Opus 4.7 (1M context) --- tools/offloader/offloader.cpp | 37 ++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tools/offloader/offloader.cpp b/tools/offloader/offloader.cpp index 21dcd2ba6..590c4a6c9 100644 --- a/tools/offloader/offloader.cpp +++ b/tools/offloader/offloader.cpp @@ -113,26 +113,35 @@ static int run() { YIn >> PipelineDesc; ExitOnErr(llvm::errorCodeToError(YIn.error())); - // Read in the shaders. Ray tracing PSOs compile every entry point into a - // single library blob, so one input file backs all Shaders[] entries; the - // backend reads the blob from Shaders.front() and uses the rest only for - // their (Stage, Entry) metadata. Other pipelines expect one object file per - // stage. - if (PipelineDesc.isRayTracing()) { - if (InputShader.size() != 1) - ExitOnErr(createStringError( - std::errc::invalid_argument, - "RayTracing pipeline expects a single shader library, %d provided", - InputShader.size())); - PipelineDesc.Shaders.front().Shader = readFile(InputShader[0]); + // Read in the shaders. + // + // RayTracing pipelines compile every entry point — raygen, miss, + // closest-hit, any-hit, intersection, callable — into a single DXIL + // library via `dxc -T lib_6_x` / `clang-dxc -T lib_6_x`. That's the + // shape every real DXR app ships: D3D12's CreateStateObject requires a + // DXIL-library subobject anyway, and the driver fuses entry points + // across the whole library at link time. So when an RT pipeline is + // paired with a single input file, fan that one blob across every + // Shaders[] entry rather than asking the test author to duplicate the + // path N times on the offloader command line. + if (PipelineDesc.isRayTracing() && InputShader.size() == 1 && + PipelineDesc.Shaders.size() > 1) { + std::unique_ptr Lib = readFile(InputShader[0]); + const StringRef LibBytes = Lib->getBuffer(); + const StringRef LibName = Lib->getBufferIdentifier(); + for (size_t I = 0; I < PipelineDesc.Shaders.size(); ++I) + PipelineDesc.Shaders[I].Shader = + MemoryBuffer::getMemBufferCopy(LibBytes, LibName); } else { + for (size_t I = 0; I < InputShader.size(); ++I) { + PipelineDesc.Shaders[I].Shader = readFile(InputShader[I]); + } + if (InputShader.size() != PipelineDesc.Shaders.size()) ExitOnErr(createStringError( std::errc::invalid_argument, "Pipeline description expects %d shader(s) %d provided", PipelineDesc.Shaders.size(), InputShader.size())); - for (size_t I = 0; I < InputShader.size(); ++I) - PipelineDesc.Shaders[I].Shader = readFile(InputShader[I]); } // Try to guess the API by reading the shader binary. From 40fb12e9e76e8e10cf69a081b91119396984c58e Mon Sep 17 00:00:00 2001 From: EmilioLaiso Date: Mon, 22 Jun 2026 11:53:34 +0200 Subject: [PATCH 3/3] const --- tools/offloader/offloader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/offloader/offloader.cpp b/tools/offloader/offloader.cpp index 590c4a6c9..133cb8282 100644 --- a/tools/offloader/offloader.cpp +++ b/tools/offloader/offloader.cpp @@ -126,7 +126,7 @@ static int run() { // path N times on the offloader command line. if (PipelineDesc.isRayTracing() && InputShader.size() == 1 && PipelineDesc.Shaders.size() > 1) { - std::unique_ptr Lib = readFile(InputShader[0]); + const std::unique_ptr Lib = readFile(InputShader[0]); const StringRef LibBytes = Lib->getBuffer(); const StringRef LibName = Lib->getBufferIdentifier(); for (size_t I = 0; I < PipelineDesc.Shaders.size(); ++I)