diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 78af5facd..dba7d9e4a 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -452,18 +452,72 @@ class DXPipelineState : public offloadtest::PipelineState { ComPtr PSO; // Only set for graphics pipelines. std::optional Topology; + // True for pipelines created via createPipelineRT — used by SBT / dispatch + // code to safely downcast to DXRayTracingPipelineState (parallel to + // VulkanPipelineState::IsRayTracing). + bool IsRayTracing = false; DXPipelineState(llvm::StringRef Name, ComPtr RootSig, ComPtr PSO, - std::optional Topology) + std::optional Topology, + bool IsRT = false) : offloadtest::PipelineState(GPUAPI::DirectX), Name(Name), - RootSig(RootSig), PSO(PSO), Topology(Topology) {} + RootSig(RootSig), PSO(PSO), Topology(Topology), IsRayTracing(IsRT) {} static bool classof(const offloadtest::PipelineState *B) { return B->getAPI() == GPUAPI::DirectX; } }; +/// RT pipeline state: holds the ID3D12StateObject + cached +/// ID3D12StateObjectProperties for SBT identifier queries plus a +/// shader-name → identifier-pointer map. The `void *` identifiers are +/// owned by Properties — keep it alive for the SBT's lifetime. +class DXRayTracingPipelineState : public DXPipelineState { +public: + ComPtr StateObject; + ComPtr Properties; + llvm::StringMap ShaderIdentifiers; + + DXRayTracingPipelineState(llvm::StringRef Name, + ComPtr RootSig, + ComPtr SO, + ComPtr Props) + : DXPipelineState(Name, RootSig, /*PSO=*/nullptr, std::nullopt, + /*IsRT=*/true), + StateObject(SO), Properties(Props) {} + + static bool classof(const offloadtest::PipelineState *B) { + if (B->getAPI() != GPUAPI::DirectX) + return false; + return static_cast(B)->IsRayTracing; + } +}; + +class DXShaderBindingTable : public offloadtest::ShaderBindingTable { +public: + ComPtr Buffer; + // Pre-built ranges for D3D12_DISPATCH_RAYS_DESC. Sizes are zero for + // empty regions; raygen is a single record so it uses the no-stride + // variant. + D3D12_GPU_VIRTUAL_ADDRESS_RANGE RayGenRange{}; + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE MissRange{}; + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE HitGroupRange{}; + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE CallableRange{}; + + DXShaderBindingTable(ComPtr Buf, + D3D12_GPU_VIRTUAL_ADDRESS_RANGE RG, + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE MS, + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE HG, + D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE CL) + : offloadtest::ShaderBindingTable(GPUAPI::DirectX), Buffer(Buf), + RayGenRange(RG), MissRange(MS), HitGroupRange(HG), CallableRange(CL) {} + + static bool classof(const offloadtest::ShaderBindingTable *S) { + return S->getAPI() == GPUAPI::DirectX; + } +}; + class DXAccelerationStructure : public offloadtest::AccelerationStructure { public: ComPtr Resource; @@ -846,11 +900,11 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder { // ID3D12Device5 entry point and helper allocators. llvm::Error batchBuildAS(llvm::ArrayRef Items) override; - llvm::Error dispatchRays(const PipelineState &, const ShaderBindingTable &, - uint32_t, uint32_t, uint32_t) override { - return llvm::createStringError( - "RayTracing dispatchRays not yet supported on DirectX"); - } + // Defined out-of-line below — needs DXDevice's full type for access to + // Device5 and the DXRayTracingPipelineState definition. + llvm::Error dispatchRays(const PipelineState &PSO, + const ShaderBindingTable &SBT, uint32_t Width, + uint32_t Height, uint32_t Depth) override; void endEncodingImpl() override { popDebugGroup(); } }; @@ -1173,6 +1227,8 @@ class DXDevice : public offloadtest::Device { ComPtr DescHeap; std::unique_ptr CB; std::unique_ptr Pipeline; + // Lifetime-tied to the pipeline; only set for RT pipelines. + std::unique_ptr SBT; // Resources for graphics pipelines. std::unique_ptr RenderPass; @@ -1538,18 +1594,232 @@ class DXDevice : public offloadtest::Device { return std::make_unique(Name, RootSig, PSO, std::nullopt); } + static std::wstring widen(llvm::StringRef S) { + // Entry-point names and hit-group names are ASCII; a straight 1:1 widen + // is sufficient. + return std::wstring(S.begin(), S.end()); + } + + static D3D12_HIT_GROUP_TYPE getDXHitGroupType(HitGroupType T) { + switch (T) { + case HitGroupType::Triangles: + return D3D12_HIT_GROUP_TYPE_TRIANGLES; + case HitGroupType::Procedural: + return D3D12_HIT_GROUP_TYPE_PROCEDURAL_PRIMITIVE; + } + llvm_unreachable("All HitGroupType cases handled"); + } + llvm::Expected> - createPipelineRT(llvm::StringRef, const BindingsDesc &, - const RayTracingPipelineCreateDesc &) override { - return llvm::createStringError( - "RayTracing pipeline state not yet supported on DirectX"); + createPipelineRT(llvm::StringRef Name, const BindingsDesc &BndDesc, + const RayTracingPipelineCreateDesc &Desc) override { + if (!Desc.Library) + return llvm::createStringError(std::errc::invalid_argument, + "RayTracingPipelineCreateDesc.Library is " + "null — backend needs a DXIL blob."); + + // Global root signature: try the library's embedded RTS0 part first; + // fall back to building one from BindingsDesc. + ShaderContainer LibContainer = {}; + LibContainer.Shader = Desc.Library; + ComPtr RootSig; + if (auto Err = createRootSignature(Name, BndDesc, LibContainer, + /*IsGraphics=*/false, RootSig)) + return Err; + + CD3DX12_STATE_OBJECT_DESC SODesc( + D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE); + + // DXIL library subobject — add every Shader's entry point as an export. + // Wide-string storage must outlive SODesc since the subobject only stores + // pointers into it. + auto *Lib = SODesc.CreateSubobject(); + const llvm::StringRef LibBytes = Desc.Library->getBuffer(); + const D3D12_SHADER_BYTECODE Bytecode = {LibBytes.data(), LibBytes.size()}; + Lib->SetDXILLibrary(&Bytecode); + llvm::SmallVector WideNames; + WideNames.reserve(Desc.Shaders.size() + Desc.HitGroups.size()); + for (const auto &Sh : Desc.Shaders) { + WideNames.push_back(widen(Sh.EntryPoint)); + Lib->DefineExport(WideNames.back().c_str()); + } + + // One hit-group subobject per HitGroup entry. + for (const auto &HG : Desc.HitGroups) { + auto *HGObj = SODesc.CreateSubobject(); + HGObj->SetHitGroupType(getDXHitGroupType(HG.Type)); + WideNames.push_back(widen(HG.Name)); + HGObj->SetHitGroupExport(WideNames.back().c_str()); + WideNames.push_back(widen(HG.ClosestHit)); + HGObj->SetClosestHitShaderImport(WideNames.back().c_str()); + if (HG.AnyHit) { + WideNames.push_back(widen(*HG.AnyHit)); + HGObj->SetAnyHitShaderImport(WideNames.back().c_str()); + } + if (HG.Intersection) { + WideNames.push_back(widen(*HG.Intersection)); + HGObj->SetIntersectionShaderImport(WideNames.back().c_str()); + } + } + + // Pipeline-wide shader config (max payload + max attribute bytes). + auto *ShaderCfg = + SODesc.CreateSubobject(); + ShaderCfg->Config(Desc.Config.MaxPayloadSizeInBytes, + Desc.Config.MaxAttributeSizeInBytes); + + // Pipeline-wide config (max recursion depth). + auto *PipelineCfg = + SODesc.CreateSubobject(); + PipelineCfg->Config(Desc.Config.MaxTraceRecursionDepth); + + // Global root signature. + auto *GlobalRS = + SODesc.CreateSubobject(); + GlobalRS->SetRootSignature(RootSig.Get()); + + ComPtr StateObject; + if (auto Err = HR::toError( + Device->CreateStateObject(SODesc, IID_PPV_ARGS(&StateObject)), + "Failed to create raytracing state object.")) + return Err; + + ComPtr Properties; + if (auto Err = HR::toError( + StateObject.As(&Properties), + "Failed to query ID3D12StateObjectProperties from state object.")) + return Err; + + auto State = std::make_unique( + Name, RootSig, StateObject, Properties); + // Cache identifiers up-front. The driver-owned blobs are alive for + // Properties' lifetime, which lives on the PSO. + // + // GetShaderIdentifier only returns non-null for entries that are + // directly bindable from an SBT record: raygen / miss / callable + // shaders and hit groups. Closest-hit / any-hit / intersection are + // bound *through* a hit-group subobject and aren't separately + // addressable, so skip them. + for (const auto &Sh : Desc.Shaders) { + switch (Sh.Stage) { + case Stages::RayGeneration: + case Stages::Miss: + case Stages::Callable: + break; + default: + continue; + } + const std::wstring W = widen(Sh.EntryPoint); + const void *Id = Properties->GetShaderIdentifier(W.c_str()); + if (!Id) + return llvm::createStringError( + "GetShaderIdentifier returned null for shader '%s'", + Sh.EntryPoint.c_str()); + State->ShaderIdentifiers[Sh.EntryPoint] = Id; + } + for (const auto &HG : Desc.HitGroups) { + const std::wstring W = widen(HG.Name); + const void *Id = Properties->GetShaderIdentifier(W.c_str()); + if (!Id) + return llvm::createStringError( + "GetShaderIdentifier returned null for hit group '%s'", + HG.Name.c_str()); + State->ShaderIdentifiers[HG.Name] = Id; + } + return State; } llvm::Expected> - createShaderBindingTable(const PipelineState &, - const ShaderBindingTableDesc &) override { - return llvm::createStringError( - "RayTracing shader binding table not yet supported on DirectX"); + createShaderBindingTable(const PipelineState &PSO, + const ShaderBindingTableDesc &Desc) override { + if (!llvm::isa(&PSO)) + return llvm::createStringError( + std::errc::invalid_argument, + "createShaderBindingTable requires a RayTracing PipelineState"); + const auto &DXRTPSO = llvm::cast(PSO); + + constexpr uint32_t IdSize = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + const SBTLayout Layout = + computeSBTLayout(IdSize, D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT, + D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT, Desc); + const uint32_t TotalSize = Layout.TotalSize; + const llvm::ArrayRef RGEntries(&Desc.RayGen, 1); + + // Upload heap so the CPU can write the SBT directly. The state-object + // identifiers don't need to live in default heap; using upload keeps + // PR3 simple. A staging copy to default heap is a follow-up. + const D3D12_HEAP_PROPERTIES HeapProps = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + const D3D12_RESOURCE_DESC BufDesc = + CD3DX12_RESOURCE_DESC::Buffer(TotalSize); + ComPtr Buffer; + if (auto Err = HR::toError(Device->CreateCommittedResource( + &HeapProps, D3D12_HEAP_FLAG_NONE, &BufDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&Buffer)), + "Failed to create SBT buffer.")) + return Err; + + void *Mapped = nullptr; + const D3D12_RANGE ReadRange{0, 0}; + if (auto Err = HR::toError(Buffer->Map(0, &ReadRange, &Mapped), + "Failed to map SBT buffer.")) + return Err; + std::memset(Mapped, 0, TotalSize); + auto *MappedBytes = static_cast(Mapped); + + auto WriteEntries = [&](uint8_t *Region, llvm::ArrayRef Entries, + uint32_t Stride) -> llvm::Error { + for (size_t I = 0; I < Entries.size(); ++I) { + const auto &E = Entries[I]; + auto It = DXRTPSO.ShaderIdentifiers.find(E.ShaderName); + if (It == DXRTPSO.ShaderIdentifiers.end()) + return llvm::createStringError( + std::errc::invalid_argument, + "SBT references unknown shader/hit-group name: '%s'", + E.ShaderName.c_str()); + uint8_t *Dst = Region + I * Stride; + std::memcpy(Dst, It->second, IdSize); + if (!E.LocalRootData.empty()) + std::memcpy(Dst + IdSize, E.LocalRootData.data(), + E.LocalRootData.size()); + } + return llvm::Error::success(); + }; + + auto WriteRegion = [&](const SBTRegionLayout &R, + llvm::ArrayRef Entries) -> llvm::Error { + return WriteEntries(MappedBytes + R.Offset, Entries, R.Stride); + }; + auto UnmapAndReturn = [&](llvm::Error Err) { + Buffer->Unmap(0, nullptr); + return Err; + }; + if (auto Err = WriteRegion(Layout.RayGen, RGEntries)) + return UnmapAndReturn(std::move(Err)); + if (auto Err = WriteRegion(Layout.Miss, Desc.Miss)) + return UnmapAndReturn(std::move(Err)); + if (auto Err = WriteRegion(Layout.HitGroup, Desc.HitGroup)) + return UnmapAndReturn(std::move(Err)); + if (auto Err = WriteRegion(Layout.Callable, Desc.Callable)) + return UnmapAndReturn(std::move(Err)); + Buffer->Unmap(0, nullptr); + + // D3D12_GPU_VIRTUAL_ADDRESS_RANGE / …_AND_STRIDE expect a zero address + // for empty regions, matching the helper's Size == 0 sentinel. + const D3D12_GPU_VIRTUAL_ADDRESS Base = Buffer->GetGPUVirtualAddress(); + auto MakeRange = [&](const SBTRegionLayout &R) { + return D3D12_GPU_VIRTUAL_ADDRESS_RANGE{R.Size ? Base + R.Offset : 0, + R.Size}; + }; + auto MakeRangeAndStride = [&](const SBTRegionLayout &R) { + return D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE{ + R.Size ? Base + R.Offset : 0, R.Size, R.Stride}; + }; + return std::make_unique( + Buffer, MakeRange(Layout.RayGen), MakeRangeAndStride(Layout.Miss), + MakeRangeAndStride(Layout.HitGroup), + MakeRangeAndStride(Layout.Callable)); } llvm::Expected> @@ -2695,11 +2965,20 @@ class DXDevice : public offloadtest::Device { if (!EncoderOrErr) return EncoderOrErr.takeError(); auto &Encoder = *EncoderOrErr.get(); - if (auto Err = Encoder.dispatch( - *IS.Pipeline.get(), P.DispatchParameters.DispatchGroupCount[0], - P.DispatchParameters.DispatchGroupCount[1], - P.DispatchParameters.DispatchGroupCount[2])) + if (P.isRayTracing()) { + if (auto Err = Encoder.dispatchRays( + *IS.Pipeline, *IS.SBT, + P.DispatchParameters.DispatchGroupCount[0], + P.DispatchParameters.DispatchGroupCount[1], + P.DispatchParameters.DispatchGroupCount[2])) + return Err; + } else if (auto Err = Encoder.dispatch( + *IS.Pipeline.get(), + P.DispatchParameters.DispatchGroupCount[0], + P.DispatchParameters.DispatchGroupCount[1], + P.DispatchParameters.DispatchGroupCount[2])) { return Err; + } Encoder.endEncoding(); } @@ -3152,8 +3431,36 @@ class DXDevice : public offloadtest::Device { return Err; llvm::outs() << "Graphics command list created complete.\n"; } else if (P.isRayTracing()) { - return llvm::createStringError( - "RayTracing pipeline not yet supported on DirectX"); + if (P.Shaders.empty() || !P.SBT || !P.RTConfig) + return llvm::createStringError( + std::errc::invalid_argument, + "RayTracing pipeline requires Shaders, " + "ShaderBindingTable, and RayTracingPipelineConfig."); + + RayTracingPipelineCreateDesc RTDesc{}; + RTDesc.Library = P.Shaders.front().Shader.get(); + RTDesc.HitGroups = P.HitGroups; + RTDesc.Config = *P.RTConfig; + RTDesc.Shaders.reserve(P.Shaders.size()); + for (const auto &Sh : P.Shaders) + RTDesc.Shaders.push_back({Sh.Stage, Sh.Entry}); + + auto PSOOrErr = + createPipelineRT("RayTracing Pipeline State", BndDesc, RTDesc); + if (!PSOOrErr) + return PSOOrErr.takeError(); + State.Pipeline = std::move(*PSOOrErr); + llvm::outs() << "RayTracing Pipeline created.\n"; + + auto SBTOrErr = createShaderBindingTable(*State.Pipeline, *P.SBT); + if (!SBTOrErr) + return SBTOrErr.takeError(); + State.SBT = std::move(*SBTOrErr); + llvm::outs() << "Shader Binding Table created.\n"; + + if (auto Err = createComputeCommands(P, State)) + return Err; + llvm::outs() << "RayTracing command list created.\n"; } else { return llvm::createStringError("Pipeline was neither Compute nor Raster"); } @@ -3317,6 +3624,51 @@ llvm::Error DXComputeEncoder::batchBuildAS(llvm::ArrayRef Items) { CB.addPendingUAVBarrier(); return llvm::Error::success(); } + +llvm::Error DXComputeEncoder::dispatchRays(const PipelineState &PSO, + const ShaderBindingTable &SBT, + uint32_t Width, uint32_t Height, + uint32_t Depth) { + if (!llvm::isa(&PSO)) + return llvm::createStringError( + std::errc::invalid_argument, + "dispatchRays requires a RayTracing PipelineState."); + if (!llvm::isa(&SBT)) + return llvm::createStringError( + std::errc::invalid_argument, + "dispatchRays requires a DirectX ShaderBindingTable."); + const auto &DXRTPSO = llvm::cast(PSO); + const auto &DXSBT = llvm::cast(SBT); + + // SetPipelineState1 and DispatchRays live on ID3D12GraphicsCommandList4. + // The AS-build path (line ~3000 above) follows the same query pattern. + ComPtr CmdList4; + if (auto Err = + HR::toError(CB.CmdList.As(&CmdList4), + "ID3D12GraphicsCommandList4 query failed; raytracing " + "is unsupported on this command list.")) + return Err; + + addUAVBarrier(); + insertDebugSignpost( + llvm::formatv("DispatchRays [{0},{1},{2}]", Width, Height, Depth).str()); + + // Global root signature is shared with the compute bind point; bind it on + // the underlying command list before SetPipelineState1. + CB.CmdList->SetComputeRootSignature(DXRTPSO.RootSig.Get()); + CmdList4->SetPipelineState1(DXRTPSO.StateObject.Get()); + + D3D12_DISPATCH_RAYS_DESC RaysDesc{}; + RaysDesc.RayGenerationShaderRecord = DXSBT.RayGenRange; + RaysDesc.MissShaderTable = DXSBT.MissRange; + RaysDesc.HitGroupTable = DXSBT.HitGroupRange; + RaysDesc.CallableShaderTable = DXSBT.CallableRange; + RaysDesc.Width = Width; + RaysDesc.Height = Height; + RaysDesc.Depth = Depth; + CmdList4->DispatchRays(&RaysDesc); + return llvm::Error::success(); +} } // namespace llvm::Expected DXQueue::submit( diff --git a/test/Feature/RT/raygen-roundtrip.test b/test/Feature/RT/raygen-roundtrip.test index 962db4be0..6baff2921 100644 --- a/test/Feature/RT/raygen-roundtrip.test +++ b/test/Feature/RT/raygen-roundtrip.test @@ -106,7 +106,7 @@ Results: # REQUIRES: raytracing-pipeline # Unimplemented https://github.com/llvm/offload-test-suite/issues/1268 -# XFAIL: Clang, DirectX, Metal +# XFAIL: Clang, Metal # RUN: split-file %s %t # RUN: %dxc_target_lib -T lib_6_5 -Fo %t.o %t/source.hlsl diff --git a/tools/offloader/offloader.cpp b/tools/offloader/offloader.cpp index 21dcd2ba6..133cb8282 100644 --- a/tools/offloader/offloader.cpp +++ b/tools/offloader/offloader.cpp @@ -113,26 +113,35 @@ static int run() { YIn >> PipelineDesc; ExitOnErr(llvm::errorCodeToError(YIn.error())); - // Read in the shaders. Ray tracing PSOs compile every entry point into a - // single library blob, so one input file backs all Shaders[] entries; the - // backend reads the blob from Shaders.front() and uses the rest only for - // their (Stage, Entry) metadata. Other pipelines expect one object file per - // stage. - if (PipelineDesc.isRayTracing()) { - if (InputShader.size() != 1) - ExitOnErr(createStringError( - std::errc::invalid_argument, - "RayTracing pipeline expects a single shader library, %d provided", - InputShader.size())); - PipelineDesc.Shaders.front().Shader = readFile(InputShader[0]); + // Read in the shaders. + // + // RayTracing pipelines compile every entry point — raygen, miss, + // closest-hit, any-hit, intersection, callable — into a single DXIL + // library via `dxc -T lib_6_x` / `clang-dxc -T lib_6_x`. That's the + // shape every real DXR app ships: D3D12's CreateStateObject requires a + // DXIL-library subobject anyway, and the driver fuses entry points + // across the whole library at link time. So when an RT pipeline is + // paired with a single input file, fan that one blob across every + // Shaders[] entry rather than asking the test author to duplicate the + // path N times on the offloader command line. + if (PipelineDesc.isRayTracing() && InputShader.size() == 1 && + PipelineDesc.Shaders.size() > 1) { + const std::unique_ptr Lib = readFile(InputShader[0]); + const StringRef LibBytes = Lib->getBuffer(); + const StringRef LibName = Lib->getBufferIdentifier(); + for (size_t I = 0; I < PipelineDesc.Shaders.size(); ++I) + PipelineDesc.Shaders[I].Shader = + MemoryBuffer::getMemBufferCopy(LibBytes, LibName); } else { + for (size_t I = 0; I < InputShader.size(); ++I) { + PipelineDesc.Shaders[I].Shader = readFile(InputShader[I]); + } + if (InputShader.size() != PipelineDesc.Shaders.size()) ExitOnErr(createStringError( std::errc::invalid_argument, "Pipeline description expects %d shader(s) %d provided", PipelineDesc.Shaders.size(), InputShader.size())); - for (size_t I = 0; I < InputShader.size(); ++I) - PipelineDesc.Shaders[I].Shader = readFile(InputShader[I]); } // Try to guess the API by reading the shader binary.