From 8a7aa04e24cd0b620dd08598d613d7f4a73c48ad Mon Sep 17 00:00:00 2001 From: Martin Vit Date: Tue, 31 Mar 2026 14:05:10 +0000 Subject: [PATCH] Enable P2P transport for AMD systems with >2 GPUs at PHB level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On AMD multi-socket systems, GPUs on the same NUMA node connect through separate PCIe root complexes under the same PCIe Host Bridge (PATH_PHB). The default P2P level (PATH_PXB) disables P2P for these paths, forcing shared memory transport with 24-42% bandwidth loss. Extend the existing AMD P2P exception to allow PHB-level P2P for configurations with more than 2 GPUs. The original SYS-level P2P for ≤2 GPU configurations is preserved. Benchmarked on dual-socket AMD EPYC 9575F (Turin) with 4x RTX PRO 6000 on the same socket (NCCL 2.29.7+cuda13.2): Transport change: SHM/direct/direct -> P2P/direct pointer Throughput: +24-42% across 256K-128M message sizes Latency: up to 19% lower at 128K Signed-off-by: Martin Vit --- src/graph/paths.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/graph/paths.cc b/src/graph/paths.cc index cb54c4bb21..f06b9e7b5f 100644 --- a/src/graph/paths.cc +++ b/src/graph/paths.cc @@ -339,8 +339,13 @@ ncclResult_t ncclTopoCheckP2p(struct ncclComm* comm, struct ncclTopoSystem* syst int arch, vendor, model; NCCLCHECK(ncclTopoCpuType(system, &arch, &vendor, &model)); - // Allow P2P between pairs of GPU devices on AMD systems - if ((arch == NCCL_TOPO_CPU_ARCH_X86 && vendor == NCCL_TOPO_CPU_VENDOR_AMD) && system->nodes[DEV].count <= 2) p2pLevel = PATH_SYS; + // Allow P2P on AMD systems: SYS level for ≤2 GPU devices (original behavior), + // PHB level for >2 to enable same-socket P2P through the PCIe Host Bridge. + // Without this, GPUs under separate root complexes on the same NUMA node + // (PATH_PHB) fall back to shared memory transport, losing 24-46% bandwidth. + if (arch == NCCL_TOPO_CPU_ARCH_X86 && vendor == NCCL_TOPO_CPU_VENDOR_AMD) { + p2pLevel = (system->nodes[DEV].count <= 2) ? PATH_SYS : PATH_PHB; + } // User override NCCLCHECK(ncclGetUserP2pLevel(&p2pLevel));