Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
cmake_minimum_required(VERSION 3.25) # ipp6 is using 3.28

# Version information
# Read makefiles/version.mk file
file(READ ${CMAKE_SOURCE_DIR}/makefiles/version.mk VERSION_CONTENT)
string(REGEX REPLACE ".*NCCL_MAJOR[ ]*:=[ ]*([0-9]+).*" "\\1" NCCL_MAJOR "${VERSION_CONTENT}")
string(REGEX REPLACE ".*NCCL_MINOR[ ]*:=[ ]*([0-9]+).*" "\\1" NCCL_MINOR "${VERSION_CONTENT}")
string(REGEX REPLACE ".*NCCL_PATCH[ ]*:=[ ]*([0-9]+).*" "\\1" NCCL_PATCH "${VERSION_CONTENT}")
string(REGEX REPLACE ".*NCCL_SUFFIX[ ]*:=[ ]*([a-zA-Z0-9]*).*" "\\1" NCCL_SUFFIX "${VERSION_CONTENT}")
string(REGEX REPLACE ".*PKG_REVISION[ ]*:=[ ]*([0-9]+).*" "\\1" PKG_REVISION "${VERSION_CONTENT}")
math(EXPR NCCL_VERSION_CODE "(${NCCL_MAJOR} * 10000) + (${NCCL_MINOR} * 100) + ${NCCL_PATCH}")

# Make version information available to C++ source files
add_compile_definitions(
NCCL_USE_CMAKE
NCCL_MAJOR=${NCCL_MAJOR}
NCCL_MINOR=${NCCL_MINOR}
NCCL_PATCH=${NCCL_PATCH}
NCCL_VERSION_CODE=${NCCL_VERSION_CODE}
)

set(ENV{NCCL_USE_CMAKE} "1")

project(NCCL VERSION ${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}
LANGUAGES CUDA CXX C)

# Make CMAKE_BUILD_TYPE to release by default if not set
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()

option(VERBOSE "Enable verbose output" OFF)
option(KEEP "Keep intermediate files" OFF)
option(DEBUG "Enable debug build" OFF)
option(ASAN "Enable Address Sanitizer" OFF)
option(UBSAN "Enable Undefined Behavior Sanitizer" OFF)
option(TRACE "Enable tracing" OFF)
option(WERROR "Treat warnings as errors" OFF)
option(PROFAPI "Enable profiling API" ON)
option(NVTX "Enable NVTX" ON)
option(RDMA_CORE "Enable RDMA core" OFF)
option(NET_PROFILER "Enable network profiler" OFF)
option(MLX5DV "Enable MLX5DV" OFF)
option(MAX_EXT_NET_PLUGINS "Maximum external network plugins" 0)

find_package(CUDAToolkit REQUIRED)

# CUDA version detection
string(REGEX MATCH "([0-9]+\\.[0-9]+)" CUDA_VERSION "${CUDAToolkit_VERSION}")

# Extract major and minor version numbers
string(REGEX MATCH "([0-9]+)" CUDA_MAJOR "${CUDA_VERSION}")
string(REGEX MATCH "([0-9]+)$" CUDA_MINOR "${CUDA_VERSION}")
string(REGEX REPLACE ".*\\.([0-9]+)$" "\\1" CUDA_MINOR "${CUDA_VERSION}")

# Add CUDA version definitions after find_package
add_compile_definitions(
CUDA_MAJOR=${CUDA_MAJOR}
CUDA_MINOR=${CUDA_MINOR}
)

# CUDA architecture flags
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES OR CMAKE_CUDA_ARCHITECTURES STREQUAL "")
message(STATUS "CMAKE_CUDA_ARCHITECTURES not defined or empty, setting default values based on CUDA version")

if(${CUDA_MAJOR} LESS 9)
set(CMAKE_CUDA_ARCHITECTURES "35;50;60;61")
elseif(${CUDA_MAJOR} EQUAL 9)
set(CMAKE_CUDA_ARCHITECTURES "35;50;60;61;70")
elseif(${CUDA_MAJOR} EQUAL 10)
set(CMAKE_CUDA_ARCHITECTURES "35;50;60;61;70")
elseif(${CUDA_MAJOR} EQUAL 11)
if(${CUDA_MINOR} LESS 8)
set(CMAKE_CUDA_ARCHITECTURES "35;50;60;61;70;80")
else()
set(CMAKE_CUDA_ARCHITECTURES "35;50;60;61;70;80;90")
endif()
elseif(${CUDA_MAJOR} EQUAL 12)
if(${CUDA_MINOR} LESS 8)
set(CMAKE_CUDA_ARCHITECTURES "50;60;61;70;80;90")
else()
set(CMAKE_CUDA_ARCHITECTURES "50;60;61;70;80;90;100;120")
endif()
elseif(${CUDA_MAJOR} EQUAL 13)
set(CMAKE_CUDA_ARCHITECTURES "50;60;61;70;80;90;100;110;120")
else()
# For future CUDA versions, include all architectures up to the latest known
set(CMAKE_CUDA_ARCHITECTURES "50;60;61;70;80;90;100;110;120")
endif()
endif()
message(STATUS "Using CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC")

# Sanitizer options
if(ASAN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -static-libasan")
endif()

if(UBSAN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined -static-libubsan")
endif()

# Additional options
if(TRACE)
add_definitions(-DENABLE_TRACE)
endif()

if(NOT NVTX)
add_definitions(-DNVTX_DISABLE)
endif()

if(WERROR)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
endif()

if(PROFAPI)
add_definitions(-DPROFAPI)
endif()

set(EXTRA_LIBS)

# RDMA and MLX5DV are Linux-specific features
if(RDMA_CORE)
add_definitions(-DNCCL_BUILD_RDMA_CORE=1)
find_library(VERBS_LIBRARY NAMES verbs)
if(VERBS_LIBRARY)
list(APPEND EXTRA_LIBS ${VERBS_LIBRARY})
endif()
endif()

if(MLX5DV)
add_definitions(-DNCCL_BUILD_MLX5DV=1)
find_library(MLX5_LIBRARY NAMES mlx5)
if(MLX5_LIBRARY)
list(APPEND EXTRA_LIBS ${MLX5_LIBRARY})
endif()
endif()

if(NET_PROFILER)
add_definitions(-DNCCL_ENABLE_NET_PROFILING=1)
endif()

if(MAX_EXT_NET_PLUGINS GREATER 0)
add_definitions(-DNCCL_NET_MAX_PLUGINS=${MAX_EXT_NET_PLUGINS})
endif()

add_definitions(-DDOCA_VERBS_USE_CUDA_WRAPPER)
add_definitions(-DDOCA_VERBS_USE_NET_WRAPPER)
add_definitions(-DNCCL_GIN_PROXY_ENABLE=1)

# Library dependencies
find_library(RT_LIBRARY NAMES rt)
if(RT_LIBRARY)
list(APPEND EXTRA_LIBS ${RT_LIBRARY})
endif()

# Debug/Release specific flags
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -O0")
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS} -O0 -G -g")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS} -O3")

add_subdirectory(ext-net)
add_subdirectory(ext-profiler/example)
add_subdirectory(ext-tuner/example)
add_subdirectory(src)
84 changes: 84 additions & 0 deletions examples/06_device_api/02_gin_alltoall_pure/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# See LICENSE.txt for license information
#

# Include common build rules
include ../../../makefiles/common.mk
include ../../../makefiles/examples.mk

# Target executable
TARGET = gin_alltoall_pure_device_api

# Common utilities
COMMON_INC = ../../common/include
COMMON_SRC = ../../common/src

# Build configuration
INCLUDES += -I$(COMMON_INC)

# Source files
SOURCES = main.cu $(COMMON_SRC)/utils.cc
OBJECTS = $(SOURCES:.cu=.o)
OBJECTS := $(OBJECTS:.cc=.o)

# Default target
all: $(TARGET)

# Build executable
$(TARGET): $(OBJECTS)
ifeq ($(MPI),1)
$(MPICXX) $(CXXFLAGS) $(OBJECTS) $(LIBRARIES) $(LDFLAGS) -o $@
else
$(CXX) $(CXXFLAGS) $(OBJECTS) $(LIBRARIES) $(LDFLAGS) -lpthread -o $@
endif
@echo "Built target $@"

# Compile source files
%.o: %.cu
$(NVCC) $(NVCUFLAGS) $(INCLUDES) -c $< -o $@

%.o: %.cc
ifeq ($(MPI),1)
$(MPICXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
else
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
endif

# Test target
test: $(TARGET)
@echo "Testing $(TARGET)..."
ifeq ($(MPI),1)
@echo "Running with 2 processes"
$(MPIRUN) -np 2 ./$(TARGET)
else
@echo "Running with all available GPUs"
./$(TARGET)
endif

# Clean build artifacts
clean:
rm -f $(OBJECTS) $(TARGET)

# Install target
install: $(TARGET)
@mkdir -p $(PREFIX)/bin
cp $(TARGET) $(PREFIX)/bin/

# Help
help:
@echo "NCCL Example: Pure GIN AlltoAll Device API"
@echo "=============================================="
@echo ""
@echo "This example demonstrates pure GPU-Initiated Networking (GIN)"
@echo "for AlltoAll operations without LSA optimizations."
@echo ""
@echo "Targets:"
@echo " all - Build the example (default)"
@echo " test - Build and run test with all GPUs"
@echo " clean - Remove build artifacts"
@echo " install - Install to PREFIX/bin (default: /usr/local/bin)"
@echo " help - Show this help"

.PHONY: all test clean install help
Loading
Loading