Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion Builds/CMake/CompileOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,10 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
endif()

if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
# Scope to the C/C++ host compiler: hipcc/clang rejects -Wno-class-memaccess
# as an unknown warning option under -Werror when it sees HIP source files.
set(DEFAULT_COMPILE_OPTIONS ${DEFAULT_COMPILE_OPTIONS}
-Wno-class-memaccess # -> disable warning: error: 'void* memcpy(void*, const void*, size_t)' ... [-Werror=class-memaccess] (caused by imgui)
$<$<COMPILE_LANGUAGE:C,CXX>:-Wno-class-memaccess> # -> disable warning: error: 'void* memcpy(void*, const void*, size_t)' ... [-Werror=class-memaccess] (caused by imgui)
)
endif ()

Expand All @@ -138,6 +140,27 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
)
endif ()

# Clang on Windows fires -Wnontrivial-memcall on memset/memcpy calls in the
# bundled Flatbuffers-generated headers (pre-existing upstream code, not the
# port). This warning is not emitted by GCC or MSVC, so suppress it on
# Windows+Clang only.
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND WIN32)
set(DEFAULT_COMPILE_OPTIONS ${DEFAULT_COMPILE_OPTIONS}
-Wno-nontrivial-memcall
)
endif ()

# hipcc/clang flags warnings nvcc does not on the existing CUDA sources. These
# must come AFTER -Werror (clang honors a later -Wno-* over an earlier -Werror),
# so append them at the end and scope to the HIP language only.
if (USE_HIP)
set(DEFAULT_COMPILE_OPTIONS ${DEFAULT_COMPILE_OPTIONS}
$<$<COMPILE_LANGUAGE:HIP>:-Wno-reorder-ctor>
$<$<COMPILE_LANGUAGE:HIP>:-Wno-unused-private-field>
$<$<COMPILE_LANGUAGE:HIP>:-Wno-unused-variable>
)
endif ()

#
# Linker options
#
Expand Down
33 changes: 28 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,32 @@ elseif (CMAKE_BUILD_TYPE MATCHES "Debug")
endif()
message(STATUS "CMake build type: ${CMAKE_BUILD_TYPE}")

# CUDA
# GPU backend selection
# USE_CUDA -> NVIDIA CUDA (default)
# USE_HIP -> AMD ROCm/HIP
# The two are mutually exclusive; enabling HIP turns CUDA off.
option(USE_CUDA "Use CUDA features" ON)
set(CUDA_CRT_LINKAGE "dynamic" CACHE STRING
option(USE_HIP "Use AMD ROCm/HIP features" OFF)
if (USE_HIP)
set(USE_CUDA OFF)
set(CUBBYFLOW_CUDA_TO_HIP_HEADER
${CMAKE_CURRENT_SOURCE_DIR}/Includes/Core/CUDA/cuda_to_hip.h)
# enable_language(HIP) honors -DCMAKE_HIP_ARCHITECTURES, else auto-detects the
# host GPU, else errors on a no-GPU host.
enable_language(HIP)
# Force-include the compat shim on every HIP translation unit so the CUDA
# spelling resolves regardless of each file's own include order, and so the
# device-vs-host __CUDA_ARCH__ idiom is defined before any header uses it.
set(CMAKE_HIP_FLAGS
"${CMAKE_HIP_FLAGS} -include \"${CUBBYFLOW_CUDA_TO_HIP_HEADER}\"")
add_compile_definitions(CUBBYFLOW_USE_CUDA)
# The shim defines __CUDACC__ to keep the project's kernel/device guards
# visible, but that flips rocThrust's auto-detect to its CUDA backend (which
# then includes a CUDA-only CUB header). Pin Thrust to its HIP backend.
add_compile_definitions(THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
message(STATUS "Using HIP: arch ${CMAKE_HIP_ARCHITECTURES}")
endif()
set(CUDA_CRT_LINKAGE "dynamic" CACHE STRING
"CUDA targets' CRT options. For 'static', use /MT or /MTd. For 'dynamic', use /MD or /MDd")
if (USE_CUDA)
set(CUDA_LINK_LIBRARIES_KEYWORD PUBLIC)
Expand Down Expand Up @@ -156,7 +179,7 @@ option(BUILD_TESTS "Build the CubbyFlow test programs" ON)
if (BUILD_TESTS)
add_subdirectory(Tests/ManualTests)
add_subdirectory(Tests/UnitTests)
if (USE_CUDA)
if (USE_CUDA OR USE_HIP)
add_subdirectory(Tests/CUDATests)
endif()
if (NOT BUILD_SONARCLOUD)
Expand All @@ -167,7 +190,7 @@ endif()

option(BUILD_EXAMPLES "Build the CubbyFlow example programs" ON)
if (BUILD_EXAMPLES)
if (USE_CUDA)
if (USE_CUDA OR USE_HIP)
add_subdirectory(Examples/CUDASPHSim)
endif()
if (NOT BUILD_SONARCLOUD)
Expand All @@ -182,7 +205,7 @@ if (BUILD_EXAMPLES)
endif()
endif()

if (NOT USE_CUDA AND NOT BUILD_SONARCLOUD)
if (NOT (USE_CUDA OR USE_HIP) AND NOT BUILD_SONARCLOUD)
add_subdirectory(Libraries/pybind11)
if (BUILD_FROM_PIP)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${DEFAULT_CMAKE_LIBRARY_OUTPUT_DIRECTORY})
Expand Down
15 changes: 15 additions & 0 deletions Documents/Install.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,21 @@ bin/UnitTests

It should show all the tests are passing.

### Building with GPU Acceleration

CubbyFlow's CUDA SPH solvers can be built for either NVIDIA or AMD GPUs. The two backends are mutually exclusive and are selected at configure time. NVIDIA CUDA is enabled by default (`USE_CUDA=ON`), so the build instructions above already produce the CUDA solvers when a CUDA toolkit is found.

To build the same solvers for AMD GPUs with ROCm/HIP instead, configure with `USE_HIP=ON` (which turns `USE_CUDA` off) and, if desired, set the target GPU architecture:

```bash
mkdir build
cd build
cmake .. -DUSE_HIP=ON -DCMAKE_HIP_ARCHITECTURES=gfx90a
make
```

`CMAKE_HIP_ARCHITECTURES` accepts a semicolon-separated list of AMD GPU targets (for example `gfx90a` or `gfx1100`); when omitted it defaults to `gfx90a`. A working ROCm installation with HIP is required, and `hipcc` should be on the path so CMake can enable the HIP language.

### Running Tests

There are two different tests in the codebase including the unit test and manual test. For the detailed instruction on how to run those tests, please checkout the documentation page from [the project website](https://utilforever.github.io/CubbyFlow/Documentation/).
Expand Down
6 changes: 5 additions & 1 deletion Examples/CUDASPHSim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set(target CUDASPHSim)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

# Sources
if (USE_CUDA)
if (USE_CUDA OR USE_HIP)
file(GLOB_RECURSE sources
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/*.cu)
Expand All @@ -19,6 +19,10 @@ if (USE_CUDA)
cuda_add_executable(${target} ${sources}
OPTIONS ${CUDA_TARGET_OPTIONS}
)
elseif (USE_HIP)
file(GLOB_RECURSE hip_sources ${CMAKE_CURRENT_SOURCE_DIR}/*.cu)
set_source_files_properties(${hip_sources} PROPERTIES LANGUAGE HIP)
add_executable(${target} ${sources})
else()
add_executable(${target} ${sources})
endif()
Expand Down
4 changes: 4 additions & 0 deletions Examples/CUDASPHSim/CUDAPCISPHSolver3Example.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
#include <Core/PointGenerator/GridPointGenerator3.hpp>
#include <Core/Utils/Serialization.hpp>

#if defined(__HIP__)
#include <Core/CUDA/cuda_to_hip.h>
#else
#include <cuda_runtime.h>
#endif
#include <pystring/pystring.h>

#include <fstream>
Expand Down
2 changes: 1 addition & 1 deletion Includes/Core/CUDA/CUDAAlgorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

namespace CubbyFlow
{
#ifdef __CUDACC__
#if defined(__CUDACC__) || defined(__HIPCC__)

template <typename T>
__global__ void CUDAFillKernel(T* dst, size_t n, T val)
Expand Down
6 changes: 3 additions & 3 deletions Includes/Core/CUDA/CUDAArray-Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

namespace CubbyFlow
{
#ifdef __CUDACC__
#if defined(__CUDACC__) || defined(__HIPCC__)
namespace Internal
{
template <typename T, size_t N, size_t I>
Expand Down Expand Up @@ -140,7 +140,7 @@ CUDAArray<T, N>::CUDAArray(const CUDAStdArray<size_t, N>& size,
Base::SetPtrAndSize(m_data.data(), size);
}

#ifdef __CUDACC__
#if defined(__CUDACC__) || defined(__HIPCC__)
template <typename T, size_t N>
template <typename... Args>
CUDAArray<T, N>::CUDAArray(size_t nx, Args... args) : CUDAArray{}
Expand Down Expand Up @@ -354,7 +354,7 @@ void CUDAArray<T, N>::Fill(const T& val)
m_data.Fill(val);
}

#ifdef __CUDACC__
#if defined(__CUDACC__) || defined(__HIPCC__)
template <typename T, size_t N>
void CUDAArray<T, N>::Resize(CUDAStdArray<size_t, N> newSize, const T& initVal)
{
Expand Down
Loading