Skip to content

Commit 6d5a3bf

Browse files
authored
Linux (#32)
* rename * fix linux compilation * fix invalid write * extend hessian buffer * use unique_ptr in context.h * unique_ptr in solver * fix interop free * free pointers * remove fps cap * update readme fix barrier hessian construction
1 parent bee3e1d commit 6d5a3bf

60 files changed

Lines changed: 622 additions & 277 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
name: CUDA CMake Build (Linux)
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- linux
7+
- main
8+
9+
jobs:
10+
build:
11+
runs-on: ubuntu-22.04
12+
strategy:
13+
matrix:
14+
build_type: [Debug, Release]
15+
16+
steps:
17+
- uses: actions/checkout@v4
18+
19+
- name: Install OpenGL deps
20+
run: |
21+
sudo apt-get update
22+
sudo apt-get install -y \
23+
libgl1-mesa-dev \
24+
libglu1-mesa-dev \
25+
libx11-dev \
26+
libxrandr-dev \
27+
libxinerama-dev \
28+
libxcursor-dev \
29+
libxi-dev
30+
31+
- name: Install CUDA Toolkit
32+
run: |
33+
sudo apt-get update
34+
sudo apt-get install -y wget gnupg
35+
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
36+
sudo dpkg -i cuda-keyring_1.1-1_all.deb
37+
sudo apt-get update
38+
sudo apt-get install -y cuda-toolkit-12-4
39+
40+
- name: Install OpenGL & GLFW deps
41+
run: |
42+
sudo apt-get update
43+
sudo apt-get install -y \
44+
libglfw3-dev \
45+
libglew-dev \
46+
libglm-dev \
47+
libx11-dev \
48+
libxrandr-dev \
49+
libxinerama-dev \
50+
libxcursor-dev \
51+
libxi-dev
52+
53+
- name: Set CUDA env
54+
run: |
55+
echo "/usr/local/cuda/bin" >> $GITHUB_PATH
56+
echo "CUDA_HOME=/usr/local/cuda" >> $GITHUB_ENV
57+
58+
- name: Configure CMake
59+
run: |
60+
cmake -S . -B build-${{ matrix.build_type }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
61+
62+
- name: Build
63+
run: |
64+
cmake --build build-${{ matrix.build_type }} --config ${{ matrix.build_type }}
65+
66+
- name: Run tests
67+
run: |
68+
cd build-${{ matrix.build_type }}
69+
ctest --output-on-failure

CMakeLists.txt

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,15 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
1313
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/recipes/")
1414
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/find/")
1515

16-
set(CMAKE_CUDA_ARCHITECTURES native)
16+
if(CMAKE_CUDA_COMPILER)
17+
if(DEFINED ENV{CI})
18+
message(STATUS "CI environment detected. Setting CUDA architecture to 75.")
19+
set(CMAKE_CUDA_ARCHITECTURES 75)
20+
else()
21+
message(STATUS "Local environment detected. Setting CUDA architecture to native.")
22+
set(CMAKE_CUDA_ARCHITECTURES native)
23+
endif()
24+
endif()
1725

1826
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
1927
set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG} -G -g")
@@ -36,14 +44,20 @@ find_package(CUDAToolkit REQUIRED)
3644

3745
find_package(OpenGL REQUIRED)
3846

47+
set(EXTERNAL "${PROJECT_SOURCE_DIR}/external")
48+
3949
if(UNIX)
50+
find_package(OpenGL REQUIRED)
4051
find_package(glfw3 REQUIRED)
4152
find_package(GLEW REQUIRED)
42-
set(LIBRARIES glfw ${GLEW_LIBRARIES} ${OPENGL_gl_LIBRARY})
53+
54+
set(LIBRARIES
55+
glfw
56+
${GLEW_LIBRARIES}
57+
OpenGL::GL
58+
)
4359
else()
4460
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
45-
set(EXTERNAL "external")
46-
4761
set(GLFW_ROOT_DIR ${EXTERNAL})
4862
set(GLFW_USE_STATIC_LIBS ON)
4963
find_package(GLFW REQUIRED)
@@ -57,7 +71,8 @@ else()
5771
set(LIBRARIES ${GLEW_LIBRARY} ${GLFW_LIBRARY} ${OPENGL_LIBRARY})
5872
endif()
5973

60-
set(GLM_ROOT_DIR "external")
74+
set(GLM_ROOT_DIR "${PROJECT_SOURCE_DIR}/external")
75+
set(GLM_INCLUDE_DIR "${EXTERNAL}/include" CACHE PATH "GLM include dir" FORCE)
6176
find_package(GLM REQUIRED)
6277
include_directories(${GLM_INCLUDE_DIRS})
6378

@@ -106,6 +121,11 @@ include_directories(
106121
${GLEW_INCLUDE_DIR}
107122
${GLFW_INCLUDE_DIR})
108123

124+
if(UNIX)
125+
list(APPEND CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include")
126+
list(REMOVE_DUPLICATES CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES)
127+
endif()
128+
109129
########################################
110130
# Catch2 Tests
111131
########################################
@@ -114,30 +134,35 @@ enable_testing()
114134
add_subdirectory(tests)
115135
########################################
116136
add_executable(${CMAKE_PROJECT_NAME} ${SOURCE_FILES} ${HEADER_FILES} ${IMGUI_SOURCES})
117-
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${EXTERNAL}/ImGui" "${EXTERNAL}/svd3_cuda" ${eigen_SOURCE_DIR})
137+
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${EXTERNAL}/ImGui" "${EXTERNAL}/svd3_cuda" "${EXTERNAL}/include")
118138
target_link_libraries(${CMAKE_PROJECT_NAME}
119139
${LIBRARIES}
120140
CUDA::cudart
121141
CUDA::cusolver
122142
OpenMP::OpenMP_CXX
123143
spdlog::spdlog
144+
Eigen3::Eigen
124145
#stream_compaction # TODO: uncomment if using your stream compaction
125146
)
126147
set_target_properties(${CMAKE_PROJECT_NAME} PROPERTIES
127148
CUDA_SEPARABLE_COMPILATION ON
128149
CUDA_RESOLVE_DEVICE_SYMBOLS ON
129150
)
151+
set(CUDA_HOST_WARNING_SUPPRESSIONS "")
152+
if(MSVC)
153+
list(APPEND CUDA_HOST_WARNING_SUPPRESSIONS
154+
-Xcompiler=/wd4819
155+
-Xcompiler=/wd4068
156+
-Xcompiler=/wd4661)
157+
endif()
158+
130159
target_compile_options(${CMAKE_PROJECT_NAME} PRIVATE
131160
$<$<COMPILE_LANGUAGE:CUDA>:
132161
-Xptxas=-v
133162
--extended-lambda
134163
--expt-relaxed-constexpr
135164
-lineinfo
136-
137-
-Xcompiler=/wd4819
138-
-Xcompiler=/wd4068
139-
-Xcompiler=/wd4661
140-
165+
${CUDA_HOST_WARNING_SUPPRESSIONS}
141166
-Xcudafe=--display_error_number
142167
-Xcudafe=--diag_suppress=20012
143168
-Xcudafe=--diag_suppress=20011

README.md

Lines changed: 69 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
CUDA-Accelerated Soft Body Simulation
2-
================
1+
# CUDA-Accelerated Soft Body Simulation
2+
3+
![CUDA CMake Build (Linux)](https://github.com/GrahamZen/Soft-Body-Simulation-CUDA/actions/workflows/cuda-cmake-build-linux.yml/badge.svg)
34

45
**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Final Project**
56

@@ -8,14 +9,17 @@ Hanting Xu
89

910
## Click [here](https://github.com/GrahamZen/Soft-Body-Simulation-CUDA/tree/CIS5650-Final) for documentation (CIS5650 Final Project version)
1011

11-
## Requirements
12+
## Overview
13+
14+
This project is a CUDA-accelerated soft body simulation framework originally developed as a final project for **CIS 5650: GPU Programming and Architecture** at Upenn.
1215

13-
- CUDA >= 12.0 (cublas, cusolver)
14-
- CMake >= 3.18
16+
The goal of this project is to explore GPU-based physics simulation by building a **lightweight, extensible simulation framework** with minimal external dependencies. The system is designed to support rapid experimentation with different:
1517

16-
## Description
18+
* physical models,
19+
* numerical solvers,
20+
* GPU-accelerated linear algebra pipelines.
1721

18-
This project is originally a final project for CIS5650 at UPenn. The goal of this toy project is to provide a CUDA-accelerated physical simulation framework with minimal dependencies. The framework is designed to be easily extensible, allowing new simulation algorithms, physical models, linear solvers, and collision detection methods to be added with minimal effort. The currently implemented features are listed below.
22+
---
1923

2024
## Features
2125

@@ -25,6 +29,8 @@ This project is originally a final project for CIS5650 at UPenn. The goal of thi
2529
* [x] Jacobi Solver (Naive)
2630
* [x] Cholesky Decomposition
2731
* [x] Preconditioned Conjugate Gradient
32+
* [x] Incomplete Cholesky Preconditioner
33+
* [x] Jacobi Preconditioner
2834

2935
* FEM
3036
* [x] Projective Dynamics
@@ -41,39 +47,78 @@ This project is originally a final project for CIS5650 at UPenn. The goal of thi
4147
* [x] Neo-Hookean
4248

4349
* Collision Detection
44-
* [x] Real-Time Bvh
45-
* [x] Ccd
46-
* [ ] Robust Collision Handling
50+
* [x] Real-Time BVH Construction
51+
* [x] Continuous Collision Detection (CCD)
4752

4853
## Dependencies
4954

50-
* [CUDA](https://developer.nvidia.com/cuda-downloads)
51-
* [CMake](https://cmake.org/download/)
55+
### System Requirements
56+
57+
* **Operating System**
58+
59+
* Windows
60+
* Linux
61+
* **CUDA Toolkit** ≥ 12.0
62+
(cublas, cusolver required)
63+
* **CMake** ≥ 3.18
64+
* **OpenGL**
65+
66+
### Third-Party Libraries
5267

53-
Below are included in the project:
68+
The following libraries are included directly in the project:
5469

5570
* OpenGL
5671
* ImGui
57-
* spdlog
72+
* GLFW
5873
* Eigen
59-
* glfw
60-
* catch2
74+
* spdlog
75+
* Catch2
76+
77+
External tools:
78+
79+
* [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads)
80+
* [CMake](https://cmake.org/download/)
81+
82+
---
83+
84+
## Configuration
85+
86+
### Environment Configuration
87+
88+
The full runtime configuration is specified in `context.json`. This file defines simulation contexts, solver settings, and physical parameters.
89+
90+
---
91+
92+
### Scene Configuration
93+
94+
The framework supports multiple **simulation contexts**, each representing an independent scene. A context may contain:
95+
96+
* one or more soft bodies,
97+
* rigid bodies,
98+
99+
Each context can be configured independently with physical parameters such as time step size, gravity, damping coefficients. Contexts can be switched **at runtime**.
100+
101+
---
61102

62-
## Note on Configuration
103+
### Solver Configuration
63104

64-
The complete environment configuration is specified in context.json.
105+
Solver behavior is controlled on a per-context basis.
65106

66-
### Scene
107+
* **Single-precision (`float`)**
67108

68-
The framework supports configuration of predefined soft bodies, rigid bodies, and camera parameters. Multiple contexts (scenes) can be loaded simultaneously, where each context may contain different combinations of soft and rigid objects, as well as distinct camera settings.
109+
* Uses the **Projective Dynamics (PD)** solver
110+
* **Double-precision (`double`)**
69111

70-
Each context can be configured independently with physical parameters such as time step size, gravity, damping coefficients, and friction coefficients, and supports real-time switching between contexts.
112+
* Uses the **Incremental Potential Contact (IPC)** solver
71113

72-
### Solver
114+
Only parameters relevant to the active solver are applied.
73115

74-
The behavior of the solver can be adjusted by modifying parameters in each context. Currently, solvers supporting two floating-point precisions are available. When defining a context, setting the precision parameter to float uses the projective dynamics solver, while setting it to double uses the IPC solver. Only the parameters relevant to the active solver take effect.
116+
#### Notes on Solver Usage
75117

76-
The PD solver supports interactive object dragging within the scene. The IPC solver is significantly slower and consumes more GPU memory; therefore, it is not recommended for scenes involving objects with a large number of degrees of freedom. Different solvers expose different global solver and linear solver options in the ImGui combo box, which can be switched in real time. However, since solvers consume a substantial amount of GPU memory, frequent switching may lead to performance degradation. It is recommended to select the desired solver before starting the simulation and avoid switching after the simulation has begun.
118+
* The PD solver supports **interactive object dragging**.
119+
* IPC is **not recommended** for scenes with a large number of degrees of freedom; for large vertex counts, careful parameter tuning is required, otherwise the simulation may fail to converge and pause.
120+
* For large-scale systems, **Cholesky-based solvers can become prohibitively slow**; **PCG with a Jacobi preconditioner** is recommended instead.
121+
* Linear solvers can be switched via ImGui **before simulation starts**.
77122

78123
## Screenshots
79124

src/collision/aabb.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,12 @@ enum class QueryType {
5858
VF,
5959
EE
6060
};
61-
61+
struct Vec3d {
62+
double x, y, z;
63+
Vec3d operator-() const {
64+
return Vec3d{-x, -y, -z};
65+
}
66+
};
6267
class Query {
6368
public:
6469
QueryType type = QueryType::UNKNOWN;
@@ -69,5 +74,5 @@ class Query {
6974
indexType v3;
7075
double d;
7176
double toi = 0.f;
72-
glm::dvec3 normal = glm::dvec3(0.f);
73-
};
77+
Vec3d normal = Vec3d{0.0, 0.0, 0.0};
78+
};

src/collision/broadphase.cu

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,27 @@
44
#include <collision/bvh.cuh>
55
#include <collision/bvh.h>
66
#include <simulation/simulationContext.h>
7-
#include <distance/distance_type.h>
7+
#include <distance/distance_type.cuh>
88
#include <collision/intersections.h>
99
#include <thrust/sort.h>
1010
#include <thrust/unique.h>
1111
#include <thrust/remove.h>
1212
#include <thrust/device_vector.h>
1313

14+
namespace {
15+
__device__ inline size_t atomicAddSizeT(size_t* address, size_t val) {
16+
if constexpr (sizeof(size_t) == sizeof(unsigned long long)) {
17+
return static_cast<size_t>(atomicAdd(
18+
reinterpret_cast<unsigned long long*>(address),
19+
static_cast<unsigned long long>(val)));
20+
} else {
21+
return static_cast<size_t>(atomicAdd(
22+
reinterpret_cast<unsigned int*>(address),
23+
static_cast<unsigned int>(val)));
24+
}
25+
}
26+
} // namespace
27+
1428
//input the aabb box of a Tetrahedron
1529
//generate a 30-bit morton code
1630
template<typename Scalar>
@@ -341,7 +355,7 @@ __global__ void traverseTree(int numTris, const BVHNode<Scalar>* nodes, const in
341355
// 1 faces * 3 verts + 3 edges * 3 edges
342356
if ((!ignoreSelfCollision || triFathers[myNode.TriangleIndex] != triFathers[leftChild.TriangleIndex]) && myNode.TriangleIndex != leftChild.TriangleIndex && !isAdjacentTriangle(tris[myNode.TriangleIndex * 3 + 0], tris[myNode.TriangleIndex * 3 + 1], tris[myNode.TriangleIndex * 3 + 2],
343357
tris[leftChild.TriangleIndex * 3 + 0], tris[leftChild.TriangleIndex * 3 + 1], tris[leftChild.TriangleIndex * 3 + 2])) {
344-
int qIdx = atomicAdd(queryCount, 12);
358+
size_t qIdx = atomicAddSizeT(queryCount, static_cast<size_t>(12));
345359
if (qIdx + 12 < maxNumQueries) {
346360
Query* qBegin = &queries[qIdx];
347361
fillQuery(qBegin, myNode.TriangleIndex, leftChild.TriangleIndex, tris);
@@ -365,7 +379,7 @@ __global__ void traverseTree(int numTris, const BVHNode<Scalar>* nodes, const in
365379
{
366380
if ((!ignoreSelfCollision || triFathers[myNode.TriangleIndex] != triFathers[rightChild.TriangleIndex]) && myNode.TriangleIndex != rightChild.TriangleIndex && !isAdjacentTriangle(tris[myNode.TriangleIndex * 3 + 0], tris[myNode.TriangleIndex * 3 + 1], tris[myNode.TriangleIndex * 3 + 2],
367381
tris[rightChild.TriangleIndex * 3 + 0], tris[rightChild.TriangleIndex * 3 + 1], tris[rightChild.TriangleIndex * 3 + 2])) {
368-
int qIdx = atomicAdd(queryCount, 12);
382+
size_t qIdx = atomicAddSizeT(queryCount, static_cast<size_t>(12));
369383
if (qIdx + 12 < maxNumQueries) {
370384
Query* qBegin = &queries[qIdx];
371385
fillQuery(qBegin, myNode.TriangleIndex, rightChild.TriangleIndex, tris);
@@ -401,7 +415,7 @@ bool CollisionDetection<Scalar>::DetectCollisionCandidates(const BVHNode<Scalar>
401415
overflowHappened = true;
402416
maxNumQueries *= 2;
403417
std::cerr << "Query buffer overflow, resizing to " << maxNumQueries << std::endl;
404-
if (maxNumQueries > 1 << 31) {
418+
if (maxNumQueries > (static_cast<size_t>(1) << 31)) {
405419
std::cerr << "Number of queries exceeds 2^31, aborting" << std::endl;
406420
exit(1);
407421
return false;

0 commit comments

Comments
 (0)