Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
cmake_minimum_required(VERSION 3.17)
project(SofaCUDALinearSolver VERSION 1.0 LANGUAGES CXX)
cmake_minimum_required(VERSION 3.24)
project(SofaCUDALinearSolver VERSION 1.0 LANGUAGES CUDA CXX)


# Find and load CMake configuration of packages containing this plugin's dependencies
## Mandatory dependencies
find_package(Sofa.Core REQUIRED)
sofa_find_package(Sofa.Component.LinearSolver.Iterative REQUIRED)
find_package(CUDA REQUIRED)
find_package(CUDAToolkit REQUIRED)
sofa_find_package(CUDAToolkit 12.0 REQUIRED)
sofa_find_package(cudss QUIET)

if(cudss_FOUND)
message("cudss has been found, enabling CUDACholeksySparseSolverCUDSS.")
else()
message("cudss has not been found, cudss-based Solver will not be compiled.")
endif()

# List all files
set(SOFACUDALINEARSOLVER_SRC_DIR src/${PROJECT_NAME})
Expand All @@ -25,16 +31,27 @@ set(README_FILES
README.md
)

if(cudss_FOUND)
list(APPEND HEADER_FILES
${SOFACUDALINEARSOLVER_SRC_DIR}/CUDASparseCholeskySolverCUDSS.h
${SOFACUDALINEARSOLVER_SRC_DIR}/CUDASparseCholeskySolverCUDSS.inl
)
list(APPEND SOURCE_FILES
${SOFACUDALINEARSOLVER_SRC_DIR}/CUDASparseCholeskySolverCUDSS.cpp
)
endif()

cuda_include_directories("${SOFACUDALINEARSOLVER_SRC_DIR}")
cuda_include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
# Create the plugin library.
cuda_add_library(${PROJECT_NAME} SHARED ${HEADER_FILES} ${SOURCE_FILES} ${README_FILES})
add_library(${PROJECT_NAME} SHARED ${HEADER_FILES} ${SOURCE_FILES} ${README_FILES})

# Link the plugin library to its dependency(ies).
target_link_libraries(${PROJECT_NAME} Sofa.Core)
target_link_libraries(${PROJECT_NAME} Sofa.Component.LinearSolver.Iterative)
target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cusparse CUDA::cusolver)
target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cusparse CUDA::cusolver cudss)

if(cudss_FOUND)
target_link_libraries(${PROJECT_NAME} cudss)
endif()

target_include_directories(${PROJECT_NAME} PUBLIC "/usr/include/cuda/")

Expand Down
7 changes: 7 additions & 0 deletions example/FEMBAR_CUDASparseCholeskySolverCUDSS.scn
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<Node name="root" dt="0.02" gravity="0 -10 0">

<include href="FEMBAR-common.xml"/>

<CUDASparseCholeskySolverCUDSS template="CompressedRowSparseMatrixMat3x3d" permutation="METIS"/>

</Node>
6 changes: 3 additions & 3 deletions src/SofaCUDALinearSolver/CUDACholeksySparseSolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class CUDASparseCholeskySolver : public sofa::component::linearsolver::MatrixLin
int singularity;

// csr format
int* host_RowPtr;
int* host_RowPtr;
int* host_ColsInd;
Real* host_values;

Expand Down Expand Up @@ -97,7 +97,7 @@ class CUDASparseCholeskySolver : public sofa::component::linearsolver::MatrixLin

void* buffer_gpu;
void* buffer_cpu;

bool notSameShape;

int previous_n;
Expand All @@ -116,7 +116,7 @@ class CUDASparseCholeskySolver : public sofa::component::linearsolver::MatrixLin
void symbolicFactorization();

sofa::linearalgebra::CompressedRowSparseMatrix<Real> m_filteredMatrix;

};
// compare the shape of 2 matrices given in csr format, return true if the matrices don't have the same shape
bool compareMatrixShape(int, const int *,const int *, int,const int *,const int *) ;
Expand Down
12 changes: 6 additions & 6 deletions src/SofaCUDALinearSolver/CUDACholeksySparseSolver.inl
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ void CUDASparseCholeskySolver<TMatrix,TVector>:: invert(Matrix& M)
switch( reorder )
{
default:
case 0:// None, this case should not be visited
case 0:// None, this case should not be visited
break;

case 1://RCM, Symmetric Reverse Cuthill-McKee permutation
Expand Down Expand Up @@ -340,8 +340,8 @@ void CUDASparseCholeskySolver<TMatrix,TVector>:: invert(Matrix& M)
const Real* hostValuesToCopy = reorder != 0 ? host_valuePermuted.data() : host_values;
checkCudaErrors( cudaMemcpyAsync( device_values, hostValuesToCopy, sizeof(Real)*nnz, cudaMemcpyHostToDevice, stream ) );
}
// factorize on device LL^t = PAP^t

// factorize on device LL^t = PAP^t
if(notSameShape)
{
createCholeskyInfo();
Expand All @@ -362,7 +362,7 @@ void CUDASparseCholeskySolver<TMatrix,TVector>:: invert(Matrix& M)
checkCudaErrors(cudaMalloc(&buffer_gpu, sizeof(char)*size_work));
}
}

{
sofa::helper::ScopedAdvancedTimer numericTimer("Numeric factorization");
numericFactorization();
Expand Down Expand Up @@ -462,9 +462,9 @@ void CUDASparseCholeskySolver<TMatrix,TVector>::solve(Matrix& M, Vector& x, Vect
inline bool compareMatrixShape(const int s_M,const int * M_colind,const int * M_rowptr,const int s_P,const int * P_colind,const int * P_rowptr)
{
if (s_M != s_P) return true;
if (M_rowptr[s_M] != P_rowptr[s_M] ) return true;
if (M_rowptr[s_M] != P_rowptr[s_M] ) return true;
for (int i=0;i<s_P;i++) {
if (M_rowptr[i]!=P_rowptr[i]) return true;
if (M_rowptr[i]!=P_rowptr[i]) return true;
}
for (int i=0;i<M_rowptr[s_M];i++) {
if (M_colind[i]!=P_colind[i]) return true;
Expand Down
60 changes: 60 additions & 0 deletions src/SofaCUDALinearSolver/CUDASparseCholeskySolverCUDSS.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/******************************************************************************
* SOFA, Simulation Open-Framework Architecture *
* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU Lesser General Public License as published by *
* the Free Software Foundation; either version 2.1 of the License, or (at *
* your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, but WITHOUT *
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
* for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
*******************************************************************************
* Authors: The SOFA Team and external contributors (see Authors.txt) *
* *
* Contact information: contact@sofa-framework.org *
******************************************************************************/
#define SOFA_PLUGIN_CUDASPARSECHOLESKYSOLVERCUDSS_CPP
#include <SofaCUDALinearSolver/CUDASparseCholeskySolverCUDSS.inl>
#include <sofa/component/linearsolver/iterative/MatrixLinearSolver.inl>
#include <sofa/component/linearsystem/MatrixProjectionMethod.inl>
#include <sofa/component/linearsystem/MappedMassMatrixObserver.inl>
#include <sofa/core/ObjectFactory.h>

namespace sofa::component::linearsolver::direct
{

using namespace sofa::linearalgebra;

int CUDASparseCholeskySolverCUDSSClass = core::RegisterObject("Direct linear solver based on Sparse Cholesky factorization, implemented with the cuSOLVER library")
.add< CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<float>,FullVector<float> > >()
.add< CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<sofa::type::Mat<3, 3, float> >,FullVector<float> > >()
.add< CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<double>,FullVector<double> > >()
.add< CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<sofa::type::Mat<3, 3, double> >,FullVector<double> > >()
;

template class SOFACUDALINEARSOLVER_API CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<float>,FullVector<float> > ;
template class SOFACUDALINEARSOLVER_API CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<sofa::type::Mat<3, 3, float> >,FullVector<float> > ;
template class SOFACUDALINEARSOLVER_API CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<double>,FullVector<double> > ;
template class SOFACUDALINEARSOLVER_API CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<sofa::type::Mat<3, 3, double> >,FullVector<double> > ;

} // namespace sofa::component::linearsolver::direct

using OtherFloatingType = std::conditional_t<std::is_same_v<SReal, double>, float, double>;

namespace sofa::component::linearsystem
{
template struct SOFACUDALINEARSOLVER_API MappedMassMatrixObserver<OtherFloatingType>;
template class SOFACUDALINEARSOLVER_API MatrixProjectionMethod<sofa::linearalgebra::CompressedRowSparseMatrix<OtherFloatingType> >;
}

namespace sofa::component::linearsolver
{
template class SOFACUDALINEARSOLVER_API MatrixLinearSolver< CompressedRowSparseMatrix<OtherFloatingType>,FullVector<OtherFloatingType> > ;
template class SOFACUDALINEARSOLVER_API MatrixLinearSolver< CompressedRowSparseMatrix<sofa::type::Mat<3, 3, OtherFloatingType> >,FullVector<OtherFloatingType> > ;
}
149 changes: 149 additions & 0 deletions src/SofaCUDALinearSolver/CUDASparseCholeskySolverCUDSS.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/******************************************************************************
* SOFA, Simulation Open-Framework Architecture *
* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU Lesser General Public License as published by *
* the Free Software Foundation; either version 2.1 of the License, or (at *
* your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, but WITHOUT *
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
* for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
*******************************************************************************
* Authors: The SOFA Team and external contributors (see Authors.txt) *
* *
* Contact information: contact@sofa-framework.org *
******************************************************************************/
#pragma once

#include <SofaCUDALinearSolver/config.h>

#include <sofa/linearalgebra/CompressedRowSparseMatrix.h>
#include <sofa/core/behavior/LinearSolver.h>
#include <sofa/component/linearsolver/iterative/MatrixLinearSolver.h>
#include <sofa/simulation/MechanicalVisitor.h>
#include <sofa/helper/OptionsGroup.h>

// cuDSS for GPU path (modern high-performance sparse direct solver)
#include <cudss.h>

// cusolverSp for CPU path (host functions)
#include <cusolverSp.h>
#include <cusolverSp_LOWLEVEL_PREVIEW.h>

namespace sofa::component::linearsolver::direct
{

// Direct linear solver based on Sparse Cholesky factorization, implemented with the cuDSS library (GPU) or cuSOLVER (CPU)
template<class TMatrix, class TVector>
class CUDASparseCholeskySolverCUDSS : public sofa::component::linearsolver::MatrixLinearSolver<TMatrix,TVector>
{
public:
SOFA_CLASS(SOFA_TEMPLATE2(CUDASparseCholeskySolverCUDSS,TMatrix,TVector),SOFA_TEMPLATE2(sofa::component::linearsolver::MatrixLinearSolver,TMatrix,TVector));

typedef TMatrix Matrix;
typedef TVector Vector;
typedef typename Matrix::Real Real;
typedef sofa::component::linearsolver::MatrixLinearSolver<TMatrix,TVector> Inherit;

void solve (Matrix& M, Vector& x, Vector& b) override;
void invert(Matrix& M) override;

private:

Data<sofa::helper::OptionsGroup> d_typePermutation;
Data<sofa::helper::OptionsGroup> d_hardware;

int rows; ///< number of rows
int cols; ///< number of columns
int nnz; ///< number of non zero elements

// Host pointers to matrix data (points into m_filteredMatrix)
int* host_RowPtr;
int* host_ColsInd;
Real* host_values;

// Device memory for matrix (CSR format)
int* device_RowPtr;
int* device_ColsInd;
Real* device_values;

// Device memory for solution and RHS vectors
Real* device_x;
Real* device_b;

// CUDA stream
cudaStream_t stream;

// ============ cuDSS (GPU path) ============
cudssHandle_t cudssHandle;
cudssConfig_t cudssConfig;
cudssData_t cudssData;
cudssMatrix_t cudssMatrixA;
cudssMatrix_t cudssMatrixX;
cudssMatrix_t cudssMatrixB;
bool cudssInitialized;

// ============ cusolverSp (CPU path) ============
cusolverSpHandle_t cusolverHandle;
cusparseMatDescr_t cusparseDescr;
csrcholInfoHost_t host_info;
void* buffer_cpu;
size_t size_internal_cpu;
size_t size_work_cpu;

// CPU path permutation data
int reorder;
sofa::type::vector<int> host_perm;
sofa::type::vector<int> host_map;
sofa::type::vector<int> host_rowPermuted;
sofa::type::vector<int> host_colPermuted;
sofa::type::vector<Real> host_valuePermuted;
sofa::type::vector<Real> host_bPermuted;
sofa::type::vector<Real> host_xPermuted;
size_t size_perm;
void* buffer_perm;

// ============ Common state ============
bool notSameShape;
int previous_n;
int previous_nnz;

sofa::type::vector<int> previous_ColsInd;
sofa::type::vector<int> previous_RowPtr;

CUDASparseCholeskySolverCUDSS();
~CUDASparseCholeskySolverCUDSS() override;

// GPU path methods (cuDSS)
void initCuDSS();
void cleanupCuDSS();
void invertGPU();
void solveGPU(int n, Real* b_host, Real* x_host);

// CPU path methods (cusolverSp)
void initCuSolverHost();
void cleanupCuSolverHost();
void invertCPU();
void solveCPU(int n, Real* b_host, Real* x_host);

sofa::linearalgebra::CompressedRowSparseMatrix<Real> m_filteredMatrix;

};

// compare the shape of 2 matrices given in csr format, return true if the matrices don't have the same shape
bool compareMatrixShape(int, const int *,const int *, int,const int *,const int *) ;

#if !defined(SOFA_PLUGIN_CUDASPARSECHOLESKYSOLVERCUDSS_CPP)
extern template class SOFACUDALINEARSOLVER_API CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<float>,FullVector<float> > ;
extern template class SOFACUDALINEARSOLVER_API CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<sofa::type::Mat<3, 3, float> >,FullVector<float> > ;
extern template class SOFACUDALINEARSOLVER_API CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<double>,FullVector<double> > ;
extern template class SOFACUDALINEARSOLVER_API CUDASparseCholeskySolverCUDSS< CompressedRowSparseMatrix<sofa::type::Mat<3, 3, double> >,FullVector<double> > ;
#endif

} // namespace sofa::component::linearsolver::direct
Loading