Skip to content

Commit f3a241e

Browse files
committed
Fix clangd Configs and Update Transformer Examples
1 parent fad111e commit f3a241e

8 files changed

Lines changed: 239 additions & 197 deletions

File tree

.clangd

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,3 @@
1-
CompileFlags:
2-
Add:
3-
- -std=c++20
4-
- --no-cuda-version-check
5-
Remove:
6-
- -ccbin
7-
- -forward-unknown-to-host-compiler
8-
- -rdc=true
9-
- -gencode
10-
- --generate-code*
11-
- -Xcudafe
12-
- --diag_suppress=*
13-
- --expt-relaxed-constexpr
14-
- --expt-extended-lambda
15-
- -Xcompiler*
16-
- -arch=*
17-
181
Index:
192
Background: Build
203
StandardLibrary: Yes
@@ -36,6 +19,7 @@ Diagnostics:
3619
readability-identifier-length,
3720
readability-magic-numbers,
3821
readability-function-cognitive-complexity,
22+
readability-redundant-access-specifiers,
3923
modernize-avoid-c-arrays,
4024
readability-math-missing-parentheses,
4125
]
@@ -50,4 +34,32 @@ Diagnostics:
5034
readability-identifier-naming.ClassCase: CamelCase
5135
readability-identifier-naming.StructCase: CamelCase
5236
readability-identifier-naming.FunctionCase: camelBack
53-
readability-identifier-naming.ClassMethodCase: camelBack
37+
readability-identifier-naming.ClassMethodCase: camelBack
38+
39+
CompileFlags:
40+
Add:
41+
- -Wall
42+
- -Wextra
43+
Remove:
44+
- -ccbin
45+
- -forward-unknown-to-host-compiler
46+
- --generate-code*
47+
- -arch*
48+
- -rdc=true
49+
- -Xcudafe
50+
- --diag_suppress=*
51+
- --expt-relaxed-constexpr
52+
- --expt-extended-lambda
53+
- -gencode
54+
- -Xcompiler*
55+
- -fmodules*
56+
- -fmodule-mapper*
57+
- -fdeps-format*
58+
59+
---
60+
61+
If:
62+
PathMatch: [.*\.cu, .*\.cuh]
63+
CompileFlags:
64+
Add:
65+
- --cuda-gpu-arch=sm_89

csrc/include/pmpp/types/concepts.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <iterator>
44
#include <ranges>
55
#include <type_traits>
6+
#include <concepts>
67

78
namespace pmpp
89
{

csrc/include/pmpp/utils/common.cuh

Lines changed: 31 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,62 +4,52 @@
44
#include <cuda_runtime_api.h>
55
#include <stdexcept>
66

7-
#ifdef PMPP_CUDA_ERR_CHECK
8-
#error "PMPP_CUDA_ERR_CHECK already defined."
7+
/**
8+
* @brief Check the given cuda error. Exit with `EXIT_FAILURE` if not
9+
* success.
10+
* The error message is printed to `stderr`.
11+
*/
12+
#define PMPP_CUDA_ERR_CHECK(err) \
13+
do { \
14+
cudaError_t err_ = (err); \
15+
if (err_ != cudaSuccess) { \
16+
::fprintf( \
17+
stderr, "CUDA error at %s:%d; Error code: %d(%s) \"%s\"", \
18+
__FILE__, __LINE__, err, ::cudaGetErrorString(err_), #err); \
19+
::cudaDeviceReset(); \
20+
::std::exit(EXIT_FAILURE); \
21+
} \
22+
} while (0)
23+
24+
#define PMPP_CUDA_ABORT(msg) \
25+
do { \
26+
::fprintf(stderr, "Abort at %s:%d \"%s\"", __FILE__, __LINE__, msg); \
27+
::cudaDeviceReset(); \
28+
::std::abort(); \
29+
} while (0)
30+
31+
#ifdef NDEBUG
32+
/**
33+
* @brief Cuda error check is turned off on Release mode.
34+
*/
35+
#define PMPP_DEBUG_CUDA_ERR_CHECK(err) ((void) 0)
936
#else
1037
/**
1138
* @brief Check the given cuda error. Exit with `EXIT_FAILURE` if not
1239
* success.
1340
* The error message is printed to `stderr`.
1441
*/
15-
#define PMPP_CUDA_ERR_CHECK(err) \
16-
do { \
17-
cudaError_t err_ = (err); \
18-
if (err_ != cudaSuccess) { \
19-
::fprintf(stderr, \
20-
"CUDA error at %s:%d; Error code: %d(%s) \"%s\"", \
21-
__FILE__, __LINE__, err, \
22-
::cudaGetErrorString(err_), #err); \
23-
::cudaDeviceReset(); \
24-
::std::abort(); \
25-
} \
26-
} while (0)
27-
28-
#define PMPP_ABORT(msg) \
29-
do { \
30-
::fprintf(stderr, "Abort at %s:%d \"%s\"", __FILE__, __LINE__, \
31-
msg); \
32-
::cudaDeviceReset(); \
33-
::std::abort(); \
34-
} while (0)
35-
#endif
36-
37-
#ifdef PMPP_DEBUG_CUDA_ERR_CHECK
38-
#error "PMPP_DEBUG_CUDA_ERR_CHECK already defined."
39-
#else
40-
#ifdef NDEBUG
41-
/**
42-
* @brief Cuda error check is turned off on Release mode.
43-
*/
44-
#define PMPP_DEBUG_CUDA_ERR_CHECK(err) ((void) 0)
45-
#else
46-
/**
47-
* @brief Check the given cuda error. Exit with `EXIT_FAILURE` if not
48-
* success.
49-
* The error message is printed to `stderr`.
50-
*/
51-
#define PMPP_DEBUG_CUDA_ERR_CHECK(err) PMPP_CUDA_ERR_CHECK(err)
52-
#endif
42+
#define PMPP_DEBUG_CUDA_ERR_CHECK(err) PMPP_CUDA_ERR_CHECK(err)
5343
#endif
5444

5545
namespace pmpp::cuda
5646
{
47+
5748
template <typename T>
5849
__host__ __device__ void initMemory(T* ptr, size_t n, const T& val)
5950
{
6051
for (size_t i = 0; i < n; ++i) {
6152
ptr[i] = val;
6253
}
6354
}
64-
6555
} // namespace pmpp::cuda

csrc/include/pmpp/utils/math.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ namespace pmpp
1414
* @return The ceiling of the division of `a` by `b`.
1515
*/
1616
template <typename T1, typename T2>
17-
requires std::is_integral_v<T1> && std::is_integral_v<T2>
18-
[[nodiscard]] constexpr auto ceilDiv(T1 a, T2 b) -> T1
17+
[[nodiscard]]
18+
constexpr auto ceilDiv(T1 a, T2 b) -> T1
1919
{
2020
return T1((a + b - 1) / b);
2121
}

csrc/lib/ops/vecAdd/op.cuh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ namespace pmpp::ops::cuda
99
__global__ void vecAddKernelv0(const fp32_t* a, const fp32_t* b, fp32_t* c,
1010
int32_t n)
1111
{
12-
12+
// Coalesced DRAM access
1313
int gtid = threadIdx.x + blockDim.x * blockIdx.x;
1414
if (gtid < n) {
1515
// [DRAM] 2 load, 1 store, 3 inst
@@ -20,9 +20,10 @@ __global__ void vecAddKernelv0(const fp32_t* a, const fp32_t* b, fp32_t* c,
2020
__global__ void vecAddKernelv1(const fp32_t* a, const fp32_t* b, fp32_t* c,
2121
int32_t n)
2222
{
23-
23+
// Coalesced DRAM access
2424
int gtid = threadIdx.x + blockDim.x * blockIdx.x;
2525
gtid = gtid % 2 == 0 ? gtid + 1 : gtid - 1;
26+
gtid = gtid == n ? n - 1 : gtid;
2627
if (gtid < n) {
2728
// [DRAM] 2 load, 1 store, 3 inst
2829
c[gtid] = a[gtid] + b[gtid];
@@ -32,8 +33,10 @@ __global__ void vecAddKernelv1(const fp32_t* a, const fp32_t* b, fp32_t* c,
3233
__global__ void vecAddKernelv2(const fp32_t* a, const fp32_t* b, fp32_t* c,
3334
int32_t n)
3435
{
35-
36-
int gtid = threadIdx.x + blockDim.x * blockIdx.x + 1;
36+
int gtid = threadIdx.x + blockDim.x * blockIdx.x;
37+
if (gtid % warpSize == 0) {
38+
gtid = (gtid + warpSize) % (ceilDiv(n, warpSize) * warpSize);
39+
}
3740
if (gtid < n) {
3841
// [DRAM] 2 load, 1 store, 3 inst
3942
c[gtid] = a[gtid] + b[gtid];
@@ -53,9 +56,9 @@ void launchVecAdd(const fp32_t* d_A, const fp32_t* d_B, fp32_t* d_C, size_t n)
5356
} else if (VERSION == 2) {
5457
vecAddKernelv2<<<gridSize, blockSize>>>(d_A, d_B, d_C, n);
5558
} else {
56-
PMPP_ABORT(std::format("Unsupported version: {}", VERSION).c_str());
59+
PMPP_CUDA_ABORT(
60+
std::format("Unsupported version: {}", VERSION).c_str());
5761
}
58-
5962
PMPP_DEBUG_CUDA_ERR_CHECK(cudaGetLastError());
6063
}
6164

csrc/test/OpTest/vecAdd.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,8 @@ TEST_F(OpTest, VecAddv2)
103103
std::cout << std::format("nElems: {}, cosSim: {}\n", nElems,
104104
cosSim.item<fp32_t>());
105105

106-
// // [NOTE] This won't pass because the kernel is deliberately wrong
107-
// EXPECT_TRUE(matCh.allclose(matCd2h));
108-
// EXPECT_GE(cosSim.item<fp32_t>(), 0.99);
106+
EXPECT_TRUE(matCh.allclose(matCd2h));
107+
EXPECT_GE(cosSim.item<fp32_t>(), 0.99);
109108
}
110109
}
111110
} // namespace pmpp::test::ops

csrc/vcpkg.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
{
22
"dependencies": [
33
"cxxopts",
4-
"fmt",
54
"spdlog",
65
"proxy",
76
"gtest",

0 commit comments

Comments
 (0)