Skip to content

Commit ad01866

Browse files
authored
Merge pull request #1958 from CEED/jeremy/flags-helper
Flags Helper function
2 parents c381141 + a83a12a commit ad01866

5 files changed

Lines changed: 177 additions & 146 deletions

File tree

backends/cuda/ceed-cuda-compile.cpp

Lines changed: 91 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -62,101 +62,121 @@ static int CeedCallSystem_Core(Ceed ceed, const char *command, const char *messa
6262
}
6363

6464
//------------------------------------------------------------------------------
65-
// Compile CUDA kernel
65+
// Build array of JIT flags
6666
//------------------------------------------------------------------------------
67-
using std::ifstream;
68-
using std::ofstream;
69-
using std::ostringstream;
70-
71-
static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_error, bool *is_compile_good, CUmodule *module,
72-
const CeedInt num_defines, va_list args) {
73-
size_t ptx_size;
74-
char *ptx;
75-
const int num_opts = 4;
76-
CeedInt num_jit_source_dirs = 0, num_jit_defines = 0;
77-
const char **opts;
78-
nvrtcProgram prog;
79-
struct cudaDeviceProp prop;
80-
Ceed_Cuda *ceed_data;
81-
82-
cudaFree(0); // Make sure a Context exists for nvrtc
83-
84-
std::ostringstream code;
85-
bool using_clang;
86-
87-
CeedCallBackend(CeedGetIsClang(ceed, &using_clang));
88-
89-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS,
90-
using_clang ? "Compiling CUDA with Clang backend (with Rust QFunction support)"
91-
: "Compiling CUDA with NVRTC backend (without Rust QFunction support).\nTo use the Clang backend, set the environment "
92-
"variable GPU_CLANG=1");
67+
static inline int CeedJitGetOpts_Cuda(Ceed ceed, const char ***opts, int *num_opts) {
68+
int opts_count = 4;
9369

94-
// Get kernel specific options, such as kernel constants
95-
if (num_defines > 0) {
96-
char *name;
97-
int val;
98-
99-
for (int i = 0; i < num_defines; i++) {
100-
name = va_arg(args, char *);
101-
val = va_arg(args, int);
102-
code << "#define " << name << " " << val << "\n";
103-
}
104-
}
105-
106-
// Standard libCEED definitions for CUDA backends
107-
code << "#include <ceed/jit-source/cuda/cuda-jit.h>\n\n";
70+
// Standard options
71+
CeedCallBackend(CeedCalloc(opts_count, opts));
72+
CeedCallBackend(CeedStringAllocCopy("-default-device", (char **)&(*opts)[0]));
73+
{
74+
Ceed_Cuda *ceed_data;
75+
struct cudaDeviceProp prop;
10876

109-
// Non-macro options
110-
CeedCallBackend(CeedCalloc(num_opts, &opts));
111-
opts[0] = "-default-device";
112-
CeedCallBackend(CeedGetData(ceed, &ceed_data));
113-
CeedCallCuda(ceed, cudaGetDeviceProperties(&prop, ceed_data->device_id));
114-
std::string arch_arg =
77+
CeedCallBackend(CeedGetData(ceed, &ceed_data));
78+
CeedCallCuda(ceed, cudaGetDeviceProperties(&prop, ceed_data->device_id));
79+
std::string arch_arg =
11580
#if CUDA_VERSION >= 11010
116-
// NVRTC used to support only virtual architectures through the option
117-
// -arch, since it was only emitting PTX. It will now support actual
118-
// architectures as well to emit SASS.
119-
// https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#dynamic-code-generation
120-
"-arch=sm_"
81+
// NVRTC used to support only virtual architectures through the option
82+
// -arch, since it was only emitting PTX. It will now support actual
83+
// architectures as well to emit SASS.
84+
// https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#dynamic-code-generation
85+
"-arch=sm_"
12186
#else
122-
"-arch=compute_"
87+
"-arch=compute_"
12388
#endif
124-
+ std::to_string(prop.major) + std::to_string(prop.minor);
125-
opts[1] = arch_arg.c_str();
126-
opts[2] = "-Dint32_t=int";
127-
opts[3] = "-DCEED_RUNNING_JIT_PASS=1";
89+
+ std::to_string(prop.major) + std::to_string(prop.minor);
90+
91+
CeedCallBackend(CeedStringAllocCopy(arch_arg.c_str(), (char **)&(*opts)[1]));
92+
}
93+
CeedCallBackend(CeedStringAllocCopy("-Dint32_t=int", (char **)&(*opts)[2]));
94+
CeedCallBackend(CeedStringAllocCopy("-DCEED_RUNNING_JIT_PASS=1", (char **)&(*opts)[3]));
95+
12896
// Additional include dirs
12997
{
13098
const char **jit_source_dirs;
99+
CeedInt num_jit_source_dirs;
131100

132101
CeedCallBackend(CeedGetJitSourceRoots(ceed, &num_jit_source_dirs, &jit_source_dirs));
133-
CeedCallBackend(CeedRealloc(num_opts + num_jit_source_dirs, &opts));
102+
CeedCallBackend(CeedRealloc(opts_count + num_jit_source_dirs, opts));
134103
for (CeedInt i = 0; i < num_jit_source_dirs; i++) {
135104
std::ostringstream include_dir_arg;
136105

137106
include_dir_arg << "-I" << jit_source_dirs[i];
138-
CeedCallBackend(CeedStringAllocCopy(include_dir_arg.str().c_str(), (char **)&opts[num_opts + i]));
107+
CeedCallBackend(CeedStringAllocCopy(include_dir_arg.str().c_str(), (char **)&(*opts)[opts_count + i]));
139108
}
140109
CeedCallBackend(CeedRestoreJitSourceRoots(ceed, &jit_source_dirs));
110+
opts_count += num_jit_source_dirs;
141111
}
112+
142113
// User defines
143114
{
144115
const char **jit_defines;
116+
CeedInt num_jit_defines;
145117

146118
CeedCallBackend(CeedGetJitDefines(ceed, &num_jit_defines, &jit_defines));
147-
CeedCallBackend(CeedRealloc(num_opts + num_jit_source_dirs + num_jit_defines, &opts));
119+
CeedCallBackend(CeedRealloc(opts_count + num_jit_defines, opts));
148120
for (CeedInt i = 0; i < num_jit_defines; i++) {
149121
std::ostringstream define_arg;
150122

151123
define_arg << "-D" << jit_defines[i];
152-
CeedCallBackend(CeedStringAllocCopy(define_arg.str().c_str(), (char **)&opts[num_opts + num_jit_source_dirs + i]));
124+
CeedCallBackend(CeedStringAllocCopy(define_arg.str().c_str(), (char **)&(*opts)[opts_count + i]));
153125
}
154126
CeedCallBackend(CeedRestoreJitDefines(ceed, &jit_defines));
127+
opts_count += num_jit_defines;
155128
}
129+
*num_opts = opts_count;
130+
return CEED_ERROR_SUCCESS;
131+
}
132+
133+
//------------------------------------------------------------------------------
134+
// Compile CUDA kernel
135+
//------------------------------------------------------------------------------
136+
using std::ifstream;
137+
using std::ofstream;
138+
using std::ostringstream;
139+
140+
static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_error, bool *is_compile_good, CUmodule *module,
141+
const CeedInt num_defines, va_list args) {
142+
bool using_clang;
143+
size_t ptx_size;
144+
char *ptx;
145+
const char **opts;
146+
int num_opts;
147+
nvrtcProgram prog;
148+
std::ostringstream code;
149+
150+
// Make sure a Context exists for nvrtc
151+
cudaFree(0);
152+
153+
CeedCallBackend(CeedGetIsClang(ceed, &using_clang));
154+
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS,
155+
using_clang ? "Compiling CUDA with Clang backend (with Rust QFunction support)"
156+
: "Compiling CUDA with NVRTC backend (without Rust QFunction support)."
157+
"\nTo use the Clang backend, set the environment variable GPU_CLANG=1");
158+
159+
// Get kernel specific options, such as kernel constants
160+
if (num_defines > 0) {
161+
char *name;
162+
int val;
163+
164+
for (int i = 0; i < num_defines; i++) {
165+
name = va_arg(args, char *);
166+
val = va_arg(args, int);
167+
code << "#define " << name << " " << val << "\n\n";
168+
}
169+
}
170+
171+
// Standard libCEED definitions for CUDA backends
172+
code << "#include <ceed/jit-source/cuda/cuda-jit.h>\n\n";
156173

157174
// Add string source argument provided in call
158175
code << source;
159176

177+
// Get compile options
178+
CeedCallBackend(CeedJitGetOpts_Cuda(ceed, &opts, &num_opts));
179+
160180
// Compile kernel
161181
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- ATTEMPTING TO COMPILE JIT SOURCE ----------\n");
162182
CeedDebug(ceed, "Source:\n%s\n", code.str().c_str());
@@ -168,23 +188,16 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
168188
if (CeedDebugFlag(ceed)) {
169189
// LCOV_EXCL_START
170190
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- JiT COMPILER OPTIONS ----------\n");
171-
for (CeedInt i = 0; i < num_opts + num_jit_source_dirs + num_jit_defines; i++) {
172-
CeedDebug(ceed, "Option %d: %s", i, opts[i]);
173-
}
191+
for (CeedInt i = 0; i < num_opts; i++) CeedDebug(ceed, "Option %d: %s", i, opts[i]);
174192
CeedDebug(ceed, "");
175193
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JiT COMPILER OPTIONS ----------\n");
176194
// LCOV_EXCL_STOP
177195
}
196+
nvrtcResult result = nvrtcCompileProgram(prog, num_opts, opts);
178197

179-
nvrtcResult result = nvrtcCompileProgram(prog, num_opts + num_jit_source_dirs + num_jit_defines, opts);
180-
181-
for (CeedInt i = 0; i < num_jit_source_dirs; i++) {
182-
CeedCallBackend(CeedFree(&opts[num_opts + i]));
183-
}
184-
for (CeedInt i = 0; i < num_jit_defines; i++) {
185-
CeedCallBackend(CeedFree(&opts[num_opts + num_jit_source_dirs + i]));
186-
}
198+
for (CeedInt i = 0; i < num_opts; i++) CeedCallBackend(CeedFree(&opts[i]));
187199
CeedCallBackend(CeedFree(&opts));
200+
188201
*is_compile_good = result == NVRTC_SUCCESS;
189202
if (!*is_compile_good) {
190203
// LCOV_EXCL_START
@@ -217,13 +230,13 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
217230
CeedCallNvrtc(ceed, nvrtcGetPTX(prog, ptx));
218231
#endif
219232
CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog));
220-
221233
CeedCallCuda(ceed, cuModuleLoadData(module, ptx));
222234
CeedCallBackend(CeedFree(&ptx));
223235
return CEED_ERROR_SUCCESS;
224236
} else {
225237
srand(time(NULL));
226-
const int build_id = rand();
238+
const int build_id = rand();
239+
struct cudaDeviceProp prop;
227240

228241
// Create temp dir if needed
229242
{
@@ -283,6 +296,9 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
283296
}
284297

285298
// Get Clang version
299+
Ceed_Cuda *ceed_data;
300+
301+
CeedCallBackend(CeedGetData(ceed, &ceed_data));
286302
bool use_llvm_version = ceed_data->use_llvm_version;
287303
int llvm_version = ceed_data->llvm_version;
288304

@@ -324,6 +340,7 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
324340
}
325341

326342
// Compile wrapper kernel
343+
CeedCallCuda(ceed, cudaGetDeviceProperties(&prop, ceed_data->device_id));
327344
command = "clang++" + (use_llvm_version ? (std::string("-") + std::to_string(llvm_version)) : "") + " -flto=thin --cuda-gpu-arch=sm_" +
328345
std::to_string(prop.major) + std::to_string(prop.minor) + " --cuda-device-only -emit-llvm -S temp/kernel_" + std::to_string(build_id) +
329346
"_0_source.cu -o temp/kernel_" + std::to_string(build_id) + "_1_wrapped.ll ";

0 commit comments

Comments
 (0)