Skip to content

Commit a58baec

Browse files
committed
cuda - name compile temp files
1 parent b174ae4 commit a58baec

23 files changed

Lines changed: 874 additions & 169 deletions

backends/cuda-gen/ceed-cuda-gen-operator-build.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -620,9 +620,9 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
620620
code << tab << "// Nothing to do AtPoints\n";
621621
} else {
622622
CeedBasis_Cuda_shared *basis_data;
623-
std::string function_name = is_tensor
624-
? ((dim == 1 ? "Weight" : "WeightTensor") + std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
625-
: "WeightNonTensor";
623+
std::string function_name = is_tensor
624+
? ((dim == 1 ? "Weight" : "WeightTensor") + std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
625+
: "WeightNonTensor";
626626

627627
code << tab << "CeedScalar r_q" << var_suffix << "[" << (is_all_tensor && (dim >= 3) ? Q_name : "1") << "];\n";
628628
CeedCallBackend(CeedBasisGetData(basis, &basis_data));
@@ -1637,7 +1637,8 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
16371637
const CeedInt T_1d = CeedIntMax(is_all_tensor ? Q_1d : Q, data->max_P_1d);
16381638

16391639
data->thread_1d = T_1d;
1640-
CeedCallBackend(CeedTryCompile_Cuda(ceed, code.str().c_str(), &is_compile_good, &data->module, 1, "OP_T_1D", T_1d));
1640+
CeedCallBackend(CeedTryCompile_Cuda(ceed, code.str().c_str(), (std::string("operator_") + qfunction_name).c_str(), &is_compile_good,
1641+
&data->module, 1, "OP_T_1D", T_1d));
16411642
if (is_compile_good) {
16421643
*is_good_build = true;
16431644
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, operator_name.c_str(), &data->op));
@@ -2116,8 +2117,9 @@ static int CeedOperatorBuildKernelAssemblyAtPoints_Cuda_gen(CeedOperator op, boo
21162117
const CeedInt T_1d = CeedIntMax(is_all_tensor ? Q_1d : Q, data->max_P_1d);
21172118

21182119
data->thread_1d = T_1d;
2119-
CeedCallBackend(CeedTryCompile_Cuda(ceed, code.str().c_str(), &is_compile_good,
2120-
is_full ? &data->module_assemble_full : &data->module_assemble_diagonal, 1, "OP_T_1D", T_1d));
2120+
CeedCallBackend(CeedTryCompile_Cuda(ceed, code.str().c_str(), (std::string("operator_assembly_at_points_") + qfunction_name).c_str(),
2121+
&is_compile_good, is_full ? &data->module_assemble_full : &data->module_assemble_diagonal, 1, "OP_T_1D",
2122+
T_1d));
21212123
if (is_compile_good) {
21222124
*is_good_build = true;
21232125
CeedCallBackend(CeedGetKernel_Cuda(ceed, is_full ? data->module_assemble_full : data->module_assemble_diagonal, operator_name.c_str(),
@@ -2711,7 +2713,8 @@ extern "C" int CeedOperatorBuildKernelLinearAssembleQFunction_Cuda_gen(CeedOpera
27112713
const CeedInt T_1d = CeedIntMax(is_all_tensor ? Q_1d : Q, data->max_P_1d);
27122714

27132715
data->thread_1d = T_1d;
2714-
CeedCallBackend(CeedTryCompile_Cuda(ceed, code.str().c_str(), &is_compile_good, &data->module_assemble_qfunction, 1, "OP_T_1D", T_1d));
2716+
CeedCallBackend(CeedTryCompile_Cuda(ceed, code.str().c_str(), (std::string("operator_assembly_") + qfunction_name).c_str(), &is_compile_good,
2717+
&data->module_assemble_qfunction, 1, "OP_T_1D", T_1d));
27152718
if (is_compile_good) {
27162719
*is_good_build = true;
27172720
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module_assemble_qfunction, operator_name.c_str(), &data->assemble_qfunction));

backends/cuda-ref/ceed-cuda-ref-basis.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -181,9 +181,9 @@ static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, cons
181181

182182
if (data->moduleAtPoints) CeedCallCuda(ceed, cuModuleUnload(data->moduleAtPoints));
183183
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
184-
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
185-
Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
186-
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
184+
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, "basis_at_points", &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d,
185+
"BASIS_BUF_LEN", Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP",
186+
num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
187187
max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1)));
188188
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
189189
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpTransposeAtPoints", &data->InterpTransposeAtPoints));
@@ -421,9 +421,9 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const
421421
const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-tensor.h>\n";
422422

423423
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
424-
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
425-
Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
426-
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim)));
424+
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, "basis_h1_tensor", &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d,
425+
"BASIS_BUF_LEN", Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP",
426+
num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim)));
427427
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
428428
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Grad", &data->Grad));
429429
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
@@ -477,8 +477,8 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes
477477
const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-nontensor.h>\n";
478478

479479
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
480-
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
481-
q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp));
480+
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, "basis_h1_nontensor", &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes,
481+
"BASIS_Q_COMP_INTERP", q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp));
482482
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
483483
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
484484
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv));
@@ -532,8 +532,8 @@ int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nod
532532
const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-nontensor.h>\n";
533533

534534
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
535-
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
536-
q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp));
535+
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, "basis_h_div", &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes,
536+
"BASIS_Q_COMP_INTERP", q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp));
537537
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
538538
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
539539
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv));
@@ -587,8 +587,8 @@ int CeedBasisCreateHcurl_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_no
587587
const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-nontensor.h>\n";
588588

589589
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
590-
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
591-
q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp));
590+
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, "basis_h_curl", &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes,
591+
"BASIS_Q_COMP_INTERP", q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp));
592592
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
593593
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
594594
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv));

backends/cuda-ref/ceed-cuda-ref-operator.c

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,9 +1408,10 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Cuda(CeedOperator op,
14081408
CeedCallBackend(CeedBasisGetNumComponents(basis_in, &num_comp));
14091409
if (basis_in == CEED_BASIS_NONE) num_qpts = num_nodes;
14101410
else CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
1411-
CeedCallCuda(ceed, CeedCompile_Cuda(ceed, diagonal_kernel_source, module, 8, "NUM_EVAL_MODES_IN", num_eval_modes_in, "NUM_EVAL_MODES_OUT",
1412-
num_eval_modes_out, "NUM_COMP", num_comp, "NUM_NODES", num_nodes, "NUM_QPTS", num_qpts, "USE_CEEDSIZE",
1413-
use_ceedsize_idx, "USE_POINT_BLOCK", is_point_block ? 1 : 0, "BLOCK_SIZE", num_nodes * elems_per_block));
1411+
CeedCallCuda(ceed, CeedCompile_Cuda(ceed, diagonal_kernel_source, "operator_diagonal_assembly", module, 8, "NUM_EVAL_MODES_IN", num_eval_modes_in,
1412+
"NUM_EVAL_MODES_OUT", num_eval_modes_out, "NUM_COMP", num_comp, "NUM_NODES", num_nodes, "NUM_QPTS", num_qpts,
1413+
"USE_CEEDSIZE", use_ceedsize_idx, "USE_POINT_BLOCK", is_point_block ? 1 : 0, "BLOCK_SIZE",
1414+
num_nodes * elems_per_block));
14141415
CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, *module, "LinearDiagonal", is_point_block ? &diag->LinearPointBlock : &diag->LinearDiagonal));
14151416
CeedCallBackend(CeedDestroy(&ceed));
14161417
CeedCallBackend(CeedBasisDestroy(&basis_in));
@@ -1629,11 +1630,11 @@ static int CeedOperatorAssembleSingleBlockSetup_Cuda(CeedOperator op, CeedInt ac
16291630

16301631
CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
16311632
CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
1632-
CeedCallBackend(CeedCompile_Cuda(ceed, source, &asmb->module, 11, "NUM_EVAL_MODES_IN", num_eval_modes_in, "NUM_EVAL_MODES_OUT", num_eval_modes_out,
1633-
"NUM_COMP_IN", num_comp_in, "NUM_COMP_OUT", num_comp_out, "TOTAL_NUM_COMP_OUT", num_output_components,
1634-
"NUM_NODES_IN", elem_size_in, "NUM_NODES_OUT", elem_size_out, "NUM_QPTS", num_qpts_in, "BLOCK_SIZE",
1635-
asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block, "BLOCK_SIZE_Y", asmb->block_size_y,
1636-
"USE_CEEDSIZE", use_ceedsize_idx));
1633+
CeedCallBackend(CeedCompile_Cuda(ceed, source, "operator_block_assembly", &asmb->module, 11, "NUM_EVAL_MODES_IN", num_eval_modes_in,
1634+
"NUM_EVAL_MODES_OUT", num_eval_modes_out, "NUM_COMP_IN", num_comp_in, "NUM_COMP_OUT", num_comp_out,
1635+
"TOTAL_NUM_COMP_OUT", num_output_components, "NUM_NODES_IN", elem_size_in, "NUM_NODES_OUT", elem_size_out,
1636+
"NUM_QPTS", num_qpts_in, "BLOCK_SIZE", asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block,
1637+
"BLOCK_SIZE_Y", asmb->block_size_y, "USE_CEEDSIZE", use_ceedsize_idx));
16371638
CeedCallBackend(CeedGetKernel_Cuda(ceed, asmb->module, "LinearAssembleBlock", &asmb->LinearAssemble));
16381639

16391640
// Load into B_in, in order that they will be used in eval_modes_in
@@ -1772,9 +1773,9 @@ static int CeedOperatorAssembleSingleSetup_Cuda(CeedOperator op, CeedInt use_cee
17721773

17731774
CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
17741775
CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
1775-
CeedCallBackend(CeedCompile_Cuda(ceed, assembly_kernel_source, &asmb->module, 10, "NUM_EVAL_MODES_IN", num_eval_modes_in, "NUM_EVAL_MODES_OUT",
1776-
num_eval_modes_out, "NUM_COMP_IN", num_comp_in, "NUM_COMP_OUT", num_comp_out, "NUM_NODES_IN", elem_size_in,
1777-
"NUM_NODES_OUT", elem_size_out, "NUM_QPTS", num_qpts_in, "BLOCK_SIZE",
1776+
CeedCallBackend(CeedCompile_Cuda(ceed, assembly_kernel_source, "operator_assembly", &asmb->module, 10, "NUM_EVAL_MODES_IN", num_eval_modes_in,
1777+
"NUM_EVAL_MODES_OUT", num_eval_modes_out, "NUM_COMP_IN", num_comp_in, "NUM_COMP_OUT", num_comp_out, "NUM_NODES_IN",
1778+
elem_size_in, "NUM_NODES_OUT", elem_size_out, "NUM_QPTS", num_qpts_in, "BLOCK_SIZE",
17781779
asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block, "BLOCK_SIZE_Y", asmb->block_size_y,
17791780
"USE_CEEDSIZE", use_ceedsize_idx));
17801781
CeedCallBackend(CeedGetKernel_Cuda(ceed, asmb->module, "LinearAssemble", &asmb->LinearAssemble));

backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ extern "C" int CeedQFunctionBuildKernel_Cuda_ref(CeedQFunction qf) {
106106
code << "}\n";
107107

108108
// Compile kernel
109-
CeedCallBackend(CeedCompile_Cuda(ceed, code.str().c_str(), &data->module, 0));
109+
CeedCallBackend(CeedCompile_Cuda(ceed, code.str().c_str(), (std::string("qfunction_") + qfunction_name).c_str(), &data->module, 0));
110110
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, kernel_name.c_str(), &data->QFunction));
111111
CeedCallBackend(CeedDestroy(&ceed));
112112
return CEED_ERROR_SUCCESS;

0 commit comments

Comments
 (0)