We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 6a92419 commit b918fa7Copy full SHA for b918fa7
1 file changed
source/lib/src/gelu.cc
@@ -6,6 +6,7 @@
6
7
template <typename FPTYPE>
8
void deepmd::gelu_cpu(FPTYPE* out, const FPTYPE* xx, const int_64 size) {
9
+#pragma omp parallel for
10
for (int ii = 0; ii < size; ii++) {
11
out[ii] = xx[ii] * (FPTYPE)0.5 *
12
((FPTYPE)1.0 +
@@ -19,6 +20,7 @@ void deepmd::gelu_grad_cpu(FPTYPE* out,
19
20
const FPTYPE* xx,
21
const FPTYPE* dy,
22
const int_64 size) {
23
24
25
const FPTYPE var =
26
tanh((FPTYPE)SQRT_2_PI *
@@ -36,6 +38,7 @@ void deepmd::gelu_grad_grad_cpu(FPTYPE* out,
36
38
37
39
const FPTYPE* dy_2,
40
41
42
43
const FPTYPE var1 =
44
0 commit comments