Skip to content

Commit b918fa7

Browse files
authored
enable openmp for gelu (deepmodeling#2375)
Fix deepmodeling#2373.
1 parent 6a92419 commit b918fa7

1 file changed

Lines changed: 3 additions & 0 deletions

File tree

source/lib/src/gelu.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
template <typename FPTYPE>
88
void deepmd::gelu_cpu(FPTYPE* out, const FPTYPE* xx, const int_64 size) {
9+
#pragma omp parallel for
910
for (int ii = 0; ii < size; ii++) {
1011
out[ii] = xx[ii] * (FPTYPE)0.5 *
1112
((FPTYPE)1.0 +
@@ -19,6 +20,7 @@ void deepmd::gelu_grad_cpu(FPTYPE* out,
1920
const FPTYPE* xx,
2021
const FPTYPE* dy,
2122
const int_64 size) {
23+
#pragma omp parallel for
2224
for (int ii = 0; ii < size; ii++) {
2325
const FPTYPE var =
2426
tanh((FPTYPE)SQRT_2_PI *
@@ -36,6 +38,7 @@ void deepmd::gelu_grad_grad_cpu(FPTYPE* out,
3638
const FPTYPE* dy,
3739
const FPTYPE* dy_2,
3840
const int_64 size) {
41+
#pragma omp parallel for
3942
for (int ii = 0; ii < size; ii++) {
4043
const FPTYPE var1 =
4144
tanh((FPTYPE)SQRT_2_PI *

0 commit comments

Comments
 (0)