This repository was archived by the owner on Mar 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 42
Expand file tree
/
Copy pathsolve_core.cpp
More file actions
84 lines (72 loc) · 2.6 KB
/
solve_core.cpp
File metadata and controls
84 lines (72 loc) · 2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/*
# =============================================================================
# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
#
# See top-level LICENSE file for details.
# =============================================================================.
*/
#include "coreneuron/nrnconf.h"
#include "coreneuron/permute/cellorder.hpp"
#include "coreneuron/sim/multicore.hpp"
namespace coreneuron {
bool use_solve_interleave;
static void triang(NrnThread*), bksub(NrnThread*);
/* solve the matrix equation */
void nrn_solve_minimal(NrnThread* _nt) {
if (use_solve_interleave) {
solve_interleaved(_nt->id);
} else {
triang(_nt);
bksub(_nt);
}
}
/** @todo OpenACC GPU offload is sequential/slow. Because --cell-permute=0 and
* --gpu is forbidden anyway, no OpenMP target offload equivalent is implemented.
*/
/* triangularization of the matrix equations */
static void triang(NrnThread* _nt) {
int i2 = _nt->ncell;
int i3 = _nt->end;
double* vec_a = &(VEC_A(0));
double* vec_b = &(VEC_B(0));
double* vec_d = &(VEC_D(0));
double* vec_rhs = &(VEC_RHS(0));
int* parent_index = _nt->_v_parent_index;
nrn_pragma_acc(parallel loop seq present(
vec_a [0:i3], vec_b [0:i3], vec_d [0:i3], vec_rhs [0:i3], parent_index [0:i3])
async(_nt->stream_id) if (_nt->compute_gpu))
nrn_pragma_omp(target if (_nt->compute_gpu))
for (int i = i3 - 1; i >= i2; --i) {
double p = vec_a[i] / vec_d[i];
vec_d[parent_index[i]] -= p * vec_b[i];
vec_rhs[parent_index[i]] -= p * vec_rhs[i];
}
}
/* back substitution to finish solving the matrix equations */
static void bksub(NrnThread* _nt) {
int i1 = 0;
int i2 = i1 + _nt->ncell;
int i3 = _nt->end;
double* vec_b = &(VEC_B(0));
double* vec_d = &(VEC_D(0));
double* vec_rhs = &(VEC_RHS(0));
int* parent_index = _nt->_v_parent_index;
nrn_pragma_acc(parallel loop seq present(vec_d [0:i2], vec_rhs [0:i2])
async(_nt->stream_id) if (_nt->compute_gpu))
nrn_pragma_omp(target if (_nt->compute_gpu))
for (int i = i1; i < i2; ++i) {
vec_rhs[i] /= vec_d[i];
}
nrn_pragma_acc(
parallel loop seq present(vec_b [0:i3], vec_d [0:i3], vec_rhs [0:i3], parent_index [0:i3])
async(_nt->stream_id) if (_nt->compute_gpu))
nrn_pragma_omp(target if (_nt->compute_gpu))
for (int i = i2; i < i3; ++i) {
vec_rhs[i] -= vec_b[i] * vec_rhs[parent_index[i]];
vec_rhs[i] /= vec_d[i];
}
if (_nt->compute_gpu) {
nrn_pragma_acc(wait(_nt->stream_id))
}
}
} // namespace coreneuron