CoreNeuron/coreneuron/sim/multicore.hpp at e661bf6373d13cda0b2d16be2a8209bc3b133fe9 · BlueBrain/CoreNeuron · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/*
# =============================================================================
# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
#
# See top-level LICENSE file for details.
# =============================================================================.
*/

#pragma once

#include "coreneuron/nrnconf.h"
#include "coreneuron/mechanism/membfunc.hpp"
#include "coreneuron/utils/memory.h"
#include "coreneuron/mpi/nrnmpi.h"
#include "coreneuron/mpi/core/nrnmpi.hpp"
#include "coreneuron/io/reports/nrnreport.hpp"
#include <vector>
#include <memory>

namespace coreneuron {
class NetCon;
class PreSyn;

extern bool use_solve_interleave;

/*
   Point_process._presyn, used only if its NET_RECEIVE sends a net_event, is
   eliminated. Needed only by net_event function. Replaced by
   PreSyn* = nt->presyns + nt->pnt2presyn_ix[pnttype2presyn[pnt->_type]][pnt->_i_instance];
*/

struct NrnThreadMembList { /* patterned after CvMembList in cvodeobj.h */
    NrnThreadMembList* next;
    Memb_list* ml;
    int index;
    int* dependencies; /* list of mechanism types that this mechanism depends on*/
    int ndependencies; /* for scheduling we need to know the dependency count */
};


NrnThreadMembList* create_tml(NrnThread& nt,
                              int mech_id,
                              Memb_func& memb_func,
                              int& shadow_rhs_cnt,
                              const std::vector<int>& mech_types,
                              const std::vector<int>& nodecounts);

struct NrnThreadBAList {
    Memb_list* ml; /* an item in the NrnThreadMembList */
    BAMech* bam;
    NrnThreadBAList* next;
};

struct NrnFastImem {
    double* nrn_sav_rhs;
    double* nrn_sav_d;
};

struct TrajectoryRequests {
    void** vpr;       /* PlayRecord Objects known by NEURON */
    double** scatter; /* if bsize == 0, each time step */
    double** varrays; /* if bsize > 0, the Vector data pointers. */
    double** gather;  /* pointers to values that get scattered to NEURON */
    int n_pr;         /* number of PlayRecord instances */
    int n_trajec;     /* number of trajectories requested */
    int bsize;        /* buffer size of the Vector data */
    int vsize;        /* number of elements in varrays so far */
};

/* for OpenACC, in order to avoid an error while update PreSyn, with virtual base
 * class, we are adding helper with flag variable which could be updated on GPU
 */
struct PreSynHelper {
    int flag_;
};

struct NrnThread: UnifiedMemManaged<> {
    double _t = 0;
    double _dt = -1e9;
    double cj = 0.0;

    NrnThreadMembList* tml = nullptr;
    Memb_list** _ml_list = nullptr;
    Point_process* pntprocs = nullptr;  // synapses and artificial cells with and without gid
    PreSyn* presyns = nullptr;          // all the output PreSyn with and without gid
    PreSynHelper* presyns_helper = nullptr;
    int** pnt2presyn_ix = nullptr;  // eliminates Point_process._presyn used only by net_event
                                    // sender.
    NetCon* netcons = nullptr;
    double* weights = nullptr;  // size n_weight. NetCon.weight_ points into this array.

    int n_pntproc = 0;
    int n_weight = 0;
    int n_netcon = 0;
    int n_input_presyn = 0;
    int n_presyn = 0;       // only for model_size
    int n_real_output = 0;  // for checking their thresholds.

    int ncell = 0; /* analogous to old rootnodecount */
    int end = 0;   /* 1 + position of last in v_node array. Now v_node_count. */
    int id = 0;    /* this is nrn_threads[id] */
    int _stop_stepping = 0;
    int n_vecplay = 0; /* number of instances of VecPlayContinuous */

    size_t _ndata = 0;
    size_t _nvdata = 0;
    size_t _nidata = 0;        /* sizes */
    double* _data = nullptr;   /* all the other double* and Datum to doubles point into here*/
    int* _idata = nullptr;     /* all the Datum to ints index into here */
    void** _vdata = nullptr;   /* all the Datum to pointers index into here */
    void** _vecplay = nullptr; /* array of instances of VecPlayContinuous */

    double* _actual_rhs = nullptr;
    double* _actual_d = nullptr;
    double* _actual_a = nullptr;
    double* _actual_b = nullptr;
    double* _actual_v = nullptr;
    double* _actual_area = nullptr;
    double* _actual_diam = nullptr; /* nullptr if no mechanism has dparam with diam semantics */
    double* _shadow_rhs = nullptr;  /* Not pointer into _data. Avoid race for multiple POINT_PROCESS
                             in same  compartment */
    double* _shadow_d = nullptr; /* Not pointer into _data. Avoid race for multiple POINT_PROCESS in
                          same compartment */

    /* Fast membrane current calculation struct */
    NrnFastImem* nrn_fast_imem = nullptr;

    int* _v_parent_index = nullptr;
    int* _permute = nullptr;
    char* _sp13mat = nullptr;              /* handle to general sparse matrix */
    Memb_list* _ecell_memb_list = nullptr; /* normally nullptr */

    double _ctime = 0.0; /* computation time in seconds (using nrnmpi_wtime) */

    NrnThreadBAList* tbl[BEFORE_AFTER_SIZE]; /* wasteful since almost all empty */

    int shadow_rhs_cnt = 0; /* added to facilitate the NrnThread transfer to GPU */
    int compute_gpu = 0;    /* define whether to compute with gpus */
    int stream_id = 0;      /* define where the kernel will be launched on GPU stream */
    int _net_send_buffer_size = 0;
    int _net_send_buffer_cnt = 0;
    int* _net_send_buffer = nullptr;

    int* _watch_types = nullptr; /* nullptr or 0 terminated array of integers */
    void* mapping = nullptr;     /* section to segment mapping information */
    std::unique_ptr<SummationReportMapping> summation_report_handler_; /* report to ALU (values of
                                                                          the current summation */
    TrajectoryRequests* trajec_requests = nullptr; /* per time step values returned to NEURON */

    /* Needed in case there are FOR_NETCON statements in use. */
    std::size_t _fornetcon_perm_indices_size{}; /* length of _fornetcon_perm_indices */
    size_t* _fornetcon_perm_indices{};          /* displacement like list of indices */
    std::size_t _fornetcon_weight_perm_size{};  /* length of _fornetcon_weight_perm */
    size_t* _fornetcon_weight_perm{};           /* permutation indices into weight */

    std::vector<int> _pnt_offset; /* for SelfEvent queue transfer */
};

extern void nrn_threads_create(int n);
extern int nrn_nthread;
extern NrnThread* nrn_threads;
template <typename F, typename... Args>
void nrn_multithread_job(F&& job, Args&&... args) {
    int i;
    // clang-format off

    #pragma omp parallel for private(i) shared(nrn_threads, job, nrn_nthread, \
                                           nrnmpi_myid) schedule(static, 1)
    // FIXME: multiple forwarding of the same arguments...
    for (i = 0; i < nrn_nthread; ++i) {
        job(nrn_threads + i, std::forward<Args>(args)...);
    }
    // clang-format on
}

extern void nrn_thread_table_check(void);

extern void nrn_threads_free(void);

extern bool _nrn_skip_initmodel;


extern void dt2thread(double);
extern void clear_event_queue(void);
extern void nrn_ba(NrnThread*, int);
extern void* nrn_fixed_step_lastpart(NrnThread*);
extern void nrn_solve_minimal(NrnThread*);
extern void nrncore2nrn_send_init();
extern void* setup_tree_matrix_minimal(NrnThread*);
extern void nrncore2nrn_send_values(NrnThread*);
extern void nrn_fixed_step_group_minimal(int total_sim_steps);
extern void nrn_fixed_single_steps_minimal(int total_sim_steps, double tstop);
extern void nrn_fixed_step_minimal(void);
extern void nrn_finitialize(int setv, double v);
extern void direct_mode_initialize();
extern void nrn_mk_table_check(void);
extern void nonvint(NrnThread* _nt);
extern void update(NrnThread*);
// NOTE : this implementation is duplicated in "coreneuron/mechanism/nrnoc_ml.ispc"
// for the ISPC backend. If changes are required, make sure to change ISPC as well.
constexpr int at_time(NrnThread* nt, double te) {
    double x = te - 1e-11;
    if (x <= nt->_t && x > (nt->_t - nt->_dt)) {
        return 1;
    }
    return 0;
}
}  // namespace coreneuron