This repository was archived by the owner on Mar 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 42
Expand file tree
/
Copy pathfadvance_core.cpp
More file actions
412 lines (363 loc) · 12 KB
/
fadvance_core.cpp
File metadata and controls
412 lines (363 loc) · 12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
/*
# =============================================================================
# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
#
# See top-level LICENSE file for details.
# =============================================================================.
*/
#include <cstdlib>
#include <functional>
#include "coreneuron/coreneuron.hpp"
#include "coreneuron/nrnconf.h"
#include "coreneuron/apps/corenrn_parameters.hpp"
#include "coreneuron/sim/multicore.hpp"
#include "coreneuron/mpi/nrnmpi.h"
#include "coreneuron/sim/fast_imem.hpp"
#include "coreneuron/gpu/nrn_acc_manager.hpp"
#include "coreneuron/io/reports/nrnreport.hpp"
#include "coreneuron/network/netcvode.hpp"
#include "coreneuron/network/netpar.hpp"
#include "coreneuron/network/partrans.hpp"
#include "coreneuron/utils/nrnoc_aux.hpp"
#include "coreneuron/utils/progressbar/progressbar.h"
#include "coreneuron/utils/profile/profiler_interface.h"
#include "coreneuron/io/nrn2core_direct.h"
#include "coreneuron/io/nrn_checkpoint.hpp"
// Do an auto checkpoint only if execution lasted longer than this var (secs)
#define CHECKPOINT_MIN_RUNTIME (4 * 3600) // 4h
namespace coreneuron {
extern corenrn_parameters corenrn_param;
static void* nrn_fixed_step_thread(NrnThread*);
static void* nrn_fixed_step_group_thread(NrnThread*, int, int, int&);
static bool nrn_auto_checkpoint();
static time_t sim_start_time;
void dt2thread(double adt) { /* copied from nrnoc/fadvance.c */
if (adt != nrn_threads[0]._dt) {
for (int i = 0; i < nrn_nthread; ++i) {
NrnThread* nt = nrn_threads + i;
nt->_t = t;
nt->_dt = dt;
if (secondorder) {
nt->cj = 2.0 / dt;
} else {
nt->cj = 1.0 / dt;
}
}
}
}
void nrn_fixed_step_minimal() { /* not so minimal anymore with gap junctions */
if (t != nrn_threads->_t) {
dt2thread(-1.);
} else {
dt2thread(dt);
}
nrn_thread_table_check();
nrn_multithread_job(nrn_fixed_step_thread);
if (nrn_have_gaps) {
nrnmpi_v_transfer();
nrn_multithread_job(nrn_fixed_step_lastpart);
}
#if NRNMPI
if (nrn_threads[0]._stop_stepping) {
nrn_spike_exchange(nrn_threads);
}
#endif
#if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS)
nrn_flush_reports(nrn_threads[0]._t);
#endif
t = nrn_threads[0]._t;
}
/* better cache efficiency since a thread can do an entire minimum delay
integration interval before joining
*/
/// --> Coreneuron
static progressbar* progress;
void initialize_progress_bar(int nstep) {
if (nrnmpi_myid == 0 && !corenrn_param.is_quiet()) {
printf("\n");
progress = progressbar_new(" psolve", nstep);
}
}
void update_progress_bar(int step, double time) {
if (nrnmpi_myid == 0 && !corenrn_param.is_quiet()) {
progressbar_update(progress, step, time);
}
}
void finalize_progress_bar() {
if (nrnmpi_myid == 0 && !corenrn_param.is_quiet()) {
progressbar_finish(progress);
}
}
void nrn_fixed_single_steps_minimal(int total_sim_steps, double tstop) {
const int progressbar_update_interval = 5;
static int current_steps = 0;
initialize_progress_bar(total_sim_steps);
#if NRNMPI
double updated_tstop = tstop - dt;
nrn_assert(nrn_threads->_t <= tstop);
// It may very well be the case that we do not advance at all
while (nrn_threads->_t <= updated_tstop) {
#else
double updated_tstop = tstop - .5 * dt;
while (nrn_threads->_t < updated_tstop) {
#endif
nrn_fixed_step_minimal();
if (stoprun) {
nrn_auto_checkpoint();
break;
}
current_steps++;
if (!(current_steps % progressbar_update_interval)) {
update_progress_bar(current_steps, nrn_threads[0]._t);
}
}
finalize_progress_bar();
}
void nrn_fixed_step_group_minimal(int total_sim_steps) {
static int current_steps = 0;
dt2thread(dt);
nrn_thread_table_check();
int step_group_n = total_sim_steps;
int step_group_begin = 0;
int step_group_end = 0;
initialize_progress_bar(step_group_n);
while (step_group_end < step_group_n) {
nrn_multithread_job(nrn_fixed_step_group_thread,
step_group_n,
step_group_begin,
step_group_end);
#if NRNMPI
nrn_spike_exchange(nrn_threads);
#endif
#if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS)
nrn_flush_reports(nrn_threads[0]._t);
#endif
if (stoprun) {
nrn_auto_checkpoint();
break;
}
current_steps++;
step_group_begin = step_group_end;
update_progress_bar(step_group_end, nrn_threads[0]._t);
}
t = nrn_threads[0]._t;
finalize_progress_bar();
}
static void* nrn_fixed_step_group_thread(NrnThread* nth,
int step_group_max,
int step_group_begin,
int& step_group_end) {
nth->_stop_stepping = 0;
for (int i = step_group_begin; i < step_group_max; ++i) {
nrn_fixed_step_thread(nth);
if (nth->_stop_stepping) {
if (nth->id == 0) {
step_group_end = i + 1;
}
nth->_stop_stepping = 0;
return nullptr;
}
}
if (nth->id == 0) {
step_group_end = step_group_max;
}
return nullptr;
}
void update(NrnThread* _nt) {
double* vec_v = &(VEC_V(0));
double* vec_rhs = &(VEC_RHS(0));
int i2 = _nt->end;
#if defined(_OPENACC)
int stream_id = _nt->stream_id;
#endif
/* do not need to worry about linmod or extracellular*/
if (secondorder) {
// clang-format off
#pragma acc parallel loop present( \
vec_v[0:i2], vec_rhs[0:i2]) \
if (_nt->compute_gpu) async(stream_id)
// clang-format on
for (int i = 0; i < i2; ++i) {
vec_v[i] += 2. * vec_rhs[i];
}
} else {
// clang-format off
#pragma acc parallel loop present( \
vec_v[0:i2], vec_rhs[0:i2]) \
if (_nt->compute_gpu) async(stream_id)
// clang-format on
for (int i = 0; i < i2; ++i) {
vec_v[i] += vec_rhs[i];
}
}
// update_matrix_to_gpu(_nt);
if (_nt->tml) {
assert(_nt->tml->index == CAP);
nrn_cur_capacitance(_nt, _nt->tml->ml, _nt->tml->index);
}
if (nrn_use_fast_imem) {
nrn_calc_fast_imem(_nt);
}
}
void nonvint(NrnThread* _nt) {
if (nrn_have_gaps) {
Instrumentor::phase p("gap-v-transfer");
nrnthread_v_transfer(_nt);
}
errno = 0;
Instrumentor::phase_begin("state-update");
for (auto tml = _nt->tml; tml; tml = tml->next)
if (corenrn.get_memb_func(tml->index).state) {
mod_f_t s = corenrn.get_memb_func(tml->index).state;
std::string ss("state-");
ss += nrn_get_mechname(tml->index);
{
Instrumentor::phase p(ss.c_str());
(*s)(_nt, tml->ml, tml->index);
}
#ifdef DEBUG
if (errno) {
hoc_warning("errno set during calculation of states", nullptr);
}
#endif
}
Instrumentor::phase_end("state-update");
}
void nrn_ba(NrnThread* nt, int bat) {
for (auto tbl = nt->tbl[bat]; tbl; tbl = tbl->next) {
mod_f_t f = tbl->bam->f;
int type = tbl->bam->type;
Memb_list* ml = tbl->ml;
(*f)(nt, ml, type);
}
}
void nrncore2nrn_send_init() {
if (nrn2core_trajectory_values_ == nullptr) {
// standalone execution : no callbacks
return;
}
// if per time step transfer, need to call nrn_record_init() in NEURON.
// if storing full trajectories in CoreNEURON, need to initialize
// vsize for all the trajectory requests.
(*nrn2core_trajectory_values_)(-1, 0, nullptr, 0.0);
for (int tid = 0; tid < nrn_nthread; ++tid) {
NrnThread& nt = nrn_threads[tid];
if (nt.trajec_requests) {
nt.trajec_requests->vsize = 0;
}
}
}
void nrncore2nrn_send_values(NrnThread* nth) {
if (nrn2core_trajectory_values_ == nullptr) {
// standalone execution : no callbacks
return;
}
TrajectoryRequests* tr = nth->trajec_requests;
if (tr) {
// \todo Check if user has requested voltages for this NrnThread object.
// Currently we are updating voltages if there is any trajectory
// requested by NEURON.
update_voltage_from_gpu(nth);
if (tr->varrays) { // full trajectories into Vector data
double** va = tr->varrays;
int vs = tr->vsize++;
assert(vs < tr->bsize);
for (int i = 0; i < tr->n_trajec; ++i) {
va[i][vs] = *(tr->gather[i]);
}
} else if (tr->scatter) { // scatter to NEURON and notify each step.
nrn_assert(nrn2core_trajectory_values_);
for (int i = 0; i < tr->n_trajec; ++i) {
*(tr->scatter[i]) = *(tr->gather[i]);
}
(*nrn2core_trajectory_values_)(nth->id, tr->n_pr, tr->vpr, nth->_t);
}
}
}
static void* nrn_fixed_step_thread(NrnThread* nth) {
/* check thresholds and deliver all (including binqueue)
events up to t+dt/2 */
Instrumentor::phase_begin("timestep");
{
Instrumentor::phase p("deliver_events");
deliver_net_events(nth);
}
nth->_t += .5 * nth->_dt;
if (nth->ncell) {
#if defined(_OPENACC)
int stream_id = nth->stream_id;
/*@todo: do we need to update nth->_t on GPU: Yes (Michael, but can launch kernel) */
// clang-format off
#pragma acc update device(nth->_t) if (nth->compute_gpu) async(stream_id)
#pragma acc wait(stream_id)
// clang-format on
#endif
fixed_play_continuous(nth);
{
Instrumentor::phase p("setup_tree_matrix");
setup_tree_matrix_minimal(nth);
}
{
Instrumentor::phase p("matrix-solver");
nrn_solve_minimal(nth);
}
{
Instrumentor::phase p("second_order_cur");
second_order_cur(nth, secondorder);
}
{
Instrumentor::phase p("update");
update(nth);
}
}
if (!nrn_have_gaps) {
nrn_fixed_step_lastpart(nth);
}
Instrumentor::phase_end("timestep");
return nullptr;
}
void* nrn_fixed_step_lastpart(NrnThread* nth) {
nth->_t += .5 * nth->_dt;
if (nth->ncell) {
#if defined(_OPENACC)
int stream_id = nth->stream_id;
/*@todo: do we need to update nth->_t on GPU */
// clang-format off
#pragma acc update device(nth->_t) if (nth->compute_gpu) async(stream_id)
#pragma acc wait(stream_id)
// clang-format on
#endif
fixed_play_continuous(nth);
nonvint(nth);
nrncore2nrn_send_values(nth);
nrn_ba(nth, AFTER_SOLVE);
nrn_ba(nth, BEFORE_STEP);
} else {
nrncore2nrn_send_values(nth);
}
{
Instrumentor::phase p("deliver_events");
nrn_deliver_events(nth); /* up to but not past texit */
}
return nullptr;
}
/**
* \brief Does a checkpoint of the simulation in enough time has passed
* \return True if a checkpoint was performed. False otherwise (not enough elapsed time)
*/
static bool nrn_auto_checkpoint() {
time_t cur_time = time(NULL);
int elapsed_secs = difftime(sim_start_time, cur_time);
if (elapsed_secs < CHECKPOINT_MIN_RUNTIME) {
return false;
}
// Write to tmp location first because allocated time may not be enough to complete
const auto ckpt_tmp = corenrn_param.outpath + "/_corenrn_ckpt_dirty",
ckpt_dir = corenrn_param.outpath + "/_corenrn_ckpt";
Instrumentor::phase p("Checkpointing");
write_checkpoint(nrn_threads, nrn_nthread, ckpt_tmp.c_str());
system(("/bin/rm -rf '" + ckpt_dir + "'; " + "/bin/mv '" + ckpt_tmp + "' '" + ckpt_dir + "'")
.c_str());
return true;
}
} // namespace coreneuron