@@ -79,7 +79,7 @@ void dt2thread(double adt) { /* copied from nrnoc/fadvance.c */
7979 nt->cj = 1.0 / dt;
8080 }
8181 nrn_pragma_acc (update device (nt->_t , nt->_dt , nt->cj )
82- async (nt->stream_id ) if (nt->compute_gpu ))
82+ async (nt->streams [nt-> stream_id ] ) if (nt->compute_gpu ))
8383 // clang-format off
8484 nrn_pragma_omp (target update to (nt->_t , nt->_dt , nt->cj )
8585 if (nt->compute_gpu ))
@@ -206,14 +206,14 @@ void update(NrnThread* _nt) {
206206 /* do not need to worry about linmod or extracellular*/
207207 if (secondorder) {
208208 nrn_pragma_acc (parallel loop present (vec_v [0 :i2], vec_rhs [0 :i2]) if (_nt->compute_gpu )
209- async (_nt->stream_id ) )
209+ async (_nt->streams [_nt-> stream_id ] )
210210 nrn_pragma_omp (target teams distribute parallel for simd if (_nt->compute_gpu ))
211211 for (int i = 0 ; i < i2; ++i) {
212212 vec_v[i] += 2 . * vec_rhs[i];
213213 }
214214 } else {
215215 nrn_pragma_acc (parallel loop present (vec_v [0 :i2], vec_rhs [0 :i2]) if (_nt->compute_gpu )
216- async (_nt->stream_id ) )
216+ async (_nt->streams [_nt-> stream_id ] )
217217 nrn_pragma_omp (target teams distribute parallel for simd if (_nt->compute_gpu ))
218218 for (int i = 0 ; i < i2; ++i) {
219219 vec_v[i] += vec_rhs[i];
@@ -295,7 +295,7 @@ void nrncore2nrn_send_values(NrnThread* nth) {
295295 assert (vs < tr->bsize );
296296
297297 nrn_pragma_acc (parallel loop present (tr [0 :1 ]) if (nth->compute_gpu )
298- async (nth->stream_id ))
298+ async (nth->streams [nth-> stream_id ] ))
299299 nrn_pragma_omp (target teams distribute parallel for simd if (nth->compute_gpu ))
300300 for (int i = 0 ; i < tr->n_trajec ; ++i) {
301301 tr->varrays [i][vs] = *tr->gather [i];
@@ -316,10 +316,10 @@ void nrncore2nrn_send_values(NrnThread* nth) {
316316 for (int i = 0 ; i < tr->n_trajec ; ++i) {
317317 double * gather_i = tr->gather [i];
318318 nrn_pragma_acc (update self (gather_i [0 :1 ]) if (nth->compute_gpu )
319- async (nth->stream_id ))
319+ async (nth->streams [nth-> stream_id ] ))
320320 nrn_pragma_omp (target update from (gather_i [0 :1 ]) if (nth->compute_gpu ))
321321 }
322- nrn_pragma_acc (wait (nth->stream_id ))
322+ nrn_pragma_acc (wait (nth->streams [nth-> stream_id ) ))
323323 nrn_pragma_omp (taskwait)
324324 for (int i = 0 ; i < tr->n_trajec ; ++i) {
325325 *(tr->scatter [i]) = *(tr->gather [i]);
@@ -342,8 +342,8 @@ static void* nrn_fixed_step_thread(NrnThread* nth) {
342342 if (nth->ncell ) {
343343 /* @todo: do we need to update nth->_t on GPU: Yes (Michael, but can
344344 launch kernel) */
345- nrn_pragma_acc (update device (nth->_t ) if (nth->compute_gpu ) async (nth->stream_id ))
346- nrn_pragma_acc (wait (nth->stream_id ))
345+ nrn_pragma_acc (update device (nth->_t ) if (nth->compute_gpu ) async (nth->streams [nth-> stream_id ] ))
346+ nrn_pragma_acc (wait (nth->streams [nth-> stream_id ) ))
347347 nrn_pragma_omp (target update to (nth->_t ) if (nth->compute_gpu ))
348348 fixed_play_continuous (nth);
349349
@@ -378,8 +378,8 @@ void* nrn_fixed_step_lastpart(NrnThread* nth) {
378378
379379 if (nth->ncell ) {
380380 /* @todo: do we need to update nth->_t on GPU */
381- nrn_pragma_acc (update device (nth->_t ) if (nth->compute_gpu ) async (nth->stream_id ))
382- nrn_pragma_acc (wait (nth->stream_id ))
381+ nrn_pragma_acc (update device (nth->_t ) if (nth->compute_gpu ) async (nth->streams [nth-> stream_id ] ))
382+ nrn_pragma_acc (wait (nth->streams [nth-> stream_id ) ))
383383 nrn_pragma_omp (target update to (nth->_t ) if (nth->compute_gpu ))
384384 fixed_play_continuous (nth);
385385 nonvint (nth);
0 commit comments