@@ -87,8 +87,6 @@ contains
8787 if (p /= 0 ) then
8888 $:GPU_UPDATE(device= ' [z_cc, dz, z_domain, ib_bc_z%beg]' )
8989 end if
90- ib_gbl_idx_lookup = - 1
91- $:GPU_UPDATE(device= ' [ib_gbl_idx_lookup]' )
9290 call s_update_ib_lookup()
9391
9492 ! recompute the new ib_patch locations and broadcast them.
@@ -1263,8 +1261,8 @@ contains
12631261
12641262 buf_size = storage_size(0)/8 + (storage_size(0)/8 + 6*storage_size(0._wp)/8)*size(patch_ib)
12651263 allocate (send_buf(buf_size), recv_buf(buf_size), recv_forces_snap(num_ibs, 3), recv_torques_snap(num_ibs, 3))
1266- @:ALLOCATE(send_ids(num_ibs), send_ft(num_ibs, 6 ))
1267- allocate (recv_ids(size(patch_ib)), recv_ft(size(patch_ib), 6 ))
1264+ @:ALLOCATE(send_ids(num_ibs), send_ft(6, num_ibs ))
1265+ allocate (recv_ids(size(patch_ib)), recv_ft(6, size(patch_ib)))
12681266
12691267 ! Accumulation phase: propagate contributions toward the high-index corner.
12701268 #:for X, ID in [(' x' , 1), (' y' , 2), (' z' , 3)]
@@ -1282,8 +1280,8 @@ contains
12821280 $:GPU_PARALLEL_LOOP(private=' [i]' , copyin=' [forces, torques]' )
12831281 do i = 1, num_ibs
12841282 send_ids(i) = patch_ib(i)%gbl_patch_id
1285- send_ft(i, 1:3) = forces(i,:)
1286- send_ft(i, 4:6) = torques(i,:)
1283+ send_ft(1:3,i ) = forces(i,:)
1284+ send_ft(4:6,i ) = torques(i,:)
12871285 end do
12881286 $:END_GPU_PARALLEL_LOOP()
12891287 $:GPU_UPDATE(host=' [send_ids, send_ft]' )
@@ -1298,19 +1296,19 @@ contains
12981296 call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
12991297 call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, MPI_COMM_WORLD, ierr)
13001298 call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
1301- ! $:GPU_PARALLEL_LOOP(private=' [i, j]' , copyin=' [recv_ft, recv_ids]' , copy=' [forces, torques,
1302- ! recv_forces_snap, recv_torques_snap]' )
1299+ $:GPU_PARALLEL_LOOP(private=' [i, j]' , copyin=' [recv_ft, recv_ids]' , copy=' [forces, torques, &
1300+ & recv_forces_snap, recv_torques_snap]' )
13031301 do i = 1, recv_count
13041302 call s_get_neighborhood_idx(recv_ids(i), j)
13051303 if (j > 0) then
13061304 ! add forces and subtract recv_snap prevent double-counting
1307- forces(j,:) = forces(j,:) + recv_ft(i, 1:3) - recv_forces_snap(j,:)
1308- torques(j,:) = torques(j,:) + recv_ft(i, 4:6) - recv_torques_snap(j,:)
1309- recv_forces_snap(j,:) = recv_ft(i, 1:3)
1310- recv_torques_snap(j,:) = recv_ft(i, 4:6)
1305+ forces(j,:) = forces(j,:) + recv_ft(1:3,i ) - recv_forces_snap(j,:)
1306+ torques(j,:) = torques(j,:) + recv_ft(4:6,i ) - recv_torques_snap(j,:)
1307+ recv_forces_snap(j,:) = recv_ft(1:3,i )
1308+ recv_torques_snap(j,:) = recv_ft(4:6,i )
13111309 end if
13121310 end do
1313- ! $:END_GPU_PARALLEL_LOOP()
1311+ $:END_GPU_PARALLEL_LOOP()
13141312 end if
13151313 tag = tag + 2
13161314 end do
@@ -1325,13 +1323,14 @@ contains
13251323
13261324 do k = 1, 2*ib_neighborhood_radius
13271325 pack_pos = 0
1328- $:GPU_PARALLEL_LOOP(private=' [i]' , copy= ' [send_ids, send_ft] ' , copyin=' [forces, torques]' )
1326+ $:GPU_PARALLEL_LOOP(private=' [i]' , copyin=' [forces, torques]' )
13291327 do i = 1, num_ibs
13301328 send_ids(i) = patch_ib(i)%gbl_patch_id
1331- send_ft(i, 1:3) = forces(i,:)
1332- send_ft(i, 4:6) = torques(i,:)
1329+ send_ft(1:3,i ) = forces(i,:)
1330+ send_ft(4:6,i ) = torques(i,:)
13331331 end do
13341332 $:END_GPU_PARALLEL_LOOP()
1333+ $:GPU_UPDATE(host=' [send_ids, send_ft]' )
13351334 call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
13361335 call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
13371336 call MPI_PACK(send_ft, 6*num_ibs, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
@@ -1342,15 +1341,15 @@ contains
13421341 call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
13431342 call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, MPI_COMM_WORLD, ierr)
13441343 call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
1345- ! $:GPU_PARALLEL_LOOP(private=' [i, j]' , copyin=' [recv_ft, recv_ids]' , copy=' [forces, torques]' )
1344+ $:GPU_PARALLEL_LOOP(private=' [i, j]' , copyin=' [recv_ft, recv_ids]' , copy=' [forces, torques]' )
13461345 do i = 1, recv_count
13471346 call s_get_neighborhood_idx(recv_ids(i), j)
13481347 if (j > 0) then
1349- forces(j,:) = recv_ft(i, 1:3)
1350- torques(j,:) = recv_ft(i, 4:6)
1348+ forces(j,:) = recv_ft(1:3,i )
1349+ torques(j,:) = recv_ft(4:6,i )
13511350 end if
13521351 end do
1353- ! $:END_GPU_PARALLEL_LOOP()
1352+ $:END_GPU_PARALLEL_LOOP()
13541353 end if
13551354 tag = tag + 2
13561355 end do
@@ -1400,7 +1399,9 @@ contains
14001399 ! delete if not in neighborhood
14011400 if (f_neighborhood_ranks_own_location(centroid)) then
14021401 output_idx = output_idx + 1
1403- if (i /= output_idx) patch_ib(output_idx) = patch_ib(i)
1402+ if (i /= output_idx) then
1403+ patch_ib(output_idx) = patch_ib(i)
1404+ end if
14041405
14051406 ! check if in local domain
14061407 if (f_local_rank_owns_location(centroid)) then
@@ -1411,6 +1412,8 @@ contains
14111412 end do
14121413 num_ibs = output_idx
14131414 num_local_ibs = local_output_idx
1415+ $:GPU_UPDATE(device= ' [patch_ib]' )
1416+ call s_update_ib_lookup()
14141417
14151418 ! Broadcast newly- owned patches to all neighborhood neighbors
14161419 patch_bytes = storage_size(tmp_patch)/ 8
@@ -1497,6 +1500,7 @@ contains
14971500 end do
14981501
14991502 deallocate (send_buf, recv_bufs)
1503+ $:GPU_UPDATE(device= ' [patch_ib]' )
15001504 call s_update_ib_lookup()
15011505 end if
15021506#endif
@@ -1519,11 +1523,15 @@ contains
15191523
15201524 integer :: i
15211525
1526+ ib_gbl_idx_lookup = - 1
1527+ $:GPU_UPDATE(device= ' [ib_gbl_idx_lookup]' )
1528+
15221529 $:GPU_PARALLEL_LOOP(private= ' [i]' )
15231530 do i = 1 , num_ibs
15241531 ib_gbl_idx_lookup(patch_ib(i)%gbl_patch_id) = i
15251532 end do
15261533 $:END_GPU_PARALLEL_LOOP()
1534+
15271535 $:GPU_UPDATE(host= ' [ib_gbl_idx_lookup]' )
15281536
15291537 end subroutine s_update_ib_lookup
0 commit comments