Skip to content

Commit 0f9eb30

Browse files
author
Daniel Vickers
committed
fixed incorrect array sizing
1 parent a631916 commit 0f9eb30

1 file changed

Lines changed: 29 additions & 21 deletions

File tree

src/simulation/m_ibm.fpp

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,6 @@ contains
8787
if (p /= 0) then
8888
$:GPU_UPDATE(device='[z_cc, dz, z_domain, ib_bc_z%beg]')
8989
end if
90-
ib_gbl_idx_lookup = -1
91-
$:GPU_UPDATE(device='[ib_gbl_idx_lookup]')
9290
call s_update_ib_lookup()
9391

9492
! recompute the new ib_patch locations and broadcast them.
@@ -1263,8 +1261,8 @@ contains
12631261
12641262
buf_size = storage_size(0)/8 + (storage_size(0)/8 + 6*storage_size(0._wp)/8)*size(patch_ib)
12651263
allocate (send_buf(buf_size), recv_buf(buf_size), recv_forces_snap(num_ibs, 3), recv_torques_snap(num_ibs, 3))
1266-
@:ALLOCATE(send_ids(num_ibs), send_ft(num_ibs, 6))
1267-
allocate (recv_ids(size(patch_ib)), recv_ft(size(patch_ib), 6))
1264+
@:ALLOCATE(send_ids(num_ibs), send_ft(6, num_ibs))
1265+
allocate (recv_ids(size(patch_ib)), recv_ft(6, size(patch_ib)))
12681266
12691267
! Accumulation phase: propagate contributions toward the high-index corner.
12701268
#:for X, ID in [('x', 1), ('y', 2), ('z', 3)]
@@ -1282,8 +1280,8 @@ contains
12821280
$:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]')
12831281
do i = 1, num_ibs
12841282
send_ids(i) = patch_ib(i)%gbl_patch_id
1285-
send_ft(i,1:3) = forces(i,:)
1286-
send_ft(i,4:6) = torques(i,:)
1283+
send_ft(1:3,i) = forces(i,:)
1284+
send_ft(4:6,i) = torques(i,:)
12871285
end do
12881286
$:END_GPU_PARALLEL_LOOP()
12891287
$:GPU_UPDATE(host='[send_ids, send_ft]')
@@ -1298,19 +1296,19 @@ contains
12981296
call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
12991297
call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, MPI_COMM_WORLD, ierr)
13001298
call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
1301-
! $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques,
1302-
! recv_forces_snap, recv_torques_snap]')
1299+
$:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques, &
1300+
& recv_forces_snap, recv_torques_snap]')
13031301
do i = 1, recv_count
13041302
call s_get_neighborhood_idx(recv_ids(i), j)
13051303
if (j > 0) then
13061304
! add forces and subtract recv_snap prevent double-counting
1307-
forces(j,:) = forces(j,:) + recv_ft(i,1:3) - recv_forces_snap(j,:)
1308-
torques(j,:) = torques(j,:) + recv_ft(i,4:6) - recv_torques_snap(j,:)
1309-
recv_forces_snap(j,:) = recv_ft(i,1:3)
1310-
recv_torques_snap(j,:) = recv_ft(i,4:6)
1305+
forces(j,:) = forces(j,:) + recv_ft(1:3,i) - recv_forces_snap(j,:)
1306+
torques(j,:) = torques(j,:) + recv_ft(4:6,i) - recv_torques_snap(j,:)
1307+
recv_forces_snap(j,:) = recv_ft(1:3,i)
1308+
recv_torques_snap(j,:) = recv_ft(4:6,i)
13111309
end if
13121310
end do
1313-
! $:END_GPU_PARALLEL_LOOP()
1311+
$:END_GPU_PARALLEL_LOOP()
13141312
end if
13151313
tag = tag + 2
13161314
end do
@@ -1325,13 +1323,14 @@ contains
13251323
13261324
do k = 1, 2*ib_neighborhood_radius
13271325
pack_pos = 0
1328-
$:GPU_PARALLEL_LOOP(private='[i]', copy='[send_ids, send_ft]', copyin='[forces, torques]')
1326+
$:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]')
13291327
do i = 1, num_ibs
13301328
send_ids(i) = patch_ib(i)%gbl_patch_id
1331-
send_ft(i,1:3) = forces(i,:)
1332-
send_ft(i,4:6) = torques(i,:)
1329+
send_ft(1:3,i) = forces(i,:)
1330+
send_ft(4:6,i) = torques(i,:)
13331331
end do
13341332
$:END_GPU_PARALLEL_LOOP()
1333+
$:GPU_UPDATE(host='[send_ids, send_ft]')
13351334
call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
13361335
call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
13371336
call MPI_PACK(send_ft, 6*num_ibs, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
@@ -1342,15 +1341,15 @@ contains
13421341
call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
13431342
call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, MPI_COMM_WORLD, ierr)
13441343
call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
1345-
! $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques]')
1344+
$:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques]')
13461345
do i = 1, recv_count
13471346
call s_get_neighborhood_idx(recv_ids(i), j)
13481347
if (j > 0) then
1349-
forces(j,:) = recv_ft(i,1:3)
1350-
torques(j,:) = recv_ft(i,4:6)
1348+
forces(j,:) = recv_ft(1:3,i)
1349+
torques(j,:) = recv_ft(4:6,i)
13511350
end if
13521351
end do
1353-
! $:END_GPU_PARALLEL_LOOP()
1352+
$:END_GPU_PARALLEL_LOOP()
13541353
end if
13551354
tag = tag + 2
13561355
end do
@@ -1400,7 +1399,9 @@ contains
14001399
! delete if not in neighborhood
14011400
if (f_neighborhood_ranks_own_location(centroid)) then
14021401
output_idx = output_idx + 1
1403-
if (i /= output_idx) patch_ib(output_idx) = patch_ib(i)
1402+
if (i /= output_idx) then
1403+
patch_ib(output_idx) = patch_ib(i)
1404+
end if
14041405

14051406
! check if in local domain
14061407
if (f_local_rank_owns_location(centroid)) then
@@ -1411,6 +1412,8 @@ contains
14111412
end do
14121413
num_ibs = output_idx
14131414
num_local_ibs = local_output_idx
1415+
$:GPU_UPDATE(device='[patch_ib]')
1416+
call s_update_ib_lookup()
14141417

14151418
! Broadcast newly-owned patches to all neighborhood neighbors
14161419
patch_bytes = storage_size(tmp_patch)/8
@@ -1497,6 +1500,7 @@ contains
14971500
end do
14981501

14991502
deallocate (send_buf, recv_bufs)
1503+
$:GPU_UPDATE(device='[patch_ib]')
15001504
call s_update_ib_lookup()
15011505
end if
15021506
#endif
@@ -1519,11 +1523,15 @@ contains
15191523

15201524
integer :: i
15211525

1526+
ib_gbl_idx_lookup = -1
1527+
$:GPU_UPDATE(device='[ib_gbl_idx_lookup]')
1528+
15221529
$:GPU_PARALLEL_LOOP(private='[i]')
15231530
do i = 1, num_ibs
15241531
ib_gbl_idx_lookup(patch_ib(i)%gbl_patch_id) = i
15251532
end do
15261533
$:END_GPU_PARALLEL_LOOP()
1534+
15271535
$:GPU_UPDATE(host='[ib_gbl_idx_lookup]')
15281536

15291537
end subroutine s_update_ib_lookup

0 commit comments

Comments
 (0)