@@ -44,6 +44,11 @@ module m_ibm
4444#endif
4545 logical :: moving_immersed_boundary_flag
4646
47+ ! IB MPI buffers
48+ integer , allocatable :: send_ids(:), recv_ids(:)
49+ real (wp), allocatable :: send_ft(:,:), recv_ft(:,:)
50+ real (wp), allocatable :: recv_forces_snap(:,:), recv_torques_snap(:,:)
51+
4752contains
4853
4954 !> Allocates memory for the variables in the IBM module
@@ -82,14 +87,23 @@ contains
8287 end do
8388 $:GPU_UPDATE(device= ' [patch_ib(1:num_ibs)]' )
8489
90+ ! allocate some arrays for MPI communication, if required by this simulation
91+ #ifdef MFC_MPI
92+ if (num_procs > 1 ) then
93+ @:ALLOCATE(send_ids(size (patch_ib)), send_ft(6 , size (patch_ib)))
94+ allocate (recv_forces_snap(size (patch_ib), 3 ), recv_torques_snap(size (patch_ib), 3 ), recv_ids(size (patch_ib)), &
95+ & recv_ft(6 , size (patch_ib)))
96+ end if
97+ #endif
98+
8599 ! GPU routines require updated cell centers
86100 $:GPU_UPDATE(device= ' [num_ibs, num_gbl_ibs, x_cc, y_cc, dx, dy, x_domain, y_domain, ib_bc_x%beg, ib_bc_y%beg]' )
87101 if (p /= 0 ) then
88102 $:GPU_UPDATE(device= ' [z_cc, dz, z_domain, ib_bc_z%beg]' )
89103 end if
90104 call s_update_ib_lookup()
91105
92- ! recompute the new ib_patch locations and broadcast them.
106+ ! recompute the new ib_patch locations
93107 ib_markers%sf = 0._wp
94108 $:GPU_UPDATE(device= ' [ib_markers%sf]' )
95109 call s_apply_ib_patches(ib_markers)
@@ -1048,20 +1062,6 @@ contains
10481062
10491063 end subroutine s_compute_ib_forces
10501064
1051- !> Finalize the IBM module
1052- impure subroutine s_finalize_ibm_module ()
1053-
1054- @:DEALLOCATE(ib_markers%sf)
1055- @:DEALLOCATE(ib_gbl_idx_lookup)
1056- if (allocated(airfoil_grid_u)) then
1057- @:DEALLOCATE(airfoil_grid_u)
1058- @:DEALLOCATE(airfoil_grid_l)
1059- end if
1060-
1061- if (collision_model > 0 ) call s_finalize_collisions_module()
1062-
1063- end subroutine s_finalize_ibm_module
1064-
10651065 !> Computes the center of mass for IB patch types where we are unable to determine their center of mass analytically.
10661066 !> These patches include things like NACA airfoils and STL models
10671067 subroutine s_compute_centroid_offset (ib_marker )
@@ -1252,17 +1252,12 @@ contains
12521252#ifdef MFC_MPI
12531253 integer :: i, j, k, pack_pos, unpack_pos, buf_size, ierr
12541254 integer :: send_neighbor, recv_neighbor, recv_count, tag
1255- real(wp), allocatable :: recv_forces_snap(:,:), recv_torques_snap(:,:)
1256- character(len=1), allocatable :: send_buf(:), recv_buf(:)
1257- integer, allocatable :: send_ids(:), recv_ids(:)
1258- real(wp), allocatable :: send_ft(:,:), recv_ft(:,:)
1255+ character(len=1), allocatable :: ib_force_send_buf(:), ib_force_recv_buf(:)
12591256
12601257 if (num_procs == 1) return
12611258
12621259 buf_size = storage_size(0)/8 + (storage_size(0)/8 + 6*storage_size(0._wp)/8)*size(patch_ib)
1263- allocate (send_buf(buf_size), recv_buf(buf_size), recv_forces_snap(num_ibs, 3), recv_torques_snap(num_ibs, 3))
1264- @:ALLOCATE(send_ids(num_ibs), send_ft(6, num_ibs))
1265- allocate (recv_ids(size(patch_ib)), recv_ft(6, size(patch_ib)))
1260+ allocate (ib_force_send_buf(buf_size), ib_force_recv_buf(buf_size))
12661261
12671262 ! Accumulation phase: propagate contributions toward the high-index corner.
12681263 #:for X, ID in [(' x' , 1), (' y' , 2), (' z' , 3)]
@@ -1285,17 +1280,18 @@ contains
12851280 end do
12861281 $:END_GPU_PARALLEL_LOOP()
12871282 $:GPU_UPDATE(host=' [send_ids, send_ft]' )
1288- call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1289- call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1290- call MPI_PACK(send_ft, 6*num_ibs, mpi_p, send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1291- call MPI_SENDRECV(send_buf , pack_pos, MPI_PACKED, send_neighbor, tag, recv_buf , buf_size, MPI_PACKED , &
1292- & recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
1283+ call MPI_PACK(num_ibs, 1, MPI_INTEGER, ib_force_send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1284+ call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, ib_force_send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1285+ call MPI_PACK(send_ft, 6*num_ibs, mpi_p, ib_force_send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1286+ call MPI_SENDRECV(ib_force_send_buf , pack_pos, MPI_PACKED, send_neighbor, tag, ib_force_recv_buf , buf_size, &
1287+ & MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
12931288
12941289 if (recv_neighbor /= MPI_PROC_NULL) then
12951290 unpack_pos = 0
1296- call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
1297- call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, MPI_COMM_WORLD, ierr)
1298- call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
1291+ call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
1292+ call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, &
1293+ & MPI_COMM_WORLD, ierr)
1294+ call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
12991295 $:GPU_PARALLEL_LOOP(private=' [i, j]' , copyin=' [recv_ft, recv_ids]' , copy=' [forces, torques, &
13001296 & recv_forces_snap, recv_torques_snap]' )
13011297 do i = 1, recv_count
@@ -1331,16 +1327,17 @@ contains
13311327 end do
13321328 $:END_GPU_PARALLEL_LOOP()
13331329 $:GPU_UPDATE(host=' [send_ids, send_ft]' )
1334- call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1335- call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1336- call MPI_PACK(send_ft, 6*num_ibs, mpi_p, send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1337- call MPI_SENDRECV(send_buf , pack_pos, MPI_PACKED, send_neighbor, tag, recv_buf , buf_size, MPI_PACKED , &
1338- & recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
1330+ call MPI_PACK(num_ibs, 1, MPI_INTEGER, ib_force_send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1331+ call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, ib_force_send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1332+ call MPI_PACK(send_ft, 6*num_ibs, mpi_p, ib_force_send_buf , buf_size, pack_pos, MPI_COMM_WORLD, ierr)
1333+ call MPI_SENDRECV(ib_force_send_buf , pack_pos, MPI_PACKED, send_neighbor, tag, ib_force_recv_buf , buf_size, &
1334+ & MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
13391335 if (recv_neighbor /= MPI_PROC_NULL) then
13401336 unpack_pos = 0
1341- call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
1342- call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, MPI_COMM_WORLD, ierr)
1343- call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
1337+ call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
1338+ call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, &
1339+ & MPI_COMM_WORLD, ierr)
1340+ call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
13441341 $:GPU_PARALLEL_LOOP(private=' [i, j]' , copyin=' [recv_ft, recv_ids]' , copy=' [forces, torques]' )
13451342 do i = 1, recv_count
13461343 call s_get_neighborhood_idx(recv_ids(i), j)
@@ -1355,9 +1352,6 @@ contains
13551352 end do
13561353 end if
13571354 #:endfor
1358-
1359- @:DEALLOCATE(send_ids, send_ft)
1360- deallocate (send_buf, recv_buf, recv_forces_snap, recv_torques_snap, recv_ids, recv_ft)
13611355#endif
13621356
13631357 end subroutine s_communicate_ib_forces
@@ -1536,4 +1530,25 @@ contains
15361530
15371531 end subroutine s_update_ib_lookup
15381532
1533+ !> Finalize the IBM module
1534+ impure subroutine s_finalize_ibm_module ()
1535+
1536+ @:DEALLOCATE(ib_markers%sf)
1537+ @:DEALLOCATE(ib_gbl_idx_lookup)
1538+ if (allocated(airfoil_grid_u)) then
1539+ @:DEALLOCATE(airfoil_grid_u)
1540+ @:DEALLOCATE(airfoil_grid_l)
1541+ end if
1542+
1543+ if (collision_model > 0 ) call s_finalize_collisions_module()
1544+
1545+ #ifdef MFC_MPI
1546+ if (num_procs > 1 ) then
1547+ @:DEALLOCATE(send_ids, send_ft)
1548+ deallocate (recv_forces_snap, recv_torques_snap, recv_ids, recv_ft)
1549+ end if
1550+ #endif
1551+
1552+ end subroutine s_finalize_ibm_module
1553+
15391554end module m_ibm
0 commit comments