Skip to content

Commit bfcc593

Browse files
image points computed on the GPU for x4 performance in that subroutine
1 parent f0085c9 commit bfcc593

1 file changed

Lines changed: 80 additions & 129 deletions

File tree

src/simulation/m_ibm.fpp

Lines changed: 80 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,9 @@ contains
131131
$:GPU_ENTER_DATA(copyin='[ghost_points,inner_points]')
132132
call s_find_ghost_points(ghost_points, inner_points)
133133
call s_apply_levelset(ghost_points, num_gps)
134-
$:GPU_UPDATE(host='[ghost_points, inner_points]')
135134

136135
call s_compute_image_points(ghost_points)
137-
$:GPU_UPDATE(device='[ghost_points]')
138-
139136
call s_compute_interpolation_coeffs(ghost_points)
140-
$:GPU_UPDATE(device='[ghost_points]')
141137

142138
call nvtxEndRange
143139

@@ -458,6 +454,7 @@ contains
458454
integer :: dir
459455
integer :: index
460456
457+
$:GPU_PARALLEL_LOOP(private='[q,gp,i,j,k,physical_loc,patch_id,dist,norm,dim,bound,dir,index,temp_loc,s_cc]')
461458
do q = 1, num_gps
462459
gp = ghost_points_in(q)
463460
i = gp%loc(1)
@@ -507,6 +504,7 @@ contains
507504
do while ((temp_loc < s_cc(index) &
508505
.or. temp_loc > s_cc(index + 1)))
509506
index = index + dir
507+
#if !defined(MFC_OpenACC) && !defined(MFC_OpenMP)
510508
if (index < -buff_size .or. index > bound) then
511509
print *, "A required image point is not located in this computational domain."
512510
print *, "Ghost Point is located at ", [x_cc(i), y_cc(j), z_cc(k)], " while moving in dimension ", dim
@@ -518,6 +516,7 @@ contains
518516
print *, "A short term fix may include increasing buff_size further in m_helper_basic (currently set to a minimum of 10)"
519517
error stop "Ghost Point and Image Point on Different Processors"
520518
end if
519+
#endif
521520
end do
522521
ghost_points_in(q)%ip_grid(dim) = index
523522
if (ghost_points_in(q)%DB(dim) == -1) then
@@ -528,6 +527,7 @@ contains
528527
end if
529528
end do
530529
end do
530+
$:END_GPU_PARALLEL_LOOP()
531531
532532
end subroutine s_compute_image_points
533533
@@ -685,134 +685,90 @@ contains
685685
real(wp) :: buf
686686
real(wp), dimension(2, 2, 2) :: eta
687687
type(ghost_point) :: gp
688-
integer :: i !< Iterator variables
689-
integer :: i1, i2, j1, j2, k1, k2 !< Grid indexes
688+
integer :: q, i, j, k, ii, jj, kk !< Grid indexes and iterators
690689
integer :: patch_id
690+
logical is_cell_center
691691
692-
! 2D
693-
if (p <= 0) then
694-
do i = 1, num_gps
695-
gp = ghost_points_in(i)
696-
! Get the interpolation points
697-
i1 = gp%ip_grid(1); i2 = i1 + 1
698-
j1 = gp%ip_grid(2); j2 = j1 + 1
699-
700-
dist = 0._wp
701-
buf = 1._wp
702-
dist(1, 1, 1) = sqrt( &
703-
(x_cc(i1) - gp%ip_loc(1))**2 + &
704-
(y_cc(j1) - gp%ip_loc(2))**2)
705-
dist(2, 1, 1) = sqrt( &
706-
(x_cc(i2) - gp%ip_loc(1))**2 + &
707-
(y_cc(j1) - gp%ip_loc(2))**2)
708-
dist(1, 2, 1) = sqrt( &
709-
(x_cc(i1) - gp%ip_loc(1))**2 + &
710-
(y_cc(j2) - gp%ip_loc(2))**2)
711-
dist(2, 2, 1) = sqrt( &
712-
(x_cc(i2) - gp%ip_loc(1))**2 + &
713-
(y_cc(j2) - gp%ip_loc(2))**2)
714-
715-
interp_coeffs = 0._wp
716-
717-
if (dist(1, 1, 1) <= 1.e-16_wp) then
718-
interp_coeffs(1, 1, 1) = 1._wp
719-
else if (dist(2, 1, 1) <= 1.e-16_wp) then
720-
interp_coeffs(2, 1, 1) = 1._wp
721-
else if (dist(1, 2, 1) <= 1.e-16_wp) then
722-
interp_coeffs(1, 2, 1) = 1._wp
723-
else if (dist(2, 2, 1) <= 1.e-16_wp) then
724-
interp_coeffs(2, 2, 1) = 1._wp
725-
else
692+
$:GPU_PARALLEL_LOOP(private='[q,i,j,k,ii,jj,kk,dist,buf,gp,interp_coeffs,eta,alpha,patch_id,is_cell_center]')
693+
do q = 1, num_gps
694+
gp = ghost_points_in(q)
695+
! Get the interpolation points
696+
i = gp%ip_grid(1)
697+
j = gp%ip_grid(2)
698+
if (p /= 0) then
699+
k = gp%ip_grid(3)
700+
else
701+
k = 0;
702+
end if
703+
704+
! get the distance to a cell in each direction
705+
dist = 0._wp
706+
buf = 1._wp
707+
do ii = 0, 1
708+
do jj = 0, 1
709+
if (p == 0) then
710+
dist(1 + ii, 1 + jj, 1) = sqrt( &
711+
(x_cc(i + ii) - gp%ip_loc(1))**2 + &
712+
(y_cc(j + jj) - gp%ip_loc(2))**2)
713+
else
714+
do kk = 0, 1
715+
dist(1 + ii, 1 + jj, 1 + kk) = sqrt( &
716+
(x_cc(i + ii) - gp%ip_loc(1))**2 + &
717+
(y_cc(j + jj) - gp%ip_loc(2))**2 + &
718+
(z_cc(k + kk) - gp%ip_loc(3))**2)
719+
end do
720+
end if
721+
end do
722+
end do
723+
724+
! check if we are arbitrarily close to a cell center
725+
interp_coeffs = 0._wp
726+
is_cell_center = .false.
727+
check_is_cell_center: do ii = 0, 1
728+
do jj = 0, 1
729+
if (dist(ii + 1, jj + 1, 1) <= 1.e-16_wp) then
730+
interp_coeffs(ii + 1, jj + 1, 1) = 1._wp
731+
is_cell_center = .true.
732+
exit check_is_cell_center
733+
else
734+
if (p /= 0) then
735+
if (dist(ii + 1, jj + 1, 2) <= 1.e-16_wp) then
736+
interp_coeffs(ii + 1, jj + 1, 2) = 1._wp
737+
is_cell_center = .true.
738+
exit check_is_cell_center
739+
end if
740+
end if
741+
end if
742+
end do
743+
end do check_is_cell_center
744+
745+
if (.not. is_cell_center) then
746+
! if we are not arbitrarily close, interpolate
747+
alpha = 1._wp
748+
patch_id = gp%ib_patch_id
749+
if (ib_markers%sf(i, j, k) /= 0) alpha(1, 1, 1) = 0._wp
750+
if (ib_markers%sf(i + 1, j, k) /= 0) alpha(2, 1, 1) = 0._wp
751+
if (ib_markers%sf(i, j + 1, k) /= 0) alpha(1, 2, 1) = 0._wp
752+
if (ib_markers%sf(i + 1, j + 1, k) /= 0) alpha(2, 2, 1) = 0._wp
753+
754+
if (p == 0) then
726755
eta(:, :, 1) = 1._wp/dist(:, :, 1)**2
727-
alpha = 1._wp
728-
patch_id = gp%ib_patch_id
729-
if (ib_markers%sf(i1, j1, 0) /= 0) alpha(1, 1, 1) = 0._wp
730-
if (ib_markers%sf(i2, j1, 0) /= 0) alpha(2, 1, 1) = 0._wp
731-
if (ib_markers%sf(i1, j2, 0) /= 0) alpha(1, 2, 1) = 0._wp
732-
if (ib_markers%sf(i2, j2, 0) /= 0) alpha(2, 2, 1) = 0._wp
733756
buf = sum(alpha(:, :, 1)*eta(:, :, 1))
734757
if (buf > 0._wp) then
735758
interp_coeffs(:, :, 1) = alpha(:, :, 1)*eta(:, :, 1)/buf
736759
else
737760
buf = sum(eta(:, :, 1))
738761
interp_coeffs(:, :, 1) = eta(:, :, 1)/buf
739762
end if
740-
end if
741-
742-
ghost_points_in(i)%interp_coeffs = interp_coeffs
743-
end do
744-
745-
else
746-
do i = 1, num_gps
747-
gp = ghost_points_in(i)
748-
! Get the interpolation points
749-
i1 = gp%ip_grid(1); i2 = i1 + 1
750-
j1 = gp%ip_grid(2); j2 = j1 + 1
751-
k1 = gp%ip_grid(3); k2 = k1 + 1
752-
753-
! Get interpolation weights (Chaudhuri et al. 2011, JCP)
754-
dist(1, 1, 1) = sqrt( &
755-
(x_cc(i1) - gp%ip_loc(1))**2 + &
756-
(y_cc(j1) - gp%ip_loc(2))**2 + &
757-
(z_cc(k1) - gp%ip_loc(3))**2)
758-
dist(2, 1, 1) = sqrt( &
759-
(x_cc(i2) - gp%ip_loc(1))**2 + &
760-
(y_cc(j1) - gp%ip_loc(2))**2 + &
761-
(z_cc(k1) - gp%ip_loc(3))**2)
762-
dist(1, 2, 1) = sqrt( &
763-
(x_cc(i1) - gp%ip_loc(1))**2 + &
764-
(y_cc(j2) - gp%ip_loc(2))**2 + &
765-
(z_cc(k1) - gp%ip_loc(3))**2)
766-
dist(2, 2, 1) = sqrt( &
767-
(x_cc(i2) - gp%ip_loc(1))**2 + &
768-
(y_cc(j2) - gp%ip_loc(2))**2 + &
769-
(z_cc(k1) - gp%ip_loc(3))**2)
770-
dist(1, 1, 2) = sqrt( &
771-
(x_cc(i1) - gp%ip_loc(1))**2 + &
772-
(y_cc(j1) - gp%ip_loc(2))**2 + &
773-
(z_cc(k2) - gp%ip_loc(3))**2)
774-
dist(2, 1, 2) = sqrt( &
775-
(x_cc(i2) - gp%ip_loc(1))**2 + &
776-
(y_cc(j1) - gp%ip_loc(2))**2 + &
777-
(z_cc(k2) - gp%ip_loc(3))**2)
778-
dist(1, 2, 2) = sqrt( &
779-
(x_cc(i1) - gp%ip_loc(1))**2 + &
780-
(y_cc(j2) - gp%ip_loc(2))**2 + &
781-
(z_cc(k2) - gp%ip_loc(3))**2)
782-
dist(2, 2, 2) = sqrt( &
783-
(x_cc(i2) - gp%ip_loc(1))**2 + &
784-
(y_cc(j2) - gp%ip_loc(2))**2 + &
785-
(z_cc(k2) - gp%ip_loc(3))**2)
786-
interp_coeffs = 0._wp
787-
buf = 1._wp
788-
if (dist(1, 1, 1) <= 1.e-16_wp) then
789-
interp_coeffs(1, 1, 1) = 1._wp
790-
else if (dist(2, 1, 1) <= 1.e-16_wp) then
791-
interp_coeffs(2, 1, 1) = 1._wp
792-
else if (dist(1, 2, 1) <= 1.e-16_wp) then
793-
interp_coeffs(1, 2, 1) = 1._wp
794-
else if (dist(2, 2, 1) <= 1.e-16_wp) then
795-
interp_coeffs(2, 2, 1) = 1._wp
796-
else if (dist(1, 1, 2) <= 1.e-16_wp) then
797-
interp_coeffs(1, 1, 2) = 1._wp
798-
else if (dist(2, 1, 2) <= 1.e-16_wp) then
799-
interp_coeffs(2, 1, 2) = 1._wp
800-
else if (dist(1, 2, 2) <= 1.e-16_wp) then
801-
interp_coeffs(1, 2, 2) = 1._wp
802-
else if (dist(2, 2, 2) <= 1.e-16_wp) then
803-
interp_coeffs(2, 2, 2) = 1._wp
804763
else
764+
765+
if (ib_markers%sf(i, j, k + 1) /= 0) alpha(1, 1, 2) = 0._wp
766+
if (ib_markers%sf(i + 1, j, k + 1) /= 0) alpha(2, 1, 2) = 0._wp
767+
if (ib_markers%sf(i, j + 1, k + 1) /= 0) alpha(1, 2, 2) = 0._wp
768+
if (ib_markers%sf(i + 1, j + 1, k + 1) /= 0) alpha(2, 2, 2) = 0._wp
805769
eta = 1._wp/dist**2
806-
alpha = 1._wp
807-
if (ib_markers%sf(i1, j1, k1) /= 0) alpha(1, 1, 1) = 0._wp
808-
if (ib_markers%sf(i2, j1, k1) /= 0) alpha(2, 1, 1) = 0._wp
809-
if (ib_markers%sf(i1, j2, k1) /= 0) alpha(1, 2, 1) = 0._wp
810-
if (ib_markers%sf(i2, j2, k1) /= 0) alpha(2, 2, 1) = 0._wp
811-
if (ib_markers%sf(i1, j1, k2) /= 0) alpha(1, 1, 2) = 0._wp
812-
if (ib_markers%sf(i2, j1, k2) /= 0) alpha(2, 1, 2) = 0._wp
813-
if (ib_markers%sf(i1, j2, k2) /= 0) alpha(1, 2, 2) = 0._wp
814-
if (ib_markers%sf(i2, j2, k2) /= 0) alpha(2, 2, 2) = 0._wp
815770
buf = sum(alpha*eta)
771+
816772
if (buf > 0._wp) then
817773
interp_coeffs = alpha*eta/buf
818774
else
@@ -821,9 +777,11 @@ contains
821777
end if
822778
end if
823779
824-
ghost_points_in(i)%interp_coeffs = interp_coeffs
825-
end do
826-
end if
780+
end if
781+
782+
ghost_points_in(q)%interp_coeffs = interp_coeffs
783+
end do
784+
$:END_GPU_PARALLEL_LOOP()
827785
828786
end subroutine s_compute_interpolation_coeffs
829787
@@ -988,20 +946,13 @@ contains
988946
call nvtxStartRange("COMPUTE-GHOST-POINTS")
989947
! recalculate the ghost point locations and coefficients
990948
call s_find_num_ghost_points(num_gps, num_inner_gps)
991-
$:GPU_UPDATE(device='[num_gps, num_inner_gps]')
992-
993949
call s_find_ghost_points(ghost_points, inner_points)
994950
call nvtxEndRange
995951
996952
call nvtxStartRange("COMPUTE-IMAGE-POINTS")
997953
call s_apply_levelset(ghost_points, num_gps)
998-
$:GPU_UPDATE(host='[ghost_points, inner_points]')
999-
1000954
call s_compute_image_points(ghost_points)
1001-
$:GPU_UPDATE(device='[ghost_points]')
1002-
1003955
call s_compute_interpolation_coeffs(ghost_points)
1004-
$:GPU_UPDATE(device='[ghost_points]')
1005956
call nvtxEndRange
1006957
1007958
call nvtxEndRange

0 commit comments

Comments
 (0)