Skip to content

Commit 6edad1b

Browse files
Finished GPU implementation of all subroutines. Final check before regenerating golden file
1 parent 0fd5492 commit 6edad1b

3 files changed

Lines changed: 115 additions & 70 deletions

File tree

src/common/m_model.fpp

Lines changed: 112 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,7 @@ contains
734734
end do
735735

736736
! Check all edges and count repeated edges
737+
$:GPU_PARALLEL_LOOP(private='[i,j]', copy='[temp_boundary_v,edge_occurrence]', copyin='[threshold_edge_zero]', collapse=2)
737738
do i = 1, edge_count
738739
do j = 1, edge_count
739740
if (i /= j) then
@@ -746,11 +747,13 @@ contains
746747
(abs(temp_boundary_v(i, 2, 1) - temp_boundary_v(j, 1, 1)) < threshold_edge_zero) .and. &
747748
(abs(temp_boundary_v(i, 2, 2) - temp_boundary_v(j, 1, 2)) < threshold_edge_zero))) then
748749

750+
$:GPU_ATOMIC(atomic='update')
749751
edge_occurrence(i) = edge_occurrence(i) + 1
750752
end if
751753
end if
752754
end do
753755
end do
756+
$:END_GPU_PARALLEL_LOOP()
754757

755758
! Count the number of boundary vertices/edges
756759
boundary_vertex_count = 0
@@ -818,7 +821,7 @@ contains
818821

819822
end subroutine f_register_edge
820823

821-
!> This procedure check if interpolates is needed for 2D models.
824+
!> This procedure check if interpolation is needed for 2D models.
822825
!! @param boundary_v Temporary edge end vertex buffer
823826
!! @param spacing Dimensions of the current levelset cell
824827
subroutine f_check_interpolation_2D(boundary_v, boundary_edge_count, spacing, interpolate)
@@ -830,24 +833,29 @@ contains
830833

831834
real(wp) :: l1, cell_width !< Length of each boundary edge and cell width
832835
integer :: j !< Boundary edge index iterator
836+
integer :: interpolate_integer !< integer form of interpolate logical to support GPU checking
833837

834838
cell_width = minval(spacing(1:2))
835-
interpolate = .false.
839+
interpolate_integer = 0
836840

841+
$:GPU_PARALLEL_LOOP(private='[j,l1]', copyin='[boundary_v,cell_width,Ifactor_2D]', copy='[interpolate_integer]')
837842
do j = 1, boundary_edge_count
838-
l1 = sqrt((boundary_v(j, 2, 1) - boundary_v(j, 1, 1))**2 + &
839-
(boundary_v(j, 2, 2) - boundary_v(j, 1, 2))**2)
843+
if (interpolate_integer == 0) then
844+
l1 = sqrt((boundary_v(j, 2, 1) - boundary_v(j, 1, 1))**2 + &
845+
(boundary_v(j, 2, 2) - boundary_v(j, 1, 2))**2)
840846

841-
if ((l1 > cell_width)) then
842-
interpolate = .true.
843-
else
844-
interpolate = .false.
847+
if ((l1*Ifactor_2D > cell_width)) then
848+
interpolate_integer = 1
849+
end if
845850
end if
846851
end do
852+
$:END_GPU_PARALLEL_LOOP()
853+
854+
interpolate = (interpolate_integer == 1)
847855

848856
end subroutine f_check_interpolation_2D
849857

850-
!> This procedure check if interpolates is needed for 3D models.
858+
!> This procedure check if interpolation is needed for 3D models.
851859
!! @param model Model to search in.
852860
!! @param spacing Dimensions of the current levelset cell
853861
!! @param interpolate Logical output
@@ -858,37 +866,45 @@ contains
858866
real(wp), dimension(1:3), intent(in) :: spacing
859867
real(wp), dimension(1:3) :: edge_l
860868
real(wp) :: cell_width
861-
real(wp), dimension(1:3, 1:3) :: tri_v
862-
integer :: i, j !< Loop iterator
869+
real(wp), dimension(1:model%ntrs, 1:3, 1:3) :: tri_v
870+
integer :: i, j, interpolate_integer !< Loop iterator
863871

864872
cell_width = minval(spacing)
865-
interpolate = .false.
873+
interpolate_integer = 0
866874

875+
! load up the array of triangles for GPU packing
867876
do i = 1, model%ntrs
868877
do j = 1, 3
869-
tri_v(1, j) = model%trs(i)%v(1, j)
870-
tri_v(2, j) = model%trs(i)%v(2, j)
871-
tri_v(3, j) = model%trs(i)%v(3, j)
878+
tri_v(i, 1, j) = model%trs(i)%v(1, j)
879+
tri_v(i, 2, j) = model%trs(i)%v(2, j)
880+
tri_v(i, 3, j) = model%trs(i)%v(3, j)
872881
end do
882+
end do
873883

874-
edge_l(1) = sqrt((tri_v(1, 2) - tri_v(1, 1))**2 + &
875-
(tri_v(2, 2) - tri_v(2, 1))**2 + &
876-
(tri_v(3, 2) - tri_v(3, 1))**2)
877-
edge_l(2) = sqrt((tri_v(1, 3) - tri_v(1, 2))**2 + &
878-
(tri_v(2, 3) - tri_v(2, 2))**2 + &
879-
(tri_v(3, 3) - tri_v(3, 2))**2)
880-
edge_l(3) = sqrt((tri_v(1, 1) - tri_v(1, 3))**2 + &
881-
(tri_v(2, 1) - tri_v(2, 3))**2 + &
882-
(tri_v(3, 1) - tri_v(3, 3))**2)
883-
884-
if ((edge_l(1) > cell_width) .or. &
885-
(edge_l(2) > cell_width) .or. &
886-
(edge_l(3) > cell_width)) then
887-
interpolate = .true.
888-
else
889-
interpolate = .false.
884+
! compare the side of all
885+
$:GPU_PARALLEL_LOOP(private='[i,edge_l]', copyin='[cell_width,tri_v,Ifactor_3D]', copy='[interpolate_integer]')
886+
do i = 1, model%ntrs
887+
if (interpolate_integer == 0) then
888+
edge_l(1) = sqrt((tri_v(i, 1, 2) - tri_v(i, 1, 1))**2 + &
889+
(tri_v(i, 2, 2) - tri_v(i, 2, 1))**2 + &
890+
(tri_v(i, 3, 2) - tri_v(i, 3, 1))**2)
891+
edge_l(2) = sqrt((tri_v(i, 1, 3) - tri_v(i, 1, 2))**2 + &
892+
(tri_v(i, 2, 3) - tri_v(i, 2, 2))**2 + &
893+
(tri_v(i, 3, 3) - tri_v(i, 3, 2))**2)
894+
edge_l(3) = sqrt((tri_v(i, 1, 1) - tri_v(i, 1, 3))**2 + &
895+
(tri_v(i, 2, 1) - tri_v(i, 2, 3))**2 + &
896+
(tri_v(i, 3, 1) - tri_v(i, 3, 3))**2)
897+
898+
if ((edge_l(1) > cell_width*Ifactor_3D) .or. &
899+
(edge_l(2) > cell_width*Ifactor_3D) .or. &
900+
(edge_l(3) > cell_width*Ifactor_3D)) then
901+
interpolate_integer = 1
902+
end if
890903
end if
891904
end do
905+
$:END_GPU_PARALLEL_LOOP()
906+
907+
interpolate = (interpolate_integer == 1)
892908

893909
end subroutine f_check_interpolation_3D
894910

@@ -905,7 +921,7 @@ contains
905921
real(wp), allocatable, intent(inout), dimension(:, :) :: interpolated_boundary_v
906922

907923
integer, intent(inout) :: total_vertices, boundary_edge_count
908-
integer :: num_segments
924+
integer :: num_segments, vertex_idx
909925
integer :: i, j
910926

911927
real(wp) :: edge_length, cell_width
@@ -917,6 +933,7 @@ contains
917933

918934
! First pass: Calculate the total number of vertices including interpolated ones
919935
total_vertices = 1
936+
$:GPU_PARALLEL_LOOP(private='[i,edge_x,edge_y,edge_length,num_segments]', copyin='[Ifactor_2D]', copy='[total_vertices]')
920937
do i = 1, boundary_edge_count
921938
! Get the coordinates of the two ends of the current edge
922939
edge_x(1) = boundary_v(i, 1, 1)
@@ -936,14 +953,17 @@ contains
936953
end if
937954

938955
! Each edge contributes num_segments vertices
956+
$:GPU_ATOMIC(atomic='update')
939957
total_vertices = total_vertices + num_segments
940958
end do
959+
$:END_GPU_PARALLEL_LOOP()
941960

942961
! Allocate memory for the new boundary vertices array
943962
allocate (interpolated_boundary_v(1:total_vertices, 1:3))
944963

945964
! Fill the new boundary vertices array with original and interpolated vertices
946965
total_vertices = 1
966+
$:GPU_PARALLEL_LOOP(private='[i,edge_x,edge_y,edge_length,num_segments,edge_del,vertex_idx]', copyin='[Ifactor_2D]', copy='[total_vertices,interpolated_boundary_v]')
947967
do i = 1, boundary_edge_count
948968
! Get the coordinates of the two ends of the current edge
949969
edge_x(1) = boundary_v(i, 1, 1)
@@ -972,18 +992,25 @@ contains
972992

973993
! Add original and interpolated vertices to the output array
974994
do j = 1, num_segments - 1
995+
$:GPU_ATOMIC(atomic='capture')
975996
total_vertices = total_vertices + 1
976-
interpolated_boundary_v(total_vertices, 1) = edge_x(1) + j*edge_del(1)
977-
interpolated_boundary_v(total_vertices, 2) = edge_y(1) + j*edge_del(2)
997+
vertex_idx = total_vertices
998+
$:END_GPU_ATOMIC_CAPTURE()
999+
interpolated_boundary_v(vertex_idx, 1) = edge_x(1) + j*edge_del(1)
1000+
interpolated_boundary_v(vertex_idx, 2) = edge_y(1) + j*edge_del(2)
9781001
end do
9791002

9801003
! Add the last vertex of the edge
9811004
if (num_segments > 0) then
1005+
$:GPU_ATOMIC(atomic='capture')
9821006
total_vertices = total_vertices + 1
983-
interpolated_boundary_v(total_vertices, 1) = edge_x(2)
984-
interpolated_boundary_v(total_vertices, 2) = edge_y(2)
1007+
vertex_idx = total_vertices
1008+
$:END_GPU_ATOMIC_CAPTURE()
1009+
interpolated_boundary_v(vertex_idx, 1) = edge_x(2)
1010+
interpolated_boundary_v(vertex_idx, 2) = edge_y(2)
9851011
end if
9861012
end do
1013+
$:END_GPU_PARALLEL_LOOP()
9871014

9881015
end subroutine f_interpolate_2D
9891016

@@ -998,12 +1025,16 @@ contains
9981025
real(wp), allocatable, intent(inout), dimension(:, :) :: interpolated_boundary_v
9991026
integer, intent(out) :: total_vertices
10001027

1001-
integer :: i, j, k, num_triangles, num_segments, num_inner_vertices
1028+
integer :: i, j, k, num_triangles, num_segments, num_inner_vertices, vertex_idx
10021029
real(wp), dimension(1:3, 1:3) :: tri
10031030
real(wp), dimension(1:3) :: edge_del, cell_area
10041031
real(wp), dimension(1:3) :: bary_coord !< Barycentric coordinates
10051032
real(wp) :: edge_length, cell_width, cell_area_min, tri_area
10061033

1034+
! GPU-friendly flat arrays packed from model
1035+
real(wp), allocatable, dimension(:, :, :) :: flat_v ! (3, 3, num_triangles)
1036+
real(wp), allocatable, dimension(:, :) :: flat_n ! (3, num_triangles)
1037+
10071038
! Number of triangles in the model
10081039
num_triangles = model%ntrs
10091040
cell_width = minval(spacing)
@@ -1015,18 +1046,23 @@ contains
10151046
cell_area_min = minval(cell_area)
10161047
num_inner_vertices = 0
10171048

1049+
! Pack model into flat arrays on CPU
1050+
allocate (flat_v(1:3, 1:3, 1:num_triangles))
1051+
allocate (flat_n(1:3, 1:num_triangles))
1052+
do i = 1, num_triangles
1053+
flat_v(:, :, i) = model%trs(i)%v(:, :)
1054+
flat_n(:, i) = model%trs(i)%n(:)
1055+
end do
1056+
10181057
! Calculate the total number of vertices including interpolated ones
10191058
total_vertices = 0
1059+
$:GPU_PARALLEL_LOOP(private='[i,j,k,tri,edge_length,num_segments,num_inner_vertices]', copyin='[Ifactor_3D,Ifactor_bary_3D,flat_v,flat_n]', copy='[total_vertices]', collapse=1)
10201060
do i = 1, num_triangles
10211061
do j = 1, 3
10221062
! Get the coordinates of the two vertices of the current edge
1023-
tri(1, 1) = model%trs(i)%v(j, 1)
1024-
tri(1, 2) = model%trs(i)%v(j, 2)
1025-
tri(1, 3) = model%trs(i)%v(j, 3)
1063+
tri(1, :) = flat_v(j, :, i)
10261064
! Next vertex in the triangle (cyclic)
1027-
tri(2, 1) = model%trs(i)%v(mod(j, 3) + 1, 1)
1028-
tri(2, 2) = model%trs(i)%v(mod(j, 3) + 1, 2)
1029-
tri(2, 3) = model%trs(i)%v(mod(j, 3) + 1, 3)
1065+
tri(2, :) = flat_v(mod(j, 3) + 1, :, i)
10301066

10311067
! Compute the length of the edge
10321068
edge_length = sqrt((tri(2, 1) - tri(1, 1))**2 + &
@@ -1041,39 +1077,35 @@ contains
10411077
end if
10421078

10431079
! Each edge contributes num_segments vertices
1080+
$:GPU_ATOMIC(atomic='update')
10441081
total_vertices = total_vertices + num_segments + 1
10451082
end do
10461083

10471084
! Add vertices inside the triangle
10481085
do k = 1, 3
1049-
tri(k, 1) = model%trs(i)%v(k, 1)
1050-
tri(k, 2) = model%trs(i)%v(k, 2)
1051-
tri(k, 3) = model%trs(i)%v(k, 3)
1086+
tri(k, :) = flat_v(k, :, i)
10521087
end do
10531088
call f_tri_area(tri, tri_area)
10541089

10551090
if (tri_area > threshold_bary*cell_area_min) then
10561091
num_inner_vertices = Ifactor_bary_3D*ceiling(tri_area/cell_area_min)
1092+
$:GPU_ATOMIC(atomic='update')
10571093
total_vertices = total_vertices + num_inner_vertices
10581094
end if
10591095
end do
1096+
$:END_GPU_PARALLEL_LOOP()
10601097

10611098
! Allocate memory for the new boundary vertices array
10621099
allocate (interpolated_boundary_v(1:total_vertices, 1:3))
10631100

10641101
! Fill the new boundary vertices array with original and interpolated vertices
10651102
total_vertices = 0
1103+
$:GPU_PARALLEL_LOOP(private='[i,j,k,tri,edge_length,num_segments,num_inner_vertices,edge_del,bary_coord,vertex_idx]', copyin='[Ifactor_3D,Ifactor_bary_3D]', copy='[total_vertices]', collapse=1)
10661104
do i = 1, num_triangles
10671105
! Loop through the 3 edges of each triangle
10681106
do j = 1, 3
1069-
! Get the coordinates of the two vertices of the current edge
1070-
tri(1, 1) = model%trs(i)%v(j, 1)
1071-
tri(1, 2) = model%trs(i)%v(j, 2)
1072-
tri(1, 3) = model%trs(i)%v(j, 3)
1073-
! Next vertex in the triangle (cyclic)
1074-
tri(2, 1) = model%trs(i)%v(mod(j, 3) + 1, 1)
1075-
tri(2, 2) = model%trs(i)%v(mod(j, 3) + 1, 2)
1076-
tri(2, 3) = model%trs(i)%v(mod(j, 3) + 1, 3)
1107+
tri(1, :) = flat_v(j, :, i)
1108+
tri(2, :) = flat_v(mod(j, 3) + 1, :, i)
10771109

10781110
! Compute the length of the edge
10791111
edge_length = sqrt((tri(2, 1) - tri(1, 1))**2 + &
@@ -1093,47 +1125,60 @@ contains
10931125

10941126
! Add original and interpolated vertices to the output array
10951127
do k = 0, num_segments - 1
1128+
$:GPU_ATOMIC(atomic='capture')
10961129
total_vertices = total_vertices + 1
1097-
interpolated_boundary_v(total_vertices, 1) = tri(1, 1) + k*edge_del(1)
1098-
interpolated_boundary_v(total_vertices, 2) = tri(1, 2) + k*edge_del(2)
1099-
interpolated_boundary_v(total_vertices, 3) = tri(1, 3) + k*edge_del(3)
1130+
vertex_idx = total_vertices
1131+
$:END_GPU_ATOMIC_CAPTURE()
1132+
interpolated_boundary_v(vertex_idx, 1) = tri(1, 1) + k*edge_del(1)
1133+
interpolated_boundary_v(vertex_idx, 2) = tri(1, 2) + k*edge_del(2)
1134+
interpolated_boundary_v(vertex_idx, 3) = tri(1, 3) + k*edge_del(3)
11001135
end do
11011136

11021137
! Add the last vertex of the edge
1138+
$:GPU_ATOMIC(atomic='capture')
11031139
total_vertices = total_vertices + 1
1104-
interpolated_boundary_v(total_vertices, 1) = tri(2, 1)
1105-
interpolated_boundary_v(total_vertices, 2) = tri(2, 2)
1106-
interpolated_boundary_v(total_vertices, 3) = tri(2, 3)
1140+
vertex_idx = total_vertices
1141+
$:END_GPU_ATOMIC_CAPTURE()
1142+
interpolated_boundary_v(vertex_idx, 1) = tri(2, 1)
1143+
interpolated_boundary_v(vertex_idx, 2) = tri(2, 2)
1144+
interpolated_boundary_v(vertex_idx, 3) = tri(2, 3)
11071145
end do
11081146

11091147
! Interpolate verties that are not on edges
11101148
do k = 1, 3
1111-
tri(k, 1) = model%trs(i)%v(k, 1)
1112-
tri(k, 2) = model%trs(i)%v(k, 2)
1113-
tri(k, 3) = model%trs(i)%v(k, 3)
1149+
tri(k, :) = flat_v(k, :, i)
11141150
end do
11151151
call f_tri_area(tri, tri_area)
11161152

11171153
if (tri_area > threshold_bary*cell_area_min) then
11181154
num_inner_vertices = Ifactor_bary_3D*ceiling(tri_area/cell_area_min)
11191155
!Use barycentric coordinates for randomly distributed points
1156+
1157+
! Deterministic seed per triangle
1158+
rand_seed = i*73856093 + 19349663
1159+
if (rand_seed == 0) rand_seed = 1
1160+
11201161
do k = 1, num_inner_vertices
1121-
call random_number(bary_coord(1))
1122-
call random_number(bary_coord(2))
1162+
bary_coord(1) = f_model_random_number(rand_seed)
1163+
bary_coord(2) = f_model_random_number(rand_seed)
11231164

11241165
if ((bary_coord(1) + bary_coord(2)) >= 1._wp) then
11251166
bary_coord(1) = 1._wp - bary_coord(1)
11261167
bary_coord(2) = 1._wp - bary_coord(2)
11271168
end if
11281169
bary_coord(3) = 1._wp - bary_coord(1) - bary_coord(2)
11291170

1171+
$:GPU_ATOMIC(atomic='update')
11301172
total_vertices = total_vertices + 1
11311173
interpolated_boundary_v(total_vertices, 1) = dot_product(bary_coord, tri(1:3, 1))
11321174
interpolated_boundary_v(total_vertices, 2) = dot_product(bary_coord, tri(1:3, 2))
11331175
interpolated_boundary_v(total_vertices, 3) = dot_product(bary_coord, tri(1:3, 3))
11341176
end do
11351177
end if
11361178
end do
1179+
$:END_GPU_PARALLEL_LOOP()
1180+
1181+
deallocate (flat_v, flat_n)
11371182

11381183
end subroutine f_interpolate_3D
11391184

@@ -1344,6 +1389,8 @@ contains
13441389
real(wp), dimension(1:3) :: AB, AC, cross
13451390
integer :: i !< Loop iterator
13461391

1392+
$:GPU_ROUTINE(parallelism='[seq]')
1393+
13471394
do i = 1, 3
13481395
AB(i) = tri(2, i) - tri(1, i)
13491396
AC(i) = tri(3, i) - tri(1, i)

src/simulation/m_compute_levelset.fpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,6 @@ contains
722722

723723
! Assign the levelset_norm
724724
gp%levelset_norm = matmul(rotation, normals(1:3))
725-
726725
end if
727726

728727
end subroutine s_model_levelset

0 commit comments

Comments
 (0)