@@ -734,6 +734,7 @@ contains
734734 end do
735735
736736 ! Check all edges and count repeated edges
737+ $:GPU_PARALLEL_LOOP(private= ' [i,j]' , copy= ' [temp_boundary_v,edge_occurrence]' , copyin= ' [threshold_edge_zero]' , collapse= 2 )
737738 do i = 1 , edge_count
738739 do j = 1 , edge_count
739740 if (i /= j) then
@@ -746,11 +747,13 @@ contains
746747 (abs (temp_boundary_v(i, 2 , 1 ) - temp_boundary_v(j, 1 , 1 )) < threshold_edge_zero) .and. &
747748 (abs (temp_boundary_v(i, 2 , 2 ) - temp_boundary_v(j, 1 , 2 )) < threshold_edge_zero))) then
748749
750+ $:GPU_ATOMIC(atomic= ' update' )
749751 edge_occurrence(i) = edge_occurrence(i) + 1
750752 end if
751753 end if
752754 end do
753755 end do
756+ $:END_GPU_PARALLEL_LOOP()
754757
755758 ! Count the number of boundary vertices/ edges
756759 boundary_vertex_count = 0
@@ -818,7 +821,7 @@ contains
818821
819822 end subroutine f_register_edge
820823
821- !> This procedure check if interpolates is needed for 2D models.
824+ !> This procedure check if interpolation is needed for 2D models.
822825 !! @param boundary_v Temporary edge end vertex buffer
823826 !! @param spacing Dimensions of the current levelset cell
824827 subroutine f_check_interpolation_2D (boundary_v , boundary_edge_count , spacing , interpolate )
@@ -830,24 +833,29 @@ contains
830833
831834 real (wp) :: l1, cell_width !< Length of each boundary edge and cell width
832835 integer :: j !< Boundary edge index iterator
836+ integer :: interpolate_integer !< integer form of interpolate logical to support GPU checking
833837
834838 cell_width = minval (spacing (1 :2 ))
835- interpolate = .false.
839+ interpolate_integer = 0
836840
841+ $:GPU_PARALLEL_LOOP(private= ' [j,l1]' , copyin= ' [boundary_v,cell_width,Ifactor_2D]' , copy= ' [interpolate_integer]' )
837842 do j = 1 , boundary_edge_count
838- l1 = sqrt ((boundary_v(j, 2 , 1 ) - boundary_v(j, 1 , 1 ))** 2 + &
839- (boundary_v(j, 2 , 2 ) - boundary_v(j, 1 , 2 ))** 2 )
843+ if (interpolate_integer == 0 ) then
844+ l1 = sqrt ((boundary_v(j, 2 , 1 ) - boundary_v(j, 1 , 1 ))** 2 + &
845+ (boundary_v(j, 2 , 2 ) - boundary_v(j, 1 , 2 ))** 2 )
840846
841- if ((l1 > cell_width)) then
842- interpolate = .true.
843- else
844- interpolate = .false.
847+ if ((l1* Ifactor_2D > cell_width)) then
848+ interpolate_integer = 1
849+ end if
845850 end if
846851 end do
852+ $:END_GPU_PARALLEL_LOOP()
853+
854+ interpolate = (interpolate_integer == 1 )
847855
848856 end subroutine f_check_interpolation_2D
849857
850- !> This procedure check if interpolates is needed for 3D models.
858+ !> This procedure check if interpolation is needed for 3D models.
851859 !! @param model Model to search in .
852860 !! @param spacing Dimensions of the current levelset cell
853861 !! @param interpolate Logical output
@@ -858,37 +866,45 @@ contains
858866 real (wp), dimension (1 :3 ), intent (in ) :: spacing
859867 real (wp), dimension (1 :3 ) :: edge_l
860868 real (wp) :: cell_width
861- real (wp), dimension (1 :3 , 1 :3 ) :: tri_v
862- integer :: i, j !< Loop iterator
869+ real (wp), dimension (1 :model%ntrs, 1 : 3 , 1 :3 ) :: tri_v
870+ integer :: i, j, interpolate_integer !< Loop iterator
863871
864872 cell_width = minval (spacing)
865- interpolate = .false.
873+ interpolate_integer = 0
866874
875+ ! load up the array of triangles for GPU packing
867876 do i = 1 , model%ntrs
868877 do j = 1 , 3
869- tri_v(1 , j) = model%trs(i)%v(1 , j)
870- tri_v(2 , j) = model%trs(i)%v(2 , j)
871- tri_v(3 , j) = model%trs(i)%v(3 , j)
878+ tri_v(i, 1 , j) = model%trs(i)%v(1 , j)
879+ tri_v(i, 2 , j) = model%trs(i)%v(2 , j)
880+ tri_v(i, 3 , j) = model%trs(i)%v(3 , j)
872881 end do
882+ end do
873883
874- edge_l(1 ) = sqrt ((tri_v(1 , 2 ) - tri_v(1 , 1 ))** 2 + &
875- (tri_v(2 , 2 ) - tri_v(2 , 1 ))** 2 + &
876- (tri_v(3 , 2 ) - tri_v(3 , 1 ))** 2 )
877- edge_l(2 ) = sqrt ((tri_v(1 , 3 ) - tri_v(1 , 2 ))** 2 + &
878- (tri_v(2 , 3 ) - tri_v(2 , 2 ))** 2 + &
879- (tri_v(3 , 3 ) - tri_v(3 , 2 ))** 2 )
880- edge_l(3 ) = sqrt ((tri_v(1 , 1 ) - tri_v(1 , 3 ))** 2 + &
881- (tri_v(2 , 1 ) - tri_v(2 , 3 ))** 2 + &
882- (tri_v(3 , 1 ) - tri_v(3 , 3 ))** 2 )
883-
884- if ((edge_l(1 ) > cell_width) .or. &
885- (edge_l(2 ) > cell_width) .or. &
886- (edge_l(3 ) > cell_width)) then
887- interpolate = .true.
888- else
889- interpolate = .false.
884+ ! compare the side of all
885+ $:GPU_PARALLEL_LOOP(private= ' [i,edge_l]' , copyin= ' [cell_width,tri_v,Ifactor_3D]' , copy= ' [interpolate_integer]' )
886+ do i = 1 , model%ntrs
887+ if (interpolate_integer == 0 ) then
888+ edge_l(1 ) = sqrt ((tri_v(i, 1 , 2 ) - tri_v(i, 1 , 1 ))** 2 + &
889+ (tri_v(i, 2 , 2 ) - tri_v(i, 2 , 1 ))** 2 + &
890+ (tri_v(i, 3 , 2 ) - tri_v(i, 3 , 1 ))** 2 )
891+ edge_l(2 ) = sqrt ((tri_v(i, 1 , 3 ) - tri_v(i, 1 , 2 ))** 2 + &
892+ (tri_v(i, 2 , 3 ) - tri_v(i, 2 , 2 ))** 2 + &
893+ (tri_v(i, 3 , 3 ) - tri_v(i, 3 , 2 ))** 2 )
894+ edge_l(3 ) = sqrt ((tri_v(i, 1 , 1 ) - tri_v(i, 1 , 3 ))** 2 + &
895+ (tri_v(i, 2 , 1 ) - tri_v(i, 2 , 3 ))** 2 + &
896+ (tri_v(i, 3 , 1 ) - tri_v(i, 3 , 3 ))** 2 )
897+
898+ if ((edge_l(1 ) > cell_width* Ifactor_3D) .or. &
899+ (edge_l(2 ) > cell_width* Ifactor_3D) .or. &
900+ (edge_l(3 ) > cell_width* Ifactor_3D)) then
901+ interpolate_integer = 1
902+ end if
890903 end if
891904 end do
905+ $:END_GPU_PARALLEL_LOOP()
906+
907+ interpolate = (interpolate_integer == 1 )
892908
893909 end subroutine f_check_interpolation_3D
894910
@@ -905,7 +921,7 @@ contains
905921 real (wp), allocatable, intent (inout ), dimension (:, :) :: interpolated_boundary_v
906922
907923 integer , intent (inout ) :: total_vertices, boundary_edge_count
908- integer :: num_segments
924+ integer :: num_segments, vertex_idx
909925 integer :: i, j
910926
911927 real (wp) :: edge_length, cell_width
@@ -917,6 +933,7 @@ contains
917933
918934 ! First pass: Calculate the total number of vertices including interpolated ones
919935 total_vertices = 1
936+ $:GPU_PARALLEL_LOOP(private= ' [i,edge_x,edge_y,edge_length,num_segments]' , copyin= ' [Ifactor_2D]' , copy= ' [total_vertices]' )
920937 do i = 1 , boundary_edge_count
921938 ! Get the coordinates of the two ends of the current edge
922939 edge_x(1 ) = boundary_v(i, 1 , 1 )
@@ -936,14 +953,17 @@ contains
936953 end if
937954
938955 ! Each edge contributes num_segments vertices
956+ $:GPU_ATOMIC(atomic= ' update' )
939957 total_vertices = total_vertices + num_segments
940958 end do
959+ $:END_GPU_PARALLEL_LOOP()
941960
942961 ! Allocate memory for the new boundary vertices array
943962 allocate (interpolated_boundary_v(1 :total_vertices, 1 :3 ))
944963
945964 ! Fill the new boundary vertices array with original and interpolated vertices
946965 total_vertices = 1
966+ $:GPU_PARALLEL_LOOP(private= ' [i,edge_x,edge_y,edge_length,num_segments,edge_del,vertex_idx]' , copyin= ' [Ifactor_2D]' , copy= ' [total_vertices,interpolated_boundary_v]' )
947967 do i = 1 , boundary_edge_count
948968 ! Get the coordinates of the two ends of the current edge
949969 edge_x(1 ) = boundary_v(i, 1 , 1 )
@@ -972,18 +992,25 @@ contains
972992
973993 ! Add original and interpolated vertices to the output array
974994 do j = 1 , num_segments - 1
995+ $:GPU_ATOMIC(atomic= ' capture' )
975996 total_vertices = total_vertices + 1
976- interpolated_boundary_v(total_vertices, 1 ) = edge_x(1 ) + j* edge_del(1 )
977- interpolated_boundary_v(total_vertices, 2 ) = edge_y(1 ) + j* edge_del(2 )
997+ vertex_idx = total_vertices
998+ $:END_GPU_ATOMIC_CAPTURE()
999+ interpolated_boundary_v(vertex_idx, 1 ) = edge_x(1 ) + j* edge_del(1 )
1000+ interpolated_boundary_v(vertex_idx, 2 ) = edge_y(1 ) + j* edge_del(2 )
9781001 end do
9791002
9801003 ! Add the last vertex of the edge
9811004 if (num_segments > 0 ) then
1005+ $:GPU_ATOMIC(atomic= ' capture' )
9821006 total_vertices = total_vertices + 1
983- interpolated_boundary_v(total_vertices, 1 ) = edge_x(2 )
984- interpolated_boundary_v(total_vertices, 2 ) = edge_y(2 )
1007+ vertex_idx = total_vertices
1008+ $:END_GPU_ATOMIC_CAPTURE()
1009+ interpolated_boundary_v(vertex_idx, 1 ) = edge_x(2 )
1010+ interpolated_boundary_v(vertex_idx, 2 ) = edge_y(2 )
9851011 end if
9861012 end do
1013+ $:END_GPU_PARALLEL_LOOP()
9871014
9881015 end subroutine f_interpolate_2D
9891016
@@ -998,12 +1025,16 @@ contains
9981025 real (wp), allocatable, intent (inout ), dimension (:, :) :: interpolated_boundary_v
9991026 integer , intent (out ) :: total_vertices
10001027
1001- integer :: i, j, k, num_triangles, num_segments, num_inner_vertices
1028+ integer :: i, j, k, num_triangles, num_segments, num_inner_vertices, vertex_idx
10021029 real (wp), dimension (1 :3 , 1 :3 ) :: tri
10031030 real (wp), dimension (1 :3 ) :: edge_del, cell_area
10041031 real (wp), dimension (1 :3 ) :: bary_coord !< Barycentric coordinates
10051032 real (wp) :: edge_length, cell_width, cell_area_min, tri_area
10061033
1034+ ! GPU- friendly flat arrays packed from model
1035+ real (wp), allocatable, dimension (:, :, :) :: flat_v ! (3 , 3 , num_triangles)
1036+ real (wp), allocatable, dimension (:, :) :: flat_n ! (3 , num_triangles)
1037+
10071038 ! Number of triangles in the model
10081039 num_triangles = model%ntrs
10091040 cell_width = minval (spacing)
@@ -1015,18 +1046,23 @@ contains
10151046 cell_area_min = minval (cell_area)
10161047 num_inner_vertices = 0
10171048
1049+ ! Pack model into flat arrays on CPU
1050+ allocate (flat_v(1 :3 , 1 :3 , 1 :num_triangles))
1051+ allocate (flat_n(1 :3 , 1 :num_triangles))
1052+ do i = 1 , num_triangles
1053+ flat_v(:, :, i) = model%trs(i)%v(:, :)
1054+ flat_n(:, i) = model%trs(i)%n(:)
1055+ end do
1056+
10181057 ! Calculate the total number of vertices including interpolated ones
10191058 total_vertices = 0
1059+ $:GPU_PARALLEL_LOOP(private= ' [i,j,k,tri,edge_length,num_segments,num_inner_vertices]' , copyin= ' [Ifactor_3D,Ifactor_bary_3D,flat_v,flat_n]' , copy= ' [total_vertices]' , collapse= 1 )
10201060 do i = 1 , num_triangles
10211061 do j = 1 , 3
10221062 ! Get the coordinates of the two vertices of the current edge
1023- tri(1 , 1 ) = model%trs(i)%v(j, 1 )
1024- tri(1 , 2 ) = model%trs(i)%v(j, 2 )
1025- tri(1 , 3 ) = model%trs(i)%v(j, 3 )
1063+ tri(1 , :) = flat_v(j, :, i)
10261064 ! Next vertex in the triangle (cyclic)
1027- tri(2 , 1 ) = model%trs(i)%v(mod (j, 3 ) + 1 , 1 )
1028- tri(2 , 2 ) = model%trs(i)%v(mod (j, 3 ) + 1 , 2 )
1029- tri(2 , 3 ) = model%trs(i)%v(mod (j, 3 ) + 1 , 3 )
1065+ tri(2 , :) = flat_v(mod (j, 3 ) + 1 , :, i)
10301066
10311067 ! Compute the length of the edge
10321068 edge_length = sqrt ((tri(2 , 1 ) - tri(1 , 1 ))** 2 + &
@@ -1041,39 +1077,35 @@ contains
10411077 end if
10421078
10431079 ! Each edge contributes num_segments vertices
1080+ $:GPU_ATOMIC(atomic= ' update' )
10441081 total_vertices = total_vertices + num_segments + 1
10451082 end do
10461083
10471084 ! Add vertices inside the triangle
10481085 do k = 1 , 3
1049- tri(k, 1 ) = model%trs(i)%v(k, 1 )
1050- tri(k, 2 ) = model%trs(i)%v(k, 2 )
1051- tri(k, 3 ) = model%trs(i)%v(k, 3 )
1086+ tri(k, :) = flat_v(k, :, i)
10521087 end do
10531088 call f_tri_area(tri, tri_area)
10541089
10551090 if (tri_area > threshold_bary* cell_area_min) then
10561091 num_inner_vertices = Ifactor_bary_3D* ceiling(tri_area/ cell_area_min)
1092+ $:GPU_ATOMIC(atomic= ' update' )
10571093 total_vertices = total_vertices + num_inner_vertices
10581094 end if
10591095 end do
1096+ $:END_GPU_PARALLEL_LOOP()
10601097
10611098 ! Allocate memory for the new boundary vertices array
10621099 allocate (interpolated_boundary_v(1 :total_vertices, 1 :3 ))
10631100
10641101 ! Fill the new boundary vertices array with original and interpolated vertices
10651102 total_vertices = 0
1103+ $:GPU_PARALLEL_LOOP(private= ' [i,j,k,tri,edge_length,num_segments,num_inner_vertices,edge_del,bary_coord,vertex_idx]' , copyin= ' [Ifactor_3D,Ifactor_bary_3D]' , copy= ' [total_vertices]' , collapse= 1 )
10661104 do i = 1 , num_triangles
10671105 ! Loop through the 3 edges of each triangle
10681106 do j = 1 , 3
1069- ! Get the coordinates of the two vertices of the current edge
1070- tri(1 , 1 ) = model%trs(i)%v(j, 1 )
1071- tri(1 , 2 ) = model%trs(i)%v(j, 2 )
1072- tri(1 , 3 ) = model%trs(i)%v(j, 3 )
1073- ! Next vertex in the triangle (cyclic)
1074- tri(2 , 1 ) = model%trs(i)%v(mod (j, 3 ) + 1 , 1 )
1075- tri(2 , 2 ) = model%trs(i)%v(mod (j, 3 ) + 1 , 2 )
1076- tri(2 , 3 ) = model%trs(i)%v(mod (j, 3 ) + 1 , 3 )
1107+ tri(1 , :) = flat_v(j, :, i)
1108+ tri(2 , :) = flat_v(mod (j, 3 ) + 1 , :, i)
10771109
10781110 ! Compute the length of the edge
10791111 edge_length = sqrt ((tri(2 , 1 ) - tri(1 , 1 ))** 2 + &
@@ -1093,47 +1125,60 @@ contains
10931125
10941126 ! Add original and interpolated vertices to the output array
10951127 do k = 0 , num_segments - 1
1128+ $:GPU_ATOMIC(atomic= ' capture' )
10961129 total_vertices = total_vertices + 1
1097- interpolated_boundary_v(total_vertices, 1 ) = tri(1 , 1 ) + k* edge_del(1 )
1098- interpolated_boundary_v(total_vertices, 2 ) = tri(1 , 2 ) + k* edge_del(2 )
1099- interpolated_boundary_v(total_vertices, 3 ) = tri(1 , 3 ) + k* edge_del(3 )
1130+ vertex_idx = total_vertices
1131+ $:END_GPU_ATOMIC_CAPTURE()
1132+ interpolated_boundary_v(vertex_idx, 1 ) = tri(1 , 1 ) + k* edge_del(1 )
1133+ interpolated_boundary_v(vertex_idx, 2 ) = tri(1 , 2 ) + k* edge_del(2 )
1134+ interpolated_boundary_v(vertex_idx, 3 ) = tri(1 , 3 ) + k* edge_del(3 )
11001135 end do
11011136
11021137 ! Add the last vertex of the edge
1138+ $:GPU_ATOMIC(atomic= ' capture' )
11031139 total_vertices = total_vertices + 1
1104- interpolated_boundary_v(total_vertices, 1 ) = tri(2 , 1 )
1105- interpolated_boundary_v(total_vertices, 2 ) = tri(2 , 2 )
1106- interpolated_boundary_v(total_vertices, 3 ) = tri(2 , 3 )
1140+ vertex_idx = total_vertices
1141+ $:END_GPU_ATOMIC_CAPTURE()
1142+ interpolated_boundary_v(vertex_idx, 1 ) = tri(2 , 1 )
1143+ interpolated_boundary_v(vertex_idx, 2 ) = tri(2 , 2 )
1144+ interpolated_boundary_v(vertex_idx, 3 ) = tri(2 , 3 )
11071145 end do
11081146
11091147 ! Interpolate verties that are not on edges
11101148 do k = 1 , 3
1111- tri(k, 1 ) = model%trs(i)%v(k, 1 )
1112- tri(k, 2 ) = model%trs(i)%v(k, 2 )
1113- tri(k, 3 ) = model%trs(i)%v(k, 3 )
1149+ tri(k, :) = flat_v(k, :, i)
11141150 end do
11151151 call f_tri_area(tri, tri_area)
11161152
11171153 if (tri_area > threshold_bary* cell_area_min) then
11181154 num_inner_vertices = Ifactor_bary_3D* ceiling(tri_area/ cell_area_min)
11191155 !Use barycentric coordinates for randomly distributed points
1156+
1157+ ! Deterministic seed per triangle
1158+ rand_seed = i* 73856093 + 19349663
1159+ if (rand_seed == 0 ) rand_seed = 1
1160+
11201161 do k = 1 , num_inner_vertices
1121- call random_number ( bary_coord(1 ))
1122- call random_number ( bary_coord(2 ))
1162+ bary_coord(1 ) = f_model_random_number(rand_seed )
1163+ bary_coord(2 ) = f_model_random_number(rand_seed )
11231164
11241165 if ((bary_coord(1 ) + bary_coord(2 )) >= 1._wp ) then
11251166 bary_coord(1 ) = 1._wp - bary_coord(1 )
11261167 bary_coord(2 ) = 1._wp - bary_coord(2 )
11271168 end if
11281169 bary_coord(3 ) = 1._wp - bary_coord(1 ) - bary_coord(2 )
11291170
1171+ $:GPU_ATOMIC(atomic= ' update' )
11301172 total_vertices = total_vertices + 1
11311173 interpolated_boundary_v(total_vertices, 1 ) = dot_product (bary_coord, tri(1 :3 , 1 ))
11321174 interpolated_boundary_v(total_vertices, 2 ) = dot_product (bary_coord, tri(1 :3 , 2 ))
11331175 interpolated_boundary_v(total_vertices, 3 ) = dot_product (bary_coord, tri(1 :3 , 3 ))
11341176 end do
11351177 end if
11361178 end do
1179+ $:END_GPU_PARALLEL_LOOP()
1180+
1181+ deallocate (flat_v, flat_n)
11371182
11381183 end subroutine f_interpolate_3D
11391184
@@ -1344,6 +1389,8 @@ contains
13441389 real (wp), dimension (1 :3 ) :: AB, AC, cross
13451390 integer :: i !< Loop iterator
13461391
1392+ $:GPU_ROUTINE(parallelism= ' [seq]' )
1393+
13471394 do i = 1 , 3
13481395 AB(i) = tri(2 , i) - tri(1 , i)
13491396 AC(i) = tri(3 , i) - tri(1 , i)
0 commit comments