@@ -25,7 +25,7 @@ module m_ib_patches
2525
2626 implicit none
2727
28- private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, s_instantiate_STL_models, models, f_convert_cyl_to_cart
28+ private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, s_instantiate_STL_models, models, f_convert_cyl_to_cart, gpu_ntrs, gpu_trs_v, gpu_trs_n
2929
3030 real (wp) :: x_centroid, y_centroid, z_centroid
3131 real (wp) :: length_x, length_y, length_z
@@ -55,9 +55,12 @@ module m_ib_patches
5555
5656 character (len= 5 ) :: istr ! string to store int to string result for error checking
5757
58- type(t_model_array), allocatable, target :: models(:)
59- $:GPU_DECLARE(create= ' [models]' )
6058 !! array of STL models that can be allocated and then used in IB marker and levelset compute
59+ type(t_model_array), allocatable, target :: models(:)
60+ !! GPU- friendly flat arrays for STL model data
61+ integer , allocatable :: gpu_ntrs(:)
62+ real (wp), allocatable, dimension (:,:,:,:) :: gpu_trs_v
63+ real (wp), allocatable, dimension (:,:,:) :: gpu_trs_n
6164
6265contains
6366
@@ -140,7 +143,7 @@ contains
140143
141144 do patch_id = 1 , num_ibs
142145 if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12 ) then
143- @:ALLOCATE (models(patch_id)%model)
146+ allocate (models(patch_id)%model)
144147 print * , " * Reading model: " // trim (patch_ib(patch_id)%model_filepath)
145148
146149 model = f_model_read(patch_ib(patch_id)%model_filepath)
@@ -229,38 +232,52 @@ contains
229232 models(patch_id)%model = model
230233 models(patch_id)%boundary_v = boundary_v
231234 models(patch_id)%boundary_edge_count = boundary_edge_count
232- models(patch_id)%interpolate = interpolate
235+ if (interpolate) then
236+ models(patch_id)%interpolate = 1
237+ else
238+ models(patch_id)%interpolate = 0
239+ end if
233240 if (interpolate) then
234241 models(patch_id)%interpolated_boundary_v = interpolated_boundary_v
235242 models(patch_id)%total_vertices = total_vertices
236243 end if
237244
238- ! update allocatables
239- $:GPU_ENTER_DATA(copyin= ' [models(patch_id)]' )
240- $:GPU_ENTER_DATA(copyin= ' [models(patch_id)%model]' )
241- $:GPU_ENTER_DATA(copyin= ' [models(patch_id)%model%trs]' )
242- $:GPU_ENTER_DATA(copyin= ' [models(patch_id)%model%ntrs]' )
243- $:GPU_ENTER_DATA(copyin= ' [models(patch_id)%boundary_v]' )
244- do i = 1 , models(patch_id)%model%ntrs
245- $:GPU_ENTER_DATA(copyin= ' [models(patch_id)%model%trs(i)]' )
246- end do
247- if (interpolate) then
248- $:GPU_ENTER_DATA(copyin= ' [models(patch_id)%interpolated_boundary_v]' )
245+ end if
246+ end do
247+
248+ ! Pack and upload flat arrays for GPU (AFTER the loop)
249+ block
250+ integer :: pid, max_ntrs
251+
252+ max_ntrs = 0
253+ do pid = 1 , num_ibs
254+ if (allocated(models(pid)%model)) then
255+ call s_pack_model_for_gpu(models(pid))
256+ max_ntrs = max (max_ntrs, models(pid)%ntrs)
249257 end if
258+ end do
259+
260+ if (max_ntrs > 0 ) then
261+ allocate(gpu_ntrs(1 :num_ibs))
262+ allocate(gpu_trs_v(1 :3 , 1 :3 , 1 :max_ntrs, 1 :num_ibs))
263+ allocate(gpu_trs_n(1 :3 , 1 :max_ntrs, 1 :num_ibs))
250264
251- print * , " Entering GPU loop to read"
265+ gpu_ntrs = 0
266+ gpu_trs_v = 0._wp
267+ gpu_trs_n = 0._wp
252268
253- ! TODO :: REMOVE ME
254- $:GPU_PARALLEL_LOOP(private = ' [i] ' )
255- do i = 1 , 3
256- print * , " Num Triangles " , models(1 )%model% ntrs
257- ! print * , " interpolate " , models(i)%interpolate
258- ! print * , " Vertices " , models(i)%model%trs( 1 )%v
269+ do pid = 1 , num_ibs
270+ if (allocated(models(pid)%model)) then
271+ gpu_ntrs(pid) = models(pid)%ntrs
272+ gpu_trs_v(:, :, 1 : models(pid)% ntrs, pid) = models(pid)%trs_v
273+ gpu_trs_n(:, 1 :models(pid)%ntrs, pid) = models(pid)%trs_n
274+ end if
259275 end do
260- $:END_GPU_PARALLEL_LOOP()
276+
277+ $:GPU_ENTER_DATA(copyin= ' [gpu_ntrs, gpu_trs_v, gpu_trs_n]' )
261278
262279 end if
263- end do
280+ end block
264281
265282 end subroutine s_instantiate_STL_models
266283
@@ -986,23 +1003,25 @@ contains
9861003 integer, intent(in) :: patch_id
9871004 type(integer_field), intent(inout) :: ib_markers
9881005
989- integer :: i, j, k !< Generic loop iterators
1006+ integer :: i, j, k !< Generic loop iterators
1007+ integer :: spc
9901008
991- type(t_model), pointer :: model
992-
993- real(wp) :: eta
1009+ real(wp) :: eta, threshold
9941010 real(wp), dimension(1:3) :: point, local_point, offset
9951011 real(wp), dimension(1:3) :: center, xy_local
9961012 real(wp), dimension(1:3, 1:3) :: inverse_rotation
9971013
998- model => models(patch_id)%model
9991014 center = 0._wp
10001015 center(1) = patch_ib(patch_id)%x_centroid
10011016 center(2) = patch_ib(patch_id)%y_centroid
10021017 inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :)
10031018 offset(:) = patch_ib(patch_id)%centroid_offset(:)
1019+ spc = patch_ib(patch_id)%model_spc
1020+ threshold = patch_ib(patch_id)%model_threshold
10041021
1005- do i = 0 - gp_layers, m + gp_layers
1022+ $:GPU_PARALLEL_LOOP(private=' [i,j, xy_local, eta]' ,&
1023+ & copyin=' [patch_id,center,inverse_rotation, offset, spc, threshold]' , collapse=2)
1024+ do i = -gp_layers, m + gp_layers
10061025 do j = -gp_layers, n + gp_layers
10071026
10081027 xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp]
@@ -1013,16 +1032,30 @@ contains
10131032 xy_local = f_convert_cyl_to_cart(xy_local)
10141033 end if
10151034
1016- eta = f_model_is_inside(model, xy_local, (/dx(i), dy(j), 0._wp/), patch_ib(patch_id)%model_spc)
1035+ if (i == 13 .and. j == 16) then
1036+ print *, "spc:", spc
1037+ print *, "ntrs:", gpu_ntrs(patch_id)
1038+ print *, "threshold:", threshold
1039+ print *, "dx:", dx(i), dy(j)
1040+ print *, "xy_local:", xy_local(1)
1041+
1042+ end if
1043+
1044+ eta = f_model_is_inside_flat(gpu_ntrs(patch_id), &
1045+ gpu_trs_v, gpu_trs_n, &
1046+ patch_id, &
1047+ xy_local, (/dx(i), dy(j), 0._wp/), &
1048+ spc)
10171049
10181050 ! Reading STL boundary vertices and compute the levelset and levelset_norm
1019- if (eta > patch_ib(patch_id)%model_threshold ) then
1051+ if (eta > threshold ) then
10201052 print *, eta, i, j
10211053 ib_markers%sf(i, j, 0) = patch_id
10221054 end if
10231055
10241056 end do
10251057 end do
1058+ $:END_GPU_PARALLEL_LOOP()
10261059
10271060 end subroutine s_ib_model
10281061
@@ -1035,10 +1068,9 @@ contains
10351068 type(integer_field), intent(inout) :: ib_markers
10361069
10371070 integer :: i, j, k !< Generic loop iterators
1071+ integer :: spc
10381072
1039- type(t_model), pointer :: model
1040-
1041- real(wp) :: eta
1073+ real(wp) :: eta, threshold
10421074 real(wp), dimension(1:3) :: point, local_point, offset
10431075 real(wp), dimension(1:3) :: center, xyz_local
10441076 real(wp), dimension(1:3, 1:3) :: inverse_rotation
@@ -1050,8 +1082,12 @@ contains
10501082 center(3) = patch_ib(patch_id)%z_centroid
10511083 inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :)
10521084 offset(:) = patch_ib(patch_id)%centroid_offset(:)
1085+ spc = patch_ib(patch_id)%model_spc
1086+ threshold = patch_ib(patch_id)%model_threshold
10531087
1054- do i = 0 - gp_layers, m + gp_layers
1088+ $:GPU_PARALLEL_LOOP(private=' [i,j,k, xyz_local, eta]' ,&
1089+ & copyin=' [patch_id,center,inverse_rotation, offset, spc, threshold]' , collapse=2)
1090+ do i = -gp_layers, m + gp_layers
10551091 do j = -gp_layers, n + gp_layers
10561092 do k = -gp_layers, p + gp_layers
10571093
@@ -1063,7 +1099,11 @@ contains
10631099 xyz_local = f_convert_cyl_to_cart(xyz_local)
10641100 end if
10651101
1066- eta = f_model_is_inside(model, xyz_local, (/dx(i), dy(j), dz(k)/), patch_ib(patch_id)%model_spc)
1102+ eta = f_model_is_inside_flat(gpu_ntrs(patch_id), &
1103+ gpu_trs_v, gpu_trs_n, &
1104+ patch_id, &
1105+ xyz_local, (/dx(i), dy(j), dz(k)/), &
1106+ spc)
10671107
10681108 ! Reading STL boundary vertices and compute the levelset and levelset_norm
10691109 if (eta > patch_ib(patch_id)%model_threshold) then
0 commit comments