Skip to content

Commit 5e58655

Browse files
STLs appear to be working on the GPU with NVHPC!
1 parent a1769d0 commit 5e58655

4 files changed

Lines changed: 134 additions & 36 deletions

File tree

src/common/m_derived_types.fpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,15 +184,15 @@ module m_derived_types
184184
end type t_model
185185

186186
type :: t_model_array
187-
! --- Original CPU-side fields (unchanged) ---
187+
! Original CPU-side fields (unchanged)
188188
type(t_model), allocatable :: model
189189
real(wp), allocatable, dimension(:, :, :) :: boundary_v
190190
real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v
191191
integer :: boundary_edge_count
192192
integer :: total_vertices
193193
integer :: interpolate
194194

195-
! --- GPU-friendly flattened arrays ---
195+
! GPU-friendly flattened arrays
196196
integer :: ntrs ! copy of model%ntrs
197197
real(wp), allocatable, dimension(:, :, :) :: trs_v ! (3, 3, ntrs) - triangle vertices
198198
real(wp), allocatable, dimension(:, :) :: trs_n ! (3, ntrs) - triangle normals

src/common/m_model.fpp

Lines changed: 117 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,27 @@ module m_model
1717

1818
private
1919

20-
public :: s_instantiate_STL_models, f_model_read, s_model_write, s_model_free, f_model_is_inside, models, gpu_ntrs, gpu_trs_v, gpu_trs_n
20+
public :: s_instantiate_STL_models, f_model_read, s_model_write, s_model_free, f_model_is_inside, models, gpu_ntrs, &
21+
gpu_trs_v, gpu_trs_n, gpu_boundary_v, gpu_interpolated_boundary_v, gpu_interpolate, gpu_boundary_edge_count, &
22+
gpu_total_vertices
2123

2224
! Subroutines for STL immersed boundaries
2325
public :: f_check_boundary, f_register_edge, f_check_interpolation_2D, &
2426
f_check_interpolation_3D, f_interpolate_2D, f_interpolate_3D, &
2527
f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area, s_pack_model_for_gpu, &
26-
f_model_is_inside_flat
28+
f_model_is_inside_flat, f_distance_normals_3d_flat
2729

2830
!! array of STL models that can be allocated and then used in IB marker and levelset compute
2931
type(t_model_array), allocatable, target :: models(:)
3032
!! GPU-friendly flat arrays for STL model data
3133
integer, allocatable :: gpu_ntrs(:)
3234
real(wp), allocatable, dimension(:, :, :, :) :: gpu_trs_v
3335
real(wp), allocatable, dimension(:, :, :) :: gpu_trs_n
34-
real(wp), allocatable, dimension(:, :, :) :: gpu_boundary_v
36+
real(wp), allocatable, dimension(:, :, :, :) :: gpu_boundary_v
3537
real(wp), allocatable, dimension(:, :, :) :: gpu_interpolated_boundary_v
38+
integer, allocatable :: gpu_interpolate(:)
39+
integer, allocatable :: gpu_boundary_edge_count(:)
40+
integer, allocatable :: gpu_total_vertices(:)
3641

3742
contains
3843

@@ -174,34 +179,80 @@ contains
174179
! Pack and upload flat arrays for GPU (AFTER the loop)
175180
block
176181
integer :: pid, max_ntrs
182+
integer :: max_bv1, max_bv2, max_bv3, max_iv1, max_iv2
177183

178184
max_ntrs = 0
185+
max_bv1 = 0; max_bv2 = 0; max_bv3 = 0
186+
max_iv1 = 0; max_iv2 = 0
187+
179188
do pid = 1, num_ibs
180189
if (allocated(models(pid)%model)) then
181190
call s_pack_model_for_gpu(models(pid))
182191
max_ntrs = max(max_ntrs, models(pid)%ntrs)
183192
end if
193+
if (allocated(models(pid)%boundary_v)) then
194+
max_bv1 = max(max_bv1, size(models(pid)%boundary_v, 1))
195+
max_bv2 = max(max_bv2, size(models(pid)%boundary_v, 2))
196+
max_bv3 = max(max_bv3, size(models(pid)%boundary_v, 3))
197+
end if
198+
if (allocated(models(pid)%interpolated_boundary_v)) then
199+
max_iv1 = max(max_iv1, size(models(pid)%interpolated_boundary_v, 1))
200+
max_iv2 = max(max_iv2, size(models(pid)%interpolated_boundary_v, 2))
201+
end if
184202
end do
185203

186204
if (max_ntrs > 0) then
187205
allocate (gpu_ntrs(1:num_ibs))
188206
allocate (gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs))
189207
allocate (gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs))
208+
allocate (gpu_interpolate(1:num_ibs))
209+
allocate (gpu_boundary_edge_count(1:num_ibs))
210+
allocate (gpu_total_vertices(1:num_ibs))
190211

191212
gpu_ntrs = 0
192213
gpu_trs_v = 0._wp
193214
gpu_trs_n = 0._wp
215+
gpu_interpolate = 0
216+
gpu_boundary_edge_count = 0
217+
gpu_total_vertices = 0
218+
219+
if (max_bv1 > 0) then
220+
allocate (gpu_boundary_v(1:max_bv1, 1:max_bv2, 1:max_bv3, 1:num_ibs))
221+
gpu_boundary_v = 0._wp
222+
end if
223+
224+
if (max_iv1 > 0) then
225+
allocate (gpu_interpolated_boundary_v(1:max_iv1, 1:max_iv2, 1:num_ibs))
226+
gpu_interpolated_boundary_v = 0._wp
227+
end if
194228

195229
do pid = 1, num_ibs
196230
if (allocated(models(pid)%model)) then
197231
gpu_ntrs(pid) = models(pid)%ntrs
198232
gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v
199233
gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n
234+
gpu_interpolate(pid) = models(pid)%interpolate
235+
gpu_boundary_edge_count(pid) = models(pid)%boundary_edge_count
236+
gpu_total_vertices(pid) = models(pid)%total_vertices
237+
end if
238+
if (allocated(models(pid)%boundary_v)) then
239+
gpu_boundary_v(1:size(models(pid)%boundary_v, 1), &
240+
1:size(models(pid)%boundary_v, 2), &
241+
1:size(models(pid)%boundary_v, 3), pid) = models(pid)%boundary_v
242+
end if
243+
if (allocated(models(pid)%interpolated_boundary_v)) then
244+
gpu_interpolated_boundary_v(1:size(models(pid)%interpolated_boundary_v, 1), &
245+
1:size(models(pid)%interpolated_boundary_v, 2), pid) = models(pid)%interpolated_boundary_v
200246
end if
201247
end do
202248

203-
$:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n]')
204-
249+
$:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n, gpu_interpolate, gpu_boundary_edge_count, gpu_total_vertices]')
250+
if (allocated(gpu_boundary_v)) then
251+
$:GPU_ENTER_DATA(copyin='[gpu_boundary_v]')
252+
end if
253+
if (allocated(gpu_interpolated_boundary_v)) then
254+
$:GPU_ENTER_DATA(copyin='[gpu_interpolated_boundary_v]')
255+
end if
205256
end if
206257
end block
207258

@@ -731,7 +782,7 @@ contains
731782
end if
732783
end do
733784

734-
! if thje ray hits an odd number of triangles on its way out, then
785+
! if the ray hits an odd number of triangles on its way out, then
735786
! it must be on the inside of the model
736787
nInOrOut = nInOrOut + mod(nHits, 2)
737788
end do
@@ -1371,6 +1422,66 @@ contains
13711422

13721423
end subroutine f_distance_normals_3D
13731424

1425+
subroutine f_distance_normals_3D_flat(ntrs, trs_v, trs_n, pid, point, normals, distance)
1426+
1427+
$:GPU_ROUTINE(parallelism='[seq]')
1428+
1429+
integer, intent(in) :: ntrs
1430+
real(wp), dimension(:, :, :, :), intent(in) :: trs_v
1431+
real(wp), dimension(:, :, :), intent(in) :: trs_n
1432+
integer, intent(in) :: pid
1433+
real(wp), dimension(1:3), intent(in) :: point
1434+
real(wp), dimension(1:3), intent(out) :: normals
1435+
real(wp), intent(out) :: distance
1436+
1437+
real(wp), dimension(1:3, 1:3) :: tri
1438+
real(wp) :: dist_min, dist_t_min
1439+
real(wp) :: dist_min_normal, dist_buffer_normal
1440+
real(wp), dimension(1:3) :: midp
1441+
real(wp), dimension(1:3) :: dist_buffer
1442+
integer :: i, j, tri_idx
1443+
1444+
dist_min = 1.e12_wp
1445+
dist_min_normal = 1.e12_wp
1446+
distance = 0._wp
1447+
1448+
tri_idx = 0
1449+
do i = 1, ntrs
1450+
do j = 1, 3
1451+
tri(j, 1) = trs_v(j, 1, i, pid)
1452+
tri(j, 2) = trs_v(j, 2, i, pid)
1453+
tri(j, 3) = trs_v(j, 3, i, pid)
1454+
dist_buffer(j) = sqrt((point(1) - tri(j, 1))**2 + &
1455+
(point(2) - tri(j, 2))**2 + &
1456+
(point(3) - tri(j, 3))**2)
1457+
end do
1458+
1459+
do j = 1, 3
1460+
midp(j) = (tri(1, j) + tri(2, j) + tri(3, j))/3
1461+
end do
1462+
1463+
dist_t_min = minval(dist_buffer(1:3))
1464+
dist_buffer_normal = sqrt((point(1) - midp(1))**2 + &
1465+
(point(2) - midp(2))**2 + &
1466+
(point(3) - midp(3))**2)
1467+
1468+
if (dist_t_min < dist_min) then
1469+
dist_min = dist_t_min
1470+
end if
1471+
1472+
if (dist_buffer_normal < dist_min_normal) then
1473+
dist_min_normal = dist_buffer_normal
1474+
tri_idx = i
1475+
end if
1476+
end do
1477+
1478+
normals(1) = trs_n(1, tri_idx, pid)
1479+
normals(2) = trs_n(2, tri_idx, pid)
1480+
normals(3) = trs_n(3, tri_idx, pid)
1481+
distance = dist_min
1482+
1483+
end subroutine f_distance_normals_3D_flat
1484+
13741485
!> This procedure determines the levelset distance of 2D models without interpolation.
13751486
!! @param boundary_v Group of all the boundary vertices of the 2D model without interpolation
13761487
!! @param boundary_vertex_count Output the total number of boundary vertices

src/simulation/m_compute_levelset.fpp

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ contains
5050
call s_cylinder_levelset(gps(i))
5151
elseif (patch_geometry == 11) then
5252
call s_3d_airfoil_levelset(gps(i))
53+
elseif (patch_geometry == 12) then
54+
call s_model_levelset(gps(i))
5355
end if
5456
end do
5557
$:END_GPU_PARALLEL_LOOP()
@@ -70,6 +72,8 @@ contains
7072
call s_rectangle_levelset(gps(i))
7173
elseif (patch_geometry == 4) then
7274
call s_airfoil_levelset(gps(i))
75+
elseif (patch_geometry == 5) then
76+
call s_model_levelset(gps(i))
7377
elseif (patch_geometry == 6) then
7478
call s_ellipse_levelset(gps(i))
7579
end if
@@ -79,17 +83,6 @@ contains
7983

8084
end if
8185

82-
! STL models computed on the CPU for now
83-
do i = 1, num_gps
84-
patch_id = gps(i)%ib_patch_id
85-
patch_geometry = patch_ib(patch_id)%geometry
86-
87-
if (patch_geometry == 5 .or. patch_geometry == 12) then
88-
call s_model_levelset(gps(i))
89-
$:GPU_UPDATE(device='[gps(i)]')
90-
end if
91-
end do
92-
9386
end subroutine s_apply_levelset
9487

9588
subroutine s_circle_levelset(gp)
@@ -651,7 +644,7 @@ contains
651644
type(ghost_point), intent(inout) :: gp
652645

653646
integer :: i, j, k, patch_id, boundary_edge_count, total_vertices
654-
logical :: interpolate
647+
integer :: interpolate
655648
real(wp), dimension(1:3) :: center, xyz_local
656649
real(wp) :: normals(1:3) !< Boundary normal buffer
657650
real(wp) :: distance
@@ -663,16 +656,17 @@ contains
663656
k = gp%loc(3)
664657

665658
! load in model values
666-
interpolate = models(patch_id)%interpolate
667-
boundary_edge_count = models(patch_id)%boundary_edge_count
668-
total_vertices = models(patch_id)%total_vertices
659+
interpolate = gpu_interpolate(patch_id)
660+
boundary_edge_count = gpu_boundary_edge_count(patch_id)
661+
total_vertices = gpu_total_vertices(patch_id)
669662

670663
center = 0._wp
671664
if (.not. f_is_default(patch_ib(patch_id)%x_centroid)) center(1) = patch_ib(patch_id)%x_centroid
672665
if (.not. f_is_default(patch_ib(patch_id)%y_centroid)) center(2) = patch_ib(patch_id)%y_centroid
673666
if (p > 0) then
674667
if (.not. f_is_default(patch_ib(patch_id)%z_centroid)) center(3) = patch_ib(patch_id)%z_centroid
675668
end if
669+
676670
inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :)
677671
rotation(:, :) = patch_ib(patch_id)%rotation_matrix(:, :)
678672

@@ -691,11 +685,11 @@ contains
691685
if (p > 0) then
692686

693687
! Get the boundary normals and shortest distance between the cell center and the model boundary
694-
call f_distance_normals_3D(models(patch_id)%model, xyz_local, normals, distance)
688+
call f_distance_normals_3D_flat(gpu_ntrs(patch_id), gpu_trs_v, gpu_trs_n, patch_id, xyz_local, normals, distance)
695689

696690
! Get the shortest distance between the cell center and the interpolated model boundary
697691
if (interpolate) then
698-
gp%levelset = f_interpolated_distance(models(patch_id)%interpolated_boundary_v, total_vertices, xyz_local)
692+
gp%levelset = f_interpolated_distance(gpu_interpolated_boundary_v(:, :, patch_id), total_vertices, xyz_local)
699693
else
700694
gp%levelset = distance
701695
end if
@@ -707,19 +701,18 @@ contains
707701
gp%levelset_norm = matmul(rotation, normals(1:3))
708702
else
709703
! 2D models
710-
if (interpolate) then
704+
if (interpolate == 1) then
711705
! Get the shortest distance between the cell center and the model boundary
712-
gp%levelset = f_interpolated_distance(models(patch_id)%interpolated_boundary_v, total_vertices, xyz_local)
706+
gp%levelset = f_interpolated_distance(gpu_interpolated_boundary_v(:, :, patch_id), total_vertices, xyz_local) ! Change 7
713707
else
714-
! Get the shortest distance between the cell center and the interpolated model boundary
715-
gp%levelset = f_distance(models(patch_id)%boundary_v, boundary_edge_count, xyz_local)
708+
gp%levelset = f_distance(gpu_boundary_v(:, :, :, patch_id), boundary_edge_count, xyz_local) ! Change 8
716709
end if
717710

718711
! Correct the sign of the levelset
719712
gp%levelset = -abs(gp%levelset)
720713

721714
! Get the boundary normals
722-
call f_normals(models(patch_id)%boundary_v, &
715+
call f_normals(gpu_boundary_v(:, :, :, patch_id), &
723716
boundary_edge_count, &
724717
xyz_local, &
725718
normals)
@@ -729,8 +722,6 @@ contains
729722

730723
end if
731724

732-
! print gp%levelset, gp%levelset_norm(1), gp%levelset_norm(2), gp%levelset_norm(3)
733-
734725
end subroutine s_model_levelset
735726

736727
end module m_compute_levelset

src/simulation/m_ib_patches.fpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -877,8 +877,6 @@ contains
877877
end do
878878
$:END_GPU_PARALLEL_LOOP()
879879
880-
$:GPU_UPDATE(host='[ib_markers%sf]')
881-
882880
end subroutine s_ib_model
883881
884882
!> The STL patch is a 3D geometry that is imported from an STL file.
@@ -935,8 +933,6 @@ contains
935933
end do
936934
end do
937935
938-
$:GPU_UPDATE(host='[ib_markers%sf]')
939-
940936
end subroutine s_ib_3d_model
941937
942938
!> Subroutine that computes a rotation matrix for converting to the rotating frame of the boundary

0 commit comments

Comments
 (0)