Skip to content

Commit 64bc348

Browse files
Ib markers computed on GPU working
1 parent 804a286 commit 64bc348

5 files changed

Lines changed: 160 additions & 43 deletions

File tree

src/common/m_derived_types.fpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,12 +184,18 @@ module m_derived_types
184184
end type t_model
185185

186186
type :: t_model_array
187+
! --- Original CPU-side fields (unchanged) ---
187188
type(t_model), allocatable :: model
188189
real(wp), allocatable, dimension(:, :, :) :: boundary_v
189190
real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v
190191
integer :: boundary_edge_count
191192
integer :: total_vertices
192-
logical :: interpolate
193+
integer :: interpolate
194+
195+
! --- GPU-friendly flattened arrays ---
196+
integer :: ntrs ! copy of model%ntrs
197+
real(wp), allocatable, dimension(:, :, :) :: trs_v ! (3, 3, ntrs) - triangle vertices
198+
real(wp), allocatable, dimension(:, :) :: trs_n ! (3, ntrs) - triangle normals
193199
end type t_model_array
194200

195201
!> Derived type adding initial condition (ic) patch parameters as attributes

src/common/m_helper.fpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,8 @@ contains
328328
!! @return The cross product of the two vectors.
329329
pure function f_cross(a, b) result(c)
330330
331+
$:GPU_ROUTINE(parallelism='[seq]')
332+
331333
real(wp), dimension(3), intent(in) :: a, b
332334
real(wp), dimension(3) :: c
333335

src/common/m_model.fpp

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ module m_model
2222
! Subroutines for STL immersed boundaries
2323
public :: f_check_boundary, f_register_edge, f_check_interpolation_2D, &
2424
f_check_interpolation_3D, f_interpolate_2D, f_interpolate_3D, &
25-
f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area
25+
f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area, s_pack_model_for_gpu, &
26+
f_model_is_inside_flat
2627

2728
contains
2829

@@ -484,7 +485,7 @@ contains
484485
!! from GPU routines/functions
485486
function f_model_random_number(seed) result(rval)
486487

487-
! $:GPU_ROUTINE(parallelism='[seq]')
488+
$:GPU_ROUTINE(parallelism='[seq]')
488489

489490
integer, intent(inout) :: seed
490491
real(wp) :: rval
@@ -559,13 +560,67 @@ contains
559560

560561
end function f_model_is_inside
561562

563+
impure function f_model_is_inside_flat(ntrs, trs_v, trs_n, pid, point, spacing, spc) result(fraction)
564+
565+
$:GPU_ROUTINE(parallelism='[seq]')
566+
567+
integer, intent(in) :: ntrs
568+
real(wp), dimension(:,:,:,:), intent(in) :: trs_v
569+
real(wp), dimension(:,:,:), intent(in) :: trs_n
570+
integer, intent(in) :: pid
571+
real(wp), dimension(1:3), intent(in) :: point
572+
real(wp), dimension(1:3), intent(in) :: spacing
573+
integer, intent(in) :: spc
574+
575+
real(wp) :: fraction
576+
real(wp) :: origin(1:3), dir(1:3), dir_mag
577+
type(t_ray) :: ray
578+
type(t_triangle) :: tri
579+
integer :: i, j, k, nInOrOut, nHits
580+
integer :: rand_seed
581+
582+
rand_seed = int(point(1) * 73856093_wp) + &
583+
int(point(2) * 19349663_wp) + &
584+
int(point(3) * 83492791_wp)
585+
if (rand_seed == 0) rand_seed = 1
586+
587+
! generate our random collection of rays
588+
nInOrOut = 0
589+
do i = 1, spc
590+
! Generate one ray at a time — no arrays needed
591+
do k = 1, 3
592+
origin(k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp) * spacing(k)
593+
dir(k) = point(k) + f_model_random_number(rand_seed) - 0.5_wp
594+
end do
595+
dir_mag = sqrt(dir(1)*dir(1) + dir(2)*dir(2) + dir(3)*dir(3))
596+
dir(:) = dir(:) / dir_mag
597+
598+
ray%o = origin
599+
ray%d = dir
600+
601+
nHits = 0
602+
do j = 1, ntrs
603+
tri%v(:, :) = trs_v(:, :, j, pid)
604+
tri%n(:) = trs_n(:, j, pid)
605+
if (f_intersects_triangle(ray, tri)) then
606+
nHits = nHits + 1
607+
end if
608+
end do
609+
nInOrOut = nInOrOut + mod(nHits, 2)
610+
end do
611+
612+
fraction = real(nInOrOut)/real(spc)
613+
end function f_model_is_inside_flat
614+
562615
! From https://www.scratchapixel.com/lessons/3e-basic-rendering/ray-tracing-rendering-a-triangle/ray-triangle-intersection-geometric-solution.html
563616
!> This procedure checks if a ray intersects a triangle.
564617
!! @param ray Ray.
565618
!! @param triangle Triangle.
566619
!! @return True if the ray intersects the triangle, false otherwise.
567620
elemental function f_intersects_triangle(ray, triangle) result(intersects)
568621

622+
$:GPU_ROUTINE(parallelism='[seq]')
623+
569624
type(t_ray), intent(in) :: ray
570625
type(t_triangle), intent(in) :: triangle
571626

@@ -1269,4 +1324,18 @@ contains
12691324

12701325
end function f_interpolated_distance
12711326

1327+
subroutine s_pack_model_for_gpu(ma)
1328+
type(t_model_array), intent(inout) :: ma
1329+
integer :: i
1330+
1331+
ma%ntrs = ma%model%ntrs
1332+
allocate(ma%trs_v(1:3, 1:3, 1:ma%ntrs))
1333+
allocate(ma%trs_n(1:3, 1:ma%ntrs))
1334+
1335+
do i = 1, ma%ntrs
1336+
ma%trs_v(:, :, i) = ma%model%trs(i)%v(:, :)
1337+
ma%trs_n(:, i) = ma%model%trs(i)%n(:)
1338+
end do
1339+
end subroutine
1340+
12721341
end module m_model

src/simulation/m_ib_patches.fpp

Lines changed: 78 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ module m_ib_patches
2525

2626
implicit none
2727

28-
private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, s_instantiate_STL_models, models, f_convert_cyl_to_cart
28+
private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, s_instantiate_STL_models, models, f_convert_cyl_to_cart, gpu_ntrs, gpu_trs_v, gpu_trs_n
2929

3030
real(wp) :: x_centroid, y_centroid, z_centroid
3131
real(wp) :: length_x, length_y, length_z
@@ -55,9 +55,12 @@ module m_ib_patches
5555

5656
character(len=5) :: istr ! string to store int to string result for error checking
5757

58-
type(t_model_array), allocatable, target :: models(:)
59-
$:GPU_DECLARE(create='[models]')
6058
!! array of STL models that can be allocated and then used in IB marker and levelset compute
59+
type(t_model_array), allocatable, target :: models(:)
60+
!! GPU-friendly flat arrays for STL model data
61+
integer, allocatable :: gpu_ntrs(:)
62+
real(wp), allocatable, dimension(:,:,:,:) :: gpu_trs_v
63+
real(wp), allocatable, dimension(:,:,:) :: gpu_trs_n
6164

6265
contains
6366

@@ -140,7 +143,7 @@ contains
140143

141144
do patch_id = 1, num_ibs
142145
if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then
143-
@:ALLOCATE(models(patch_id)%model)
146+
allocate(models(patch_id)%model)
144147
print *, " * Reading model: "//trim(patch_ib(patch_id)%model_filepath)
145148

146149
model = f_model_read(patch_ib(patch_id)%model_filepath)
@@ -229,38 +232,52 @@ contains
229232
models(patch_id)%model = model
230233
models(patch_id)%boundary_v = boundary_v
231234
models(patch_id)%boundary_edge_count = boundary_edge_count
232-
models(patch_id)%interpolate = interpolate
235+
if (interpolate) then
236+
models(patch_id)%interpolate = 1
237+
else
238+
models(patch_id)%interpolate = 0
239+
end if
233240
if (interpolate) then
234241
models(patch_id)%interpolated_boundary_v = interpolated_boundary_v
235242
models(patch_id)%total_vertices = total_vertices
236243
end if
237244

238-
! update allocatables
239-
$:GPU_ENTER_DATA(copyin='[models(patch_id)]')
240-
$:GPU_ENTER_DATA(copyin='[models(patch_id)%model]')
241-
$:GPU_ENTER_DATA(copyin='[models(patch_id)%model%trs]')
242-
$:GPU_ENTER_DATA(copyin='[models(patch_id)%model%ntrs]')
243-
$:GPU_ENTER_DATA(copyin='[models(patch_id)%boundary_v]')
244-
do i = 1, models(patch_id)%model%ntrs
245-
$:GPU_ENTER_DATA(copyin='[models(patch_id)%model%trs(i)]')
246-
end do
247-
if (interpolate) then
248-
$:GPU_ENTER_DATA(copyin='[models(patch_id)%interpolated_boundary_v]')
245+
end if
246+
end do
247+
248+
! Pack and upload flat arrays for GPU (AFTER the loop)
249+
block
250+
integer :: pid, max_ntrs
251+
252+
max_ntrs = 0
253+
do pid = 1, num_ibs
254+
if (allocated(models(pid)%model)) then
255+
call s_pack_model_for_gpu(models(pid))
256+
max_ntrs = max(max_ntrs, models(pid)%ntrs)
249257
end if
258+
end do
259+
260+
if (max_ntrs > 0) then
261+
allocate(gpu_ntrs(1:num_ibs))
262+
allocate(gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs))
263+
allocate(gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs))
250264

251-
print *, "Entering GPU loop to read"
265+
gpu_ntrs = 0
266+
gpu_trs_v = 0._wp
267+
gpu_trs_n = 0._wp
252268

253-
! TODO :: REMOVE ME
254-
$:GPU_PARALLEL_LOOP(private='[i]')
255-
do i = 1, 3
256-
print *, "Num Triangles", models(1)%model%ntrs
257-
! print *, "interpolate", models(i)%interpolate
258-
! print *, "Vertices", models(i)%model%trs(1)%v
269+
do pid = 1, num_ibs
270+
if (allocated(models(pid)%model)) then
271+
gpu_ntrs(pid) = models(pid)%ntrs
272+
gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v
273+
gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n
274+
end if
259275
end do
260-
$:END_GPU_PARALLEL_LOOP()
276+
277+
$:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n]')
261278

262279
end if
263-
end do
280+
end block
264281

265282
end subroutine s_instantiate_STL_models
266283

@@ -986,23 +1003,25 @@ contains
9861003
integer, intent(in) :: patch_id
9871004
type(integer_field), intent(inout) :: ib_markers
9881005
989-
integer :: i, j, k !< Generic loop iterators
1006+
integer :: i, j, k !< Generic loop iterators
1007+
integer :: spc
9901008
991-
type(t_model), pointer :: model
992-
993-
real(wp) :: eta
1009+
real(wp) :: eta, threshold
9941010
real(wp), dimension(1:3) :: point, local_point, offset
9951011
real(wp), dimension(1:3) :: center, xy_local
9961012
real(wp), dimension(1:3, 1:3) :: inverse_rotation
9971013
998-
model => models(patch_id)%model
9991014
center = 0._wp
10001015
center(1) = patch_ib(patch_id)%x_centroid
10011016
center(2) = patch_ib(patch_id)%y_centroid
10021017
inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :)
10031018
offset(:) = patch_ib(patch_id)%centroid_offset(:)
1019+
spc = patch_ib(patch_id)%model_spc
1020+
threshold = patch_ib(patch_id)%model_threshold
10041021
1005-
do i = 0 - gp_layers, m + gp_layers
1022+
$:GPU_PARALLEL_LOOP(private='[i,j, xy_local, eta]',&
1023+
& copyin='[patch_id,center,inverse_rotation, offset, spc, threshold]', collapse=2)
1024+
do i = -gp_layers, m + gp_layers
10061025
do j = -gp_layers, n + gp_layers
10071026
10081027
xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp]
@@ -1013,16 +1032,30 @@ contains
10131032
xy_local = f_convert_cyl_to_cart(xy_local)
10141033
end if
10151034
1016-
eta = f_model_is_inside(model, xy_local, (/dx(i), dy(j), 0._wp/), patch_ib(patch_id)%model_spc)
1035+
if (i == 13 .and. j == 16) then
1036+
print *, "spc:", spc
1037+
print *, "ntrs:", gpu_ntrs(patch_id)
1038+
print *, "threshold:", threshold
1039+
print *, "dx:", dx(i), dy(j)
1040+
print *, "xy_local:", xy_local(1)
1041+
1042+
end if
1043+
1044+
eta = f_model_is_inside_flat(gpu_ntrs(patch_id), &
1045+
gpu_trs_v, gpu_trs_n, &
1046+
patch_id, &
1047+
xy_local, (/dx(i), dy(j), 0._wp/), &
1048+
spc)
10171049
10181050
! Reading STL boundary vertices and compute the levelset and levelset_norm
1019-
if (eta > patch_ib(patch_id)%model_threshold) then
1051+
if (eta > threshold) then
10201052
print *, eta, i, j
10211053
ib_markers%sf(i, j, 0) = patch_id
10221054
end if
10231055
10241056
end do
10251057
end do
1058+
$:END_GPU_PARALLEL_LOOP()
10261059
10271060
end subroutine s_ib_model
10281061
@@ -1035,10 +1068,9 @@ contains
10351068
type(integer_field), intent(inout) :: ib_markers
10361069
10371070
integer :: i, j, k !< Generic loop iterators
1071+
integer :: spc
10381072
1039-
type(t_model), pointer :: model
1040-
1041-
real(wp) :: eta
1073+
real(wp) :: eta, threshold
10421074
real(wp), dimension(1:3) :: point, local_point, offset
10431075
real(wp), dimension(1:3) :: center, xyz_local
10441076
real(wp), dimension(1:3, 1:3) :: inverse_rotation
@@ -1050,8 +1082,12 @@ contains
10501082
center(3) = patch_ib(patch_id)%z_centroid
10511083
inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :)
10521084
offset(:) = patch_ib(patch_id)%centroid_offset(:)
1085+
spc = patch_ib(patch_id)%model_spc
1086+
threshold = patch_ib(patch_id)%model_threshold
10531087
1054-
do i = 0 - gp_layers, m + gp_layers
1088+
$:GPU_PARALLEL_LOOP(private='[i,j,k, xyz_local, eta]',&
1089+
& copyin='[patch_id,center,inverse_rotation, offset, spc, threshold]', collapse=2)
1090+
do i = -gp_layers, m + gp_layers
10551091
do j = -gp_layers, n + gp_layers
10561092
do k = -gp_layers, p + gp_layers
10571093
@@ -1063,7 +1099,11 @@ contains
10631099
xyz_local = f_convert_cyl_to_cart(xyz_local)
10641100
end if
10651101
1066-
eta = f_model_is_inside(model, xyz_local, (/dx(i), dy(j), dz(k)/), patch_ib(patch_id)%model_spc)
1102+
eta = f_model_is_inside_flat(gpu_ntrs(patch_id), &
1103+
gpu_trs_v, gpu_trs_n, &
1104+
patch_id, &
1105+
xyz_local, (/dx(i), dy(j), dz(k)/), &
1106+
spc)
10671107
10681108
! Reading STL boundary vertices and compute the levelset and levelset_norm
10691109
if (eta > patch_ib(patch_id)%model_threshold) then

src/simulation/m_ibm.fpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,9 @@ contains
9898
$:GPU_UPDATE(device='[patch_ib(1:num_ibs)]')
9999

100100
! GPU routines require updated cell centers
101-
$:GPU_UPDATE(device='[x_cc, y_cc]')
101+
$:GPU_UPDATE(device='[x_cc, y_cc, dx, dy]')
102102
if (p /= 0) then
103-
$:GPU_UPDATE(device='[z_cc]')
103+
$:GPU_UPDATE(device='[z_cc, dz]')
104104
end if
105105

106106
! allocate STL models

0 commit comments

Comments
 (0)