Skip to content

Commit bc972ca

Browse files
Passes STL tests with GPU compute for IB markers (not added levelset yet)
1 parent 64bc348 commit bc972ca

5 files changed

Lines changed: 44 additions & 47 deletions

File tree

src/common/m_derived_types.fpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ module m_derived_types
195195
! --- GPU-friendly flattened arrays ---
196196
integer :: ntrs ! copy of model%ntrs
197197
real(wp), allocatable, dimension(:, :, :) :: trs_v ! (3, 3, ntrs) - triangle vertices
198-
real(wp), allocatable, dimension(:, :) :: trs_n ! (3, ntrs) - triangle normals
198+
real(wp), allocatable, dimension(:, :) :: trs_n ! (3, ntrs) - triangle normals
199199
end type t_model_array
200200

201201
!> Derived type adding initial condition (ic) patch parameters as attributes

src/common/m_model.fpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -484,17 +484,17 @@ contains
484484
!! generator because the native generator is not compatible being called
485485
!! from GPU routines/functions
486486
function f_model_random_number(seed) result(rval)
487-
487+
488488
$:GPU_ROUTINE(parallelism='[seq]')
489-
489+
490490
integer, intent(inout) :: seed
491491
real(wp) :: rval
492-
492+
493493
seed = ieor(seed, ishft(seed, 13))
494494
seed = ieor(seed, ishft(seed, -17))
495495
seed = ieor(seed, ishft(seed, 5))
496-
497-
rval = abs(real(seed, wp)) / real(huge(seed), wp)
496+
497+
rval = abs(real(seed, wp))/real(huge(seed), wp)
498498
end function f_model_random_number
499499

500500
!> This procedure, recursively, finds whether a point is inside an octree.
@@ -521,16 +521,16 @@ contains
521521

522522
real(wp), dimension(1:spc, 1:3) :: ray_origins, ray_dirs
523523

524-
rand_seed = int(point(1) * 73856093_wp) + &
525-
int(point(2) * 19349663_wp) + &
526-
int(point(3) * 83492791_wp)
524+
rand_seed = int(point(1)*73856093_wp) + &
525+
int(point(2)*19349663_wp) + &
526+
int(point(3)*83492791_wp)
527527
if (rand_seed == 0) rand_seed = 1
528528

529529
! generate our random collection or rays
530530
do i = 1, spc
531531
do k = 1, 3
532532
! random jitter in the origin helps us estimate volume fraction instead of only at the cell center
533-
ray_origins(i, k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp) * spacing(k)
533+
ray_origins(i, k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp)*spacing(k)
534534
! cast sample rays in all directions
535535
ray_dirs(i, k) = point(k) + f_model_random_number(rand_seed) - 0.5_wp
536536
end do
@@ -561,12 +561,12 @@ contains
561561
end function f_model_is_inside
562562

563563
impure function f_model_is_inside_flat(ntrs, trs_v, trs_n, pid, point, spacing, spc) result(fraction)
564-
564+
565565
$:GPU_ROUTINE(parallelism='[seq]')
566566

567567
integer, intent(in) :: ntrs
568-
real(wp), dimension(:,:,:,:), intent(in) :: trs_v
569-
real(wp), dimension(:,:,:), intent(in) :: trs_n
568+
real(wp), dimension(:, :, :, :), intent(in) :: trs_v
569+
real(wp), dimension(:, :, :), intent(in) :: trs_n
570570
integer, intent(in) :: pid
571571
real(wp), dimension(1:3), intent(in) :: point
572572
real(wp), dimension(1:3), intent(in) :: spacing
@@ -579,21 +579,21 @@ contains
579579
integer :: i, j, k, nInOrOut, nHits
580580
integer :: rand_seed
581581

582-
rand_seed = int(point(1) * 73856093_wp) + &
583-
int(point(2) * 19349663_wp) + &
584-
int(point(3) * 83492791_wp)
582+
rand_seed = int(point(1)*73856093_wp) + &
583+
int(point(2)*19349663_wp) + &
584+
int(point(3)*83492791_wp)
585585
if (rand_seed == 0) rand_seed = 1
586586

587587
! generate our random collection of rays
588588
nInOrOut = 0
589589
do i = 1, spc
590590
! Generate one ray at a time — no arrays needed
591591
do k = 1, 3
592-
origin(k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp) * spacing(k)
592+
origin(k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp)*spacing(k)
593593
dir(k) = point(k) + f_model_random_number(rand_seed) - 0.5_wp
594594
end do
595595
dir_mag = sqrt(dir(1)*dir(1) + dir(2)*dir(2) + dir(3)*dir(3))
596-
dir(:) = dir(:) / dir_mag
596+
dir(:) = dir(:)/dir_mag
597597

598598
ray%o = origin
599599
ray%d = dir
@@ -1327,14 +1327,14 @@ contains
13271327
subroutine s_pack_model_for_gpu(ma)
13281328
type(t_model_array), intent(inout) :: ma
13291329
integer :: i
1330-
1330+
13311331
ma%ntrs = ma%model%ntrs
1332-
allocate(ma%trs_v(1:3, 1:3, 1:ma%ntrs))
1333-
allocate(ma%trs_n(1:3, 1:ma%ntrs))
1334-
1332+
allocate (ma%trs_v(1:3, 1:3, 1:ma%ntrs))
1333+
allocate (ma%trs_n(1:3, 1:ma%ntrs))
1334+
13351335
do i = 1, ma%ntrs
13361336
ma%trs_v(:, :, i) = ma%model%trs(i)%v(:, :)
1337-
ma%trs_n(:, i) = ma%model%trs(i)%n(:)
1337+
ma%trs_n(:, i) = ma%model%trs(i)%n(:)
13381338
end do
13391339
end subroutine
13401340

src/simulation/m_compute_levelset.fpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,8 @@ contains
729729

730730
end if
731731

732+
! print gp%levelset, gp%levelset_norm(1), gp%levelset_norm(2), gp%levelset_norm(3)
733+
732734
end subroutine s_model_levelset
733735

734736
end module m_compute_levelset

src/simulation/m_ib_patches.fpp

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ module m_ib_patches
6161
integer, allocatable :: gpu_ntrs(:)
6262
real(wp), allocatable, dimension(:,:,:,:) :: gpu_trs_v
6363
real(wp), allocatable, dimension(:,:,:) :: gpu_trs_n
64+
real(wp), allocatable, dimension(:,:,:) :: gpu_boundary_v
65+
real(wp), allocatable, dimension(:,:,:) :: gpu_interpolated_boundary_v
6466

6567
contains
6668

@@ -1032,15 +1034,6 @@ contains
10321034
xy_local = f_convert_cyl_to_cart(xy_local)
10331035
end if
10341036
1035-
if (i == 13 .and. j == 16) then
1036-
print *, "spc:", spc
1037-
print *, "ntrs:", gpu_ntrs(patch_id)
1038-
print *, "threshold:", threshold
1039-
print *, "dx:", dx(i), dy(j)
1040-
print *, "xy_local:", xy_local(1)
1041-
1042-
end if
1043-
10441037
eta = f_model_is_inside_flat(gpu_ntrs(patch_id), &
10451038
gpu_trs_v, gpu_trs_n, &
10461039
patch_id, &
@@ -1049,14 +1042,15 @@ contains
10491042
10501043
! Reading STL boundary vertices and compute the levelset and levelset_norm
10511044
if (eta > threshold) then
1052-
print *, eta, i, j
10531045
ib_markers%sf(i, j, 0) = patch_id
10541046
end if
10551047
10561048
end do
10571049
end do
10581050
$:END_GPU_PARALLEL_LOOP()
10591051
1052+
$:GPU_UPDATE(host='[ib_markers%sf]')
1053+
10601054
end subroutine s_ib_model
10611055
10621056
!> The STL patch is a 3D geometry that is imported from an STL file.
@@ -1075,7 +1069,6 @@ contains
10751069
real(wp), dimension(1:3) :: center, xyz_local
10761070
real(wp), dimension(1:3, 1:3) :: inverse_rotation
10771071
1078-
model => models(patch_id)%model
10791072
center = 0._wp
10801073
center(1) = patch_ib(patch_id)%x_centroid
10811074
center(2) = patch_ib(patch_id)%y_centroid
@@ -1114,6 +1107,8 @@ contains
11141107
end do
11151108
end do
11161109
1110+
$:GPU_UPDATE(host='[ib_markers%sf]')
1111+
11171112
end subroutine s_ib_3d_model
11181113
11191114
!> Subroutine that computes a rotation matrix for converting to the rotating frame of the boundary

src/simulation/m_igr.fpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ contains
269269

270270
subroutine s_igr_iterative_solve(q_cons_vf, bc_type, t_step)
271271
#ifdef _CRAYFTN
272-
!DIR$ OPTIMIZE (-haggress)
272+
!DIR$ OPTIMIZE (-haggress)
273273
#endif
274274
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
275275
type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type
@@ -370,7 +370,7 @@ contains
370370

371371
subroutine s_igr_sigma_x(q_cons_vf, rhs_vf)
372372
#ifdef _CRAYFTN
373-
!DIR$ OPTIMIZE (-haggress)
373+
!DIR$ OPTIMIZE (-haggress)
374374
#endif
375375
type(scalar_field), &
376376
dimension(sys_size), &
@@ -455,7 +455,7 @@ contains
455455

456456
subroutine s_igr_riemann_solver(q_cons_vf, rhs_vf, idir)
457457
#ifdef _CRAYFTN
458-
!DIR$ OPTIMIZE (-haggress)
458+
!DIR$ OPTIMIZE (-haggress)
459459
#endif
460460
type(scalar_field), &
461461
dimension(sys_size), &
@@ -495,9 +495,9 @@ contains
495495

496496
#:if MFC_CASE_OPTIMIZATION
497497
#:if igr_order == 5
498-
!DIR$ unroll 6
498+
!DIR$ unroll 6
499499
#:elif igr_order == 3
500-
!DIR$ unroll 4
500+
!DIR$ unroll 4
501501
#:endif
502502
#:endif
503503
$:GPU_LOOP(parallelism='[seq]')
@@ -912,9 +912,9 @@ contains
912912

913913
#:if MFC_CASE_OPTIMIZATION
914914
#:if igr_order == 5
915-
!DIR$ unroll 6
915+
!DIR$ unroll 6
916916
#:elif igr_order == 3
917-
!DIR$ unroll 4
917+
!DIR$ unroll 4
918918
#:endif
919919
#:endif
920920
$:GPU_LOOP(parallelism='[seq]')
@@ -1429,9 +1429,9 @@ contains
14291429

14301430
#:if MFC_CASE_OPTIMIZATION
14311431
#:if igr_order == 5
1432-
!DIR$ unroll 6
1432+
!DIR$ unroll 6
14331433
#:elif igr_order == 3
1434-
!DIR$ unroll 4
1434+
!DIR$ unroll 4
14351435
#:endif
14361436
#:endif
14371437
$:GPU_LOOP(parallelism='[seq]')
@@ -1826,9 +1826,9 @@ contains
18261826

18271827
#:if MFC_CASE_OPTIMIZATION
18281828
#:if igr_order == 5
1829-
!DIR$ unroll 6
1829+
!DIR$ unroll 6
18301830
#:elif igr_order == 3
1831-
!DIR$ unroll 4
1831+
!DIR$ unroll 4
18321832
#:endif
18331833
#:endif
18341834
$:GPU_LOOP(parallelism='[seq]')
@@ -2311,9 +2311,9 @@ contains
23112311

23122312
#:if MFC_CASE_OPTIMIZATION
23132313
#:if igr_order == 5
2314-
!DIR$ unroll 6
2314+
!DIR$ unroll 6
23152315
#:elif igr_order == 3
2316-
!DIR$ unroll 4
2316+
!DIR$ unroll 4
23172317
#:endif
23182318
#:endif
23192319
$:GPU_LOOP(parallelism='[seq]')

0 commit comments

Comments
 (0)