@@ -619,7 +619,7 @@ contains
619619 !! @param stage Current stage in the time- stepper algorithm
620620 subroutine s_compute_bubble_EL_dynamics (q_prim_vf , bc_type , stage )
621621#ifdef MFC_OpenMP
622- !DIR$ OPTIMIZE (- O1)
622+ !DIR$ OPTIMIZE (- O1)
623623#endif
624624 type(scalar_field), dimension (sys_size), intent (inout ) :: q_prim_vf
625625 type(integer_field), dimension (1 :num_dims, 1 :2 ), intent (in ) :: bc_type
@@ -829,7 +829,7 @@ contains
829829 do l = 1 , E_idx
830830 if (q_beta(1 )%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
831831 rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + &
832- (q_cons_vf(l)%sf(i, j, k)/ q_beta(1 )%sf(i, j, k)) * &
832+ (q_cons_vf(l)%sf(i, j, k)/ q_beta(1 )%sf(i, j, k))* &
833833 q_beta(2 )%sf(i, j, k)
834834 end if
835835 end do
@@ -932,37 +932,6 @@ contains
932932
933933 type(integer_field), dimension (1 :num_dims, 1 :2 ), intent (in ) :: bc_type
934934 integer :: i, j, k, l
935- integer , save :: smear_call_count = 0
936-
937- smear_call_count = smear_call_count + 1
938-
939- ! DEBUG: bubble state checksum before smearing
940- $:GPU_UPDATE(host= ' [intfc_rad, intfc_vel, mtn_pos, n_el_bubs_loc]' )
941- block
942- real (wp) :: rad_loc, rad_glb, vel_loc, vel_glb
943- real (wp) :: posx_loc, posx_glb, posy_loc, posy_glb, posz_loc, posz_glb
944- integer :: kk, ierr2, nbubs_glb_chk
945- rad_loc = 0._wp ; vel_loc = 0._wp
946- posx_loc = 0._wp ; posy_loc = 0._wp ; posz_loc = 0._wp
947- do kk = 1 , n_el_bubs_loc
948- rad_loc = rad_loc + intfc_rad(kk, 2 )
949- vel_loc = vel_loc + intfc_vel(kk, 2 )
950- posx_loc = posx_loc + mtn_pos(kk, 1 , 2 )
951- posy_loc = posy_loc + mtn_pos(kk, 2 , 2 )
952- posz_loc = posz_loc + mtn_pos(kk, 3 , 2 )
953- end do
954- call MPI_Allreduce(rad_loc, rad_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
955- call MPI_Allreduce(vel_loc, vel_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
956- call MPI_Allreduce(posx_loc, posx_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
957- call MPI_Allreduce(posy_loc, posy_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
958- call MPI_Allreduce(posz_loc, posz_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
959- call MPI_Allreduce(n_el_bubs_loc, nbubs_glb_chk, 1 , MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr2)
960- if (proc_rank == 0 ) print * , " DEBUG BUBSTATE call=" , smear_call_count, &
961- " n_bubs=" , nbubs_glb_chk, &
962- " sum_R=" , rad_glb, " sum_Rdot=" , vel_glb, &
963- " sum_x=" , posx_glb, " sum_y=" , posy_glb, " sum_z=" , posz_glb
964- end block
965- $:GPU_UPDATE(device= ' [intfc_rad, intfc_vel, mtn_pos, n_el_bubs_loc]' )
966935
967936 call nvtxStartRange(" BUBBLES-LAGRANGE-SMEARING" )
968937 $:GPU_PARALLEL_LOOP(private= ' [i,j,k,l]' , collapse= 4 )
@@ -984,46 +953,6 @@ contains
984953 call s_smoothfunction(n_el_bubs_loc, intfc_rad, intfc_vel, &
985954 mtn_s, mtn_pos, q_beta, kahan_comp)
986955
987- ! DEBUG: checksum after Gaussian smearing (before communication)
988- ! all_cells = sum over entire grid (interior + buffer) per rank, then MPI_SUM.
989- ! This is the total Gaussian integral and MUST be decomposition- invariant.
990- ! If it differs between 1 - rank and multi- rank, the smearing kernel itself is wrong.
991- ! interior = sum over interior cells only (0 :m, 0 :n, 0 :p).
992- $:GPU_UPDATE(host= ' [q_beta(1)%sf]' )
993- block
994- real (wp) :: all_loc, all_glb
995- real (wp) :: int_loc, int_glb
996- real (wp) :: max_loc, max_glb
997- integer :: ierr2
998-
999- all_loc = 0._wp
1000- int_loc = 0._wp
1001- max_loc = 0._wp
1002-
1003- do l = idwbuff(3 )%beg, idwbuff(3 )%end
1004- do k = idwbuff(2 )%beg, idwbuff(2 )%end
1005- do j = idwbuff(1 )%beg, idwbuff(1 )%end
1006- all_loc = all_loc + real (q_beta(1 )%sf(j, k, l), kind= wp)
1007- if (j >= 0 .and. j <= m .and. k >= 0 .and. k <= n .and. &
1008- l >= 0 .and. l <= p) then
1009- int_loc = int_loc + real (q_beta(1 )%sf(j, k, l), kind= wp)
1010- max_loc = max (max_loc, abs (real (q_beta(1 )%sf(j, k, l), kind= wp)))
1011- end if
1012- end do
1013- end do
1014- end do
1015-
1016- call MPI_Allreduce(all_loc, all_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
1017- call MPI_Allreduce(int_loc, int_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
1018- call MPI_Allreduce(max_loc, max_glb, 1 , MPI_DOUBLE_PRECISION, MPI_MAX, MPI_COMM_WORLD, ierr2)
1019-
1020- if (proc_rank == 0 ) print * , " DEBUG PRE_COMM call=" , smear_call_count, &
1021- " all_cells=" , all_glb, " interior=" , int_glb, &
1022- " buf=" , all_glb - int_glb, " max=" , max_glb, &
1023- " n_bubs=" , n_el_bubs_glb
1024- end block
1025- $:GPU_UPDATE(device= ' [q_beta(1)%sf]' )
1026-
1027956 call nvtxStartRange(" BUBBLES-LAGRANGE-BETA-COMM" )
1028957 if (lag_params%cluster_type >= 4 ) then
1029958 call s_populate_beta_buffers(q_beta, bc_type, 3 , kahan_comp)
@@ -1032,48 +961,6 @@ contains
1032961 end if
1033962 call nvtxEndRange
1034963
1035- ! DEBUG: checksum after communication (before 1 - beta conversion)
1036- ! interior = sum over interior cells — this is what the source term uses.
1037- ! weighted = position- weighted checksum to detect spatial redistribution errors.
1038- $:GPU_UPDATE(host= ' [q_beta(1)%sf, q_beta(2)%sf]' )
1039- block
1040- real (wp) :: int_loc, int_glb
1041- real (wp) :: weighted_loc, weighted_glb
1042- real (wp) :: max_loc, max_glb
1043- real (wp) :: dbeta_loc, dbeta_glb
1044- integer :: gj, gk, gl, ierr2
1045-
1046- int_loc = 0._wp
1047- weighted_loc = 0._wp
1048- max_loc = 0._wp
1049- dbeta_loc = 0._wp
1050-
1051- do l = 0 , p
1052- do k = 0 , n
1053- do j = 0 , m
1054- int_loc = int_loc + real (q_beta(1 )%sf(j, k, l), kind= wp)
1055- max_loc = max (max_loc, abs (real (q_beta(1 )%sf(j, k, l), kind= wp)))
1056- dbeta_loc = dbeta_loc + real (q_beta(2 )%sf(j, k, l), kind= wp)
1057- gj = j + start_idx(1 )
1058- gk = k + merge (start_idx(2 ), 0 , num_dims >= 2 )
1059- gl = l + merge (start_idx(num_dims), 0 , num_dims >= 3 )
1060- weighted_loc = weighted_loc + &
1061- real (q_beta(1 )%sf(j, k, l), kind= wp) * real (gj + gk* 1000 + gl* 1000000 , kind= wp)
1062- end do
1063- end do
1064- end do
1065-
1066- call MPI_Allreduce(int_loc, int_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
1067- call MPI_Allreduce(max_loc, max_glb, 1 , MPI_DOUBLE_PRECISION, MPI_MAX, MPI_COMM_WORLD, ierr2)
1068- call MPI_Allreduce(weighted_loc, weighted_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
1069- call MPI_Allreduce(dbeta_loc, dbeta_glb, 1 , MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
1070-
1071- if (proc_rank == 0 ) print * , " DEBUG POST_COMM call=" , smear_call_count, &
1072- " interior=" , int_glb, " max=" , max_glb, " weighted=" , weighted_glb, &
1073- " dbeta_dt=" , dbeta_glb
1074- end block
1075- $:GPU_UPDATE(device= ' [q_beta(1)%sf, q_beta(2)%sf]' )
1076-
1077964 !Store 1 - beta
1078965 $:GPU_PARALLEL_LOOP(private= ' [j,k,l]' , collapse= 3 )
1079966 do l = idwbuff(3 )%beg, idwbuff(3 )%end
0 commit comments