precheck and remove debug prints

wilfonba · wilfonba · commit 8e5faaabdeb5 · 2026-02-27T16:50:26.000-05:00
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
@@ -619,7 +619,7 @@ contains
         !! @param stage Current stage in the time-stepper algorithm
     subroutine s_compute_bubble_EL_dynamics(q_prim_vf, bc_type, stage)
 #ifdef MFC_OpenMP
-    !DIR$ OPTIMIZE (-O1)
+        !DIR$ OPTIMIZE (-O1)
 #endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
         type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type
@@ -829,7 +829,7 @@ contains
                         do l = 1, E_idx
                             if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
                                 rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + &
-                                                        (q_cons_vf(l)%sf(i, j, k)/q_beta(1)%sf(i, j, k)) * &
+                                                        (q_cons_vf(l)%sf(i, j, k)/q_beta(1)%sf(i, j, k))* &
                                                         q_beta(2)%sf(i, j, k)
                             end if
                         end do
@@ -932,37 +932,6 @@ contains
 
         type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type
         integer :: i, j, k, l
-        integer, save :: smear_call_count = 0
-
-        smear_call_count = smear_call_count + 1
-
-        ! DEBUG: bubble state checksum before smearing
-        $:GPU_UPDATE(host='[intfc_rad, intfc_vel, mtn_pos, n_el_bubs_loc]')
-        block
-            real(wp) :: rad_loc, rad_glb, vel_loc, vel_glb
-            real(wp) :: posx_loc, posx_glb, posy_loc, posy_glb, posz_loc, posz_glb
-            integer :: kk, ierr2, nbubs_glb_chk
-            rad_loc = 0._wp; vel_loc = 0._wp
-            posx_loc = 0._wp; posy_loc = 0._wp; posz_loc = 0._wp
-            do kk = 1, n_el_bubs_loc
-                rad_loc = rad_loc + intfc_rad(kk, 2)
-                vel_loc = vel_loc + intfc_vel(kk, 2)
-                posx_loc = posx_loc + mtn_pos(kk, 1, 2)
-                posy_loc = posy_loc + mtn_pos(kk, 2, 2)
-                posz_loc = posz_loc + mtn_pos(kk, 3, 2)
-            end do
-            call MPI_Allreduce(rad_loc, rad_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(vel_loc, vel_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(posx_loc, posx_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(posy_loc, posy_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(posz_loc, posz_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(n_el_bubs_loc, nbubs_glb_chk, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            if (proc_rank == 0) print *, "DEBUG BUBSTATE call=", smear_call_count, &
-                " n_bubs=", nbubs_glb_chk, &
-                " sum_R=", rad_glb, " sum_Rdot=", vel_glb, &
-                " sum_x=", posx_glb, " sum_y=", posy_glb, " sum_z=", posz_glb
-        end block
-        $:GPU_UPDATE(device='[intfc_rad, intfc_vel, mtn_pos, n_el_bubs_loc]')
 
         call nvtxStartRange("BUBBLES-LAGRANGE-SMEARING")
         $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4)
@@ -984,46 +953,6 @@ contains
         call s_smoothfunction(n_el_bubs_loc, intfc_rad, intfc_vel, &
                               mtn_s, mtn_pos, q_beta, kahan_comp)
 
-        ! DEBUG: checksum after Gaussian smearing (before communication)
-        ! all_cells = sum over entire grid (interior + buffer) per rank, then MPI_SUM.
-        !   This is the total Gaussian integral and MUST be decomposition-invariant.
-        !   If it differs between 1-rank and multi-rank, the smearing kernel itself is wrong.
-        ! interior = sum over interior cells only (0:m, 0:n, 0:p).
-        $:GPU_UPDATE(host='[q_beta(1)%sf]')
-        block
-            real(wp) :: all_loc, all_glb
-            real(wp) :: int_loc, int_glb
-            real(wp) :: max_loc, max_glb
-            integer :: ierr2
-
-            all_loc = 0._wp
-            int_loc = 0._wp
-            max_loc = 0._wp
-
-            do l = idwbuff(3)%beg, idwbuff(3)%end
-                do k = idwbuff(2)%beg, idwbuff(2)%end
-                    do j = idwbuff(1)%beg, idwbuff(1)%end
-                        all_loc = all_loc + real(q_beta(1)%sf(j, k, l), kind=wp)
-                        if (j >= 0 .and. j <= m .and. k >= 0 .and. k <= n .and. &
-                            l >= 0 .and. l <= p) then
-                            int_loc = int_loc + real(q_beta(1)%sf(j, k, l), kind=wp)
-                            max_loc = max(max_loc, abs(real(q_beta(1)%sf(j, k, l), kind=wp)))
-                        end if
-                    end do
-                end do
-            end do
-
-            call MPI_Allreduce(all_loc, all_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(int_loc, int_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(max_loc, max_glb, 1, MPI_DOUBLE_PRECISION, MPI_MAX, MPI_COMM_WORLD, ierr2)
-
-            if (proc_rank == 0) print *, "DEBUG PRE_COMM call=", smear_call_count, &
-                " all_cells=", all_glb, " interior=", int_glb, &
-                " buf=", all_glb - int_glb, " max=", max_glb, &
-                " n_bubs=", n_el_bubs_glb
-        end block
-        $:GPU_UPDATE(device='[q_beta(1)%sf]')
-
         call nvtxStartRange("BUBBLES-LAGRANGE-BETA-COMM")
         if (lag_params%cluster_type >= 4) then
             call s_populate_beta_buffers(q_beta, bc_type, 3, kahan_comp)
@@ -1032,48 +961,6 @@ contains
         end if
         call nvtxEndRange
 
-        ! DEBUG: checksum after communication (before 1-beta conversion)
-        ! interior = sum over interior cells — this is what the source term uses.
-        ! weighted = position-weighted checksum to detect spatial redistribution errors.
-        $:GPU_UPDATE(host='[q_beta(1)%sf, q_beta(2)%sf]')
-        block
-            real(wp) :: int_loc, int_glb
-            real(wp) :: weighted_loc, weighted_glb
-            real(wp) :: max_loc, max_glb
-            real(wp) :: dbeta_loc, dbeta_glb
-            integer :: gj, gk, gl, ierr2
-
-            int_loc = 0._wp
-            weighted_loc = 0._wp
-            max_loc = 0._wp
-            dbeta_loc = 0._wp
-
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        int_loc = int_loc + real(q_beta(1)%sf(j, k, l), kind=wp)
-                        max_loc = max(max_loc, abs(real(q_beta(1)%sf(j, k, l), kind=wp)))
-                        dbeta_loc = dbeta_loc + real(q_beta(2)%sf(j, k, l), kind=wp)
-                        gj = j + start_idx(1)
-                        gk = k + merge(start_idx(2), 0, num_dims >= 2)
-                        gl = l + merge(start_idx(num_dims), 0, num_dims >= 3)
-                        weighted_loc = weighted_loc + &
-                            real(q_beta(1)%sf(j, k, l), kind=wp) * real(gj + gk*1000 + gl*1000000, kind=wp)
-                    end do
-                end do
-            end do
-
-            call MPI_Allreduce(int_loc, int_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(max_loc, max_glb, 1, MPI_DOUBLE_PRECISION, MPI_MAX, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(weighted_loc, weighted_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-            call MPI_Allreduce(dbeta_loc, dbeta_glb, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr2)
-
-            if (proc_rank == 0) print *, "DEBUG POST_COMM call=", smear_call_count, &
-                " interior=", int_glb, " max=", max_glb, " weighted=", weighted_glb, &
-                " dbeta_dt=", dbeta_glb
-        end block
-        $:GPU_UPDATE(device='[q_beta(1)%sf, q_beta(2)%sf]')
-
         !Store 1-beta
         $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3)
         do l = idwbuff(3)%beg, idwbuff(3)%end
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -147,7 +147,7 @@ contains
                     ! Loop over bubbles in this cell
                     $:GPU_LOOP(parallelism='[seq]')
                     do lb = cell_list_start(i, j, k), &
-                            cell_list_start(i, j, k) + cell_list_count(i, j, k) - 1
+                        cell_list_start(i, j, k) + cell_list_count(i, j, k) - 1
 
                         bub_idx = cell_list_idx(lb)
 
@@ -246,7 +246,7 @@ contains
                             do di = di_beg, di_end
                                 $:GPU_LOOP(parallelism='[seq]')
                                 do lb = cell_list_start(di, dj, dk), &
-                                        cell_list_start(di, dj, dk) + cell_list_count(di, dj, dk) - 1
+                                    cell_list_start(di, dj, dk) + cell_list_count(di, dj, dk) - 1
 
                                     bub_idx = cell_list_idx(lb)
 
@@ -344,11 +344,11 @@ contains
             else
                 !< 2D cartesian function: Equation (48) from Madea and Colonius 2018
                 ! We smear particles considering a virtual depth (lag_params%charwidth) with lag_params%charNz cells
-                dzp = (lag_params%charwidth / (lag_params%charNz + 1._wp))
+                dzp = (lag_params%charwidth/(lag_params%charNz + 1._wp))
 
                 func = 0._wp
                 do i = 0, lag_params%charNz
-                    zc = (-lag_params%charwidth/2._wp + dzp * (0.5_wp + i)) ! Center of virtual cell i in z-direction
+                    zc = (-lag_params%charwidth/2._wp + dzp*(0.5_wp + i)) ! Center of virtual cell i in z-direction
                     Lz2 = (center(3) - zc)**2._wp
                     distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + Lz2)
                     func = func + dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
@@ -1203,7 +1203,7 @@ contains
         ! carried out if the simulation is in fact not truly executed in parallel.
 
         call s_mpi_bcast_user_inputs()
-print*, proc_rank, lag_params%charwidth, lag_params%charNz
+        print *, proc_rank, lag_params%charwidth, lag_params%charNz
         call s_initialize_parallel_io()
 
         call s_mpi_decompose_computational_domain()