@@ -38,18 +38,19 @@ contains
3838 !! @param lbk_s Computational coordinates of the bubbles
3939 !! @param lbk_pos Spatial coordinates of the bubbles
4040 !! @param updatedvar Eulerian variable to be updated
41- subroutine s_smoothfunction (nBubs , lbk_rad , lbk_vel , lbk_s , lbk_pos , updatedvar )
41+ subroutine s_smoothfunction (nBubs , lbk_rad , lbk_vel , lbk_s , lbk_pos , updatedvar , kcomp )
4242
4343 integer , intent (in ) :: nBubs
4444 real (wp), dimension (1 :lag_params%nBubs_glb, 1 :3 , 1 :2 ), intent (in ) :: lbk_s, lbk_pos
4545 real (wp), dimension (1 :lag_params%nBubs_glb, 1 :2 ), intent (in ) :: lbk_rad, lbk_vel
4646 type(scalar_field), dimension (:), intent (inout ) :: updatedvar
47+ type(scalar_field), dimension (:), intent (inout ) :: kcomp
4748
4849 smoothfunc:select case(lag_params%smooth_type)
4950 case (1 )
50- call s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar)
51+ call s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar, kcomp )
5152 case (2 )
52- call s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar)
53+ call s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar, kcomp )
5354 end select smoothfunc
5455
5556 end subroutine s_smoothfunction
@@ -118,18 +119,20 @@ contains
118119 !> Cell- centric delta- function smearing using the cell list (no GPU atomics).
119120 !! Each bubble only affects the cell it resides in . The outer GPU loop
120121 !! iterates over interior cells and sums contributions from resident bubbles.
121- subroutine s_deltafunc (nBubs , lbk_rad , lbk_vel , lbk_s , updatedvar )
122+ subroutine s_deltafunc (nBubs , lbk_rad , lbk_vel , lbk_s , updatedvar , kcomp )
122123
123124 integer , intent (in ) :: nBubs
124125 real (wp), dimension (1 :lag_params%nBubs_glb, 1 :3 , 1 :2 ), intent (in ) :: lbk_s
125126 real (wp), dimension (1 :lag_params%nBubs_glb, 1 :2 ), intent (in ) :: lbk_rad, lbk_vel
126127 type(scalar_field), dimension (:), intent (inout ) :: updatedvar
128+ type(scalar_field), dimension (:), intent (inout ) :: kcomp
127129
128130 real (wp) :: strength_vel, strength_vol
129131 real (wp) :: volpart, Vol
132+ real (wp) :: y_kahan, t_kahan
130133 integer :: i, j, k, lb, bub_idx
131134
132- $:GPU_PARALLEL_LOOP(collapse= 3 , private= ' [i,j,k,lb,bub_idx,volpart,Vol,strength_vel,strength_vol]' )
135+ $:GPU_PARALLEL_LOOP(collapse= 3 , private= ' [i,j,k,lb,bub_idx,volpart,Vol,strength_vel,strength_vol,y_kahan,t_kahan ]' )
133136 do k = 0 , p
134137 do j = 0 , n
135138 do i = 0 , m
@@ -153,16 +156,24 @@ contains
153156 strength_vol = volpart
154157 strength_vel = 4._wp * pi* lbk_rad(bub_idx, 2 )** 2._wp * lbk_vel(bub_idx, 2 )
155158
156- ! Update void fraction field — no atomics needed
157- updatedvar(1 )%sf(i, j, k) = updatedvar(1 )%sf(i, j, k) + real (strength_vol/ Vol, kind= stp)
159+ ! Kahan summation for void fraction
160+ y_kahan = real (strength_vol/ Vol, kind= wp) - kcomp(1 )%sf(i, j, k)
161+ t_kahan = updatedvar(1 )%sf(i, j, k) + y_kahan
162+ kcomp(1 )%sf(i, j, k) = (t_kahan - updatedvar(1 )%sf(i, j, k)) - y_kahan
163+ updatedvar(1 )%sf(i, j, k) = t_kahan
158164
159- ! Update time derivative of void fraction
160- updatedvar(2 )%sf(i, j, k) = updatedvar(2 )%sf(i, j, k) + real (strength_vel/ Vol, kind= stp)
165+ ! Kahan summation for time derivative of void fraction
166+ y_kahan = real (strength_vel/ Vol, kind= wp) - kcomp(2 )%sf(i, j, k)
167+ t_kahan = updatedvar(2 )%sf(i, j, k) + y_kahan
168+ kcomp(2 )%sf(i, j, k) = (t_kahan - updatedvar(2 )%sf(i, j, k)) - y_kahan
169+ updatedvar(2 )%sf(i, j, k) = t_kahan
161170
162171 ! Product of two smeared functions
163172 if (lag_params%cluster_type >= 4 ) then
164- updatedvar(5 )%sf(i, j, k) = updatedvar(5 )%sf(i, j, k) + &
165- real ((strength_vol* strength_vel)/ Vol, kind= stp)
173+ y_kahan = real ((strength_vol* strength_vel)/ Vol, kind= wp) - kcomp(5 )%sf(i, j, k)
174+ t_kahan = updatedvar(5 )%sf(i, j, k) + y_kahan
175+ kcomp(5 )%sf(i, j, k) = (t_kahan - updatedvar(5 )%sf(i, j, k)) - y_kahan
176+ updatedvar(5 )%sf(i, j, k) = t_kahan
166177 end if
167178 end do
168179
@@ -176,18 +187,20 @@ contains
176187 !> Cell- centric gaussian smearing using the cell list (no GPU atomics).
177188 !! Each grid cell accumulates contributions from nearby bubbles looked up
178189 !! via cell_list_start/ count/ idx.
179- subroutine s_gaussian (nBubs , lbk_rad , lbk_vel , lbk_s , lbk_pos , updatedvar )
190+ subroutine s_gaussian (nBubs , lbk_rad , lbk_vel , lbk_s , lbk_pos , updatedvar , kcomp )
180191
181192 integer , intent (in ) :: nBubs
182193 real (wp), dimension (1 :lag_params%nBubs_glb, 1 :3 , 1 :2 ), intent (in ) :: lbk_s, lbk_pos
183194 real (wp), dimension (1 :lag_params%nBubs_glb, 1 :2 ), intent (in ) :: lbk_rad, lbk_vel
184195 type(scalar_field), dimension (:), intent (inout ) :: updatedvar
196+ type(scalar_field), dimension (:), intent (inout ) :: kcomp
185197
186198 real (wp), dimension (3 ) :: center, nodecoord, s_coord
187199 integer , dimension (3 ) :: cell, cellijk
188200 real (wp) :: stddsv, volpart
189201 real (wp) :: strength_vel, strength_vol
190202 real (wp) :: func, func2
203+ real (wp) :: y_kahan, t_kahan
191204 integer :: i, j, k, di, dj, dk, lb, bub_idx
192205 integer :: di_beg, di_end, dj_beg, dj_end, dk_beg, dk_end
193206 integer :: smear_x_beg, smear_x_end
@@ -203,7 +216,7 @@ contains
203216 smear_z_end = merge (p + mapCells + 1 , p, p > 0 )
204217
205218 $:GPU_PARALLEL_LOOP(collapse= 3 , &
206- & private= ' [i,j,k,di,dj,dk,lb,bub_idx,center,nodecoord,s_coord,cell,cellijk,stddsv,volpart,strength_vel,strength_vol,func,func2,di_beg,di_end,dj_beg,dj_end,dk_beg,dk_end]' , &
219+ & private= ' [i,j,k,di,dj,dk,lb,bub_idx,center,nodecoord,s_coord,cell,cellijk,stddsv,volpart,strength_vel,strength_vol,func,func2,y_kahan,t_kahan, di_beg,di_end,dj_beg,dj_end,dk_beg,dk_end]' , &
207220 & copyin= ' [smear_x_beg,smear_x_end,smear_y_beg,smear_y_end,smear_z_beg,smear_z_end]' )
208221 do k = smear_z_beg, smear_z_end
209222 do j = smear_y_beg, smear_y_end
@@ -253,14 +266,24 @@ contains
253266
254267 call s_applygaussian(center, cellijk, nodecoord, stddsv, 0._wp , func)
255268
256- ! Accumulate — no atomics needed (each thread owns its (i,j,k))
257- updatedvar(1 )%sf(i, j, k) = updatedvar(1 )%sf(i, j, k) + real (func* strength_vol, kind= stp)
258- updatedvar(2 )%sf(i, j, k) = updatedvar(2 )%sf(i, j, k) + real (func* strength_vel, kind= stp)
269+ ! Kahan summation for void fraction
270+ y_kahan = real (func* strength_vol, kind= wp) - kcomp(1 )%sf(i, j, k)
271+ t_kahan = updatedvar(1 )%sf(i, j, k) + y_kahan
272+ kcomp(1 )%sf(i, j, k) = (t_kahan - updatedvar(1 )%sf(i, j, k)) - y_kahan
273+ updatedvar(1 )%sf(i, j, k) = t_kahan
274+
275+ ! Kahan summation for time derivative of void fraction
276+ y_kahan = real (func* strength_vel, kind= wp) - kcomp(2 )%sf(i, j, k)
277+ t_kahan = updatedvar(2 )%sf(i, j, k) + y_kahan
278+ kcomp(2 )%sf(i, j, k) = (t_kahan - updatedvar(2 )%sf(i, j, k)) - y_kahan
279+ updatedvar(2 )%sf(i, j, k) = t_kahan
259280
260281 if (lag_params%cluster_type >= 4 ) then
261282 call s_applygaussian(center, cellijk, nodecoord, stddsv, 1._wp , func2)
262- updatedvar(5 )%sf(i, j, k) = updatedvar(5 )%sf(i, j, k) + &
263- real (func2* strength_vol* strength_vel, kind= stp)
283+ y_kahan = real (func2* strength_vol* strength_vel, kind= wp) - kcomp(5 )%sf(i, j, k)
284+ t_kahan = updatedvar(5 )%sf(i, j, k) + y_kahan
285+ kcomp(5 )%sf(i, j, k) = (t_kahan - updatedvar(5 )%sf(i, j, k)) - y_kahan
286+ updatedvar(5 )%sf(i, j, k) = t_kahan
264287 end if
265288
266289 end do
0 commit comments