127127 # approaching particles and turn it off for receding particles. In this way, the
128128 # viscosity is used for shocks and not rarefactions."
129129 if vr < 0
130- mu = h * vr / (distance^ 2 + epsilon * h^ 2 )
131- return (alpha * c * mu + beta * mu^ 2 ) / rho_mean * grad_kernel
130+ # Since this is one of the most performance critical functions, using fast divisions
131+ # here gives a significant speedup on GPUs.
132+ # See the docs page "Development" for more details on `div_fast`.
133+ mu = div_fast (h * vr, distance^ 2 + epsilon * h^ 2 )
134+ return div_fast (alpha * c * mu + beta * mu^ 2 , rho_mean) * grad_kernel
132135 end
133136
134137 return zero (v_diff)
142145 mu_a = nu_a * rho_a
143146 mu_b = nu_b * rho_b
144147
145- return (mu_a + mu_b) / (rho_a * rho_b) * dot (pos_diff, grad_kernel) /
146- (distance^ 2 + epsilon * h^ 2 ) * v_diff
148+ # Since this is one of the most performance critical functions, using fast divisions
149+ # here gives a significant speedup on GPUs.
150+ # See the docs page "Development" for more details on `div_fast`.
151+ return div_fast ((mu_a + mu_b) * dot (pos_diff, grad_kernel),
152+ rho_a * rho_b * (distance^ 2 + epsilon * h^ 2 )) * v_diff
147153end
148154
149155# See, e.g.,
@@ -177,17 +183,20 @@ struct ViscosityAdami{ELTYPE}
177183 end
178184end
179185
180- function adami_viscosity_force (smoothing_length_average , pos_diff, distance, grad_kernel,
186+ function adami_viscosity_force (h , pos_diff, distance, grad_kernel,
181187 m_a, m_b, rho_a, rho_b, v_diff, nu_a, nu_b, epsilon)
182188 eta_a = nu_a * rho_a
183189 eta_b = nu_b * rho_b
184190
185- eta_tilde = 2 * (eta_a * eta_b) / (eta_a + eta_b)
191+ # Since this is one of the most performance critical functions, using fast divisions
192+ # here gives a significant speedup on GPUs.
193+ # See the docs page "Development" for more details on `div_fast`.
194+ volume_a = div_fast (m_a, rho_a)
195+ volume_b = div_fast (m_b, rho_b)
186196
187- tmp = eta_tilde / (distance^ 2 + epsilon * smoothing_length_average^ 2 )
188-
189- volume_a = m_a / rho_a
190- volume_b = m_b / rho_b
197+ # eta_tilde = 2 * (eta_a * eta_b) / (eta_a + eta_b)
198+ # tmp = eta_tilde / (distance^2 + epsilon * h^2) / m_a
199+ tmp = div_fast (2 * eta_a * eta_b, (eta_a + eta_b) * (distance^ 2 + epsilon * h^ 2 ) * m_a)
191200
192201 # This formulation was introduced by Hu and Adams (2006). https://doi.org/10.1016/j.jcp.2005.09.001
193202 # They argued that the formulation is more flexible because of the possibility to formulate
@@ -198,9 +207,9 @@ function adami_viscosity_force(smoothing_length_average, pos_diff, distance, gra
198207 # Because when using this formulation for the pressure acceleration, it is not
199208 # energy conserving.
200209 # See issue: https://github.com/trixi-framework/TrixiParticles.jl/issues/394
201- visc = (volume_a^ 2 + volume_b^ 2 ) * dot (grad_kernel, pos_diff) * tmp / m_a
210+ visc = (volume_a^ 2 + volume_b^ 2 ) * dot (grad_kernel, pos_diff) * tmp
202211
203- return visc . * v_diff
212+ return visc * v_diff
204213end
205214
206215@inline function (viscosity:: ViscosityAdami )(particle_system, neighbor_system,
@@ -334,7 +343,10 @@ ViscosityAdamiSGS(; nu, C_S=0.1, epsilon=0.001) = ViscosityAdamiSGS(nu, C_S, eps
334343 # and then the Smagorinsky eddy viscosity:
335344 # ν_SGS = (C_S * h̄)^2 * S_mag.
336345 #
337- S_mag = norm (v_diff) / (distance + epsilon)
346+ # Since this is one of the most performance critical functions, using fast divisions
347+ # here gives a significant speedup on GPUs.
348+ # See the docs page "Development" for more details on `div_fast`.
349+ S_mag = div_fast (sqrt (dot (v_diff, v_diff)), (distance + epsilon))
338350 nu_SGS = (viscosity. C_S * smoothing_length_average)^ 2 * S_mag
339351
340352 # Effective kinematic viscosity is the sum of the standard and SGS parts.
@@ -412,7 +424,7 @@ ViscosityMorrisSGS(; nu, C_S=0.1, epsilon=0.001) = ViscosityMorrisSGS(nu, C_S, e
412424
413425 smoothing_length_particle = smoothing_length (particle_system, particle)
414426 smoothing_length_neighbor = smoothing_length (particle_system, neighbor)
415- smoothing_length_average = (smoothing_length_particle + smoothing_length_neighbor) / 2
427+ h = (smoothing_length_particle + smoothing_length_neighbor) / 2
416428
417429 nu_a = kinematic_viscosity (particle_system,
418430 viscosity_model (neighbor_system, particle_system),
@@ -427,8 +439,11 @@ ViscosityMorrisSGS(; nu, C_S=0.1, epsilon=0.001) = ViscosityMorrisSGS(nu, C_S, e
427439
428440 # SGS part: Compute the subgrid-scale eddy viscosity.
429441 # See comments above for `ViscosityAdamiSGS`.
430- S_mag = norm (v_diff) / (distance + epsilon)
431- nu_SGS = (viscosity. C_S * smoothing_length_average)^ 2 * S_mag
442+ # Since this is one of the most performance critical functions, using fast divisions
443+ # here gives a significant speedup on GPUs.
444+ # See the docs page "Development" for more details on `div_fast`.
445+ S_mag = div_fast (sqrt (dot (v_diff, v_diff)), (distance + epsilon))
446+ nu_SGS = (viscosity. C_S * h)^ 2 * S_mag
432447
433448 # Effective viscosities include the SGS term.
434449 nu_a_eff = nu_a + nu_SGS
@@ -438,9 +453,11 @@ ViscosityMorrisSGS(; nu, C_S=0.1, epsilon=0.001) = ViscosityMorrisSGS(nu, C_S, e
438453 mu_a = nu_a_eff * rho_a
439454 mu_b = nu_b_eff * rho_b
440455
441- force_Morris = (mu_a + mu_b) / (rho_a * rho_b) * (dot (pos_diff, grad_kernel)) /
442- (distance^ 2 + epsilon * smoothing_length_average^ 2 ) * v_diff
443- return m_b * force_Morris
456+ # Since this is one of the most performance critical functions, using fast divisions
457+ # here gives a significant speedup on GPUs.
458+ # See the docs page "Development" for more details on `div_fast`.
459+ return div_fast (m_b * (mu_a + mu_b) * dot (pos_diff, grad_kernel),
460+ rho_a * rho_b * (distance^ 2 + epsilon * h^ 2 )) * v_diff
444461end
445462
446463function kinematic_viscosity (system, viscosity:: ViscosityMorrisSGS , smoothing_length,
496513 v_b = viscous_velocity (v_neighbor_system, neighbor_system, neighbor)
497514 v_diff = v_a - v_b
498515
499- gamma_dot = norm (v_diff) / (distance + epsilon)
516+ # Since this is one of the most performance critical functions, using fast divisions
517+ # here gives a significant speedup on GPUs.
518+ # See the docs page "Development" for more details on `div_fast`.
519+ gamma_dot = div_fast (sqrt (dot (v_diff, v_diff)), (distance + epsilon))
500520
501521 # Compute Carreau-Yasuda effective viscosity
502522 (; nu0, nu_inf, lambda, a, n) = viscosity
0 commit comments