1+ # Since `@muladd` can fuse multiply-add operations and thus improve performance in
2+ # the flux differencing loops, we opt-in explicitly.
3+ # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
4+ @muladd begin
5+ # ! format: noindent
6+
17# by default, return an empty tuple for volume integral caches
28function create_cache (mesh:: DGMultiMesh{NDIMS} , equations,
39 volume_integral:: VolumeIntegralShockCapturingHGType ,
@@ -12,40 +18,52 @@ function create_cache(mesh::DGMultiMesh{NDIMS}, equations,
1218 # create sparse hybridized operators for low order scheme
1319 Qrst, E = StartUpDG. sparse_low_order_SBP_operators (dg. basis)
1420 Brst = map (n -> Diagonal (n .* dg. basis. wf), dg. basis. nrstJ)
15- sparse_hybridized_SBP_operators = map ((Q, B) -> 0.5 * [Q- Q' E' * B; - B* E zeros (size (B))],
16- Qrst, Brst)
21+ sparse_SBP_operators = map ((Q, B) -> 0.5f0 * [Q- Q' E' * B; - B* E zeros (size (B))],
22+ Qrst, Brst)
1723
1824 # Find the joint sparsity pattern of the entire matrix. We store the sparsity pattern as
1925 # an adjoint for faster iteration through the rows.
20- sparsity_pattern = sum (map (A -> abs .(A)' , sparse_hybridized_SBP_operators )) .>
26+ sparsity_pattern = sum (map (A -> abs .(A)' , sparse_SBP_operators )) .>
2127 100 * eps ()
2228
23- return (; sparse_hybridized_SBP_operators , sparsity_pattern,
29+ return (; sparse_SBP_operators , sparsity_pattern,
2430 element_to_element_connectivity)
2531end
2632
2733# this method is used when the indicator is constructed as for shock-capturing volume integrals
2834function create_cache (:: Type{IndicatorHennemannGassner} , equations:: AbstractEquations ,
29- basis:: RefElemData {NDIMS} ) where {NDIMS}
35+ basis:: DGMultiBasis {NDIMS} ) where {NDIMS}
3036 uEltype = real (basis)
3137 alpha = Vector {uEltype} ()
3238 alpha_tmp = similar (alpha)
3339
34- MVec = MVector{nnodes (basis), uEltype}
35- indicator_threaded = MVec[MVec (undef) for _ in 1 : Threads. maxthreadid ()]
36- modal_threaded = MVec[MVec (undef) for _ in 1 : Threads. maxthreadid ()]
40+ MVec_nodes = MVector{nnodes (basis), uEltype}
41+ indicator_threaded = MVec_nodes[MVec_nodes (undef) for _ in 1 : Threads. maxthreadid ()]
42+ MVec_modes = MVector{nmodes (basis. N, basis. element_type), uEltype}
43+ modal_threaded = MVec_modes[MVec_modes (undef) for _ in 1 : Threads. maxthreadid ()]
44+
45+ inverse_vandermonde = calc_inverse_vandermonde (basis)
3746
47+ return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde)
48+ end
49+
50+ # calculates the inverse of the Vandermonde matrix for shock capturing purposes.
51+ # This version is for tensor product elements (Line, Quad, Hex)
52+ function calc_inverse_vandermonde (basis:: DGMultiBasis{NDIMS, <:Union{Line, Quad, Hex}} ) where {NDIMS}
3853 # initialize inverse Vandermonde matrices at Gauss-Legendre nodes
3954 (; N) = basis
4055 lobatto_node_coordinates_1D, _ = StartUpDG. gauss_lobatto_quad (0 , 0 , N)
4156 VDM_1D = StartUpDG. vandermonde (Line (), N, lobatto_node_coordinates_1D)
4257 inverse_vandermonde = SimpleKronecker (NDIMS, inv (VDM_1D))
4358
44- return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde)
59+ return inverse_vandermonde
4560end
4661
4762function (indicator_hg:: IndicatorHennemannGassner )(u, mesh:: DGMultiMesh ,
48- equations, dg:: DGMulti{NDIMS} , cache;
63+ equations,
64+ dg:: DGMulti {NDIMS,
65+ <: Union{Line, Quad, Hex} },
66+ cache;
4967 kwargs... ) where {NDIMS}
5068 (; alpha_max, alpha_min, alpha_smooth, variable) = indicator_hg
5169 (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde) = indicator_hg. cache
@@ -56,7 +74,7 @@ function (indicator_hg::IndicatorHennemannGassner)(u, mesh::DGMultiMesh,
5674 end
5775
5876 # magic parameters
59- threshold = 0.5 * 10 ^ (- 1.8 * (dg. basis. N + 1 )^ 0.25 )
77+ threshold = 0.5f0 * 10 ^ (- 1.8 * (dg. basis. N + 1 )^ 0.25 )
6078 parameter_s = log ((1 - 0.0001 ) / 0.0001 )
6179
6280 @threaded for element in eachelement (mesh, dg)
@@ -101,7 +119,8 @@ function (indicator_hg::IndicatorHennemannGassner)(u, mesh::DGMultiMesh,
101119 energy_frac_1 = zero (total_energy)
102120 end
103121 if ! (iszero (total_energy_clip1))
104- energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
122+ energy_frac_2 = (total_energy_clip1 - total_energy_clip2) /
123+ total_energy_clip1
105124 else
106125 energy_frac_2 = zero (total_energy_clip1)
107126 end
@@ -134,15 +153,15 @@ end
134153# Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
135154function apply_smoothing! (mesh:: DGMultiMesh , alpha, alpha_tmp, dg:: DGMulti , cache)
136155
137- # Copy alpha values such that smoothing is indpedenent of the element access order
156+ # Copy alpha values such that smoothing is independent of the element access order
138157 alpha_tmp .= alpha
139158
140159 # smooth alpha with its neighboring value
141- for element in eachelement (mesh, dg)
160+ @threaded for element in eachelement (mesh, dg)
142161 for face in Base. OneTo (StartUpDG. num_faces (dg. basis. element_type))
143162 neighboring_element = cache. element_to_element_connectivity[face, element]
144163 alpha_neighbor = alpha_tmp[neighboring_element]
145- alpha[element] = max (alpha[element], 0.5 * alpha_neighbor)
164+ alpha[element] = max (alpha[element], 0.5f0 * alpha_neighbor)
146165 end
147166 end
148167
@@ -194,16 +213,16 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh,
194213end
195214
196215function get_sparse_operator_entries (i, j, mesh:: DGMultiMesh{1} , cache)
197- return SVector (cache. sparse_hybridized_SBP_operators [1 ][i, j])
216+ return SVector (cache. sparse_SBP_operators [1 ][i, j])
198217end
199218
200219function get_sparse_operator_entries (i, j, mesh:: DGMultiMesh{2} , cache)
201- Qr, Qs = cache. sparse_hybridized_SBP_operators
220+ Qr, Qs = cache. sparse_SBP_operators
202221 return SVector (Qr[i, j], Qs[i, j])
203222end
204223
205224function get_sparse_operator_entries (i, j, mesh:: DGMultiMesh{3} , cache)
206- Qr, Qs, Qt = cache. sparse_hybridized_SBP_operators
225+ Qr, Qs, Qt = cache. sparse_SBP_operators
207226 return SVector (Qr[i, j], Qs[i, j], Qt[i, j])
208227end
209228
@@ -244,7 +263,7 @@ function get_contravariant_matrix(i, element, mesh::DGMultiMesh{3}, cache)
244263end
245264
246265function get_avg_contravariant_matrix (i, j, element, mesh:: DGMultiMesh , cache)
247- return 0.5 * (get_contravariant_matrix (i, element, mesh, cache) +
266+ return 0.5f0 * (get_contravariant_matrix (i, element, mesh, cache) +
248267 get_contravariant_matrix (j, element, mesh, cache))
249268end
250269
@@ -290,7 +309,8 @@ function volume_integral_kernel!(du, u, element, mesh::DGMultiMesh,
290309 u_j = u_local[j]
291310
292311 # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j
293- geometric_matrix = get_low_order_geometric_matrix (i, j, element, mesh, cache)
312+ geometric_matrix = get_low_order_geometric_matrix (i, j, element,
313+ mesh, cache)
294314 reference_operator_entries = get_sparse_operator_entries (i, j, mesh, cache)
295315 normal_direction_ij = geometric_matrix * reference_operator_entries
296316
@@ -305,3 +325,4 @@ function volume_integral_kernel!(du, u, element, mesh::DGMultiMesh,
305325 # TODO : factor this out to avoid calling it twice during calc_volume_integral!
306326 return project_rhs_to_gauss_nodes! (du, rhs_local, element, mesh, dg, cache, alpha)
307327end
328+ end # @muladd
0 commit comments