diff --git a/AUTHORS.md b/AUTHORS.md index b54b8c45f41..bf659a68874 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -37,6 +37,7 @@ are listed in alphabetical order: * Gregor Gassner * Lucas Gemein * Sven Goldberg +* T. Jeremy P. Karpowski * Joshua Lampert * Tristan Montoya * Julia Odenthal diff --git a/src/callbacks_step/analysis_surface_integral_2d.jl b/src/callbacks_step/analysis_surface_integral_2d.jl index 9c1899ef750..003c279f8bd 100644 --- a/src/callbacks_step/analysis_surface_integral_2d.jl +++ b/src/callbacks_step/analysis_surface_integral_2d.jl @@ -250,6 +250,9 @@ function analyze(surface_variable::AnalysisSurfaceIntegral, du, u, t, j_node += j_node_step end end + if mpi_isparallel() + surface_integral = MPI.Allreduce!(Ref(surface_integral), +, mpi_comm())[] + end return surface_integral end @@ -320,6 +323,9 @@ function analyze(surface_variable::AnalysisSurfaceIntegral{Variable}, du, u, t, j_node += j_node_step end end + if mpi_isparallel() + surface_integral = MPI.Allreduce!(Ref(surface_integral), +, mpi_comm())[] + end return surface_integral end end # muladd diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic_parallel.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic_parallel.jl new file mode 100644 index 00000000000..f7f95710c63 --- /dev/null +++ b/src/solvers/dgsem_p4est/dg_2d_parabolic_parallel.jl @@ -0,0 +1,449 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent +function rhs_parabolic!(du, u, t, + mesh::P4estMeshParallel{2}, + equations_parabolic::AbstractEquationsParabolic, + boundary_conditions_parabolic, source_terms_parabolic, + dg::DG, parabolic_scheme, cache, cache_parabolic) + @unpack parabolic_container = cache_parabolic + @unpack u_transformed, gradients, flux_parabolic = parabolic_container + + @trixi_timeit timer() "transform variables" begin + transform_variables!(u_transformed, u, mesh, equations_parabolic, + dg, cache) + end + + ### Gradient computation ### + + # Start gradient MPI receive + @trixi_timeit timer() "start MPI receive gradient" begin + start_mpi_receive!(cache.mpi_cache) + end + + @assert isempty(eachmortar(dg, cache)) "Nonconforming meshes are not yet supported on MPI parallel P4estMesh." + # Prolong transformed variables to MPI mortars + # @trixi_timeit timer() "prolong2mpimortars gradient" begin + # prolong2mpimortars!(cache, u_transformed, mesh, equations_parabolic, + # dg.mortar, dg) + # end + + # Prolong transformed variables to MPI interfaces + @trixi_timeit timer() "prolong2mpiinterfaces gradient" begin + prolong2mpiinterfaces!(cache, u_transformed, mesh, equations_parabolic, + dg.surface_integral, dg) + end + + # Start gradient MPI send + @trixi_timeit timer() "start MPI send gradient" begin + start_mpi_send!(cache.mpi_cache, mesh, equations_parabolic, dg, cache) + end + + # Local gradient computation + @trixi_timeit timer() "calculate gradient local" begin + calc_gradient_local!(gradients, u_transformed, t, mesh, + equations_parabolic, boundary_conditions_parabolic, + dg, parabolic_scheme, cache) + end + + # Finish gradient MPI receive + @trixi_timeit timer() "finish MPI receive gradient" begin + finish_mpi_receive!(cache.mpi_cache, mesh, equations_parabolic, dg, cache) + end + + # MPI interface fluxes for gradients + @trixi_timeit timer() "MPI interface flux gradient" begin + calc_mpi_interface_flux_gradient!(cache.elements.surface_flux_values, + mesh, equations_parabolic, + dg, parabolic_scheme, cache) + end + + # MPI mortar fluxes for gradients + # @trixi_timeit timer() "MPI mortar flux gradient" begin + # calc_mpi_mortar_flux_gradient!(cache.elements.surface_flux_values, + # mesh, equations_parabolic, dg.mortar, + # dg, parabolic_scheme, cache) + # end + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral_gradient!(gradients, mesh, equations_parabolic, + dg, cache) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" begin + apply_jacobian_parabolic!(gradients, mesh, equations_parabolic, dg, + cache) + end + + # Finish gradient MPI send + @trixi_timeit timer() "finish MPI send gradient" begin + finish_mpi_send!(cache.mpi_cache) + end + + # Local parabolic flux construction + @trixi_timeit timer() "calculate parabolic fluxes" begin + calc_parabolic_fluxes!(flux_parabolic, gradients, u_transformed, mesh, + equations_parabolic, dg, cache) + end + + ### Divergence of parabolic/gradient fluxes ### + + # Start divergence MPI receive + @trixi_timeit timer() "start MPI receive divergence" begin + start_mpi_receive!(cache.mpi_cache) + end + + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" begin + set_zero!(du, dg, cache) + end + + # Local volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, flux_parabolic, mesh, equations_parabolic, dg, cache) + end + + # Prolong parabolic fluxes to MPI mortars + # @trixi_timeit timer() "prolong2mpimortars divergence" begin + # prolong2mpimortars_divergence!(cache, flux_parabolic, mesh, equations_parabolic, + # dg.mortar, dg) + # end + + # Prolong parabolic fluxes to MPI interfaces + @trixi_timeit timer() "prolong2mpiinterfaces divergence" begin + prolong2mpiinterfaces!(cache, flux_parabolic, mesh, equations_parabolic, dg) + end + + # Start divergence MPI send + @trixi_timeit timer() "start MPI send divergence" begin + start_mpi_send!(cache.mpi_cache, mesh, equations_parabolic, dg, cache) + end + + # Local interface fluxes + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache, flux_parabolic, mesh, equations_parabolic, dg) + end + + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache.elements.surface_flux_values, mesh, + equations_parabolic, dg, parabolic_scheme, cache) + end + + # Local boundary fluxes + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache, flux_parabolic, mesh, equations_parabolic, dg) + end + + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux_divergence!(cache, t, + boundary_conditions_parabolic, mesh, + equations_parabolic, + dg.surface_integral, dg) + end + + # Local mortar fluxes + @trixi_timeit timer() "prolong2mortars" begin + prolong2mortars_divergence!(cache, flux_parabolic, mesh, equations_parabolic, + dg.mortar, dg) + end + + @trixi_timeit timer() "mortar flux" begin + calc_mortar_flux_divergence!(cache.elements.surface_flux_values, + mesh, equations_parabolic, dg.mortar, + dg, parabolic_scheme, cache) + end + + # Finish divergence MPI receive + @trixi_timeit timer() "finish MPI receive divergence" begin + finish_mpi_receive!(cache.mpi_cache, mesh, equations_parabolic, dg, cache) + end + + # MPI interface fluxes for divergence + @trixi_timeit timer() "MPI interface flux divergence" begin + calc_mpi_interface_flux_divergence!(cache.elements.surface_flux_values, + mesh, equations_parabolic, + dg, parabolic_scheme, cache) + end + + # MPI mortar fluxes for divergence + # @trixi_timeit timer() "MPI mortar flux divergence" begin + # calc_mpi_mortar_flux_divergence!(cache.elements.surface_flux_values, + # mesh, equations_parabolic, dg.mortar, + # dg, parabolic_scheme, cache) + # end + + # Finish divergence MPI send + @trixi_timeit timer() "finish MPI send divergence" begin + finish_mpi_send!(cache.mpi_cache) + end + + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, + dg.surface_integral, dg, cache) + end + + @trixi_timeit timer() "Jacobian" begin + apply_jacobian_parabolic!(du, mesh, equations_parabolic, dg, cache) + end + + @trixi_timeit timer() "source terms parabolic" begin + calc_sources_parabolic!(du, u, gradients, t, source_terms_parabolic, + equations_parabolic, dg, cache) + end + + return nothing +end + +function calc_gradient_local!(gradients, u_transformed, t, + mesh::P4estMeshParallel{2}, + equations_parabolic, boundary_conditions_parabolic, + dg::DG, parabolic_scheme, cache) + # Reset gradients + @trixi_timeit timer() "reset gradients" begin + reset_gradients!(gradients, dg, cache) + end + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral_gradient!(gradients, u_transformed, + mesh, equations_parabolic, dg, cache) + end + + # Prolong solution to interfaces. + # This reuses `prolong2interfaces` for the purely hyperbolic case. + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(nothing, cache, u_transformed, mesh, + equations_parabolic, dg) + end + + # Calculate interface fluxes for the gradients + @trixi_timeit timer() "interface flux" begin + @unpack surface_flux_values = cache.elements + calc_interface_flux_gradient!(surface_flux_values, mesh, equations_parabolic, + dg, parabolic_scheme, cache) + end + + # Prolong solution to boundaries. + # This reuses `prolong2boundaries` for the purely hyperbolic case. + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache, u_transformed, mesh, + equations_parabolic, dg) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux_gradient!(cache, t, boundary_conditions_parabolic, + mesh, equations_parabolic, dg.surface_integral, + dg) + end + + # Prolong solution to mortars. + # This reuses `prolong2mortars` for the purely hyperbolic case. + @trixi_timeit timer() "prolong2mortars" begin + prolong2mortars!(cache, u_transformed, mesh, equations_parabolic, + dg.mortar, dg) + end + + # Calculate mortar fluxes + @trixi_timeit timer() "mortar flux" begin + calc_mortar_flux_gradient!(cache.elements.surface_flux_values, + mesh, equations_parabolic, dg.mortar, + dg, parabolic_scheme, cache) + end + + return nothing +end + +function prolong2mpiinterfaces!(cache, flux_parabolic::Tuple, + mesh::P4estMeshParallel{2}, + equations_parabolic, dg::DG) + @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) + + flux_parabolic_x, flux_parabolic_y = flux_parabolic + + @threaded for interface in eachmpiinterface(dg, cache) + # Copy solution data from the local element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the index of the primary side + # will always run forwards. + local_element = local_neighbor_ids[interface] + local_indices = node_indices[interface] + local_direction = indices2direction(local_indices) + local_side = local_sides[interface] + # store the normal flux with respect to the primary normal direction, + # which is the negative of the secondary normal direction + orientation_factor = local_side == 1 ? 1 : -1 + + i_start, i_step = index_to_start_step_2d(local_indices[1], index_range) + j_start, j_step = index_to_start_step_2d(local_indices[2], index_range) + + i_elem = i_start + j_elem = j_start + + for i in eachnode(dg) + normal_direction = get_normal_direction(local_direction, + contravariant_vectors, + i_elem, j_elem, + local_element) + + for v in eachvariable(equations_parabolic) + flux_visc = SVector(flux_parabolic_x[v, i_elem, j_elem, local_element], + flux_parabolic_y[v, i_elem, j_elem, local_element]) + # Side 1 and 2 must be consistent, i.e., with their outward-pointing normals. + # Thus, the `orientation_factor` changes the logic such that the + # flux which enters side 1 leaves side 2. + cache.mpi_interfaces.u[local_side, v, i, interface] = orientation_factor * + dot(flux_visc, + normal_direction) + end + + i_elem += i_step + j_elem += j_step + end + end + + return nothing +end + +function calc_mpi_interface_flux_gradient!(surface_flux_values, + mesh::P4estMeshParallel{2}, + equations_parabolic, + dg::DG, parabolic_scheme, cache) + @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces + @unpack contravariant_vectors = cache.elements + @unpack u = cache.mpi_interfaces + index_range = eachnode(dg) + index_end = last(index_range) + + @threaded for interface in eachmpiinterface(dg, cache) + local_element = local_neighbor_ids[interface] + local_indices = node_indices[interface] + local_direction = indices2direction(local_indices) + local_side = local_sides[interface] + + # Create the local i,j indexing on the local element used to pull normal direction information + i_element_start, i_element_step = index_to_start_step_2d(local_indices[1], + index_range) + j_element_start, j_element_step = index_to_start_step_2d(local_indices[2], + index_range) + + i_element = i_element_start + j_element = j_element_start + + # Initiate the node index to be used in the surface for loop, + # the surface flux storage must be indexed in alignment with the local element indexing + if :i_backward in local_indices + surface_node = index_end + surface_node_step = -1 + else + surface_node = 1 + surface_node_step = 1 + end + + for i in eachnode(dg) + normal_direction = get_normal_direction(local_direction, + contravariant_vectors, + i_element, j_element, + local_element) + + u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, + i, interface) + + flux_ = flux_parabolic(u_ll, u_rr, normal_direction, Gradient(), + equations_parabolic, parabolic_scheme) + + for v in eachvariable(equations_parabolic) + surface_flux_values[v, surface_node, + local_direction, local_element] = flux_[v] + end + + # Increment local element indices to pull the normal direction + # from the element data + i_element += i_element_step + j_element += j_element_step + + # Increment the surface node index along the local element + surface_node += surface_node_step + end + end + + return nothing +end + +function calc_mpi_interface_flux_divergence!(surface_flux_values, + mesh::P4estMeshParallel{2}, + equations_parabolic, + dg::DG, parabolic_scheme, cache) + @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces + @unpack contravariant_vectors = cache.elements + @unpack u = cache.mpi_interfaces + index_range = eachnode(dg) + index_end = last(index_range) + + @threaded for interface in eachmpiinterface(dg, cache) + local_element = local_neighbor_ids[interface] + local_indices = node_indices[interface] + local_direction = indices2direction(local_indices) + local_side = local_sides[interface] + + i_element_start, i_element_step = index_to_start_step_2d(local_indices[1], + index_range) + j_element_start, j_element_step = index_to_start_step_2d(local_indices[2], + index_range) + + i_element = i_element_start + j_element = j_element_start + + # Initiate the node index to be used in the surface for loop, + # the surface flux storage must be indexed in alignment with the local element indexing + if :i_backward in local_indices + surface_node = index_end + surface_node_step = -1 + else + surface_node = 1 + surface_node_step = 1 + end + + for i in eachnode(dg) + normal_direction = get_normal_direction(local_direction, + contravariant_vectors, + i_element, j_element, + local_element) + + parabolic_flux_normal_ll, parabolic_flux_normal_rr = get_surface_node_vars(u, + equations_parabolic, + dg, + i, + interface) + + # Sign flip for `local_side = 2` required for divergence calculation since + # the divergence interface flux involves the normal direction. + # `local_side=2` is thus flipped (opposite of primary side) + orientation_factor = local_side == 1 ? 1 : -1 + flux_ = flux_parabolic(parabolic_flux_normal_ll, parabolic_flux_normal_rr, + orientation_factor * normal_direction, Divergence(), + equations_parabolic, parabolic_scheme) + + for v in eachvariable(equations_parabolic) + surface_flux_values[v, surface_node, + local_direction, local_element] = orientation_factor * flux_[v] + end + + i_element += i_element_step + j_element += j_element_step + + surface_node += surface_node_step + end + end + + return nothing +end +end #muladd diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl index e7f97d96933..aea544026ef 100644 --- a/src/solvers/dgsem_p4est/dg_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_parallel.jl @@ -608,4 +608,5 @@ end include("dg_2d_parallel.jl") include("dg_3d_parallel.jl") +include("dg_2d_parabolic_parallel.jl") end # muladd diff --git a/test/test_mpi.jl b/test/test_mpi.jl index 887eb9e01f5..3640ef8637b 100644 --- a/test/test_mpi.jl +++ b/test/test_mpi.jl @@ -26,6 +26,7 @@ CI_ON_WINDOWS = (get(ENV, "GITHUB_ACTIONS", false) == "true") && Sys.iswindows() if !CI_ON_WINDOWS # see comment on `CI_ON_WINDOWS` above include("test_mpi_p4est_3d.jl") include("test_mpi_t8code_3d.jl") + include("test_mpi_p4est_parabolic_2d.jl") end end # MPI diff --git a/test/test_mpi_p4est_parabolic_2d.jl b/test/test_mpi_p4est_parabolic_2d.jl new file mode 100644 index 00000000000..3c97c9b41c5 --- /dev/null +++ b/test/test_mpi_p4est_parabolic_2d.jl @@ -0,0 +1,126 @@ +module TestExamplesMPIP4estMesh2DParabolic + +using Test +using Trixi + +include("test_trixi.jl") + +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") + +@testset "P4estMesh MPI 2D Parabolic" begin + @trixi_testset "P4estMesh2D: elixir_navierstokes_lid_driven_cavity.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, + "elixir_navierstokes_lid_driven_cavity.jl"), + initial_refinement_level=2, tspan=(0.0, 0.5), + l2=[ + 0.00028716166408816073, + 0.08101204560401647, + 0.02099595625377768, + 0.05008149754143295 + ], + linf=[ + 0.014804500261322406, + 0.9513271652357098, + 0.7223919625994717, + 1.4846907331004786 + ]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1500) + @test_allocations(Trixi.rhs_parabolic!, semi, sol, 1500) + end + + @trixi_testset "P4estMesh2D: elixir_navierstokes_convergence_nonperiodic.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, + "elixir_navierstokes_convergence_nonperiodic.jl"), + initial_refinement_level=1, tspan=(0.0, 0.2), + l2=[ + 0.0004036496258545996, + 0.0005869762480189079, + 0.0009148853742181908, + 0.0011984191532764543 + ], + linf=[ + 0.0024993634989209923, + 0.009487866203496731, + 0.004505829506103787, + 0.011634902753554499 + ]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1500) + @test_allocations(Trixi.rhs_parabolic!, semi, sol, 1500) + end + + @trixi_testset "P4estMesh2D: elixir_advection_diffusion_nonperiodic_curved.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, + "elixir_advection_diffusion_nonperiodic_curved.jl"), + trees_per_dimension=(1, 1), initial_refinement_level=2, + tspan=(0.0, 0.5), + l2=[0.00919917034843865], + linf=[0.14186297438393505]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1500) + @test_allocations(Trixi.rhs_parabolic!, semi, sol, 1500) + end + + @trixi_testset "P4estMesh2D: elixir_advection_diffusion_periodic.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, + "elixir_advection_diffusion_periodic.jl"), + trees_per_dimension=(1, 1), initial_refinement_level=2, + tspan=(0.0, 0.5), + l2=[0.0023754695605828443], + linf=[0.008154128363741964]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1500) + @test_allocations(Trixi.rhs_parabolic!, semi, sol, 1500) + end + + @trixi_testset "elixir_navierstokes_NACA0012airfoil_mach08.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, + "elixir_navierstokes_NACA0012airfoil_mach08.jl"), + l2=[0.000186486564226516, + 0.0005076712323400374, + 0.00038074588984354107, + 0.002128177239782089], + linf=[0.5153387072802718, + 1.199362305026636, + 0.9077214424040279, + 5.666071182328691], tspan=(0.0, 0.001), + initial_refinement_level=0) + + u_ode = copy(sol.u[end]) + du_ode = zero(u_ode) # Just a placeholder in this case + + u = Trixi.wrap_array(u_ode, semi) + du = Trixi.wrap_array(du_ode, semi) + + drag_p = Trixi.analyze(drag_coefficient, du, u, tspan[2], mesh, equations, solver, + semi.cache, semi) + lift_p = Trixi.analyze(lift_coefficient, du, u, tspan[2], mesh, equations, solver, + semi.cache, semi) + + drag_f = Trixi.analyze(drag_coefficient_shear_force, du, u, tspan[2], mesh, + equations, equations_parabolic, solver, + semi.cache, semi, semi.cache_parabolic) + lift_f = Trixi.analyze(lift_coefficient_shear_force, du, u, tspan[2], mesh, + equations, equations_parabolic, solver, + semi.cache, semi, semi.cache_parabolic) + + @test isapprox(drag_p, 0.17963843913309516, atol = 1e-13) + @test isapprox(lift_p, 0.26462588007949367, atol = 1e-13) + + @test isapprox(drag_f, 1.5427441885921553, atol = 1e-13) + @test isapprox(lift_f, 0.005621910087395724, atol = 1e-13) + + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + # We move these tests here to avoid modifying values used + # to compute the drag/lift coefficients above. + @test_allocations(Trixi.rhs!, semi, sol, 1500) + @test_allocations(Trixi.rhs_parabolic!, semi, sol, 1500) + end +end +end # module