diff --git a/src/general/buffer.jl b/src/general/buffer.jl index 15a7bc4579..3e1c28e8ca 100644 --- a/src/general/buffer.jl +++ b/src/general/buffer.jl @@ -7,12 +7,7 @@ end function SystemBuffer(active_size, buffer_size::Integer) # Using a `BitVector` is not an option as writing to it is not thread-safe. - # Also, to ensure thread-safe particle activation, we use an `atomic_cas` operation. - # Thus, `active_particle` is defined as a `Vector{UInt32}` because CUDA.jl - # does not support atomic operations on `Bool`. - # https://github.com/JuliaGPU/CUDA.jl/blob/2cc9285676a4cd28d0846ca62f0300c56d281d38/src/device/intrinsics/atomics.jl#L243 - active_particle = vcat(fill(UInt32(true), active_size), - fill(UInt32(false), buffer_size)) + active_particle = vcat(fill(true, active_size), fill(false, buffer_size)) eachparticle = collect(eachindex(active_particle)) return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size) @@ -49,9 +44,8 @@ end # TODO: Parallelize (see https://github.com/trixi-framework/TrixiParticles.jl/issues/810) # Update the number of active particles and the active particle indices - buffer.active_particle_count[] = sum(active_particle) - buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(x -> x == true, - active_particle) + buffer.active_particle_count[] = count(active_particle) + buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(active_particle) return buffer end @@ -64,29 +58,6 @@ end return view(buffer.eachparticle, 1:buffer.active_particle_count[]) end -@inline function activate_next_particle(system) - (; active_particle) = system.buffer - - for particle in eachindex(active_particle) - if PointNeighbors.Atomix.@atomic(active_particle[particle]) == false - # After we go into this condition, another thread might still activate this particle - # before we do. Therefore, we use an atomic swap, which activates the particle, - # but also returns the old value. - # If the old value is `true`, the particle was active before and we need to continue. - # Note: This doesn't work with Metal.jl. No error is thrown, but the operation is simply ignored. - # An atomic compare-and-swap operation is probably implemented for Metal.jl here: - # https://github.com/JuliaGPU/Metal.jl/blob/caf299690aa52448ee72ffc5688939b157fc1ba2/src/device/intrinsics/atomics.jl#L42 - was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true - - if was_active == false - return particle - end - end - end - - error("No buffer particles available") -end - @inline function deactivate_particle!(system, particle, u) (; active_particle) = system.buffer @@ -96,7 +67,8 @@ end u[dim, particle] = eltype(system)(1e16) end - # `activate_next_particle!` and `deactivate_particle!` are never called on the same buffer inside a kernel, + # `deactivate_particle!` and `active_particle[particle] = true` + # are never called on the same buffer inside a kernel, # so we don't have any race conditions on this `active_particle` vector. active_particle[particle] = false diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl index ae956e88d1..3e033e447b 100644 --- a/src/general/semidiscretization.jl +++ b/src/general/semidiscretization.jl @@ -964,6 +964,8 @@ function set_system_links(system::OpenBoundarySPHSystem, semi) system.density, system.volume, system.pressure, + system.boundary_candidates, + system.fluid_candidates, system.boundary_zone, system.reference_velocity, system.reference_pressure, diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl index f299fd9286..14034fedc2 100644 --- a/src/schemes/boundary/open_boundary/system.jl +++ b/src/schemes/boundary/open_boundary/system.jl @@ -36,7 +36,7 @@ Open boundary system for in- and outflow particles. This is an experimental feature and may change in future releases. It is GPU-compatible (e.g., with CUDA.jl and AMDGPU.jl), but currently **not** supported with Metal.jl. """ -struct OpenBoundarySPHSystem{BM, ELTYPE, NDIMS, IC, FS, FSI, ARRAY1D, BZ, RV, +struct OpenBoundarySPHSystem{BM, ELTYPE, NDIMS, IC, FS, FSI, ARRAY1D, BC, FC, BZ, RV, RP, RD, B, UCU, C} <: System{NDIMS} boundary_model :: BM initial_condition :: IC @@ -47,6 +47,8 @@ struct OpenBoundarySPHSystem{BM, ELTYPE, NDIMS, IC, FS, FSI, ARRAY1D, BZ, RV, density :: ARRAY1D # Array{ELTYPE, 1}: [particle] volume :: ARRAY1D # Array{ELTYPE, 1}: [particle] pressure :: ARRAY1D # Array{ELTYPE, 1}: [particle] + boundary_candidates :: BC # Array{UInt32, 1}: [particle] + fluid_candidates :: FC # Array{UInt32, 1}: [particle] boundary_zone :: BZ reference_velocity :: RV reference_pressure :: RP @@ -58,18 +60,21 @@ end function OpenBoundarySPHSystem(boundary_model, initial_condition, fluid_system, fluid_system_index, smoothing_length, mass, density, volume, - pressure, boundary_zone, reference_velocity, + pressure, boundary_candidates, fluid_candidates, + boundary_zone, reference_velocity, reference_pressure, reference_density, buffer, update_callback_used, cache) OpenBoundarySPHSystem{typeof(boundary_model), eltype(mass), ndims(initial_condition), typeof(initial_condition), typeof(fluid_system), - typeof(fluid_system_index), typeof(mass), typeof(boundary_zone), - typeof(reference_velocity), typeof(reference_pressure), - typeof(reference_density), typeof(buffer), - typeof(update_callback_used), + typeof(fluid_system_index), typeof(mass), + typeof(boundary_candidates), typeof(fluid_candidates), + typeof(boundary_zone), typeof(reference_velocity), + typeof(reference_pressure), typeof(reference_density), + typeof(buffer), typeof(update_callback_used), typeof(cache)}(boundary_model, initial_condition, fluid_system, fluid_system_index, smoothing_length, mass, - density, volume, pressure, boundary_zone, + density, volume, pressure, boundary_candidates, + fluid_candidates, boundary_zone, reference_velocity, reference_pressure, reference_density, buffer, update_callback_used, cache) @@ -153,9 +158,13 @@ function OpenBoundarySPHSystem(boundary_zone::BoundaryZone; smoothing_length = initial_smoothing_length(fluid_system) + boundary_candidates = fill(false, nparticles(initial_condition)) + fluid_candidates = fill(false, nparticles(fluid_system)) + return OpenBoundarySPHSystem(boundary_model, initial_condition, fluid_system, fluid_system_index, smoothing_length, mass, density, - volume, pressure, boundary_zone, reference_velocity_, + volume, pressure, boundary_candidates, fluid_candidates, + boundary_zone, reference_velocity_, reference_pressure_, reference_density_, buffer, update_callback_used, cache) end @@ -278,37 +287,67 @@ end update_open_boundary_eachstep!(system, v_ode, u_ode, semi, t) = system function check_domain!(system, v, u, v_ode, u_ode, semi) - (; boundary_zone) = system + (; boundary_zone, boundary_candidates, fluid_candidates) = system fluid_system = corresponding_fluid_system(system, semi) u_fluid = wrap_u(u_ode, fluid_system, semi) v_fluid = wrap_v(v_ode, fluid_system, semi) + boundary_candidates .= false + # Check the boundary particles whether they're leaving the boundary zone @threaded semi for particle in each_moving_particle(system) particle_coords = current_coords(u, system, particle) # Check if boundary particle is outside the boundary zone if !is_in_boundary_zone(boundary_zone, particle_coords) - convert_particle!(system, fluid_system, boundary_zone, particle, - v, u, v_fluid, u_fluid) + boundary_candidates[particle] = true end end + crossed_boundary_particles = findall(boundary_candidates) + available_fluid_particles = findall(==(false), fluid_system.buffer.active_particle) + + @assert length(crossed_boundary_particles)<=length(available_fluid_particles) "Not enough fluid buffer particles available" + + # Convert open boundary particles in the fluid domain to fluid particles + @threaded semi for i in eachindex(crossed_boundary_particles) + particle = crossed_boundary_particles[i] + particle_new = available_fluid_particles[i] + + convert_particle!(system, fluid_system, boundary_zone, particle, particle_new, + v, u, v_fluid, u_fluid) + end + update_system_buffer!(system.buffer, semi) update_system_buffer!(fluid_system.buffer, semi) + fluid_candidates .= false + # Check the fluid particles whether they're entering the boundary zone @threaded semi for fluid_particle in each_moving_particle(fluid_system) fluid_coords = current_coords(u_fluid, fluid_system, fluid_particle) # Check if fluid particle is in boundary zone if is_in_boundary_zone(boundary_zone, fluid_coords) - convert_particle!(fluid_system, system, boundary_zone, fluid_particle, - v, u, v_fluid, u_fluid) + fluid_candidates[fluid_particle] = true end end + crossed_fluid_particles = findall(fluid_candidates) + available_boundary_particles = findall(==(false), system.buffer.active_particle) + + @assert length(crossed_fluid_particles)<=length(available_boundary_particles) "Not enough boundary buffer particles available" + + # Convert fluid particles in the open boundary zone to open boundary particles + @threaded semi for i in eachindex(crossed_fluid_particles) + particle = crossed_fluid_particles[i] + particle_new = available_boundary_particles[i] + + convert_particle!(fluid_system, system, boundary_zone, particle, particle_new, + v, u, v_fluid, u_fluid) + end + update_system_buffer!(system.buffer, semi) update_system_buffer!(fluid_system.buffer, semi) @@ -317,8 +356,8 @@ end # Outflow particle is outside the boundary zone @inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system, - boundary_zone::BoundaryZone{OutFlow}, particle, v, u, - v_fluid, u_fluid) + boundary_zone::BoundaryZone{OutFlow}, particle, + particle_new, v, u, v_fluid, u_fluid) deactivate_particle!(system, particle, u) return system @@ -326,12 +365,12 @@ end # Inflow particle is outside the boundary zone @inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system, - boundary_zone::BoundaryZone{InFlow}, particle, v, u, - v_fluid, u_fluid) + boundary_zone::BoundaryZone{InFlow}, particle, + particle_new, v, u, v_fluid, u_fluid) (; spanning_set) = boundary_zone # Activate a new particle in simulation domain - transfer_particle!(fluid_system, system, particle, v_fluid, u_fluid, v, u) + transfer_particle!(fluid_system, system, particle, particle_new, v_fluid, u_fluid, v, u) # Reset position of boundary particle for dim in 1:ndims(system) @@ -343,8 +382,8 @@ end # Buffer particle is outside the boundary zone @inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system, - boundary_zone::BoundaryZone{BidirectionalFlow}, particle, - v, u, v_fluid, u_fluid) + boundary_zone::BoundaryZone{BidirectionalFlow}, + particle, particle_new, v, u, v_fluid, u_fluid) relative_position = current_coords(u, system, particle) - boundary_zone.zone_origin # Check if particle is in- or outside the fluid domain. @@ -356,7 +395,7 @@ end end # Activate a new particle in simulation domain - transfer_particle!(fluid_system, system, particle, v_fluid, u_fluid, v, u) + transfer_particle!(fluid_system, system, particle, particle_new, v_fluid, u_fluid, v, u) # Reset position of boundary particle for dim in 1:ndims(system) @@ -368,9 +407,10 @@ end # Fluid particle is in boundary zone @inline function convert_particle!(fluid_system::FluidSystem, system, - boundary_zone, particle, v, u, v_fluid, u_fluid) + boundary_zone, particle, particle_new, + v, u, v_fluid, u_fluid) # Activate particle in boundary zone - transfer_particle!(system, fluid_system, particle, v, u, v_fluid, u_fluid) + transfer_particle!(system, fluid_system, particle, particle_new, v, u, v_fluid, u_fluid) # Deactivate particle in interior domain deactivate_particle!(fluid_system, particle, u_fluid) @@ -378,9 +418,10 @@ end return fluid_system end -@inline function transfer_particle!(system_new, system_old, particle_old, +@inline function transfer_particle!(system_new, system_old, particle_old, particle_new, v_new, u_new, v_old, u_old) - particle_new = activate_next_particle(system_new) + # Activate new particle + system_new.buffer.active_particle[particle_new] = true # Transfer densities density = current_density(v_old, system_old, particle_old) diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl index 85d657b678..a9a2e791dd 100644 --- a/test/examples/gpu.jl +++ b/test/examples/gpu.jl @@ -288,42 +288,70 @@ end end # Test open boundaries and steady-state callback - @testset "fluid/pipe_flow_2d.jl - steady state reached (`dt`)" begin - # TODO This currently doesn't work on GPUs due to - # https://github.com/trixi-framework/PointNeighbors.jl/issues/20. - - # # Import variables into scope - # trixi_include_changeprecision(Float32, @__MODULE__, - # joinpath(examples_dir(), "fluid", - # "pipe_flow_2d.jl"), - # sol=nothing, ode=nothing) - - # # Neighborhood search with `FullGridCellList` for GPU compatibility - # min_corner = minimum(pipe.boundary.coordinates, dims=2) .- 8 * particle_spacing - # max_corner = maximum(pipe.boundary.coordinates, dims=2) .+ 8 * particle_spacing - # cell_list = FullGridCellList(; min_corner, max_corner) - # semi_fullgrid = Semidiscretization(fluid_system, boundary_system, - # neighborhood_search=GridNeighborhoodSearch{2}(; - # cell_list), - # parallelization_backend=Main.parallelization_backend) - - # steady_state_reached = SteadyStateReachedCallback(; dt=0.002, interval_size=10) - - # @trixi_test_nowarn trixi_include_changeprecision(Float32, @__MODULE__, - # joinpath(examples_dir(), "fluid", - # "pipe_flow_2d.jl"), - # extra_callback=steady_state_reached, - # tspan=(0.0f0, 1.5f0), - # semi=semi_fullgrid) - - # TODO This currently doesn't work on GPUs due to - # https://github.com/trixi-framework/PointNeighbors.jl/issues/20. - - # Make sure that the simulation is terminated after a reasonable amount of time - @test_skip 0.1 < sol.t[end] < 1.0 - @test_skip sol.retcode == ReturnCode.Terminated - # backend = TrixiParticles.KernelAbstractions.get_backend(sol.u[end].x[1]) - # @test_skip backend == Main.parallelization_backend + @trixi_testset "fluid/pipe_flow_2d.jl - BoundaryModelLastiwka (WCSPH)" begin + @trixi_test_nowarn trixi_include_changeprecision(Float32, @__MODULE__, + tspan=(0.0f0, 0.5f0), + joinpath(examples_dir(), + "fluid", + "pipe_flow_2d.jl"), + wcsph=true, + parallelization_backend=Main.parallelization_backend) + @test sol.retcode == ReturnCode.Success + backend = TrixiParticles.KernelAbstractions.get_backend(sol.u[end].x[1]) + @test backend == Main.parallelization_backend + end + + @trixi_testset "fluid/pipe_flow_2d.jl - BoundaryModelLastiwka (EDAC)" begin + @trixi_test_nowarn trixi_include_changeprecision(Float32, @__MODULE__, + tspan=(0.0f0, 0.5f0), + joinpath(examples_dir(), + "fluid", + "pipe_flow_2d.jl"), + parallelization_backend=Main.parallelization_backend) + @test sol.retcode == ReturnCode.Success + backend = TrixiParticles.KernelAbstractions.get_backend(sol.u[end].x[1]) + @test backend == Main.parallelization_backend + end + + @trixi_testset "fluid/pipe_flow_2d.jl - BoundaryModelTafuni (EDAC)" begin + @trixi_test_nowarn trixi_include_changeprecision(Float32, @__MODULE__, + tspan=(0.0f0, 0.5f0), + joinpath(examples_dir(), + "fluid", + "pipe_flow_2d.jl"), + open_boundary_model=BoundaryModelTafuni(), + boundary_type_in=BidirectionalFlow(), + boundary_type_out=BidirectionalFlow(), + reference_density_in=nothing, + reference_pressure_in=nothing, + reference_density_out=nothing, + reference_velocity_out=nothing, + parallelization_backend=Main.parallelization_backend) + @test sol.retcode == ReturnCode.Success + backend = TrixiParticles.KernelAbstractions.get_backend(sol.u[end].x[1]) + @test backend == Main.parallelization_backend + end + + @trixi_testset "fluid/pipe_flow_2d.jl - BoundaryModelTafuni (WCSPH)" begin + @trixi_test_nowarn trixi_include_changeprecision(Float32, @__MODULE__, + tspan=(0.0f0, 0.5f0), + joinpath(examples_dir(), + "fluid", + "pipe_flow_2d.jl"), + wcsph=true, sound_speed=20.0f0, + pressure=0.0f0, + open_boundary_model=BoundaryModelTafuni(), + boundary_type_in=BidirectionalFlow(), + boundary_type_out=BidirectionalFlow(), + reference_density_in=nothing, + reference_pressure_in=nothing, + reference_density_out=nothing, + reference_pressure_out=nothing, + reference_velocity_out=nothing, + parallelization_backend=Main.parallelization_backend) + @test sol.retcode == ReturnCode.Success + backend = TrixiParticles.KernelAbstractions.get_backend(sol.u[end].x[1]) + @test backend == Main.parallelization_backend end end diff --git a/test/general/buffer.jl b/test/general/buffer.jl index bf02bebc1c..18758590ea 100644 --- a/test/general/buffer.jl +++ b/test/general/buffer.jl @@ -2,6 +2,7 @@ # Mock fluid system struct FluidSystemMock3 <: TrixiParticles.FluidSystem{2} end TrixiParticles.initial_smoothing_length(system::FluidSystemMock3) = 1.0 + TrixiParticles.nparticles(system::FluidSystemMock3) = 1 zone = BoundaryZone(; plane=([0.0, 0.0], [0.0, 1.0]), particle_spacing=0.2, open_boundary_layers=2, density=1.0, plane_normal=[1.0, 0.0], @@ -23,7 +24,9 @@ @test TrixiParticles.each_moving_particle(system_buffer) == 1:n_particles - particle_id = TrixiParticles.activate_next_particle(system_buffer) + # Activate a particle + particle_id = findfirst(==(false), system_buffer.buffer.active_particle) + system_buffer.buffer.active_particle[particle_id] = true TrixiParticles.update_system_buffer!(system_buffer.buffer, DummySemidiscretization()) diff --git a/test/systems/open_boundary_system.jl b/test/systems/open_boundary_system.jl index c728432a1c..0fdf98f907 100644 --- a/test/systems/open_boundary_system.jl +++ b/test/systems/open_boundary_system.jl @@ -6,6 +6,7 @@ # Mock fluid system struct FluidSystemMock2 <: TrixiParticles.FluidSystem{2} end TrixiParticles.initial_smoothing_length(system::FluidSystemMock2) = 1.0 + TrixiParticles.nparticles(system::FluidSystemMock2) = 1 inflow = BoundaryZone(; plane, particle_spacing=0.1, plane_normal=flow_direction, density=1.0,