Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ TrixiBase = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284"
WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"

[weakdeps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
OrdinaryDiffEqCore = "bbf590c4-e513-4bbe-9b18-05decba2e5d8"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"

[extensions]
TrixiParticlesOrdinaryDiffEqExt = ["OrdinaryDiffEq", "OrdinaryDiffEqCore"]
TrixiParticlesCUDAExt = "CUDA"
TrixiParticlesOrdinaryDiffEqExt = ["OrdinaryDiffEq", "OrdinaryDiffEqCore"]

[compat]
Accessors = "0.1.43"
Expand Down
19 changes: 10 additions & 9 deletions src/general/abstract_system.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,18 @@ end
end

# Return `A[:, :, i]` as an `SMatrix`.
@inline function extract_smatrix(A, system, particle)
@boundscheck checkbounds(A, ndims(system), ndims(system), particle)
@propagate_inbounds function extract_smatrix(A, system, particle)
return extract_smatrix(A, Val(ndims(system)), particle)
end

@inline function extract_smatrix(A, ::Val{NDIMS}, particle) where {NDIMS}
@boundscheck checkbounds(A, NDIMS, NDIMS, particle)

# Extract the matrix elements for this particle as a tuple to pass to SMatrix
return SMatrix{ndims(system),
ndims(system)}(ntuple(@inline(i->@inbounds A[mod(i - 1,
ndims(system)) + 1,
div(i - 1,
ndims(system)) + 1,
particle]),
Val(ndims(system)^2)))
return SMatrix{NDIMS, NDIMS}(ntuple(@inline(i->@inbounds A[mod(i - 1, NDIMS) + 1,
div(i - 1, NDIMS) + 1,
particle]),
Val(NDIMS^2)))
end

# Specifically get the current coordinates of a particle for all system types.
Expand Down
6 changes: 6 additions & 0 deletions src/general/neighborhood_search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ function PointNeighbors.foreach_point_neighbor(f, system, neighbor_system,
points, parallelization_backend)
end

@propagate_inbounds function foreach_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, backend, particle)
PointNeighbors.foreach_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, particle)
Comment thread
efaulhaber marked this conversation as resolved.
end

# === Compact support selection ===
# -- Generic
@inline function compact_support(system, neighbor)
Expand Down
5 changes: 5 additions & 0 deletions src/schemes/boundary/wall_boundary/system.jl
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ end
return kernel(smoothing_kernel, distance, smoothing_length)
end

@inline function smoothing_kernel_unsafe(system::WallBoundarySystem, distance, particle)
(; smoothing_kernel, smoothing_length) = system.boundary_model
return kernel_unsafe(smoothing_kernel, distance, smoothing_length)
end

@inline function smoothing_length(system::WallBoundarySystem, particle)
return smoothing_length(system.boundary_model, particle)
end
Expand Down
65 changes: 40 additions & 25 deletions src/schemes/structure/total_lagrangian_sph/penalty_force.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,38 +14,53 @@ struct PenaltyForceGanzenmueller{ELTYPE}
end
end

@inline function dv_penalty_force(penalty_force::Nothing,
particle, neighbor, initial_pos_diff, initial_distance,
current_pos_diff, current_distance,
system, m_a, m_b, rho_a, rho_b)
return zero(initial_pos_diff)
@inline function dv_penalty_force!(dv_particle, penalty_force::Nothing,
particle, neighbor, initial_pos_diff, initial_distance,
current_pos_diff, current_distance,
system, m_a, m_b, rho_a, rho_b, F_a, F_b)
return dv_particle
end

@propagate_inbounds function dv_penalty_force(penalty_force::PenaltyForceGanzenmueller,
particle, neighbor, initial_pos_diff,
initial_distance,
current_pos_diff, current_distance,
system, m_a, m_b, rho_a, rho_b)
volume_a = m_a / rho_a
volume_b = m_b / rho_b
@propagate_inbounds function dv_penalty_force!(dv_particle,
penalty_force::PenaltyForceGanzenmueller,
particle, neighbor, initial_pos_diff,
initial_distance,
current_pos_diff, current_distance,
system, m_a, m_b, rho_a, rho_b, F_a, F_b)
(; alpha) = penalty_force

kernel_weight = smoothing_kernel(system, initial_distance, particle)
# Since this is one of the most performance critical functions, using fast divisions
# here gives a significant speedup on GPUs.
# See the docs page "Development" for more details on `div_fast`.
volume_a = div_fast(m_a, rho_a)
volume_b = div_fast(m_b, rho_b)

F_a = deformation_gradient(system, particle)
F_b = deformation_gradient(system, neighbor)
# This function is called after a compact support check, so we can use the unsafe
# kernel function, which does not check the distance again.
kernel_weight = smoothing_kernel_unsafe(system, initial_distance, particle)

inv_current_distance = 1 / current_distance
E_a = young_modulus(system, particle)
E_b = young_modulus(system, neighbor)

# Use the symmetry of epsilon to simplify computations
eps_sum = (F_a + F_b) * initial_pos_diff - 2 * current_pos_diff
delta_sum = dot(eps_sum, current_pos_diff) * inv_current_distance
# Note that this is actually ϵ^b_ab = -ϵ^b_ba in the paper, so we later compute
# δ^b_ab instead of δ^b_ba, but δ^b_ab = δ^b_ba because of antisymmetry of x_ab and ϵ_ab.
eps_a = F_a * initial_pos_diff - current_pos_diff
eps_b = F_b * initial_pos_diff - current_pos_diff

E = young_modulus(system, particle)
# This is (E_a * delta_a + E_b * delta_b) * current_distance.
# Pulling the division by `current_distance` out allows us to do one division by
# `current_distance^2` instead.
delta_sum = E_a * dot(eps_a, current_pos_diff) + E_b * dot(eps_b, current_pos_diff)

f = (penalty_force.alpha / 2) * volume_a * volume_b *
kernel_weight / initial_distance^2 * E * delta_sum * current_pos_diff *
inv_current_distance
# The division contains all scalar factors, which are then multiplied by
# the vector `current_pos_diff` at the end.
# We already divide by `m_a` to obtain an acceleration.
# Since this is one of the most performance critical functions, using fast divisions
# here gives a significant speedup on GPUs.
# See the docs page "Development" for more details on `div_fast`.
dv_particle[] += div_fast((alpha / 2) * volume_a * volume_b * kernel_weight * delta_sum,
initial_distance^2 * current_distance^2 * m_a) *
current_pos_diff

# Divide force by mass to obtain acceleration
return f / m_a
return dv_particle
end
93 changes: 54 additions & 39 deletions src/schemes/structure/total_lagrangian_sph/rhs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ end

# Everything here is done in the initial coordinates
system_coords = initial_coordinates(system)
neighborhood_search = get_neighborhood_search(system, semi)
backend = semi.parallelization_backend

# For `distance == 0`, the analytical gradient is zero, but the unsafe gradient
# and the density diffusion divide by zero.
Expand All @@ -29,48 +31,61 @@ end
h = initial_smoothing_length(system)
almostzero = sqrt(eps(h^2))

# Loop over all pairs of particles and neighbors within the kernel cutoff.
# For structure-structure interaction, this has to happen in the initial coordinates.
foreach_point_neighbor(system, system, system_coords, system_coords, semi;
points=each_integrated_particle(system)) do particle, neighbor,
initial_pos_diff,
initial_distance
# Skip neighbors with the same position because the kernel gradient is zero.
# Note that `return` only exits the closure, i.e., skips the current neighbor.
skip_zero_distance(system) && initial_distance < almostzero && return

# Now that we know that `distance` is not zero, we can safely call the unsafe
# version of the kernel gradient to avoid redundant zero checks.
grad_kernel = smoothing_kernel_grad_unsafe(system, initial_pos_diff,
initial_distance, particle)

rho_a = @inbounds system.material_density[particle]
rho_b = @inbounds system.material_density[neighbor]

@threaded semi for particle in each_integrated_particle(system)
# We are looping over the particles of `system`, so it is guaranteed
# that `particle` is in bounds of `system`.
m_a = @inbounds system.mass[particle]
m_b = @inbounds system.mass[neighbor]

rho_a = @inbounds system.material_density[particle]
# PK1 / rho^2
pk1_rho2_a = @inbounds pk1_rho2(system, particle)
pk1_rho2_b = @inbounds pk1_rho2(system, neighbor)

current_pos_diff_ = @inbounds current_coords(system, particle) -
current_coords(system, neighbor)
# On GPUs, convert `Float64` coordinates to `Float32` after computing the difference
current_pos_diff = convert.(eltype(system), current_pos_diff_)
current_distance = norm(current_pos_diff)

dv_stress = m_b * (pk1_rho2_a + pk1_rho2_b) * grad_kernel

dv_penalty_force_ = @inbounds dv_penalty_force(penalty_force, particle, neighbor,
initial_pos_diff, initial_distance,
current_pos_diff, current_distance,
system, m_a, m_b, rho_a, rho_b)

dv_particle = Ref(dv_stress + dv_penalty_force_)
@inbounds dv_viscosity_tlsph!(dv_particle, system, v_system, particle, neighbor,
current_pos_diff, current_distance,
m_a, m_b, rho_a, rho_b, grad_kernel)
current_coords_a = @inbounds current_coords(system, particle)
F_a = @inbounds deformation_gradient(system, particle)

# Accumulate the RHS contributions over all neighbors before writing to `dv`
# to reduce the number of memory writes.
# Note that we need a `Ref` in order to be able to update these variables
# inside the closure in the `foreach_neighbor` loop.
dv_particle = Ref(zero(current_coords_a))

# Loop over all neighbors within the kernel cutoff
@inbounds foreach_neighbor(system_coords, system_coords,
neighborhood_search, backend,
particle) do particle, neighbor,
initial_pos_diff, initial_distance
# Skip neighbors with the same position because the kernel gradient is zero.
# Note that `return` only exits the closure, i.e., skips the current neighbor.
skip_zero_distance(system) && initial_distance < almostzero && return

# Now that we know that `distance` is not zero, we can safely call the unsafe
# version of the kernel gradient to avoid redundant zero checks.
grad_kernel = smoothing_kernel_grad_unsafe(system, initial_pos_diff,
initial_distance, particle)

rho_b = @inbounds system.material_density[neighbor]
m_b = @inbounds system.mass[neighbor]
# PK1 / rho^2
pk1_rho2_b = @inbounds pk1_rho2(system, neighbor)
current_coords_b = @inbounds current_coords(system, neighbor)

# The compiler is smart enough to optimize this away if no penalty force is used
F_b = @inbounds deformation_gradient(system, neighbor)
Comment thread
efaulhaber marked this conversation as resolved.

current_pos_diff_ = current_coords_a - current_coords_b
# On GPUs, convert `Float64` coordinates to `Float32` after computing the difference
current_pos_diff = convert.(eltype(system), current_pos_diff_)
current_distance = norm(current_pos_diff)

dv_particle[] += m_b * (pk1_rho2_a + pk1_rho2_b) * grad_kernel

@inbounds dv_penalty_force!(dv_particle, penalty_force, particle, neighbor,
initial_pos_diff, initial_distance,
current_pos_diff, current_distance,
system, m_a, m_b, rho_a, rho_b, F_a, F_b)

@inbounds dv_viscosity_tlsph!(dv_particle, system, v_system, particle, neighbor,
current_pos_diff, current_distance,
m_a, m_b, rho_a, rho_b, F_a, grad_kernel)
end

for i in 1:ndims(system)
@inbounds dv[i, particle] += dv_particle[][i]
Expand Down
61 changes: 40 additions & 21 deletions src/schemes/structure/total_lagrangian_sph/system.jl
Original file line number Diff line number Diff line change
Expand Up @@ -489,30 +489,49 @@ end

# Loop over all pairs of particles and neighbors within the kernel cutoff
initial_coords = initial_coordinates(system)
foreach_point_neighbor(system, system, initial_coords, initial_coords,
semi) do particle, neighbor, initial_pos_diff, initial_distance
# Skip neighbors with the same position because the kernel gradient is zero.
# Note that `return` only exits the closure, i.e., skips the current neighbor.
skip_zero_distance(system) && initial_distance < almostzero && return
neighborhood_search = get_neighborhood_search(system, semi)
backend = semi.parallelization_backend

# Now that we know that `distance` is not zero, we can safely call the unsafe
# version of the kernel gradient to avoid redundant zero checks.
grad_kernel = smoothing_kernel_grad_unsafe(system, initial_pos_diff,
initial_distance, particle)

volume = @inbounds mass[neighbor] / material_density[neighbor]
pos_diff_ = @inbounds current_coords(system, particle) -
current_coords(system, neighbor)
# On GPUs, convert `Float64` coordinates to `Float32` after computing the difference
pos_diff = convert.(eltype(system), pos_diff_)

# Multiply by L_{0a}
L = @inbounds correction_matrix(system, particle)

result = volume * pos_diff * grad_kernel' * L'
# The deformation gradient is computed for all particles, including the clamped ones
@threaded semi for particle in eachparticle(system)
# We are looping over the particles of `system`, so it is guaranteed
# that `particle` is in bounds of `system`.
current_coords_a = @inbounds current_coords(system, particle)
L_a = @inbounds correction_matrix(system, particle)

# Accumulate the contributions over all neighbors before writing
# to `deformation_grad` to reduce the number of memory writes.
# Note that we need a `Ref` in order to be able to update these variables
# inside the closure in the `foreach_neighbor` loop.
result = Ref(zero(L_a))

# Loop over all neighbors within the kernel cutoff
@inbounds foreach_neighbor(initial_coords, initial_coords,
neighborhood_search, backend,
particle) do particle, neighbor,
initial_pos_diff, initial_distance
# Skip neighbors with the same position because the kernel gradient is zero.
# Note that `return` only exits the closure, i.e., skips the current neighbor.
skip_zero_distance(system) && initial_distance < almostzero && return

# Now that we know that `distance` is not zero, we can safely call the unsafe
# version of the kernel gradient to avoid redundant zero checks.
grad_kernel = smoothing_kernel_grad_unsafe(system, initial_pos_diff,
initial_distance, particle)

volume = @inbounds mass[neighbor] / material_density[neighbor]
current_coords_b = @inbounds current_coords(system, neighbor)
pos_diff_ = current_coords_a - current_coords_b
# On GPUs, convert `Float64` coordinates to `Float32` after computing the difference
pos_diff = convert.(eltype(system), pos_diff_)
Comment thread
LasNikas marked this conversation as resolved.

# The tensor product pos_diff ⊗ (L_{0a} * ∇W) is equivalent to multiplication
# by the transpose: pos_diff * (L_{0a} * ∇W)ᵀ = pos_diff * ∇Wᵀ * L_{0a}ᵀ.
result[] -= volume * pos_diff * grad_kernel' * L_a'
end

for j in 1:ndims(system), i in 1:ndims(system)
@inbounds deformation_grad[i, j, particle] -= result[i, j]
@inbounds deformation_grad[i, j, particle] += result[][i, j]
end
end

Expand Down
Loading
Loading