From 92708ef80a0cdf5489349f2c8e88d44f2bcbb0c6 Mon Sep 17 00:00:00 2001 From: Augustin Bussy Date: Fri, 20 Feb 2026 10:08:55 +0100 Subject: [PATCH 01/11] Optimizing XC instantiation for GPUs --- src/density_methods.jl | 37 ++++++++++++++++++++++------ src/elements.jl | 4 +++ src/pseudo/NormConservingPsp.jl | 14 +++++++++++ src/pseudo/PspUpf.jl | 13 ++++++++++ src/workarounds/forwarddiff_rules.jl | 26 ++++++++++--------- 5 files changed, 75 insertions(+), 19 deletions(-) diff --git a/src/density_methods.jl b/src/density_methods.jl index f1a5523e6c..864fe5d14c 100644 --- a/src/density_methods.jl +++ b/src/density_methods.jl @@ -180,17 +180,18 @@ function atomic_density_form_factors(basis::PlaneWaveBasis{T}, p = norm(G) iG2ifnorm_cpu[iG] = get!(norm_indices, p, length(norm_indices) + 1) end + iG2ifnorm = to_device(basis.architecture, iG2ifnorm_cpu) - form_factors_cpu = zeros(T, length(norm_indices), length(basis.model.atom_groups)) - for (p, ifnorm) in norm_indices - for (igroup, group) in enumerate(basis.model.atom_groups) - element = basis.model.atoms[first(group)] - form_factors_cpu[ifnorm, igroup] = atomic_density(element, p, method) - end + ni_pairs = collect(pairs(norm_indices)) + ps = to_device(basis.architecture, first.(ni_pairs)) + indices = to_device(basis.architecture, last.(ni_pairs)) + + form_factors = similar(ps, length(norm_indices), length(basis.model.atom_groups)) + for (igroup, group) in enumerate(basis.model.atom_groups) + element = basis.model.atoms[first(group)] + @inbounds form_factors[indices, igroup] .= atomic_density(element, ps, method) end - form_factors = to_device(basis.architecture, form_factors_cpu) - iG2ifnorm = to_device(basis.architecture, iG2ifnorm_cpu) (; form_factors, iG2ifnorm) end @@ -214,6 +215,7 @@ function atomic_density(element::Element, Gnorm::T, has_core_density(element) ? core_charge_density_fourier(element, Gnorm) : zero(T) end +#TODO: vectorize and check GPU function atomic_density(element::Element, Gnorm::T, ::CoreKineticEnergyDensity)::T where {T <: Real} if has_core_kinetic_energy_density(element) @@ -223,6 +225,25 @@ function atomic_density(element::Element, Gnorm::T, end end +# Generic vectoriezed version of the above +function atomic_density(element::Element, Gnorms::AbstractVector{T}, + ::AtomicDensity) where {T <: Real} + arch = architecture(Gnorms) + to_device(arch, + map(Gnorm -> gaussian_valence_charge_density_fourier(element, Gnorm), + to_cpu(Gnorms))) +end + +# Vectorized version for CoreDensity, GPU optimized +function atomic_density(element::Element, Gnorms::AbstractVector{T}, + ::CoreDensity) where {T <: Real} + if has_core_density(element) + core_charge_density_fourier(element, Gnorms) + else + zeros_like(Gnorms) + end +end + # Get the lengthscale of the valence density for an atom with `n_elec_core` core # and `n_elec_valence` valence electrons. function atom_decay_length(n_elec_core, n_elec_valence) diff --git a/src/elements.jl b/src/elements.jl index 43771c20e4..a26040cab5 100644 --- a/src/elements.jl +++ b/src/elements.jl @@ -194,6 +194,10 @@ end function core_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real} eval_psp_core_density_fourier(el.psp, p) end +# Vectorized version of the above +function core_charge_density_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real} + eval_psp_density_core_fourier(el.psp, ps) +end # diff --git a/src/pseudo/NormConservingPsp.jl b/src/pseudo/NormConservingPsp.jl index a600f7d77f..b9b28b6028 100644 --- a/src/pseudo/NormConservingPsp.jl +++ b/src/pseudo/NormConservingPsp.jl @@ -24,12 +24,20 @@ abstract type NormConservingPsp end # eval_psp_energy_correction(T::Type, psp) #### Optional methods: +#TODO: check that the new core_kinetic functions are well listed (i.e. vectorized) # eval_psp_valence_density_real(psp, r::Real) # eval_psp_valence_density_fourier(psp, p::Real) # eval_psp_core_density_real(psp, r::Real) # eval_psp_core_density_fourier(psp, p::Real) # eval_psp_core_kinetic_energy_density_real(psp, r::Real) # eval_psp_core_kinetic_energy_density_fourier(psp, p::Real) +# eval_psp_density_valence_real(psp, r::Real) +# eval_psp_density_valence_fourier(psp, p::Real) +# eval_psp_density_core_real(psp, r::Real) +# eval_psp_density_core_fourier(psp, p::Real) +# eval_psp_density_core_fourier(psp, ps::AbstractArray{ eval_psp_density_core_fourier(psp, p), to_cpu(ps))) +end + #### Methods defined on a NormConservingPsp import Base.Broadcast.broadcastable diff --git a/src/pseudo/PspUpf.jl b/src/pseudo/PspUpf.jl index 804e36c24c..1639b9d783 100644 --- a/src/pseudo/PspUpf.jl +++ b/src/pseudo/PspUpf.jl @@ -281,6 +281,19 @@ function eval_psp_core_density_fourier(psp::PspUpf, p::T) where {T<:Real} return hankel(rgrid, r2_ρcore, 0, p) end +# Vectorized version of the above, GPU compatible +function eval_psp_density_core_fourier(psp::PspUpf, ps::AbstractVector{T}) where {T<:Real} + quadrature = default_psp_quadrature(psp.rgrid) + arch = architecture(ps) + rgrid = to_device(arch, @view psp.rgrid[1:psp.ircut]) + r2_ρcore = to_device(arch, @view psp.r2_ρcore[1:psp.ircut]) + map(ps) do p + # GPU kernels with dynamic function calls do not compile, + # hence the pre-determined explicit integration function + hankel(quadrature, rgrid, r2_ρcore, 0, p) + end +end + function eval_psp_core_kinetic_energy_density_real(psp::PspUpf, r::T) where {T<:Real} psp.r2_τcore_interp(r) / r^2 # TODO if r is below a threshold, return zero end diff --git a/src/workarounds/forwarddiff_rules.jl b/src/workarounds/forwarddiff_rules.jl index f28fe9cac8..63de69daba 100644 --- a/src/workarounds/forwarddiff_rules.jl +++ b/src/workarounds/forwarddiff_rules.jl @@ -304,6 +304,13 @@ end end function hankel(r::AbstractVector, r2_f::AbstractVector, l::Integer, p::TT) where {TT <: ForwardDiff.Dual} + quadrature = default_psp_quadrature(r) + hankel(quadrature, r, r2_f, l, p) +end + +# For GPU kernels to compile, dynamic function calls must be avoided. Therefore, the quadrature +# must be known and passed ahead of time, and the Dual type explicitly parametrized. +function hankel(quadrature, r::AbstractVector, r2_f::AbstractVector, l::Integer, p::Dual{Tg,V,N}) where {Tg,V,N} # This custom rule uses two properties of the hankel transform: # d H[f] / dp = 4\pi \int_0^∞ r^2 f(r) j_l'(p⋅r)⋅r dr # and that @@ -314,20 +321,17 @@ function hankel(r::AbstractVector, r2_f::AbstractVector, l::Integer, p::TT) wher # the tricky bit is to exploit that one needs both the j_l'(p⋅r) and j_l(p⋅r) values # but one does not want to precompute and allocate them into arrays # TODO Investigate custom rules for bessels and integration - - T = ForwardDiff.valtype(TT) pv = ForwardDiff.value(p) - jl = sphericalbesselj_fast.(l, pv .* r) - value = 4T(π) * simpson((i, r) -> r2_f[i] * jl[i], r) - - if iszero(pv) - return TT(value, zero(T) * ForwardDiff.partials(p)) - end - derivative = 4T(π) * simpson(r) do i, r - (r2_f[i] * (l * jl[i] / pv - r * sphericalbesselj_fast(l+1, pv * r))) + # To reduce allocations, compute value and derivative simultaneously, using a SVector as integrand + res = 4V(π) * quadrature(r) do i, r + jl_i = sphericalbesselj_fast(l, pv * r) + val = r2_f[i] * jl_i + deriv = iszero(pv) ? zero(V) : r2_f[i] * (l * jl_i / pv - r * sphericalbesselj_fast(l+1, pv * r)) + SVector(val, deriv) end - TT(value, derivative * ForwardDiff.partials(p)) + value, derivative = res[1], res[2] + Dual{Tg,V,N}(value, derivative * ForwardDiff.partials(p)) end # other workarounds From af6c331bcf4b20e6ef93c067dc4cdc9710b35bb6 Mon Sep 17 00:00:00 2001 From: Augustin Bussy Date: Wed, 25 Feb 2026 14:17:05 +0100 Subject: [PATCH 02/11] Fix PspLinComb test and generic atomic_density() --- src/density_methods.jl | 4 ++-- src/pseudo/PspLinComb.jl | 11 +++++++++++ src/workarounds/forwarddiff_rules.jl | 3 +-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/density_methods.jl b/src/density_methods.jl index 864fe5d14c..cb889e3e79 100644 --- a/src/density_methods.jl +++ b/src/density_methods.jl @@ -227,10 +227,10 @@ end # Generic vectoriezed version of the above function atomic_density(element::Element, Gnorms::AbstractVector{T}, - ::AtomicDensity) where {T <: Real} + method::AtomicDensity) where {T <: Real} arch = architecture(Gnorms) to_device(arch, - map(Gnorm -> gaussian_valence_charge_density_fourier(element, Gnorm), + map(Gnorm -> atomic_density(element, Gnorm, method), to_cpu(Gnorms))) end diff --git a/src/pseudo/PspLinComb.jl b/src/pseudo/PspLinComb.jl index ac66fcc0b1..ff0ca6a3ac 100644 --- a/src/pseudo/PspLinComb.jl +++ b/src/pseudo/PspLinComb.jl @@ -76,6 +76,16 @@ macro make_psplincomb_call(fn) end end end + +macro make_psplincomb_call_vectorized(fn) + quote + function $fn(psp::PspLinComb, arg::AbstractVector{<:Real}) + sum(c * $fn(pp, arg) for (c, pp) in zip(psp.coefficients, psp.pseudos)) + end + end +end + +#TODO: core kinetic here too @make_psplincomb_call DFTK.eval_psp_local_real @make_psplincomb_call DFTK.eval_psp_local_fourier @make_psplincomb_call DFTK.eval_psp_valence_density_real @@ -84,3 +94,4 @@ end @make_psplincomb_call DFTK.eval_psp_core_density_fourier @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_real @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_fourier +@make_psplincomb_call_vectorized DFTK.eval_psp_density_core_fourier \ No newline at end of file diff --git a/src/workarounds/forwarddiff_rules.jl b/src/workarounds/forwarddiff_rules.jl index 63de69daba..2af87e37dc 100644 --- a/src/workarounds/forwarddiff_rules.jl +++ b/src/workarounds/forwarddiff_rules.jl @@ -324,13 +324,12 @@ function hankel(quadrature, r::AbstractVector, r2_f::AbstractVector, l::Integer, pv = ForwardDiff.value(p) # To reduce allocations, compute value and derivative simultaneously, using a SVector as integrand - res = 4V(π) * quadrature(r) do i, r + value, derivative = 4V(π) * quadrature(r) do i, r jl_i = sphericalbesselj_fast(l, pv * r) val = r2_f[i] * jl_i deriv = iszero(pv) ? zero(V) : r2_f[i] * (l * jl_i / pv - r * sphericalbesselj_fast(l+1, pv * r)) SVector(val, deriv) end - value, derivative = res[1], res[2] Dual{Tg,V,N}(value, derivative * ForwardDiff.partials(p)) end From c65c8adf1e90bb093d9c6f548bf2f5d5e85d3ea6 Mon Sep 17 00:00:00 2001 From: Augustin Bussy Date: Wed, 4 Mar 2026 15:43:41 +0100 Subject: [PATCH 03/11] Uniformization of vectorized functions --- src/elements.jl | 41 +++++++++++++++++++++++---- src/pseudo/NormConservingPsp.jl | 50 +++++++++++++++++++++++---------- src/pseudo/PspHgh.jl | 5 +++- src/pseudo/PspLinComb.jl | 11 +++++++- src/pseudo/PspUpf.jl | 12 ++++++-- 5 files changed, 95 insertions(+), 24 deletions(-) diff --git a/src/elements.jl b/src/elements.jl index a26040cab5..5f9b39e547 100644 --- a/src/elements.jl +++ b/src/elements.jl @@ -178,10 +178,6 @@ eval_psp_energy_correction(T, el::ElementPsp) = eval_psp_energy_correction(T, el function local_potential_fourier(el::ElementPsp, p::T) where {T <: Real} eval_psp_local_fourier(el.psp, p) end -# Vectorized version of the above -function local_potential_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real} - eval_psp_local_fourier(el.psp, ps) -end local_potential_real(el::ElementPsp, r::Real) = eval_psp_local_real(el.psp, r) function valence_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real} @@ -194,7 +190,21 @@ end function core_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real} eval_psp_core_density_fourier(el.psp, p) end -# Vectorized version of the above + +# Vectorized versions of the above, specific implementation depending on the Psp type +function local_potential_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real} + eval_psp_local_fourier(el.psp, ps) +end +function local_potential_real(el::ElementPsp, rs::AbstractVector{T}) where {T <: Real} + eval_psp_local_real(el.psp, rs) +end +function valence_charge_density_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real} + if has_valence_density(el.psp) + eval_psp_density_valence_fourier(el.psp, ps) + else + gaussian_valence_charge_density_fourier(el, ps) + end +end function core_charge_density_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real} eval_psp_density_core_fourier(el.psp, ps) end @@ -301,6 +311,27 @@ function local_potential_fourier(el::ElementGaussian, p::Real) end # TODO Strictly speaking needs a eval_psp_energy_correction +# Generic API expectations: all element functions taking a real space scalar |r| or a +# reciprocal space scalar |p| should have a vectorized version accepting vectors of |r| or |p|. +# This macro vectorizes element functions by calling existing single-value version elementwise. +# This is GPU safe and generic. Performance critical functions should have their own +# GPU-optimized implementation. +macro vectorize_element_function(fn) + quote + function $fn(el::Element, arg::AbstractVector{T}) where {T <: Real} + arch = architecture(arg) + to_device(arch, map(p -> $fn(el, p), to_cpu(arg))) + end + end +end + +# Generic vectorized element functions +@vectorize_element_function DFTK.valence_charge_density_fourier +@vectorize_element_function DFTK.gaussian_valence_charge_density_fourier +@vectorize_element_function DFTK.core_charge_density_fourier +@vectorize_element_function DFTK.local_potential_fourier +@vectorize_element_function DFTK.local_potential_real + # # Helper functions # diff --git a/src/pseudo/NormConservingPsp.jl b/src/pseudo/NormConservingPsp.jl index b9b28b6028..2a13f119a3 100644 --- a/src/pseudo/NormConservingPsp.jl +++ b/src/pseudo/NormConservingPsp.jl @@ -20,7 +20,6 @@ abstract type NormConservingPsp end # eval_psp_projector_fourier(psp, i, l, ps::AbstrcatArray{ $fn(psp, p), to_cpu(vec))) + end + end +end +macro vectorize_psp_projector_function(fn, PspType) + quote + function $fn(psp::$PspType, i, l, vec::AbstractVector{T}) where {T <: Real} + arch = architecture(vec) + to_device(arch, map(p -> $fn(psp, i, l, p), to_cpu(vec))) + end + end +end + """ eval_psp_projector_real(psp, i, l, r) @@ -105,12 +138,6 @@ V_{\rm loc}(p) &= ∫_{ℝ^3} (V_{\rm loc}(r) - C(r)) e^{-ip·r} dr + F[C(r)] \\ eval_psp_local_fourier(psp::NormConservingPsp, p::AbstractVector) = eval_psp_local_fourier(psp, norm(p)) -# Fallback vectorized implementation for non GPU-optimized code. -function eval_psp_local_fourier(psp::NormConservingPsp, ps::AbstractVector{T}) where {T <: Real} - arch = architecture(ps) - to_device(arch, map(p -> eval_psp_local_fourier(psp, p), to_cpu(ps))) -end - @doc raw""" eval_psp_energy_correction([T=Float64,] psp::NormConservingPsp) eval_psp_energy_correction([T=Float64,] element::Element) @@ -192,13 +219,6 @@ eval_psp_core_kinetic_energy_density_fourier(::NormConservingPsp, ::T) where {T eval_psp_core_kinetic_energy_density_fourier(psp::NormConservingPsp, p::AbstractVector) = eval_psp_core_kinetic_energy_density_fourier(psp, norm(p)) -# Fallback vectorized implementation for non GPU-optimized code. -function eval_density_core_fourier(psp::NormConservingPsp, ps::AbstractVector{T}) where {T <: Real} - arch = architecture(ps) - to_device(arch, map(p -> eval_psp_density_core_fourier(psp, p), to_cpu(ps))) -end - - #### Methods defined on a NormConservingPsp import Base.Broadcast.broadcastable Base.Broadcast.broadcastable(psp::NormConservingPsp) = Ref(psp) @@ -270,4 +290,4 @@ function find_pswfc(psp::NormConservingPsp, label::String) end error("Could not find pseudo atomic orbital with label $label " * "in pseudopotential $(psp).") -end +end \ No newline at end of file diff --git a/src/pseudo/PspHgh.jl b/src/pseudo/PspHgh.jl index 52079c16c6..0e4ffcbfd4 100644 --- a/src/pseudo/PspHgh.jl +++ b/src/pseudo/PspHgh.jl @@ -122,6 +122,7 @@ function eval_psp_local_fourier(psp::PspHgh, p::T) where {T <: Real} 4T(π) * rloc^2 * (-Zion + sqrt(T(π) / 2) * rloc * t^2 * P) * exp(-t^2 / 2) / t^2 end +@vectorize_psp_function DFTK.eval_psp_local_fourier PspHgh # [GTH98] (1) function eval_psp_local_real(psp::PspHgh, r::T) where {T <: Real} @@ -133,6 +134,7 @@ function eval_psp_local_real(psp::PspHgh, r::T) where {T <: Real} + exp(-rr^2 / 2) * (cloc[1] + cloc[2] * rr^2 + cloc[3] * rr^4 + cloc[4] * rr^6) ) end +@vectorize_psp_function DFTK.eval_psp_local_real PspHgh # [HGH98] (7-15) except they do it with plane waves normalized by 1/sqrt(Ω). @@ -161,7 +163,7 @@ function eval_psp_projector_fourier(psp::PspHgh, i, l, p::T) where {T <: Real} error("Not implemented for l=$l and i=$i") end - +@vectorize_psp_projector_function DFTK.eval_psp_projector_fourier PspHgh # [HGH98] (3) function eval_psp_projector_real(psp::PspHgh, i, l, r::T) where {T <: Real} @@ -169,6 +171,7 @@ function eval_psp_projector_real(psp::PspHgh, i, l, r::T) where {T <: Real} ired = (4i - 1) / T(2) sqrt(T(2)) * r^(l + 2(i - 1)) * exp(-r^2 / 2rp^2) / rp^(l + ired) / sqrt(gamma(l + ired)) end +@vectorize_psp_projector_function DFTK.eval_psp_projector_real PspHgh function eval_psp_energy_correction(T, psp::PspHgh) # By construction we need to compute the DC component of the difference diff --git a/src/pseudo/PspLinComb.jl b/src/pseudo/PspLinComb.jl index ff0ca6a3ac..f3dff3bcd1 100644 --- a/src/pseudo/PspLinComb.jl +++ b/src/pseudo/PspLinComb.jl @@ -77,6 +77,7 @@ macro make_psplincomb_call(fn) end end +<<<<<<< HEAD macro make_psplincomb_call_vectorized(fn) quote function $fn(psp::PspLinComb, arg::AbstractVector{<:Real}) @@ -94,4 +95,12 @@ end @make_psplincomb_call DFTK.eval_psp_core_density_fourier @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_real @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_fourier -@make_psplincomb_call_vectorized DFTK.eval_psp_density_core_fourier \ No newline at end of file + +@vectorize_psp_function DFTK.eval_psp_local_real PspLinComb +@vectorize_psp_function DFTK.eval_psp_local_fourier PspLinComb +@vectorize_psp_function DFTK.eval_psp_valence_density_real PspLinComb +@vectorize_psp_function DFTK.eval_psp_valence_density_fourier PspLinComb +@vectorize_psp_function DFTK.eval_psp_core_density_eal PspLinComb +@vectorize_psp_function DFTK.eval_psp_core_density_ourier PspLinComb +@vectorize_psp_projector_function DFTK.eval_psp_projector_real PspLinComb +@vectorize_psp_projector_function DFTK.eval_psp_projector_fourier PspLinComb \ No newline at end of file diff --git a/src/pseudo/PspUpf.jl b/src/pseudo/PspUpf.jl index 1639b9d783..1934d06f46 100644 --- a/src/pseudo/PspUpf.jl +++ b/src/pseudo/PspUpf.jl @@ -182,6 +182,7 @@ has_core_kinetic_energy_density(psp::PspUpf) = !all(iszero, psp.r2_τcore) function eval_psp_projector_real(psp::PspUpf, i, l, r::T)::T where {T<:Real} psp.r2_projs_interp[l+1][i](r) / r^2 # TODO if r is below a threshold, return zero end +@vectorize_psp_projector_function DFTK.eval_psp_projector_real PspUpf function eval_psp_projector_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} # The projectors may have been cut off before the end of the radial mesh @@ -192,6 +193,7 @@ function eval_psp_projector_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} r2_proj = @view psp.r2_projs[l+1][i][1:ircut_proj] hankel(rgrid, r2_proj, l, p) end +@vectorize_psp_projector_function DFTK.eval_psp_projector_fourier PspUpf # Vectorized version of the above, GPU compatible function eval_psp_projector_fourier(psp::PspUpf, i, l, ps::AbstractVector{T}) where {T<:Real} @@ -214,6 +216,7 @@ pswfc_label(psp::PspUpf, i, l) = psp.pswfc_labels[l+1][i] function eval_psp_pswfc_real(psp::PspUpf, i, l, r::T)::T where {T<:Real} psp.r2_pswfcs_interp[l+1][i](r) / r^2 # TODO if r is below a threshold, return zero end +@vectorize_psp_projector_function DFTK.eval_psp_pswfc_real PspUpf function eval_psp_pswfc_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} # Pseudo-atomic wavefunctions are _not_ currently cut off like the other @@ -222,8 +225,10 @@ function eval_psp_pswfc_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} # If issues arise, try cutting them off too. return hankel(psp.rgrid, psp.r2_pswfcs[l+1][i], l, p) end +@vectorize_psp_projector_function DFTK.eval_psp_pswfc_fourier PspUpf eval_psp_local_real(psp::PspUpf, r::T) where {T<:Real} = psp.vloc_interp(r) +@vectorize_psp_function DFTK.eval_psp_local_real PspUpf # Low-level function for the local part of the pseudopotential in reciprocal space function _eval_psp_local_fourier(quadrature, rgrid, vloc, Zion, p::T)::T where {T<:Real} @@ -247,7 +252,7 @@ function eval_psp_local_fourier(psp::PspUpf, p::T) where {T<:Real} _eval_psp_local_fourier(quadrature, rgrid, vloc, psp.Zion, p) end -# Vectorized version of the above, GPU compatible +# Vectorized version of the above, GPU optimized function eval_psp_local_fourier(psp::PspUpf, ps::AbstractVector{T}) where {T<:Real} quadrature = default_psp_quadrature(psp.rgrid) arch = architecture(ps) @@ -264,16 +269,19 @@ end function eval_psp_valence_density_real(psp::PspUpf, r::T) where {T<:Real} psp.r2_ρion_interp(r) / r^2 # TODO if r is below a threshold, return zero end +@vectorize_psp_function DFTK.eval_psp_density_valence_real PspUpf function eval_psp_valence_density_fourier(psp::PspUpf, p::T) where {T<:Real} rgrid = @view psp.rgrid[1:psp.ircut] r2_ρion = @view psp.r2_ρion[1:psp.ircut] return hankel(rgrid, r2_ρion, 0, p) end +@vectorize_psp_function DFTK.eval_psp_density_valence_fourier PspUpf function eval_psp_core_density_real(psp::PspUpf, r::T) where {T<:Real} psp.r2_ρcore_interp(r) / r^2 # TODO if r is below a threshold, return zero end +@vectorize_psp_function DFTK.eval_psp_density_core_real PspUpf function eval_psp_core_density_fourier(psp::PspUpf, p::T) where {T<:Real} rgrid = @view psp.rgrid[1:psp.ircut] @@ -281,7 +289,7 @@ function eval_psp_core_density_fourier(psp::PspUpf, p::T) where {T<:Real} return hankel(rgrid, r2_ρcore, 0, p) end -# Vectorized version of the above, GPU compatible +# Vectorized version of the above, GPU optimized function eval_psp_density_core_fourier(psp::PspUpf, ps::AbstractVector{T}) where {T<:Real} quadrature = default_psp_quadrature(psp.rgrid) arch = architecture(ps) From 102079ed2b95d20605c88273077f06658284e7e2 Mon Sep 17 00:00:00 2001 From: Augustin Bussy Date: Thu, 23 Apr 2026 16:01:25 +0200 Subject: [PATCH 04/11] Reorganize macros --- src/elements.jl | 65 +++++++++++---------------------- src/pseudo/NormConservingPsp.jl | 4 +- src/pseudo/PspHgh.jl | 8 ++-- src/pseudo/PspLinComb.jl | 18 +++++---- src/pseudo/PspUpf.jl | 16 ++++---- 5 files changed, 45 insertions(+), 66 deletions(-) diff --git a/src/elements.jl b/src/elements.jl index 5f9b39e547..0784224f4e 100644 --- a/src/elements.jl +++ b/src/elements.jl @@ -43,7 +43,7 @@ eval_psp_energy_correction(T, ::Element) = zero(T) eval_psp_energy_correction(psp::Element) = eval_psp_energy_correction(Float64, psp) # Fall back to the Gaussian table for Elements without pseudopotentials -function valence_charge_density_fourier(el::Element, p::T)::T where {T <: Real} +function valence_charge_density_fourier(el::Element, p) gaussian_valence_charge_density_fourier(el, p) end @@ -51,6 +51,10 @@ end function gaussian_valence_charge_density_fourier(el::Element, p::T)::T where {T <: Real} charge_ionic(el) * exp(-(p * atom_decay_length(el))^2) end +function gaussian_valence_charge_density_fourier(el::Element, ps::AbstractVector{T}) where {T <: Real} + arch = architecture(ps) + to_device(arch, map(p -> gaussian_valence_charge_density_fourier(el, p), to_cpu(ps))) +end function core_charge_density_fourier(::Element, ::T)::T where {T <: Real} error("Abstract elements do not necesesarily provide core charge density.") @@ -175,40 +179,17 @@ has_core_density(el::ElementPsp) = has_core_density(el.psp) has_core_kinetic_energy_density(el::ElementPsp) = has_core_kinetic_energy_density(el.psp) eval_psp_energy_correction(T, el::ElementPsp) = eval_psp_energy_correction(T, el.psp) -function local_potential_fourier(el::ElementPsp, p::T) where {T <: Real} - eval_psp_local_fourier(el.psp, p) -end -local_potential_real(el::ElementPsp, r::Real) = eval_psp_local_real(el.psp, r) +local_potential_fourier(el::ElementPsp, p) = eval_psp_local_fourier(el.psp, p) +local_potential_real(el::ElementPsp, r) = eval_psp_local_real(el.psp, r) -function valence_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real} +function valence_charge_density_fourier(el::ElementPsp, p) if has_valence_density(el.psp) eval_psp_valence_density_fourier(el.psp, p) else gaussian_valence_charge_density_fourier(el, p) end end -function core_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real} - eval_psp_core_density_fourier(el.psp, p) -end - -# Vectorized versions of the above, specific implementation depending on the Psp type -function local_potential_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real} - eval_psp_local_fourier(el.psp, ps) -end -function local_potential_real(el::ElementPsp, rs::AbstractVector{T}) where {T <: Real} - eval_psp_local_real(el.psp, rs) -end -function valence_charge_density_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real} - if has_valence_density(el.psp) - eval_psp_density_valence_fourier(el.psp, ps) - else - gaussian_valence_charge_density_fourier(el, ps) - end -end -function core_charge_density_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real} - eval_psp_density_core_fourier(el.psp, ps) -end - +core_charge_density_fourier(el::ElementPsp, p) = eval_psp_density_core_fourier(el.psp, p) # # ElementCohenBergstresser @@ -278,7 +259,6 @@ function local_potential_fourier(el::ElementCohenBergstresser, p::T) where {T <: end # TODO Strictly speaking needs a eval_psp_energy_correction - # # ElementGaussian # @@ -313,25 +293,22 @@ end # Generic API expectations: all element functions taking a real space scalar |r| or a # reciprocal space scalar |p| should have a vectorized version accepting vectors of |r| or |p|. -# This macro vectorizes element functions by calling existing single-value version elementwise. -# This is GPU safe and generic. Performance critical functions should have their own -# GPU-optimized implementation. -macro vectorize_element_function(fn) - quote - function $fn(el::Element, arg::AbstractVector{T}) where {T <: Real} - arch = architecture(arg) - to_device(arch, map(p -> $fn(el, p), to_cpu(arg))) +# The following loop vectorizes element functions by calling the single-value version +# elementwise. This is GPU safe and generic. Performance critical functions should have their +# own GPU-optimized implementation. Note: explicit loop over Element types in order to avoid +# ambiguities with the specific ElementPsp functions. +for fn in [:gaussian_valence_charge_density_fourier, :core_charge_density_fourier, + :local_potential_fourier, :local_potential_real] + for el_type in [ElementCoulomb, ElementCohenBergstresser, ElementGaussian] + @eval begin + function DFTK.$fn(el::$el_type, arg::AbstractVector{T}) where {T <: Real} + arch = architecture(arg) + to_device(arch, map(p -> $fn(el, p), to_cpu(arg))) + end end end end -# Generic vectorized element functions -@vectorize_element_function DFTK.valence_charge_density_fourier -@vectorize_element_function DFTK.gaussian_valence_charge_density_fourier -@vectorize_element_function DFTK.core_charge_density_fourier -@vectorize_element_function DFTK.local_potential_fourier -@vectorize_element_function DFTK.local_potential_real - # # Helper functions # diff --git a/src/pseudo/NormConservingPsp.jl b/src/pseudo/NormConservingPsp.jl index 2a13f119a3..e8f8b133c6 100644 --- a/src/pseudo/NormConservingPsp.jl +++ b/src/pseudo/NormConservingPsp.jl @@ -60,7 +60,7 @@ abstract type NormConservingPsp end # have their own GPU-optimized implementation instead of relying on this macro. The # different norm-conserving pseudopotential types are responsible for the implementation # of vectorized functions, whether by using these macros or not. -macro vectorize_psp_function(fn, PspType) +macro vectorize_psp_function(PspType, fn) quote function $fn(psp::$PspType, vec::AbstractVector{T}) where {T <: Real} arch = architecture(vec) @@ -68,7 +68,7 @@ macro vectorize_psp_function(fn, PspType) end end end -macro vectorize_psp_projector_function(fn, PspType) +macro vectorize_psp_projector_function(PspType, fn) quote function $fn(psp::$PspType, i, l, vec::AbstractVector{T}) where {T <: Real} arch = architecture(vec) diff --git a/src/pseudo/PspHgh.jl b/src/pseudo/PspHgh.jl index 0e4ffcbfd4..ad181e6fca 100644 --- a/src/pseudo/PspHgh.jl +++ b/src/pseudo/PspHgh.jl @@ -122,7 +122,7 @@ function eval_psp_local_fourier(psp::PspHgh, p::T) where {T <: Real} 4T(π) * rloc^2 * (-Zion + sqrt(T(π) / 2) * rloc * t^2 * P) * exp(-t^2 / 2) / t^2 end -@vectorize_psp_function DFTK.eval_psp_local_fourier PspHgh +@vectorize_psp_function PspHgh DFTK.eval_psp_local_fourier # [GTH98] (1) function eval_psp_local_real(psp::PspHgh, r::T) where {T <: Real} @@ -134,7 +134,7 @@ function eval_psp_local_real(psp::PspHgh, r::T) where {T <: Real} + exp(-rr^2 / 2) * (cloc[1] + cloc[2] * rr^2 + cloc[3] * rr^4 + cloc[4] * rr^6) ) end -@vectorize_psp_function DFTK.eval_psp_local_real PspHgh +@vectorize_psp_function PspHgh DFTK.eval_psp_local_real # [HGH98] (7-15) except they do it with plane waves normalized by 1/sqrt(Ω). @@ -163,7 +163,7 @@ function eval_psp_projector_fourier(psp::PspHgh, i, l, p::T) where {T <: Real} error("Not implemented for l=$l and i=$i") end -@vectorize_psp_projector_function DFTK.eval_psp_projector_fourier PspHgh +@vectorize_psp_projector_function PspHgh DFTK.eval_psp_projector_fourier # [HGH98] (3) function eval_psp_projector_real(psp::PspHgh, i, l, r::T) where {T <: Real} @@ -171,7 +171,7 @@ function eval_psp_projector_real(psp::PspHgh, i, l, r::T) where {T <: Real} ired = (4i - 1) / T(2) sqrt(T(2)) * r^(l + 2(i - 1)) * exp(-r^2 / 2rp^2) / rp^(l + ired) / sqrt(gamma(l + ired)) end -@vectorize_psp_projector_function DFTK.eval_psp_projector_real PspHgh +@vectorize_psp_projector_function PspHgh DFTK.eval_psp_projector_real function eval_psp_energy_correction(T, psp::PspHgh) # By construction we need to compute the DC component of the difference diff --git a/src/pseudo/PspLinComb.jl b/src/pseudo/PspLinComb.jl index f3dff3bcd1..e19f0ed939 100644 --- a/src/pseudo/PspLinComb.jl +++ b/src/pseudo/PspLinComb.jl @@ -96,11 +96,13 @@ end @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_real @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_fourier -@vectorize_psp_function DFTK.eval_psp_local_real PspLinComb -@vectorize_psp_function DFTK.eval_psp_local_fourier PspLinComb -@vectorize_psp_function DFTK.eval_psp_valence_density_real PspLinComb -@vectorize_psp_function DFTK.eval_psp_valence_density_fourier PspLinComb -@vectorize_psp_function DFTK.eval_psp_core_density_eal PspLinComb -@vectorize_psp_function DFTK.eval_psp_core_density_ourier PspLinComb -@vectorize_psp_projector_function DFTK.eval_psp_projector_real PspLinComb -@vectorize_psp_projector_function DFTK.eval_psp_projector_fourier PspLinComb \ No newline at end of file +@vectorize_psp_function PspLinComb DFTK.eval_psp_local_real +@vectorize_psp_function PspLinComb DFTK.eval_psp_local_fourier +@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_real +@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_fourier +@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_real +@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_fourier +@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_real +@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_fourier +@vectorize_psp_projector_function PspLinComb DFTK.eval_psp_projector_real +@vectorize_psp_projector_function PspLinComb DFTK.eval_psp_projector_fourier \ No newline at end of file diff --git a/src/pseudo/PspUpf.jl b/src/pseudo/PspUpf.jl index 1934d06f46..6215dfc815 100644 --- a/src/pseudo/PspUpf.jl +++ b/src/pseudo/PspUpf.jl @@ -182,7 +182,7 @@ has_core_kinetic_energy_density(psp::PspUpf) = !all(iszero, psp.r2_τcore) function eval_psp_projector_real(psp::PspUpf, i, l, r::T)::T where {T<:Real} psp.r2_projs_interp[l+1][i](r) / r^2 # TODO if r is below a threshold, return zero end -@vectorize_psp_projector_function DFTK.eval_psp_projector_real PspUpf +@vectorize_psp_projector_function PspUpf DFTK.eval_psp_projector_real function eval_psp_projector_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} # The projectors may have been cut off before the end of the radial mesh @@ -193,7 +193,7 @@ function eval_psp_projector_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} r2_proj = @view psp.r2_projs[l+1][i][1:ircut_proj] hankel(rgrid, r2_proj, l, p) end -@vectorize_psp_projector_function DFTK.eval_psp_projector_fourier PspUpf +@vectorize_psp_projector_function PspUpf DFTK.eval_psp_projector_fourier # Vectorized version of the above, GPU compatible function eval_psp_projector_fourier(psp::PspUpf, i, l, ps::AbstractVector{T}) where {T<:Real} @@ -216,7 +216,7 @@ pswfc_label(psp::PspUpf, i, l) = psp.pswfc_labels[l+1][i] function eval_psp_pswfc_real(psp::PspUpf, i, l, r::T)::T where {T<:Real} psp.r2_pswfcs_interp[l+1][i](r) / r^2 # TODO if r is below a threshold, return zero end -@vectorize_psp_projector_function DFTK.eval_psp_pswfc_real PspUpf +@vectorize_psp_projector_function PspUpf DFTK.eval_psp_pswfc_real function eval_psp_pswfc_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} # Pseudo-atomic wavefunctions are _not_ currently cut off like the other @@ -225,10 +225,10 @@ function eval_psp_pswfc_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} # If issues arise, try cutting them off too. return hankel(psp.rgrid, psp.r2_pswfcs[l+1][i], l, p) end -@vectorize_psp_projector_function DFTK.eval_psp_pswfc_fourier PspUpf +@vectorize_psp_projector_function PspUpf DFTK.eval_psp_pswfc_fourier eval_psp_local_real(psp::PspUpf, r::T) where {T<:Real} = psp.vloc_interp(r) -@vectorize_psp_function DFTK.eval_psp_local_real PspUpf +@vectorize_psp_function PspUpf DFTK.eval_psp_local_real # Low-level function for the local part of the pseudopotential in reciprocal space function _eval_psp_local_fourier(quadrature, rgrid, vloc, Zion, p::T)::T where {T<:Real} @@ -269,19 +269,19 @@ end function eval_psp_valence_density_real(psp::PspUpf, r::T) where {T<:Real} psp.r2_ρion_interp(r) / r^2 # TODO if r is below a threshold, return zero end -@vectorize_psp_function DFTK.eval_psp_density_valence_real PspUpf +@vectorize_psp_function PspUpf DFTK.eval_psp_density_valence_real function eval_psp_valence_density_fourier(psp::PspUpf, p::T) where {T<:Real} rgrid = @view psp.rgrid[1:psp.ircut] r2_ρion = @view psp.r2_ρion[1:psp.ircut] return hankel(rgrid, r2_ρion, 0, p) end -@vectorize_psp_function DFTK.eval_psp_density_valence_fourier PspUpf +@vectorize_psp_function PspUpf DFTK.eval_psp_density_valence_fourier function eval_psp_core_density_real(psp::PspUpf, r::T) where {T<:Real} psp.r2_ρcore_interp(r) / r^2 # TODO if r is below a threshold, return zero end -@vectorize_psp_function DFTK.eval_psp_density_core_real PspUpf +@vectorize_psp_function PspUpf DFTK.eval_psp_density_core_real function eval_psp_core_density_fourier(psp::PspUpf, p::T) where {T<:Real} rgrid = @view psp.rgrid[1:psp.ircut] From 3e8d219f8db0a27fd2ab4f011f581034345096cd Mon Sep 17 00:00:00 2001 From: Augustin Bussy Date: Mon, 27 Apr 2026 10:24:44 +0200 Subject: [PATCH 05/11] Rebased onto current master --- src/elements.jl | 8 +------- src/pseudo/NormConservingPsp.jl | 18 ++++++------------ src/pseudo/PspLinComb.jl | 2 -- src/pseudo/PspUpf.jl | 11 ++++++----- 4 files changed, 13 insertions(+), 26 deletions(-) diff --git a/src/elements.jl b/src/elements.jl index 0784224f4e..433f8731e9 100644 --- a/src/elements.jl +++ b/src/elements.jl @@ -64,12 +64,6 @@ function core_kinetic_energy_density_fourier(::Element, ::T)::T where {T <: Real error("Abstract elements do not necesesarily provide core kinetic energy density.") end -# Generic vectorized version of local_potential_fourier, GPU-safe -function local_potential_fourier(el::Element, ps::AbstractVector{T}) where {T <: Real} - arch = architecture(ps) - to_device(arch, map(p -> local_potential_fourier(el, p), to_cpu(ps))) -end - Base.show(io::IO, el::Element) = print(io, "$(typeof(el))(:$(species(el)))") # @@ -189,7 +183,7 @@ function valence_charge_density_fourier(el::ElementPsp, p) gaussian_valence_charge_density_fourier(el, p) end end -core_charge_density_fourier(el::ElementPsp, p) = eval_psp_density_core_fourier(el.psp, p) +core_charge_density_fourier(el::ElementPsp, p) = eval_psp_core_density_fourier(el.psp, p) # # ElementCohenBergstresser diff --git a/src/pseudo/NormConservingPsp.jl b/src/pseudo/NormConservingPsp.jl index e8f8b133c6..6efc580007 100644 --- a/src/pseudo/NormConservingPsp.jl +++ b/src/pseudo/NormConservingPsp.jl @@ -23,20 +23,12 @@ abstract type NormConservingPsp end # eval_psp_energy_correction(T::Type, psp) #### Optional methods: -#TODO: check that the new core_kinetic functions are well listed (i.e. vectorized) # eval_psp_valence_density_real(psp, r::Real) # eval_psp_valence_density_fourier(psp, p::Real) # eval_psp_core_density_real(psp, r::Real) # eval_psp_core_density_fourier(psp, p::Real) # eval_psp_core_kinetic_energy_density_real(psp, r::Real) # eval_psp_core_kinetic_energy_density_fourier(psp, p::Real) -# eval_psp_density_valence_real(psp, r::Real) -# eval_psp_density_valence_fourier(psp, p::Real) -# eval_psp_density_core_real(psp, r::Real) -# eval_psp_density_core_fourier(psp, p::Real) -# eval_psp_density_core_fourier(psp, ps::AbstractArray{ Date: Tue, 28 Apr 2026 17:29:04 +0200 Subject: [PATCH 06/11] Fix tests --- examples/custom_potential.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/custom_potential.jl b/examples/custom_potential.jl index 43f166df6f..edf765fafc 100644 --- a/examples/custom_potential.jl +++ b/examples/custom_potential.jl @@ -21,12 +21,12 @@ CustomPotential() = CustomPotential(1.0, 0.5); # We extend the two methods providing access to the real and Fourier # representation of the potential to DFTK. -function DFTK.local_potential_real(el::CustomPotential, r::Real) - -el.α / (√(2π) * el.L) * exp(- (r / el.L)^2 / 2) +function DFTK.local_potential_real(el::CustomPotential, rs::Vector) + map(r -> -el.α / (√(2π) * el.L) * exp(- (r / el.L)^2 / 2), rs) end -function DFTK.local_potential_fourier(el::CustomPotential, p::Real) +function DFTK.local_potential_fourier(el::CustomPotential, ps::Vector) ## = ∫ V(r) exp(-ix⋅p) dx - -el.α * exp(- (p * el.L)^2 / 2) + map(p -> -el.α * exp(- (p * el.L)^2 / 2), ps) end # !!! tip "Gaussian potentials and DFTK" From 370266a03c94cee763f3aec45eff45a512c01d78 Mon Sep 17 00:00:00 2001 From: "Michael F. Herbst" Date: Wed, 3 Jun 2026 13:27:58 +0200 Subject: [PATCH 07/11] Small change to PspLinComb.jl --- src/pseudo/PspLinComb.jl | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/src/pseudo/PspLinComb.jl b/src/pseudo/PspLinComb.jl index f3e3d1b452..3ccddb4976 100644 --- a/src/pseudo/PspLinComb.jl +++ b/src/pseudo/PspLinComb.jl @@ -77,14 +77,6 @@ macro make_psplincomb_call(fn) end end -macro make_psplincomb_call_vectorized(fn) - quote - function $fn(psp::PspLinComb, arg::AbstractVector{<:Real}) - sum(c * $fn(pp, arg) for (c, pp) in zip(psp.coefficients, psp.pseudos)) - end - end -end - @make_psplincomb_call DFTK.eval_psp_local_real @make_psplincomb_call DFTK.eval_psp_local_fourier @make_psplincomb_call DFTK.eval_psp_valence_density_real @@ -94,13 +86,13 @@ end @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_real @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_fourier -@vectorize_psp_function PspLinComb DFTK.eval_psp_local_real -@vectorize_psp_function PspLinComb DFTK.eval_psp_local_fourier -@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_real -@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_fourier -@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_real -@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_fourier -@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_real -@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_fourier +@vectorize_psp_function PspLinComb DFTK.eval_psp_local_real +@vectorize_psp_function PspLinComb DFTK.eval_psp_local_fourier +@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_real +@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_fourier +@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_real +@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_fourier +@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_real +@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_fourier @vectorize_psp_projector_function PspLinComb DFTK.eval_psp_projector_real -@vectorize_psp_projector_function PspLinComb DFTK.eval_psp_projector_fourier \ No newline at end of file +@vectorize_psp_projector_function PspLinComb DFTK.eval_psp_projector_fourier From c7e2f3420936b3b96d4699e061496b60215f5a6e Mon Sep 17 00:00:00 2001 From: "Michael F. Herbst" Date: Wed, 3 Jun 2026 14:16:30 +0200 Subject: [PATCH 08/11] In a simplified test this works, let's see --- src/elements.jl | 65 ++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/src/elements.jl b/src/elements.jl index 433f8731e9..71f42877df 100644 --- a/src/elements.jl +++ b/src/elements.jl @@ -7,6 +7,13 @@ using AtomsBase # Very likely `species`, and `charge_ionic` need to be defined as well. abstract type Element end +"""Return the local potential for a passed coordinate or vector of coordinates in frequency space""" +function local_potential_fourier end + +"""Return the local potential for a passed coordinate or vector of coordinates in real space""" +function local_potential_real end + + """Return the chemical species corresponding to an element""" AtomsBase.species(::Element) = ChemicalSpecies(0) # dummy atom @@ -37,6 +44,14 @@ has_core_density(::Element) = false """Check presence of model core kinetic energy density (non-linear core correction for τ).""" has_core_kinetic_energy_density(::Element) = false +function core_charge_density_fourier(::Element, ::T)::T where {T <: Real} + error("Abstract elements do not necesesarily provide core charge density.") +end + +function core_kinetic_energy_density_fourier(::Element, ::T)::T where {T <: Real} + error("Abstract elements do not necesesarily provide core kinetic energy density.") +end + # The preceding functions are fallback implementations that should be altered as needed. eval_psp_energy_correction(T, ::Element) = zero(T) @@ -51,18 +66,18 @@ end function gaussian_valence_charge_density_fourier(el::Element, p::T)::T where {T <: Real} charge_ionic(el) * exp(-(p * atom_decay_length(el))^2) end -function gaussian_valence_charge_density_fourier(el::Element, ps::AbstractVector{T}) where {T <: Real} - arch = architecture(ps) - to_device(arch, map(p -> gaussian_valence_charge_density_fourier(el, p), to_cpu(ps))) -end -function core_charge_density_fourier(::Element, ::T)::T where {T <: Real} - error("Abstract elements do not necesesarily provide core charge density.") +# Vector to single-argument fallbacks in the Elements +for fn in [:gaussian_valence_charge_density_fourier, :core_charge_density_fourier, + :local_potential_fourier, :local_potential_real] + @eval begin + function DFTK.$fn(el::Element, arg::AbstractVector{<:Real}) + arch = architecture(arg) + to_device(arch, map(p -> $fn(el, p), to_cpu(arg))) + end + end end -function core_kinetic_energy_density_fourier(::Element, ::T)::T where {T <: Real} - error("Abstract elements do not necesesarily provide core kinetic energy density.") -end Base.show(io::IO, el::Element) = print(io, "$(typeof(el))(:$(species(el)))") @@ -173,9 +188,22 @@ has_core_density(el::ElementPsp) = has_core_density(el.psp) has_core_kinetic_energy_density(el::ElementPsp) = has_core_kinetic_energy_density(el.psp) eval_psp_energy_correction(T, el::ElementPsp) = eval_psp_energy_correction(T, el.psp) +# Function forwarding for ElementPsp (for each case both versions are needed to resolve ambiguities) local_potential_fourier(el::ElementPsp, p) = eval_psp_local_fourier(el.psp, p) +local_potential_fourier(el::ElementPsp, p::AbstractVector{<:Real}) = eval_psp_local_fourier(el.psp, p) local_potential_real(el::ElementPsp, r) = eval_psp_local_real(el.psp, r) +local_potential_real(el::ElementPsp, r::AbstractVector{<:Real}) = eval_psp_local_real(el.psp, r) +function core_charge_density_fourier(el::ElementPsp, p) + eval_psp_core_density_fourier(el.psp, p) +end +function core_charge_density_fourier(el::ElementPsp, p::AbstractVector{<:Real}) + eval_psp_core_density_fourier(el.psp, p) +end +function gaussian_valence_charge_density_fourier(el::ElementPsp, ps::AbstractVector{<:Real}) + arch = architecture(ps) + to_device(arch, map(p -> gaussian_valence_charge_density_fourier(el, p), to_cpu(ps))) +end function valence_charge_density_fourier(el::ElementPsp, p) if has_valence_density(el.psp) eval_psp_valence_density_fourier(el.psp, p) @@ -183,7 +211,6 @@ function valence_charge_density_fourier(el::ElementPsp, p) gaussian_valence_charge_density_fourier(el, p) end end -core_charge_density_fourier(el::ElementPsp, p) = eval_psp_core_density_fourier(el.psp, p) # # ElementCohenBergstresser @@ -285,24 +312,6 @@ function local_potential_fourier(el::ElementGaussian, p::Real) end # TODO Strictly speaking needs a eval_psp_energy_correction -# Generic API expectations: all element functions taking a real space scalar |r| or a -# reciprocal space scalar |p| should have a vectorized version accepting vectors of |r| or |p|. -# The following loop vectorizes element functions by calling the single-value version -# elementwise. This is GPU safe and generic. Performance critical functions should have their -# own GPU-optimized implementation. Note: explicit loop over Element types in order to avoid -# ambiguities with the specific ElementPsp functions. -for fn in [:gaussian_valence_charge_density_fourier, :core_charge_density_fourier, - :local_potential_fourier, :local_potential_real] - for el_type in [ElementCoulomb, ElementCohenBergstresser, ElementGaussian] - @eval begin - function DFTK.$fn(el::$el_type, arg::AbstractVector{T}) where {T <: Real} - arch = architecture(arg) - to_device(arch, map(p -> $fn(el, p), to_cpu(arg))) - end - end - end -end - # # Helper functions # From 25643f673694b226f86521c0125286b7b545267a Mon Sep 17 00:00:00 2001 From: "Michael F. Herbst" Date: Wed, 3 Jun 2026 16:55:51 +0200 Subject: [PATCH 09/11] Refactor density methods --- examples/custom_potential.jl | 8 ++-- src/density_methods.jl | 88 ++++++++++++++++++++++-------------- src/elements.jl | 47 +------------------ 3 files changed, 60 insertions(+), 83 deletions(-) diff --git a/examples/custom_potential.jl b/examples/custom_potential.jl index edf765fafc..43f166df6f 100644 --- a/examples/custom_potential.jl +++ b/examples/custom_potential.jl @@ -21,12 +21,12 @@ CustomPotential() = CustomPotential(1.0, 0.5); # We extend the two methods providing access to the real and Fourier # representation of the potential to DFTK. -function DFTK.local_potential_real(el::CustomPotential, rs::Vector) - map(r -> -el.α / (√(2π) * el.L) * exp(- (r / el.L)^2 / 2), rs) +function DFTK.local_potential_real(el::CustomPotential, r::Real) + -el.α / (√(2π) * el.L) * exp(- (r / el.L)^2 / 2) end -function DFTK.local_potential_fourier(el::CustomPotential, ps::Vector) +function DFTK.local_potential_fourier(el::CustomPotential, p::Real) ## = ∫ V(r) exp(-ix⋅p) dx - map(p -> -el.α * exp(- (p * el.L)^2 / 2), ps) + -el.α * exp(- (p * el.L)^2 / 2) end # !!! tip "Gaussian potentials and DFTK" diff --git a/src/density_methods.jl b/src/density_methods.jl index cb889e3e79..55431dd79d 100644 --- a/src/density_methods.jl +++ b/src/density_methods.jl @@ -170,8 +170,7 @@ end Returns the form factors at unique values of |G + q| (in Cartesian coordinates). Additionally, returns a mapping from any G index to the corresponding entry in the form_factors array. """ -function atomic_density_form_factors(basis::PlaneWaveBasis{T}, - method::AtomicDensity ) where {T<:Real} +function atomic_density_form_factors(basis::PlaneWaveBasis{T}, method::AtomicDensity) where {T<:Real} G_cart = to_cpu(G_vectors_cart(basis)) iG2ifnorm_cpu = zeros(Int, length(G_cart)) @@ -195,55 +194,76 @@ function atomic_density_form_factors(basis::PlaneWaveBasis{T}, (; form_factors, iG2ifnorm) end -function atomic_density(element::Element, Gnorm::T, - ::ValenceDensityGaussian)::T where {T <: Real} - gaussian_valence_charge_density_fourier(element, Gnorm) -end +# +# Check for presence of certain densities in elements +# -function atomic_density(element::Element, Gnorm::T, - ::ValenceDensityPseudo)::T where {T <: Real} - eval_psp_valence_density_fourier(element.psp, Gnorm) -end +"""Check presence of model valence density (as an initial guess).""" +has_valence_density(::Element) = false -function atomic_density(element::Element, Gnorm::T, - ::ValenceDensityAuto)::T where {T <: Real} - valence_charge_density_fourier(element, Gnorm) -end +"""Check presence of model core charge density (non-linear core correction).""" +has_core_density(::Element) = false + +"""Check presence of model core kinetic energy density (non-linear core correction for τ).""" +has_core_kinetic_energy_density(::Element) = false + +has_core_density(el::ElementPsp) = has_core_density(el.psp) +has_core_kinetic_energy_density(el::ElementPsp) = has_core_kinetic_energy_density(el.psp) +has_valence_density(el::ElementPsp) = has_valence_density(el.psp) -function atomic_density(element::Element, Gnorm::T, - ::CoreDensity)::T where {T <: Real} - has_core_density(element) ? core_charge_density_fourier(element, Gnorm) : zero(T) +# +# Atomic density dispatches +# + +function atomic_density(element::Union{Element,<:ElementPsp}, Gnorm::Real, dens::AtomicDensity) + first(atomic_density(element, [Gnorm], dens)) # Dispatch to vector version end -#TODO: vectorize and check GPU -function atomic_density(element::Element, Gnorm::T, - ::CoreKineticEnergyDensity)::T where {T <: Real} - if has_core_kinetic_energy_density(element) - eval_psp_core_kinetic_energy_density_fourier(element.psp, Gnorm) +"""Gaussian valence charge density using Abinit's coefficient table, in Fourier space.""" +function atomic_density(element::Element, Gnorms::AbstractVector, ::ValenceDensityGaussian) + map(Gnorms) do Gnorm + charge_ionic(element) * exp(-(Gnorm * atom_decay_length(element))^2) + end +end +function atomic_density(element::Element, Gnorms::AbstractVector, ::ValenceDensityAuto) + if has_valence_density(element) + atomic_density(element, Gnorms, ValenceDensityPseudo()) else - zero(T) + atomic_density(element, Gnorms, ValenceDensityGaussian()) end end -# Generic vectoriezed version of the above -function atomic_density(element::Element, Gnorms::AbstractVector{T}, - method::AtomicDensity) where {T <: Real} - arch = architecture(Gnorms) - to_device(arch, - map(Gnorm -> atomic_density(element, Gnorm, method), - to_cpu(Gnorms))) +function atomic_density(element::Element, Gnorms::AbstractVector, ::CoreDensity) + @assert !has_core_density(element) + zeros_like(Gnorms) +end +function atomic_density(element::Element, Gnorms::AbstractVector, ::CoreKineticEnergyDensity) + @assert !has_core_kinetic_energy_density(element) + zeros_like(Gnorms) end -# Vectorized version for CoreDensity, GPU optimized -function atomic_density(element::Element, Gnorms::AbstractVector{T}, - ::CoreDensity) where {T <: Real} +function atomic_density(element::ElementPsp, Gnorms::AbstractVector, ::ValenceDensityPseudo) + eval_psp_valence_density_fourier(element.psp, Gnorms) +end +function atomic_density(element::ElementPsp, Gnorms::AbstractVector, ::CoreDensity) if has_core_density(element) - core_charge_density_fourier(element, Gnorms) + eval_psp_core_density_fourier(element.psp, Gnorms) + else + zeros_like(Gnorms) + end +end +function atomic_density(element::ElementPsp, Gnorms::AbstractVector, ::CoreKineticEnergyDensity) + if has_core_kinetic_energy_density(element) + eval_psp_core_kinetic_energy_density_fourier(element.psp, Gnorms) else zeros_like(Gnorms) end end +# +# Some data we need for the Gaussian densities +# + # Get the lengthscale of the valence density for an atom with `n_elec_core` core # and `n_elec_valence` valence electrons. function atom_decay_length(n_elec_core, n_elec_valence) diff --git a/src/elements.jl b/src/elements.jl index 71f42877df..6bcc5d169c 100644 --- a/src/elements.jl +++ b/src/elements.jl @@ -38,38 +38,15 @@ n_elec_core(el::Element) = charge_nuclear(el) - charge_ionic(el) """Return the pseudopotential family for the element if this is known, else `nothing`""" pseudofamily(::Element) = nothing -"""Check presence of model core charge density (non-linear core correction).""" -has_core_density(::Element) = false - -"""Check presence of model core kinetic energy density (non-linear core correction for τ).""" -has_core_kinetic_energy_density(::Element) = false - -function core_charge_density_fourier(::Element, ::T)::T where {T <: Real} - error("Abstract elements do not necesesarily provide core charge density.") -end - -function core_kinetic_energy_density_fourier(::Element, ::T)::T where {T <: Real} - error("Abstract elements do not necesesarily provide core kinetic energy density.") -end - # The preceding functions are fallback implementations that should be altered as needed. eval_psp_energy_correction(T, ::Element) = zero(T) -eval_psp_energy_correction(psp::Element) = eval_psp_energy_correction(Float64, psp) +eval_psp_energy_correction(el::Element) = eval_psp_energy_correction(Float64, el) # Fall back to the Gaussian table for Elements without pseudopotentials -function valence_charge_density_fourier(el::Element, p) - gaussian_valence_charge_density_fourier(el, p) -end - -"""Gaussian valence charge density using Abinit's coefficient table, in Fourier space.""" -function gaussian_valence_charge_density_fourier(el::Element, p::T)::T where {T <: Real} - charge_ionic(el) * exp(-(p * atom_decay_length(el))^2) -end # Vector to single-argument fallbacks in the Elements -for fn in [:gaussian_valence_charge_density_fourier, :core_charge_density_fourier, - :local_potential_fourier, :local_potential_real] +for fn in [:local_potential_fourier, :local_potential_real] @eval begin function DFTK.$fn(el::Element, arg::AbstractVector{<:Real}) arch = architecture(arg) @@ -184,8 +161,6 @@ end AtomsBase.mass(el::ElementPsp) = el.mass AtomsBase.species(el::ElementPsp) = el.species charge_ionic(el::ElementPsp) = charge_ionic(el.psp) -has_core_density(el::ElementPsp) = has_core_density(el.psp) -has_core_kinetic_energy_density(el::ElementPsp) = has_core_kinetic_energy_density(el.psp) eval_psp_energy_correction(T, el::ElementPsp) = eval_psp_energy_correction(T, el.psp) # Function forwarding for ElementPsp (for each case both versions are needed to resolve ambiguities) @@ -193,24 +168,6 @@ local_potential_fourier(el::ElementPsp, p) = eval_psp_local_fourier(el.psp, p) local_potential_fourier(el::ElementPsp, p::AbstractVector{<:Real}) = eval_psp_local_fourier(el.psp, p) local_potential_real(el::ElementPsp, r) = eval_psp_local_real(el.psp, r) local_potential_real(el::ElementPsp, r::AbstractVector{<:Real}) = eval_psp_local_real(el.psp, r) -function core_charge_density_fourier(el::ElementPsp, p) - eval_psp_core_density_fourier(el.psp, p) -end -function core_charge_density_fourier(el::ElementPsp, p::AbstractVector{<:Real}) - eval_psp_core_density_fourier(el.psp, p) -end - -function gaussian_valence_charge_density_fourier(el::ElementPsp, ps::AbstractVector{<:Real}) - arch = architecture(ps) - to_device(arch, map(p -> gaussian_valence_charge_density_fourier(el, p), to_cpu(ps))) -end -function valence_charge_density_fourier(el::ElementPsp, p) - if has_valence_density(el.psp) - eval_psp_valence_density_fourier(el.psp, p) - else - gaussian_valence_charge_density_fourier(el, p) - end -end # # ElementCohenBergstresser From 90b3786af0887017182bb3530790d9d731b98a18 Mon Sep 17 00:00:00 2001 From: "Michael F. Herbst" Date: Wed, 3 Jun 2026 17:13:53 +0200 Subject: [PATCH 10/11] Fix tests --- src/pseudo/NormConservingPsp.jl | 3 +-- test/PspLinComb.jl | 14 +++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/pseudo/NormConservingPsp.jl b/src/pseudo/NormConservingPsp.jl index 6efc580007..7a25a015de 100644 --- a/src/pseudo/NormConservingPsp.jl +++ b/src/pseudo/NormConservingPsp.jl @@ -17,7 +17,6 @@ abstract type NormConservingPsp end # has_core_kinetic_energy_density(psp) # eval_psp_projector_real(psp, i, l, r::Real) # eval_psp_projector_fourier(psp, i, l, p::Real) -# eval_psp_projector_fourier(psp, i, l, ps::AbstrcatArray{ Date: Wed, 3 Jun 2026 20:36:19 +0200 Subject: [PATCH 11/11] Nuke macros --- src/elements.jl | 6 +- src/pseudo/NormConservingPsp.jl | 128 +++++++++++++++----------------- src/pseudo/PspHgh.jl | 4 - src/pseudo/PspLinComb.jl | 11 --- src/pseudo/PspUpf.jl | 9 --- test/PspLinComb.jl | 2 +- 6 files changed, 64 insertions(+), 96 deletions(-) diff --git a/src/elements.jl b/src/elements.jl index 6bcc5d169c..659692a149 100644 --- a/src/elements.jl +++ b/src/elements.jl @@ -164,9 +164,9 @@ charge_ionic(el::ElementPsp) = charge_ionic(el.psp) eval_psp_energy_correction(T, el::ElementPsp) = eval_psp_energy_correction(T, el.psp) # Function forwarding for ElementPsp (for each case both versions are needed to resolve ambiguities) -local_potential_fourier(el::ElementPsp, p) = eval_psp_local_fourier(el.psp, p) +local_potential_fourier(el::ElementPsp, p::Real) = eval_psp_local_fourier(el.psp, p) local_potential_fourier(el::ElementPsp, p::AbstractVector{<:Real}) = eval_psp_local_fourier(el.psp, p) -local_potential_real(el::ElementPsp, r) = eval_psp_local_real(el.psp, r) +local_potential_real(el::ElementPsp, r::Real) = eval_psp_local_real(el.psp, r) local_potential_real(el::ElementPsp, r::AbstractVector{<:Real}) = eval_psp_local_real(el.psp, r) # @@ -261,7 +261,7 @@ function ElementGaussian(α, L; symbol=:X, mass=nothing) T = promote_type(typeof(α), typeof(L)) ElementGaussian{T}(α, L, symbol, mass) end -function local_potential_real(el::ElementGaussian, r) +function local_potential_real(el::ElementGaussian, r::Real) -el.α / (√(2π) * el.L) * exp(- (r / el.L)^2 / 2) end function local_potential_fourier(el::ElementGaussian, p::Real) diff --git a/src/pseudo/NormConservingPsp.jl b/src/pseudo/NormConservingPsp.jl index 7a25a015de..142552b788 100644 --- a/src/pseudo/NormConservingPsp.jl +++ b/src/pseudo/NormConservingPsp.jl @@ -10,7 +10,7 @@ abstract type NormConservingPsp end # identifier::String # String identifying the PSP # description::String # Descriptive string -#### Methods: +#### Required methods: # charge_ionic(psp) # has_valence_density(psp) # has_core_density(psp) @@ -30,45 +30,12 @@ abstract type NormConservingPsp end # eval_psp_core_kinetic_energy_density_fourier(psp, p::Real) # eval_psp_pswfc_real(psp, i::Int, l::Int, p::Real) # eval_psp_pswfc_fourier(psp, i::Int, l::Int, p::Real) -# count_n_pswfc(psp, l::Integer) # count_n_pswfc_radial(psp, l::Integer) # pswfc_label(psp, i::Integer, l::Integer) -#### Vectorization: all methods are expected to accept vectorized inputs, namely: -# eval_psp_projector_real(psp, i, l, r::AbstractVector{ $fn(psp, p), to_cpu(vec))) - end - end -end -macro vectorize_psp_projector_function(PspType, fn) - quote - function $fn(psp::$PspType, i, l, vec::AbstractVector{T}) where {T <: Real} - arch = architecture(vec) - to_device(arch, map(p -> $fn(psp, i, l, p), to_cpu(vec))) - end - end -end +# +# Function stubs for required methods +# """ eval_psp_projector_real(psp, i, l, r) @@ -76,8 +43,8 @@ end Evaluate the radial part of the `i`-th projector for angular momentum `l` in real-space at the vector with modulus `r`. """ -eval_psp_projector_real(psp::NormConservingPsp, i, l, r::AbstractVector) = - eval_psp_projector_real(psp, i, l, norm(r)) +function eval_psp_projector_real end + @doc raw""" eval_psp_projector_fourier(psp, i, l, p) @@ -91,23 +58,14 @@ at the reciprocal vector with modulus `p`: \end{aligned} ``` """ -eval_psp_projector_fourier(psp::NormConservingPsp, i, l, p::AbstractVector) = - eval_psp_projector_fourier(psp, i, l, norm(p)) - -# Fallback vectorized implementation for non GPU-optimized code. -function eval_psp_projector_fourier(psp::NormConservingPsp, i, l, - ps::AbstractVector{T}) where {T <: Real} - arch = architecture(ps) - to_device(arch, map(p -> eval_psp_projector_fourier(psp, i, l, p), to_cpu(ps))) -end +function eval_psp_projector_fourier end """ eval_psp_local_real(psp, r) Evaluate the local part of the pseudopotential in real space. """ -eval_psp_local_real(psp::NormConservingPsp, r::AbstractVector) = - eval_psp_local_real(psp, norm(r)) +function eval_psp_local_real end @doc raw""" eval_psp_local_fourier(psp, p) @@ -128,8 +86,7 @@ V_{\rm loc}(p) &= ∫_{ℝ^3} (V_{\rm loc}(r) - C(r)) e^{-ip·r} dr + F[C(r)] \\ \end{aligned} ``` """ -eval_psp_local_fourier(psp::NormConservingPsp, p::AbstractVector) = - eval_psp_local_fourier(psp, norm(p)) +function eval_psp_local_fourier end @doc raw""" eval_psp_energy_correction([T=Float64,] psp::NormConservingPsp) @@ -153,18 +110,19 @@ where as discussed above the implementation is expected to return the result for ``N_{\rm elec} = 1``. """ function eval_psp_energy_correction end -# by default, no correction, see PspHgh implementation and tests -# for details on what to implement eval_psp_energy_correction(psp::NormConservingPsp) = eval_psp_energy_correction(Float64, psp) +# +# Function stubs for optional methods +# + """ eval_psp_valence_density_real(psp, r) Evaluate the atomic valence charge density in real space. """ -eval_psp_valence_density_real(psp::NormConservingPsp, r::AbstractVector) = - eval_psp_valence_density_real(psp, norm(r)) +function eval_psp_valence_density_real end @doc raw""" eval_psp_valence_density_fourier(psp, p) @@ -177,8 +135,7 @@ Evaluate the atomic valence charge density in reciprocal space: \end{aligned} ``` """ -eval_psp_valence_density_fourier(psp::NormConservingPsp, p::AbstractVector) = - eval_psp_valence_density_fourier(psp, norm(p)) +function eval_psp_valence_density_fourier end """ eval_psp_core_density_real(psp, r) @@ -186,8 +143,6 @@ eval_psp_valence_density_fourier(psp::NormConservingPsp, p::AbstractVector) = Evaluate the atomic core charge density in real space. """ eval_psp_core_density_real(::NormConservingPsp, ::T) where {T <: Real} = zero(T) -eval_psp_core_density_real(psp::NormConservingPsp, r::AbstractVector) = - eval_psp_core_density_real(psp, norm(r)) @doc raw""" eval_psp_core_density_fourier(psp, p) @@ -201,18 +156,31 @@ Evaluate the atomic core charge density in reciprocal space: ``` """ eval_psp_core_density_fourier(::NormConservingPsp, ::T) where {T <: Real} = zero(T) -eval_psp_core_density_fourier(psp::NormConservingPsp, p::AbstractVector) = - eval_psp_core_density_fourier(psp, norm(p)) eval_psp_core_kinetic_energy_density_real(::NormConservingPsp, ::T) where {T <: Real} = zero(T) -eval_psp_core_kinetic_energy_density_real(psp::NormConservingPsp, r::AbstractVector) = - eval_psp_core_kinetic_energy_density_real(psp, norm(r)) - eval_psp_core_kinetic_energy_density_fourier(::NormConservingPsp, ::T) where {T <: Real} = zero(T) -eval_psp_core_kinetic_energy_density_fourier(psp::NormConservingPsp, p::AbstractVector) = - eval_psp_core_kinetic_energy_density_fourier(psp, norm(p)) -#### Methods defined on a NormConservingPsp + +""" + eval_psp_pswfc_real(psp::PspUpf, i, l, r) + +Evaluate the radial part of the `i`-th pseudo wave function of angular momentum `l` +at real space radius `r`. +""" +function eval_psp_pswfc_real end + +""" + eval_psp_pswfc_fourier(psp::PspUpf, i, l, p) + +Evaluate the radial part of the `i`-th pseudo wave function of angular momentum `l` +at reciprocal space radius `z`. +""" +function eval_psp_pswfc_fourier end + +# +# Other methods defined on a NormConservingPsp +# +# import Base.Broadcast.broadcastable Base.Broadcast.broadcastable(psp::NormConservingPsp) = Ref(psp) @@ -284,3 +252,27 @@ function find_pswfc(psp::NormConservingPsp, label::String) error("Could not find pseudo atomic orbital with label $label " * "in pseudopotential $(psp).") end + +# +# Provide dispatches for vector-valued inputs by calling an existing scalar version +# elementwise. Safe for GPUArray inputs. Performance critical methods should +# have their own GPU-optimized implementation instead of relying on this dispatch. +# +for fn in (:eval_psp_pswfc_fourier, :eval_psp_pswfc_real, + :eval_psp_projector_real, :eval_psp_projector_fourier) + @eval function $fn(psp::NormConservingPsp, i, l, ps::AbstractVector{<:Real}) + arch = architecture(ps) + to_device(arch, map(p -> $fn(psp, i, l, p), to_cpu(ps))) + end +end + +for fn in (:eval_psp_local_real, :eval_psp_local_fourier, + :eval_psp_valence_density_real, :eval_psp_valence_density_fourier, + :eval_psp_core_density_real, :eval_psp_core_density_fourier, + :eval_psp_core_kinetic_energy_density_real, + :eval_psp_core_kinetic_energy_density_fourier) + @eval function $fn(psp::NormConservingPsp, ps::AbstractVector{<:Real}) + arch = architecture(ps) + to_device(arch, map(p -> $fn(psp, p), to_cpu(ps))) + end +end diff --git a/src/pseudo/PspHgh.jl b/src/pseudo/PspHgh.jl index ad181e6fca..c303f29248 100644 --- a/src/pseudo/PspHgh.jl +++ b/src/pseudo/PspHgh.jl @@ -122,7 +122,6 @@ function eval_psp_local_fourier(psp::PspHgh, p::T) where {T <: Real} 4T(π) * rloc^2 * (-Zion + sqrt(T(π) / 2) * rloc * t^2 * P) * exp(-t^2 / 2) / t^2 end -@vectorize_psp_function PspHgh DFTK.eval_psp_local_fourier # [GTH98] (1) function eval_psp_local_real(psp::PspHgh, r::T) where {T <: Real} @@ -134,7 +133,6 @@ function eval_psp_local_real(psp::PspHgh, r::T) where {T <: Real} + exp(-rr^2 / 2) * (cloc[1] + cloc[2] * rr^2 + cloc[3] * rr^4 + cloc[4] * rr^6) ) end -@vectorize_psp_function PspHgh DFTK.eval_psp_local_real # [HGH98] (7-15) except they do it with plane waves normalized by 1/sqrt(Ω). @@ -163,7 +161,6 @@ function eval_psp_projector_fourier(psp::PspHgh, i, l, p::T) where {T <: Real} error("Not implemented for l=$l and i=$i") end -@vectorize_psp_projector_function PspHgh DFTK.eval_psp_projector_fourier # [HGH98] (3) function eval_psp_projector_real(psp::PspHgh, i, l, r::T) where {T <: Real} @@ -171,7 +168,6 @@ function eval_psp_projector_real(psp::PspHgh, i, l, r::T) where {T <: Real} ired = (4i - 1) / T(2) sqrt(T(2)) * r^(l + 2(i - 1)) * exp(-r^2 / 2rp^2) / rp^(l + ired) / sqrt(gamma(l + ired)) end -@vectorize_psp_projector_function PspHgh DFTK.eval_psp_projector_real function eval_psp_energy_correction(T, psp::PspHgh) # By construction we need to compute the DC component of the difference diff --git a/src/pseudo/PspLinComb.jl b/src/pseudo/PspLinComb.jl index 3ccddb4976..335450f2e2 100644 --- a/src/pseudo/PspLinComb.jl +++ b/src/pseudo/PspLinComb.jl @@ -85,14 +85,3 @@ end @make_psplincomb_call DFTK.eval_psp_core_density_fourier @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_real @make_psplincomb_call DFTK.eval_psp_core_kinetic_energy_density_fourier - -@vectorize_psp_function PspLinComb DFTK.eval_psp_local_real -@vectorize_psp_function PspLinComb DFTK.eval_psp_local_fourier -@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_real -@vectorize_psp_function PspLinComb DFTK.eval_psp_valence_density_fourier -@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_real -@vectorize_psp_function PspLinComb DFTK.eval_psp_core_density_fourier -@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_real -@vectorize_psp_function PspLinComb DFTK.eval_psp_core_kinetic_energy_density_fourier -@vectorize_psp_projector_function PspLinComb DFTK.eval_psp_projector_real -@vectorize_psp_projector_function PspLinComb DFTK.eval_psp_projector_fourier diff --git a/src/pseudo/PspUpf.jl b/src/pseudo/PspUpf.jl index 8ab775e43c..859ea8837f 100644 --- a/src/pseudo/PspUpf.jl +++ b/src/pseudo/PspUpf.jl @@ -182,7 +182,6 @@ has_core_kinetic_energy_density(psp::PspUpf) = !all(iszero, psp.r2_τcore) function eval_psp_projector_real(psp::PspUpf, i, l, r::T)::T where {T<:Real} psp.r2_projs_interp[l+1][i](r) / r^2 # TODO if r is below a threshold, return zero end -@vectorize_psp_projector_function PspUpf DFTK.eval_psp_projector_real function eval_psp_projector_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} # The projectors may have been cut off before the end of the radial mesh @@ -215,7 +214,6 @@ pswfc_label(psp::PspUpf, i, l) = psp.pswfc_labels[l+1][i] function eval_psp_pswfc_real(psp::PspUpf, i, l, r::T)::T where {T<:Real} psp.r2_pswfcs_interp[l+1][i](r) / r^2 # TODO if r is below a threshold, return zero end -@vectorize_psp_projector_function PspUpf DFTK.eval_psp_pswfc_real function eval_psp_pswfc_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} # Pseudo-atomic wavefunctions are _not_ currently cut off like the other @@ -224,10 +222,8 @@ function eval_psp_pswfc_fourier(psp::PspUpf, i, l, p::T)::T where {T<:Real} # If issues arise, try cutting them off too. return hankel(psp.rgrid, psp.r2_pswfcs[l+1][i], l, p) end -@vectorize_psp_projector_function PspUpf DFTK.eval_psp_pswfc_fourier eval_psp_local_real(psp::PspUpf, r::T) where {T<:Real} = psp.vloc_interp(r) -@vectorize_psp_function PspUpf DFTK.eval_psp_local_real # Low-level function for the local part of the pseudopotential in reciprocal space function _eval_psp_local_fourier(quadrature, rgrid, vloc, Zion, p::T)::T where {T<:Real} @@ -268,19 +264,16 @@ end function eval_psp_valence_density_real(psp::PspUpf, r::T) where {T<:Real} psp.r2_ρion_interp(r) / r^2 # TODO if r is below a threshold, return zero end -@vectorize_psp_function PspUpf DFTK.eval_psp_valence_density_real function eval_psp_valence_density_fourier(psp::PspUpf, p::T) where {T<:Real} rgrid = @view psp.rgrid[1:psp.ircut] r2_ρion = @view psp.r2_ρion[1:psp.ircut] return hankel(rgrid, r2_ρion, 0, p) end -@vectorize_psp_function PspUpf DFTK.eval_psp_valence_density_fourier function eval_psp_core_density_real(psp::PspUpf, r::T) where {T<:Real} psp.r2_ρcore_interp(r) / r^2 # TODO if r is below a threshold, return zero end -@vectorize_psp_function PspUpf DFTK.eval_psp_core_density_real function eval_psp_core_density_fourier(psp::PspUpf, p::T) where {T<:Real} rgrid = @view psp.rgrid[1:psp.ircut] @@ -304,14 +297,12 @@ end function eval_psp_core_kinetic_energy_density_real(psp::PspUpf, r::T) where {T<:Real} psp.r2_τcore_interp(r) / r^2 # TODO if r is below a threshold, return zero end -@vectorize_psp_function PspUpf DFTK.eval_psp_core_kinetic_energy_density_real function eval_psp_core_kinetic_energy_density_fourier(psp::PspUpf, p::T) where {T<:Real} rgrid = @view psp.rgrid[1:psp.ircut] r2_τcore = @view psp.r2_τcore[1:psp.ircut] return hankel(rgrid, r2_τcore, 0, p) end -@vectorize_psp_function PspUpf DFTK.eval_psp_core_kinetic_energy_density_fourier function eval_psp_energy_correction(T, psp::PspUpf) rgrid = @view psp.rgrid[1:psp.ircut] diff --git a/test/PspLinComb.jl b/test/PspLinComb.jl index 24ff0c9947..2f3b4f7a04 100644 --- a/test/PspLinComb.jl +++ b/test/PspLinComb.jl @@ -29,7 +29,7 @@ for dens in (CoreDensity(), ValenceDensityPseudo()) for p in (0.01, 0.1, 0.2, 0.5, 1., 2., 5., 10.) - @test atomic_density(mix, p, dens) == atomic_density(Si, p, dens) + @test DFTK.atomic_density(mix, p, dens) == DFTK.atomic_density(Si, p, dens) end # p end end