Merge pull request #526 from ChrisRackauckas-Claude/akima-perf-opts

ChrisRackauckas · web-flow · commit 96ab7c8c02c9 · 2026-05-15T13:38:59.000Z
Optimize AkimaInterpolation constructor (3.6-4.1x) and add sorted-batch evaluator (8-9x)
diff --git a/src/interpolation_caches.jl b/src/interpolation_caches.jl
@@ -288,6 +288,66 @@ struct AkimaInterpolation{uType, tType, IType, bType, cType, dType, T} <:
     end
 end
 
+# In-place scalar kernel for computing the Akima / makima coefficients.
+# Allocates a single length-(n+3) buffer for the padded divided differences;
+# every other intermediate (dm, f1, f2, f12, w1, w2, ind, b-default) from the
+# original vectorized formulation is fused into a scalar pass.
+function _akima_init!(
+        b::AbstractVector{T}, c::AbstractVector{T}, d::AbstractVector{T},
+        u::AbstractVector, t::AbstractVector, ::Val{modified}
+    ) where {T, modified}
+    n = length(u)
+    m = Vector{T}(undef, n + 3)
+    @inbounds begin
+        for i in 1:(n - 1)
+            m[i + 2] = (u[i + 1] - u[i]) / (t[i + 1] - t[i])
+        end
+        m[2] = 2 * m[3] - m[4]
+        m[1] = 2 * m[2] - m[3]
+        m[n + 2] = 2 * m[n + 1] - m[n]
+        m[n + 3] = 2 * m[n + 2] - m[n + 1]
+
+        # First pass: maximum weight, used as the small-weight cutoff
+        wmax = zero(T)
+        for i in 1:n
+            if modified
+                w1 = abs(m[i + 3] - m[i + 2]) + abs(m[i + 3] + m[i + 2]) / 2
+                w2 = abs(m[i + 1] - m[i]) + abs(m[i + 1] + m[i]) / 2
+            else
+                w1 = abs(m[i + 3] - m[i + 2])
+                w2 = abs(m[i + 1] - m[i])
+            end
+            w12 = w1 + w2
+            wmax = ifelse(w12 > wmax, w12, wmax)
+        end
+        tol = T(1.0e-9) * wmax
+
+        # Second pass: coefficients
+        for i in 1:n
+            if modified
+                w1 = abs(m[i + 3] - m[i + 2]) + abs(m[i + 3] + m[i + 2]) / 2
+                w2 = abs(m[i + 1] - m[i]) + abs(m[i + 1] + m[i]) / 2
+                bdefault = (m[i + 1] + m[i + 2]) / 2
+            else
+                w1 = abs(m[i + 3] - m[i + 2])
+                w2 = abs(m[i + 1] - m[i])
+                bdefault = (m[i + 3] + m[i]) / 2
+            end
+            w12 = w1 + w2
+            b[i] = w12 > tol ?
+                (w1 * m[i + 1] + w2 * m[i + 2]) / w12 :
+                bdefault
+        end
+
+        for i in 1:(n - 1)
+            dt = t[i + 1] - t[i]
+            c[i] = (3 * m[i + 2] - 2 * b[i] - b[i + 1]) / dt
+            d[i] = (b[i] + b[i + 1] - 2 * m[i + 2]) / (dt * dt)
+        end
+    end
+    return nothing
+end
+
 function AkimaInterpolation(
         u, t; modified::Bool = false,
         extrapolation::ExtrapolationType.T = ExtrapolationType.None,
@@ -301,40 +361,11 @@ function AkimaInterpolation(
     u, t = munge_data(u, t)
     linear_lookup = seems_linear(assume_linear_t, t)
     n = length(t)
-    dt = diff(t)
-    m = Array{eltype(u)}(undef, n + 3)
-    m[3:(end - 2)] = diff(u) ./ dt
-    m[2] = 2m[3] - m[4]
-    m[1] = 2m[2] - m[3]
-    m[end - 1] = 2m[end - 2] - m[end - 3]
-    m[end] = 2m[end - 1] - m[end - 2]
-
-    if modified
-        # Modified Akima (makima): adds |m_{i+1} + m_i| / 2 to each weight, which
-        # reduces overshoot on flat regions. The simple-average fallback still
-        # guards the case where all four neighboring slopes vanish.
-        w1 = abs.(m[4:end] .- m[3:(end - 1)]) .+
-            abs.(m[4:end] .+ m[3:(end - 1)]) ./ 2
-        w2 = abs.(m[2:(end - 2)] .- m[1:(end - 3)]) .+
-            abs.(m[2:(end - 2)] .+ m[1:(end - 3)]) ./ 2
-        w12 = w1 .+ w2
-        b = (m[2:(end - 2)] .+ m[3:(end - 1)]) ./ 2
-        ind = findall(w12 .> 1.0e-9 * maximum(w12))
-        b[ind] = (w1[ind] .* m[ind .+ 1] .+ w2[ind] .* m[ind .+ 2]) ./ w12[ind]
-    else
-        b = (m[4:end] .+ m[1:(end - 3)]) ./ 2
-        dm = abs.(diff(m))
-        f1 = dm[3:(n + 2)]
-        f2 = dm[1:n]
-        f12 = f1 + f2
-        ind = findall(f12 .> 1.0e-9 * maximum(f12))
-        b[ind] = (
-            f1[ind] .* m[ind .+ 1] .+
-                f2[ind] .* m[ind .+ 2]
-        ) ./ f12[ind]
-    end
-    c = (3 .* m[3:(end - 2)] .- 2 .* b[1:(end - 1)] .- b[2:end]) ./ dt
-    d = (b[1:(end - 1)] .+ b[2:end] .- 2 .* m[3:(end - 2)]) ./ dt .^ 2
+    T = eltype(u)
+    b = Vector{T}(undef, n)
+    c = Vector{T}(undef, n - 1)
+    d = Vector{T}(undef, n - 1)
+    _akima_init!(b, c, d, u, t, Val(modified))
 
     A = AkimaInterpolation(
         u, t, nothing, b, c, d, extrapolation_left,
diff --git a/src/interpolation_methods.jl b/src/interpolation_methods.jl
@@ -205,6 +205,134 @@ function _interpolate(A::AkimaInterpolation{<:AbstractVector}, t::Number, iguess
     return @evalpoly wj A.u[idx] A.b[idx] A.c[idx] A.d[idx]
 end
 
+# Sorted-batch fast path: when the query points are already sorted (and the
+# extrapolation modes don't require per-point transformation), walk the knots
+# and queries in lockstep instead of running a binary search per query.
+function (A::AkimaInterpolation{<:AbstractVector})(
+        out::AbstractVector, tt::AbstractVector
+    )
+    if length(out) != length(tt)
+        throw(
+            DimensionMismatch(
+                "number of evaluation points and length of the result vector must be equal"
+            )
+        )
+    end
+    if _akima_eval_fast_applicable(A) && issorted(tt)
+        _akima_eval_sorted!(out, A, tt)
+    else
+        map!(A, out, tt)
+    end
+    return out
+end
+
+@inline function _akima_eval_fast_applicable(A::AkimaInterpolation)
+    el = A.extrapolation_left
+    er = A.extrapolation_right
+    el_ok = el == ExtrapolationType.None ||
+        el == ExtrapolationType.Constant ||
+        el == ExtrapolationType.Linear ||
+        el == ExtrapolationType.Extension
+    er_ok = er == ExtrapolationType.None ||
+        er == ExtrapolationType.Constant ||
+        er == ExtrapolationType.Linear ||
+        er == ExtrapolationType.Extension
+    return el_ok && er_ok
+end
+
+function _akima_eval_sorted!(
+        out::AbstractVector, A::AkimaInterpolation{<:AbstractVector}, tt::AbstractVector
+    )
+    u = A.u
+    t = A.t
+    bv = A.b
+    cv = A.c
+    dv = A.d
+    el = A.extrapolation_left
+    er = A.extrapolation_right
+    n = length(t)
+    m = length(tt)
+    t1 = @inbounds t[1]
+    tn = @inbounds t[n]
+
+    i = 1
+
+    # Left extrapolation
+    if el == ExtrapolationType.None
+        @inbounds if i <= m && tt[i] < t1
+            throw(LeftExtrapolationError())
+        end
+    elseif el == ExtrapolationType.Constant
+        u1 = @inbounds u[1]
+        @inbounds while i <= m && tt[i] < t1
+            out[i] = u1
+            i += 1
+        end
+    elseif el == ExtrapolationType.Linear
+        u1 = @inbounds u[1]
+        b1 = @inbounds bv[1]
+        @inbounds while i <= m && tt[i] < t1
+            out[i] = u1 + b1 * (tt[i] - t1)
+            i += 1
+        end
+    else  # Extension
+        u1 = @inbounds u[1]
+        b1 = @inbounds bv[1]
+        c1 = @inbounds cv[1]
+        d1 = @inbounds dv[1]
+        @inbounds while i <= m && tt[i] < t1
+            wj = tt[i] - t1
+            out[i] = @evalpoly wj u1 b1 c1 d1
+            i += 1
+        end
+    end
+
+    # Interior: walk knots in lockstep
+    idx = 1
+    @inbounds while i <= m && tt[i] <= tn
+        ttt = tt[i]
+        while idx < n - 1 && ttt > t[idx + 1]
+            idx += 1
+        end
+        wj = ttt - t[idx]
+        out[i] = @evalpoly wj u[idx] bv[idx] cv[idx] dv[idx]
+        i += 1
+    end
+
+    # Right extrapolation
+    if er == ExtrapolationType.None
+        @inbounds if i <= m
+            throw(RightExtrapolationError())
+        end
+    elseif er == ExtrapolationType.Constant
+        un = @inbounds u[n]
+        @inbounds while i <= m
+            out[i] = un
+            i += 1
+        end
+    elseif er == ExtrapolationType.Linear
+        un = @inbounds u[n]
+        bn = @inbounds bv[n]
+        @inbounds while i <= m
+            out[i] = un + bn * (tt[i] - tn)
+            i += 1
+        end
+    else  # Extension
+        un1 = @inbounds u[n - 1]
+        bn1 = @inbounds bv[n - 1]
+        cn1 = @inbounds cv[n - 1]
+        dn1 = @inbounds dv[n - 1]
+        tn1 = @inbounds t[n - 1]
+        @inbounds while i <= m
+            wj = tt[i] - tn1
+            out[i] = @evalpoly wj un1 bn1 cn1 dn1
+            i += 1
+        end
+    end
+
+    return nothing
+end
+
 # Constant Interpolation
 function _interpolate(A::ConstantInterpolation{<:AbstractVector}, t::Number, iguess)
     if A.dir === :left
diff --git a/test/interpolation_tests.jl b/test/interpolation_tests.jl
@@ -565,6 +565,82 @@ end
         @test isfinite(DataInterpolations.derivative(A_makima, 5.0))
         @test isfinite(DataInterpolations.integral(A_makima, 0.0, 10.0))
     end
+
+    @testset "Sorted-batch evaluator" begin
+        u = [0.0, 2.0, 1.0, 3.0, 2.0, 6.0, 5.5, 5.5, 2.7, 5.1, 3.0]
+        t = collect(0.0:10.0)
+
+        for modified in (false, true)
+            A = AkimaInterpolation(u, t; modified = modified)
+            # Sorted query: fast path matches the per-point path
+            tt = sort!([0.0, 0.5, 1.0, 2.7, 5.3, 7.9, 10.0])
+            out = similar(tt)
+            A(out, tt)
+            for k in eachindex(tt)
+                @test out[k] ≈ A(tt[k])
+            end
+            # Knot pass-through
+            outk = similar(t)
+            A(outk, t)
+            for k in eachindex(t)
+                @test outk[k] ≈ u[k]
+            end
+            # Unsorted query falls back to per-point and stays consistent
+            tt_unsorted = [3.1, 7.7, 0.2, 5.5, 9.9]
+            out_u = similar(tt_unsorted)
+            A(out_u, tt_unsorted)
+            for k in eachindex(tt_unsorted)
+                @test out_u[k] ≈ A(tt_unsorted[k])
+            end
+        end
+
+        # Each fast-path extrapolation mode matches the per-point path
+        for el in (
+                    ExtrapolationType.Constant,
+                    ExtrapolationType.Linear,
+                    ExtrapolationType.Extension,
+                ),
+                er in (
+                    ExtrapolationType.Constant,
+                    ExtrapolationType.Linear,
+                    ExtrapolationType.Extension,
+                )
+
+            A = AkimaInterpolation(
+                u, t; extrapolation_left = el, extrapolation_right = er
+            )
+            tt = collect(-2.0:0.4:12.0)
+            out = similar(tt)
+            A(out, tt)
+            for k in eachindex(tt)
+                @test out[k] ≈ A(tt[k])
+            end
+        end
+
+        # Periodic/Reflective fall back to the map! path
+        for ext in (ExtrapolationType.Periodic, ExtrapolationType.Reflective)
+            A = AkimaInterpolation(u, t; extrapolation = ext)
+            tt = collect(-3.0:0.5:13.0)
+            out = similar(tt)
+            A(out, tt)
+            for k in eachindex(tt)
+                @test out[k] ≈ A(tt[k])
+            end
+        end
+
+        # ExtrapolationType.None throws when a sorted query is out of range
+        A_none = AkimaInterpolation(u, t)
+        @test_throws DataInterpolations.LeftExtrapolationError A_none(
+            similar([-1.0, 5.0]), [-1.0, 5.0]
+        )
+        @test_throws DataInterpolations.RightExtrapolationError A_none(
+            similar([5.0, 11.0]), [5.0, 11.0]
+        )
+
+        # DimensionMismatch
+        A_dim = AkimaInterpolation(u, t)
+        @test_throws DimensionMismatch A_dim(zeros(3), [1.0, 2.0])
+    end
 end
 
 @testset "ConstantInterpolation" begin