Simplify _cache_config, drop friendly_tangents test

yebai · claude · yebai · commit 9262d08a14f2 · 2026-04-12T19:44:15.000+01:00
Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/ext/DynamicPPLMooncakeExt.jl b/ext/DynamicPPLMooncakeExt.jl
@@ -3,13 +3,9 @@ module DynamicPPLMooncakeExt
 using DynamicPPL: DynamicPPL, is_transformed
 using Mooncake:
     Mooncake,
-    Dual,
     NoTangent,
-    primal,
     prepare_derivative_cache,
     prepare_gradient_cache,
-    tangent,
-    value_and_derivative!!,
     value_and_gradient!!
 
 # These are purely optimisations (although quite significant ones sometimes, especially for
@@ -27,23 +23,19 @@ using ADTypes: AutoMooncake, AutoMooncakeForward
 using Distributions: Normal, InverseGamma, Beta
 using PrecompileTools: @setup_workload, @compile_workload
 
-_config(::Union{AutoMooncake{Nothing},AutoMooncakeForward{Nothing}}) = Mooncake.Config()
-_config(adtype::Union{AutoMooncake,AutoMooncakeForward}) = adtype.config
+function _cache_config(::Union{AutoMooncake{Nothing},AutoMooncakeForward{Nothing}})
+    return Mooncake.Config(; friendly_tangents=false)
+end
 function _cache_config(adtype::Union{AutoMooncake,AutoMooncakeForward})
-    config = _config(adtype)
-    # `friendly_tangents=true` rewrites tangent types into named structs at tape-build time,
-    # which is incompatible with a reusable cache (the cached tape would be tied to the
-    # original tangent struct layout). Force it off so the cache stays valid across calls.
+    config = adtype.config
     return Mooncake.Config(;
         debug_mode=config.debug_mode,
         silence_debug_messages=config.silence_debug_messages,
         friendly_tangents=false,
     )
 end
 
-# LogDensityAt is the function being differentiated through, not a quantity being
-# differentiated with respect to. Declaring NoTangent here tells Mooncake to treat it as
-# a constant, which is correct and avoids unnecessary tangent allocation.
+# LogDensityAt is a constant w.r.t. differentiation; NoTangent avoids tangent allocation.
 Mooncake.tangent_type(::Type{<:DynamicPPL.LogDensityAt}) = NoTangent
 
 function DynamicPPL._prepare_gradient(
@@ -69,11 +61,7 @@ function DynamicPPL._prepare_gradient(
     accs::DynamicPPL.AccumulatorTuple,
 )
     f = LogDensityAt(model, getlogdensity, varname_ranges, transform_strategy, accs)
-    return (;
-        cache=prepare_derivative_cache(f, x; config=_cache_config(adtype)),
-        dx=similar(x),
-        grad=similar(x),
-    )
+    return prepare_derivative_cache(f, x; config=_cache_config(adtype))
 end
 
 function DynamicPPL._value_and_gradient(
@@ -102,32 +90,7 @@ function DynamicPPL._value_and_gradient(
     accs::DynamicPPL.AccumulatorTuple,
 )
     f = LogDensityAt(model, getlogdensity, varname_ranges, transform_strategy, accs)
-    dx = prep.dx
-    grad = prep.grad
-
-    if isempty(grad)
-        # Zero-dimensional parameter vector: evaluate primal only. Use a zero tangent so
-        # value_and_derivative!! returns the function value without computing any derivative.
-        fill!(dx, zero(eltype(dx)))
-        value = primal(
-            value_and_derivative!!(prep.cache, Dual(f, NoTangent()), Dual(params, dx))
-        )
-        return value, copy(grad)
-    end
-
-    # Standard column-by-column forward-mode sweep: set dx to each unit vector in turn,
-    # compute the directional derivative, and accumulate into grad.
-    # Each iteration resets dx[i] to zero after use, so dx is all-zeros at loop exit.
-    value = zero(eltype(grad))
-    @inbounds for i in eachindex(grad, dx)
-        dx[i] = oneunit(eltype(dx))
-        dual_value = value_and_derivative!!(
-            prep.cache, Dual(f, NoTangent()), Dual(params, dx)
-        )
-        value = primal(dual_value)
-        grad[i] = tangent(dual_value)
-        dx[i] = zero(eltype(dx))
-    end
+    value, grad = value_and_gradient!!(prep, f, params)
     return value, copy(grad)
 end
 
diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
@@ -178,10 +178,7 @@ struct LogDensityFunction{
     L<:AbstractTransformStrategy,
     F,
     VNT<:VarNamedTuple,
-    # ADP is intentionally unconstrained: most backends store a DI.GradientPrep, but
-    # backends that override _prepare_gradient (e.g. AutoMooncakeForward) may store any
-    # prep object (e.g. a NamedTuple with cache + gradient buffers).
-    ADP,
+    ADP,  # unconstrained: backends may store any prep object via _prepare_gradient
     # type of the vector passed to logdensity functions
     X<:AbstractVector,
     AC<:AccumulatorTuple,
@@ -541,10 +538,6 @@ By default, this function returns `false`, i.e. the constant approach will be us
 # closure (see link in the docstring).
 _use_closure(::ADTypes.AutoForwardDiff) = false
 _use_closure(::ADTypes.AutoMooncake) = false
-# AutoMooncakeForward overrides _prepare_gradient/_value_and_gradient in the Mooncake
-# extension and bypasses DI entirely, so this value is never reached when Mooncake is
-# loaded. It is a defensive fallback for the (unlikely) case where AutoMooncakeForward is
-# used without the extension.
 _use_closure(::ADTypes.AutoMooncakeForward) = false
 # For ReverseDiff, with the compiled tape, you _must_ use a closure because otherwise with
 # DI.Constant arguments the tape will always be recompiled upon each call to
diff --git a/test/logdensityfunction.jl b/test/logdensityfunction.jl
@@ -177,12 +177,12 @@ end
         struct ErrorAccumulatorException <: Exception end
         struct ErrorAccumulator <: DynamicPPL.AbstractAccumulator end
         DynamicPPL.accumulator_name(::ErrorAccumulator) = :ERROR
-        DynamicPPL.accumulate_assume!!(
-            ::ErrorAccumulator, ::Any, ::Any, ::Any, ::VarName, ::Distribution, ::Any
-        ) = throw(ErrorAccumulatorException())
-        DynamicPPL.accumulate_observe!!(
-            ::ErrorAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any
-        ) = throw(ErrorAccumulatorException())
+        DynamicPPL.accumulate_assume!!(::ErrorAccumulator, ::Any, ::Any, ::Any, ::VarName, ::Distribution, ::Any) = throw(
+            ErrorAccumulatorException()
+        )
+        DynamicPPL.accumulate_observe!!(::ErrorAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any) = throw(
+            ErrorAccumulatorException()
+        )
         DynamicPPL.reset(ea::ErrorAccumulator) = ea
         Base.copy(ea::ErrorAccumulator) = ea
         # Construct an LDF
@@ -547,28 +547,6 @@ end
             @test array_model_logp_and_grad[2] ≈ array_model_reference[2]
         end
     end
-
-    @testset "Mooncake friendly_tangents" begin
-        @model function f()
-            x ~ Normal()
-            return y ~ Normal(x)
-        end
-
-        params = randn(2)
-        ref_logp, ref_grad = LogDensityProblems.logdensity_and_gradient(
-            LogDensityFunction(f(); adtype=ref_adtype), params
-        )
-
-        for adtype in (
-            AutoMooncake(; config=Mooncake.Config(; friendly_tangents=true)),
-            AutoMooncakeForward(; config=Mooncake.Config(; friendly_tangents=true)),
-        )
-            ldf = LogDensityFunction(f(); adtype)
-            logp, grad = LogDensityProblems.logdensity_and_gradient(ldf, params)
-            @test logp ≈ ref_logp
-            @test grad ≈ ref_grad
-        end
-    end
 end
 
 end