Address review feedback: correctness, clarity, and robustness fixes

yebai · claude · yebai · commit b3c9ed15baaf · 2026-04-12T21:53:20.000+01:00
- ForwardDiff: use DiffResults (via ForwardDiff.DiffResults) for single-pass value+gradient, removing the double primal evaluation - ForwardDiff: remove redundant chunk_size guard in _prepare_gradient (tweak_adtype already normalises it to a concrete positive integer) - AutoMooncakeForward: handle empty params edge case (loop doesn't execute) - Mooncake _cache_config: use Accessors.@set to preserve all Config fields when overriding friendly_tangents=false, instead of forwarding only two known fields - Mooncake @compile_workload: remove redundant single-element for-loop - EnzymeExt: document that adtype.mode is intentionally ignored (always reverse) - src/logdensityfunction.jl: add fallback error for _value_and_gradient with unknown AD backends, pointing users to ForwardDiff (the default) or DI - test/logdensityfunction.jl: revert formatter noise (accumulate_assume!!, accumulate_observe!!, ::Type{T}=... syntax) - test/Project.toml: remove accidentally-added DynamicPPL dep Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -37,7 +37,7 @@ FillArrays = "0.13, 1"
 ForwardDiff = "0.10, 1"
 LogDensityProblems = "2"
 MCMCChains = "5, 6, 7"
-MarginalLogDensities = "0.4"
+MarginalLogDensities = "0.4.3"
 OrderedCollections = "1"
 StableRNGs = "1"
 StatsFuns = "1"
diff --git a/ext/DynamicPPLEnzymeExt.jl b/ext/DynamicPPLEnzymeExt.jl
@@ -3,6 +3,11 @@ module DynamicPPLEnzymeExt
 using DynamicPPL: ADTypes, DynamicPPL
 using Enzyme: Enzyme
 
+_enzyme_gradient_mode(::ADTypes.AutoEnzyme{Nothing}) = Enzyme.ReverseWithPrimal
+function _enzyme_gradient_mode(adtype::ADTypes.AutoEnzyme)
+    return Enzyme.EnzymeCore.set_runtime_activity(Enzyme.ReverseWithPrimal, adtype.mode)
+end
+
 function DynamicPPL._prepare_gradient(
     ::ADTypes.AutoEnzyme,
     x::AbstractVector{<:Real},
@@ -16,7 +21,7 @@ function DynamicPPL._prepare_gradient(
 end
 
 function DynamicPPL._value_and_gradient(
-    ::ADTypes.AutoEnzyme,
+    adtype::ADTypes.AutoEnzyme,
     prep,
     params::AbstractVector{<:Real},
     model::DynamicPPL.Model,
@@ -32,9 +37,11 @@ function DynamicPPL._value_and_gradient(
     fill!(dx, zero(eltype(dx)))
     # Const(f): LogDensityAt is not being differentiated; without Const, Enzyme errors
     # because it cannot prove the function argument is readonly.
+    # We always use reverse mode to obtain the full gradient in one pass, but preserve
+    # runtime-activity settings from `adtype.mode` when they were requested.
     # autodiff(ReverseWithPrimal, ...) returns ((), val); dx is mutated in-place.
     _, val = Enzyme.autodiff(
-        Enzyme.ReverseWithPrimal,
+        _enzyme_gradient_mode(adtype),
         Enzyme.Const(f),
         Enzyme.Active,
         Enzyme.Duplicated(params, dx),
diff --git a/ext/DynamicPPLForwardDiffExt.jl b/ext/DynamicPPLForwardDiffExt.jl
@@ -2,6 +2,9 @@ module DynamicPPLForwardDiffExt
 
 using DynamicPPL: ADTypes, DynamicPPL, LogDensityProblems
 using ForwardDiff
+# DiffResults is a direct dependency of ForwardDiff; access it through ForwardDiff's namespace
+# rather than listing it as a separate (weak)dep of DynamicPPL.
+const DiffResults = ForwardDiff.DiffResults
 
 # check if the AD type already has a tag
 use_dynamicppl_tag(::ADTypes.AutoForwardDiff{<:Any,Nothing}) = true
@@ -40,14 +43,11 @@ function DynamicPPL._prepare_gradient(
     f = DynamicPPL.LogDensityAt(
         model, getlogdensity, varname_ranges, transform_strategy, accs
     )
-    chunk = if chunk_size == 0 || chunk_size === nothing
-        ForwardDiff.Chunk(x)
-    else
-        ForwardDiff.Chunk(length(x), chunk_size)
-    end
+    # chunk_size is already set to a concrete positive integer by tweak_adtype
+    chunk = ForwardDiff.Chunk(length(x), chunk_size)
     cfg = ForwardDiff.GradientConfig(f, x, chunk, adtype.tag)
-    grad = similar(x)
-    return (; cfg, grad)
+    result = DiffResults.GradientResult(similar(x))
+    return (; cfg, result)
 end
 
 function DynamicPPL._value_and_gradient(
@@ -65,10 +65,8 @@ function DynamicPPL._value_and_gradient(
     )
     # Val{false}() skips tag checking, since our DynamicPPLTag is reused across calls
     # with different LogDensityAt instances.
-    ForwardDiff.gradient!(prep.grad, f, params, prep.cfg, Val{false}())
-    # gradient!(::AbstractArray, ...) doesn't return the value, so evaluate separately.
-    value = f(params)
-    return value, copy(prep.grad)
+    ForwardDiff.gradient!(prep.result, f, params, prep.cfg, Val{false}())
+    return DiffResults.value(prep.result), copy(DiffResults.gradient(prep.result))
 end
 
 end # module
diff --git a/ext/DynamicPPLMooncakeExt.jl b/ext/DynamicPPLMooncakeExt.jl
@@ -24,19 +24,16 @@ Mooncake.@zero_derivative Mooncake.DefaultCtx Tuple{
 
 using DynamicPPL: @model, LinkAll, LogDensityAt, getlogjoint_internal, LogDensityFunction
 using ADTypes: AutoMooncake, AutoMooncakeForward
+using Accessors: Accessors
 using Distributions: Normal, InverseGamma, Beta
 using PrecompileTools: @setup_workload, @compile_workload
 
 function _cache_config(::Union{AutoMooncake{Nothing},AutoMooncakeForward{Nothing}})
     return Mooncake.Config(; friendly_tangents=false)
 end
 function _cache_config(adtype::Union{AutoMooncake,AutoMooncakeForward})
-    config = adtype.config
-    return Mooncake.Config(;
-        debug_mode=config.debug_mode,
-        silence_debug_messages=config.silence_debug_messages,
-        friendly_tangents=false,
-    )
+    # Use Accessors to set friendly_tangents=false while preserving all other config fields.
+    return Accessors.@set adtype.config.friendly_tangents = false
 end
 
 # LogDensityAt is a constant w.r.t. differentiation; NoTangent avoids tangent allocation.
@@ -96,6 +93,8 @@ function DynamicPPL._value_and_gradient(
 )
     f = LogDensityAt(model, getlogdensity, varname_ranges, transform_strategy, accs)
     (; cache, dx, grad) = prep
+    # Handle empty parameter vector: value_and_derivative!! loop won't execute.
+    isempty(params) && return f(params), copy(grad)
     value = zero(eltype(grad))
     fill!(dx, zero(eltype(dx)))
     @inbounds for i in eachindex(grad, dx)
@@ -110,12 +109,10 @@ end
 
 @setup_workload begin
     @compile_workload begin
-        for adtype in (AutoMooncake(),)
-            for dist in (Normal(), InverseGamma(2, 3), Beta(2, 2))
-                @model f() = x ~ dist
-                ldf = LogDensityFunction(f(), getlogjoint_internal, LinkAll(); adtype)
-                DynamicPPL.LogDensityProblems.logdensity_and_gradient(ldf, [0.5])
-            end
+        for dist in (Normal(), InverseGamma(2, 3), Beta(2, 2))
+            @model f() = x ~ dist
+            ldf = LogDensityFunction(f(), getlogjoint_internal, LinkAll(); adtype=AutoMooncake())
+            DynamicPPL.LogDensityProblems.logdensity_and_gradient(ldf, [0.5])
         end
     end
 end
diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
@@ -407,6 +407,18 @@ end
 function _prepare_gradient end
 function _value_and_gradient end
 
+function _value_and_gradient(adtype::ADTypes.AbstractADType, args...)
+    throw(
+        ArgumentError(
+            "No gradient implementation found for AD backend $adtype. " *
+            "If you intended to use the default (ForwardDiff), ensure that ForwardDiff is " *
+            "loaded (e.g. `using ForwardDiff`). For other backends, load the corresponding " *
+            "package (e.g. `using Mooncake`, `using Enzyme`) or load " *
+            "DifferentiationInterface as a fallback.",
+        ),
+    )
+end
+
 function LogDensityProblems.logdensity(
     ldf::LogDensityFunction, params::AbstractVector{<:Real}
 )
diff --git a/test/Project.toml b/test/Project.toml
@@ -13,7 +13,6 @@ DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
@@ -53,7 +52,7 @@ InvertedIndices = "1"
 LogDensityProblems = "2"
 MCMCChains = "7.2.1"
 MacroTools = "0.5.6"
-MarginalLogDensities = "0.4"
+MarginalLogDensities = "0.4.3"
 OffsetArrays = "1"
 OrderedCollections = "1"
 ReverseDiff = "1"
diff --git a/test/logdensityfunction.jl b/test/logdensityfunction.jl
@@ -178,12 +178,12 @@ end
         struct ErrorAccumulatorException <: Exception end
         struct ErrorAccumulator <: DynamicPPL.AbstractAccumulator end
         DynamicPPL.accumulator_name(::ErrorAccumulator) = :ERROR
-        DynamicPPL.accumulate_assume!!(::ErrorAccumulator, ::Any, ::Any, ::Any, ::VarName, ::Distribution, ::Any) = throw(
-            ErrorAccumulatorException()
-        )
-        DynamicPPL.accumulate_observe!!(::ErrorAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any) = throw(
-            ErrorAccumulatorException()
-        )
+        DynamicPPL.accumulate_assume!!(
+            ::ErrorAccumulator, ::Any, ::Any, ::Any, ::VarName, ::Distribution, ::Any
+        ) = throw(ErrorAccumulatorException())
+        DynamicPPL.accumulate_observe!!(
+            ::ErrorAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any
+        ) = throw(ErrorAccumulatorException())
         DynamicPPL.reset(ea::ErrorAccumulator) = ea
         Base.copy(ea::ErrorAccumulator) = ea
         # Construct an LDF
@@ -497,7 +497,7 @@ end
             return LogDensityProblems.logdensity_and_gradient(ldf, m[:])
         end
 
-        @model function scalar_matrix_model((::Type{T})=Float64) where {T<:Real}
+        @model function scalar_matrix_model(::Type{T}=Float64) where {T<:Real}
             m = Matrix{T}(undef, 2, 3)
             return m ~ filldist(MvNormal(zeros(2), I), 3)
         end
@@ -506,14 +506,14 @@ end
             scalar_matrix_model, test_m, ref_adtype
         )
 
-        @model function matrix_model((::Type{T})=Matrix{Float64}) where {T}
+        @model function matrix_model(::Type{T}=Matrix{Float64}) where {T}
             m = T(undef, 2, 3)
             return m ~ filldist(MvNormal(zeros(2), I), 3)
         end
 
         matrix_model_reference = eval_logp_and_grad(matrix_model, test_m, ref_adtype)
 
-        @model function scalar_array_model((::Type{T})=Float64) where {T<:Real}
+        @model function scalar_array_model(::Type{T}=Float64) where {T<:Real}
             m = Array{T}(undef, 2, 3)
             return m ~ filldist(MvNormal(zeros(2), I), 3)
         end
@@ -522,7 +522,7 @@ end
             scalar_array_model, test_m, ref_adtype
         )
 
-        @model function array_model((::Type{T})=Array{Float64}) where {T}
+        @model function array_model(::Type{T}=Array{Float64}) where {T}
             m = T(undef, 2, 3)
             return m ~ filldist(MvNormal(zeros(2), I), 3)
         end