tpapp · tpapp · Mar 8, 2023 · Feb 13, 2023 · Feb 13, 2023 · Feb 13, 2023
diff --git a/ext/ForwardDiffExt.jl b/ext/ForwardDiffExt.jl
@@ -18,24 +18,35 @@ end
 # Load DiffResults helpers
 include("DiffResults_helpers.jl")
 
-struct ForwardDiffLogDensity{L, C} <: ADGradientWrapper
+struct ForwardDiffLogDensity{L, C <: ForwardDiff.Chunk,
+                             G <: Union{Nothing,ForwardDiff.GradientConfig}} <: ADGradientWrapper
+    "supports zero-order evaluation `logdensity(ℓ, x)`"
     ℓ::L
-    gradientconfig::C
+    "chunk size for ForwardDiff"
+    chunk::C
+    "gradient config, or `nothing` if created for each evaluation"
+    gradient_config::G
 end
 
 function Base.show(io::IO, ℓ::ForwardDiffLogDensity)
     print(io, "ForwardDiff AD wrapper for ", ℓ.ℓ,
-          ", w/ chunk size ", length(ℓ.gradientconfig.seeds))
+          ", w/ chunk size ", ForwardDiff.chunksize(ℓ.chunk))
 end
 
 _chunk(chunk::ForwardDiff.Chunk) = chunk
 _chunk(chunk::Integer) = ForwardDiff.Chunk(chunk)
 
 _default_chunk(ℓ) = _chunk(dimension(ℓ))
 
-_default_gradientconfig(ℓ, chunk, ::Nothing) = _default_gradientconfig(ℓ, chunk, zeros(dimension(ℓ)))
-function _default_gradientconfig(ℓ, chunk, x::AbstractVector)
-    return ForwardDiff.GradientConfig(Base.Fix1(logdensity, ℓ), x, _chunk(chunk))
+function _make_gradient_config(::Type{T}, ℓ, chunk) where T
+    ForwardDiff.GradientConfig(Base.Fix1(logdensity, ℓ), zeros(T, dimension(ℓ)),
+                               _chunk(chunk))
+end
+
+function Base.copy(fℓ::ForwardDiffLogDensity{L,C,G<:ForwardDiff.GradientConfig{T}}) where T
+    @unpack ℓ, chunk = fℓ
+    gradient_config =_make_gradient_config(T, ℓ, chunk)
+    ForwardDiffLogDensity(ℓ, chunk, gradient_config)
 end
 
 """
@@ -45,21 +56,41 @@ end
 Wrap a log density that supports evaluation of `Value` to handle `ValueGradient`, using
 `ForwardDiff`.
 
-Keywords are passed on to `ForwardDiff.GradientConfig` to customize the setup. In
-particular, chunk size can be set with a `chunk` keyword argument (accepting an integer or a
-`ForwardDiff.Chunk`), and the underlying vector used by `ForwardDiff` can be set with the
-`x` keyword argument (accepting an `AbstractVector`).
+Keyword arguments:
+
+- `chunk` can be used to set the chunk size, an integer or a `ForwardDiff.Chunk`
+
+- `gradient_config_type` can be `nothing` (the default) or a type (eg `Float64`).
+
+   The latter preallocates and reuses a `ForwardDiff.GradientConfig` for that type. Note
+   that **this option is not thread-safe**. You can [`copy`](@ref) the results for
+   concurrent evaluation:
+   ```julia
+   ∇ℓ1 = ADgradient(:ForwardDiff, ℓ; gradient_config_type = Float64)
+   ∇ℓ2 = copy(∇ℓ1) # you can now use both, in different threads
+   ```
 """
 function ADgradient(::Val{:ForwardDiff}, ℓ;
                     x::Union{Nothing,AbstractVector} = nothing,
                     chunk::Union{Integer,ForwardDiff.Chunk} = _default_chunk(ℓ),
-                    gradientconfig::ForwardDiff.GradientConfig = _default_gradientconfig(ℓ, chunk, x))
-    ForwardDiffLogDensity(ℓ, gradientconfig)
+                    gradient_config_type::Union{Nothing,DataType} = nothing)
+    gradient_config = if gradient_config_type ≡ nothing
+        nothing
+    else
+        T = gradient_config_type
+        (isconcretetype(T) && (T <: Real)) ||
+            throw(ArgumentError("gradient_config_type needs to be a concrete subtype of Real."))
+        _make_gradient_config(T, ℓ, chunk)
+    end
+    ForwardDiffLogDensity(ℓ, chunk, gradient_config)
 end
 
 function logdensity_and_gradient(fℓ::ForwardDiffLogDensity, x::AbstractVector)
-    @unpack ℓ, gradientconfig = fℓ
+    @unpack ℓ, chunk, gradient_config = fℓ
     buffer = _diffresults_buffer(x)
+    if gradient_config ≡ nothing
+        gradient_config = _make_gradient_config(eltype(x), ℓ, chunk)
+    end
     result = ForwardDiff.gradient!(buffer, Base.Fix1(logdensity, ℓ), x, gradientconfig)
     _diffresults_extract(result)
 end

diff --git a/src/LogDensityProblemsAD.jl b/src/LogDensityProblemsAD.jl
@@ -11,7 +11,6 @@ using LogDensityProblems: LogDensityOrder
 
 import UnPack
 
-
 #####
 ##### AD wrappers --- interface and generic code
 #####
@@ -34,6 +33,8 @@ dimension(ℓ::ADGradientWrapper) = dimension(ℓ.ℓ)
 
 Base.parent(ℓ::ADGradientWrapper) = ℓ.ℓ
 
+Base.copy(x::ADGradientWrapper) = x # no-op, except for ForwardDiff
+
 """
 $(SIGNATURES)
 
@@ -57,6 +58,10 @@ ADgradient(:ForwardDiff, P)
 and should mostly be equivalent if the compiler manages to fold the constant.
 
 The function `parent` can be used to retrieve the original argument.
+
+!!! note
+    With the default options, automatic differentiation preserves thread-safety. See
+    exceptions and workarounds in the docstring for each backend.
 """
 ADgradient(kind::Symbol, P; kwargs...) = ADgradient(Val{kind}(), P; kwargs...)
 

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -115,9 +115,9 @@ end
             (test_logdensity(x), test_gradient(x))
     end
 
-    # Make sure that other types are supported.
+    # preallocated gradient
     x = randexp(Float32, 3)
-    ∇ℓ = ADgradient(:ForwardDiff, ℓ; x=x)
+    ∇ℓ = ADgradient(:ForwardDiff, ℓ; gradient_config_type = Float32)
     @test eltype(first(logdensity_and_gradient(∇ℓ, x))) === Float32
     @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity(x)
     @test @inferred(logdensity_and_gradient(∇ℓ, x)) ≅