diff --git a/Project.toml b/Project.toml index 8e6f00cf..1ae0eb8a 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,8 @@ ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" InplaceOps = "505f98c9-085e-5b2c-8e89-488be7bf1f34" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" +LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" @@ -18,10 +20,12 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" [compat] -AbstractMCMC = "3.2, 4" +AbstractMCMC = "4.2" ArgCheck = "1, 2" DocStringExtensions = "0.8, 0.9" InplaceOps = "0.3" +LogDensityProblems = "2" +LogDensityProblemsAD = "1" ProgressMeter = "1" Requires = "0.5, 1" Setfield = "0.7, 0.8, 1" diff --git a/README.md b/README.md index da701fa5..94f43672 100644 --- a/README.md +++ b/README.md @@ -50,14 +50,20 @@ In this section we demonstrate a minimal example of sampling from a multivariate ```julia -using AdvancedHMC, Distributions, ForwardDiff +using AdvancedHMC, ForwardDiff +using LogDensityProblems using LinearAlgebra +# Define the target distribution using the `LogDensityProblem` interface +struct LogTargetDensity + dim::Int +end +LogDensityProblems.logdensity(p::LogTargetDensity, θ) = -sum(abs2, θ) / 2 # standard multivariate normal +LogDensityProblems.dimension(p::LogTargetDensity) = p.dim + # Choose parameter dimensionality and initial parameter value D = 10; initial_θ = rand(D) - -# Define the target distribution -ℓπ(θ) = logpdf(MvNormal(zeros(D), I), θ) +ℓπ = LogTargetDensity(D) # Set the number of samples to draw and warmup iterations n_samples, n_adapts = 2_000, 1_000 diff --git a/src/AdvancedHMC.jl b/src/AdvancedHMC.jl index 9a8b277b..cf327ed2 100644 --- a/src/AdvancedHMC.jl +++ b/src/AdvancedHMC.jl @@ -17,7 +17,11 @@ using ArgCheck: @argcheck using DocStringExtensions +using LogDensityProblems +using LogDensityProblemsAD: LogDensityProblemsAD + import AbstractMCMC +using AbstractMCMC: LogDensityModel import StatsBase: sample @@ -139,9 +143,35 @@ include("sampler.jl") export sample include("abstractmcmc.jl") -export DifferentiableDensityModel -include("contrib/ad.jl") +function Hamiltonian(metric::AbstractMetric, ℓ::LogDensityModel) + ℓπ = ℓ.logdensity + + # Check we're capable of computing gradients. + cap = LogDensityProblems.capabilities(ℓπ) + if cap === nothing + throw(ArgumentError("The log density function does not support the LogDensityProblems.jl interface")) + end + + if cap === LogDensityProblems.LogDensityOrder{0}() + throw(ArgumentError("The gradient of the log density function is not defined: Implement `LogDensityProblems.logdensity_and_gradient` or use automatic differentiation by calling `Hamiltionian(metric, model, AD; kwargs...)` where AD is one of the backends supported by LogDensityProblemsAD.jl")) + end + + return Hamiltonian( + metric, + Base.Fix1(LogDensityProblems.logdensity, ℓ.logdensity), + Base.Fix1(LogDensityProblems.logdensity_and_gradient, ℓ.logdensity), + ) +end +function Hamiltonian(metric::AbstractMetric, ℓπ::LogDensityModel, kind::Union{Symbol,Val}; kwargs...) + ℓ = LogDensityModel(LogDensityProblemsAD.ADgradient(kind, ℓπ.logdensity; kwargs...)) + return Hamiltonian(metric, ℓ) +end +function Hamiltonian(metric::AbstractMetric, ℓπ, kind::Union{Symbol,Val} = Val{:ForwardDiff}(); kwargs...) + ℓ = LogDensityModel(LogDensityProblemsAD.ADgradient(kind, ℓπ; kwargs...)) + return Hamiltonian(metric, ℓ) +end +Hamiltonian(metric::AbstractMetric, ℓπ, m::Module; kwargs...) = Hamiltonian(metric, ℓπ, Val(Symbol(m)); kwargs...) ### Init @@ -153,14 +183,6 @@ function __init__() include("contrib/diffeq.jl") end - @require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin - include("contrib/forwarddiff.jl") - end - - @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" begin - include("contrib/zygote.jl") - end - @require CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" begin include("contrib/cuda.jl") end diff --git a/src/abstractmcmc.jl b/src/abstractmcmc.jl index 037f0ab0..d0adad76 100644 --- a/src/abstractmcmc.jl +++ b/src/abstractmcmc.jl @@ -25,38 +25,6 @@ struct HMCSampler{K, M, A} <: AbstractMCMC.AbstractSampler end HMCSampler(kernel, metric) = HMCSampler(kernel, metric, Adaptation.NoAdaptation()) -""" - DifferentiableDensityModel(ℓπ, ∂ℓπ∂θ) - DifferentiableDensityModel(ℓπ, m::Module) - -A `AbstractMCMC.AbstractMCMCModel` representing a differentiable log-density. - -If a module `m` is given as the second argument, then `m` is assumed to be an -automatic-differentiation package and this will be used to compute the gradients. - -Note that the module `m` must be imported before usage, e.g. -```julia -using Zygote: Zygote -model = DifferentiableDensityModel(ℓπ, Zygote) -``` -results in a `model` which will use Zygote.jl as its AD-backend. - -# Fields -$(FIELDS) -""" -struct DifferentiableDensityModel{Tlogπ, T∂logπ∂θ} <: AbstractMCMC.AbstractModel - "Log-density. Maps `AbstractArray` to value of the log-density." - ℓπ::Tlogπ - "Gradient of log-density. Returns a tuple of `ℓπ` and the gradient evaluated at the given point." - ∂ℓπ∂θ::T∂logπ∂θ -end - -struct DummyMetric <: AbstractMetric end -function DifferentiableDensityModel(ℓπ, m::Module) - h = Hamiltonian(DummyMetric(), ℓπ, m) - return DifferentiableDensityModel(h.ℓπ, h.∂ℓπ∂θ) -end - """ HMCState @@ -91,7 +59,7 @@ end A convenient wrapper around `AbstractMCMC.sample` avoiding explicit construction of [`HMCSampler`](@ref). """ function AbstractMCMC.sample( - model::DifferentiableDensityModel, + model::LogDensityModel, kernel::AbstractMCMCKernel, metric::AbstractMetric, adaptor::AbstractAdaptor, @@ -103,7 +71,7 @@ end function AbstractMCMC.sample( rng::Random.AbstractRNG, - model::DifferentiableDensityModel, + model::LogDensityModel, kernel::AbstractMCMCKernel, metric::AbstractMetric, adaptor::AbstractAdaptor, @@ -129,7 +97,7 @@ function AbstractMCMC.sample( end function AbstractMCMC.sample( - model::DifferentiableDensityModel, + model::LogDensityModel, kernel::AbstractMCMCKernel, metric::AbstractMetric, adaptor::AbstractAdaptor, @@ -146,7 +114,7 @@ end function AbstractMCMC.sample( rng::Random.AbstractRNG, - model::DifferentiableDensityModel, + model::LogDensityModel, kernel::AbstractMCMCKernel, metric::AbstractMetric, adaptor::AbstractAdaptor, @@ -175,7 +143,7 @@ end function AbstractMCMC.step( rng::AbstractRNG, - model::DifferentiableDensityModel, + model::LogDensityModel, spl::HMCSampler; init_params = nothing, kwargs... @@ -189,7 +157,7 @@ function AbstractMCMC.step( end # Construct the hamiltonian using the initial metric - hamiltonian = Hamiltonian(metric, model.ℓπ, model.∂ℓπ∂θ) + hamiltonian = Hamiltonian(metric, model) # Get an initial sample. h, t = AdvancedHMC.sample_init(rng, hamiltonian, init_params) @@ -203,7 +171,7 @@ end function AbstractMCMC.step( rng::AbstractRNG, - model::DifferentiableDensityModel, + model::LogDensityModel, spl::HMCSampler, state::HMCState; nadapts::Int = 0, @@ -220,7 +188,7 @@ function AbstractMCMC.step( metric = state.metric # Reconstruct hamiltonian. - h = Hamiltonian(metric, model.ℓπ, model.∂ℓπ∂θ) + h = Hamiltonian(metric, model) # Make new transition. t = transition(rng, h, κ, t_old.z) diff --git a/src/contrib/ad.jl b/src/contrib/ad.jl deleted file mode 100644 index 33d6bcc1..00000000 --- a/src/contrib/ad.jl +++ /dev/null @@ -1,18 +0,0 @@ -const ADSUPPORT = (:ForwardDiff, :Zygote) -const ADAVAILABLE = Dict{Module, Function}() - -Hamiltonian(metric::AbstractMetric, ℓπ, m::Module) = ADAVAILABLE[m](metric, ℓπ) - -function Hamiltonian(metric::AbstractMetric, ℓπ) - available = collect(keys(ADAVAILABLE)) - if length(available) == 0 - support_list_str = join(ADSUPPORT, " or ") - error("MethodError: no method matching Hamiltonian(metric::AbstractMetric, ℓπ) because no backend is loaded. Please load an AD package ($support_list_str) first.") - elseif length(available) > 1 - available_list_str = join(keys(ADAVAILABLE), " and ") - constructor_list_str = join(map(m -> "Hamiltonian(metric, ℓπ, $m)", available), "\n ") - error("MethodError: Hamiltonian(metric::AbstractMetric, ℓπ) is ambiguous because multiple AD pakcages are available ($available_list_str). Please use AD explictly. Candidates:\n $constructor_list_str") - else - return Hamiltonian(metric, ℓπ, first(available)) - end -end diff --git a/src/contrib/forwarddiff.jl b/src/contrib/forwarddiff.jl deleted file mode 100644 index 122dbd7a..00000000 --- a/src/contrib/forwarddiff.jl +++ /dev/null @@ -1,50 +0,0 @@ -import .ForwardDiff, .ForwardDiff.DiffResults - -function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractVector) - res = DiffResults.GradientResult(θ) - ForwardDiff.gradient!(res, ℓπ, θ) - return DiffResults.value(res), DiffResults.gradient(res) -end - -# Implementation 1 -function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix) - jacob = similar(θ) - res = DiffResults.JacobianResult(similar(θ, size(θ, 2)), jacob) - ForwardDiff.jacobian!(res, ℓπ, θ) - jacob_full = DiffResults.jacobian(res) - - d, n = size(jacob) - for i in 1:n - jacob[:,i] = jacob_full[i,1+(i-1)*d:i*d] - end - return DiffResults.value(res), jacob -end - -# Implementation 2 -# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix) -# local densities -# f(x) = (densities = ℓπ(x); sum(densities)) -# res = DiffResults.GradientResult(θ) -# ForwardDiff.gradient!(res, f, θ) -# return ForwardDiff.value.(densities), DiffResults.gradient(res) -# end - -# Implementation 3 -# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix) -# v = similar(θ, size(θ, 2)) -# g = similar(θ) -# for i in 1:size(θ, 2) -# res = GradientResult(θ[:,i]) -# gradient!(res, ℓπ, θ[:,i]) -# v[i] = value(res) -# g[:,i] = gradient(res) -# end -# return v, g -# end - -function ForwardDiffHamiltonian(metric::AbstractMetric, ℓπ) - ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_forwarddiff(ℓπ, θ) - return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ) -end - -ADAVAILABLE[ForwardDiff] = ForwardDiffHamiltonian diff --git a/src/contrib/zygote.jl b/src/contrib/zygote.jl deleted file mode 100644 index d89511d3..00000000 --- a/src/contrib/zygote.jl +++ /dev/null @@ -1,18 +0,0 @@ -import .Zygote - -function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractVector) - res, back = Zygote.pullback(ℓπ, θ) - return res, first(back(Zygote.sensitivity(res))) -end - -function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractMatrix) - res, back = Zygote.pullback(ℓπ, θ) - return res, first(back(ones(eltype(res), size(res)))) -end - -function ZygoteADHamiltonian(metric::AbstractMetric, ℓπ) - ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_zygote(ℓπ, θ) - return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ) -end - -ADAVAILABLE[Zygote] = ZygoteADHamiltonian diff --git a/test/Project.toml b/test/Project.toml index fb0fe35c..e112465f 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -5,8 +5,11 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Comonicon = "863f3e99-da2a-4334-8734-de3dacbe5542" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" +LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" @@ -16,3 +19,6 @@ Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[compat] +LogDensityProblemsAD = "1.1.1" \ No newline at end of file diff --git a/test/abstractmcmc.jl b/test/abstractmcmc.jl index 3ffb2084..76e7c362 100644 --- a/test/abstractmcmc.jl +++ b/test/abstractmcmc.jl @@ -10,7 +10,7 @@ include("common.jl") θ_init = randn(rng, 2) - model = AdvancedHMC.DifferentiableDensityModel(ℓπ_gdemo, ForwardDiff) + model = AdvancedHMC.LogDensityModel(LogDensityProblemsAD.ADgradient(Val(:ForwardDiff), ℓπ_gdemo)) init_eps = Leapfrog(1e-3) κ = NUTS(init_eps) metric = DiagEuclideanMetric(2) diff --git a/test/adaptation.jl b/test/adaptation.jl index 91dc941e..3aad5982 100644 --- a/test/adaptation.jl +++ b/test/adaptation.jl @@ -124,8 +124,7 @@ end σ² = 1 .+ abs.(randn(D)) # Diagonal Gaussian - target = MvNormal(zeros(D), Diagonal(σ²)) - ℓπ = θ -> logpdf(target, θ) + ℓπ = LogDensityDistribution(MvNormal(Diagonal(σ²))) res = runnuts(ℓπ, DiagEuclideanMetric(D)) @test res.adaptor.pc.var ≈ σ² rtol=0.2 @@ -142,8 +141,7 @@ end Σ = m' * m # Correlated Gaussian - target = MvNormal(zeros(D), Σ) - ℓπ = θ -> logpdf(target, θ) + ℓπ = LogDensityDistribution(MvNormal(Σ)) res = runnuts(ℓπ, DiagEuclideanMetric(D)) @test res.adaptor.pc.var ≈ diag(Σ) rtol=0.2 @@ -156,8 +154,7 @@ end end @testset "Initialisation adaptor by metric" begin - target = MvNormal(zeros(D), I) - ℓπ = θ -> logpdf(target, θ) + ℓπ = LogDensityDistribution(MvNormal(Eye(D))) mass_init = fill(0.5, D) res = runnuts(ℓπ, DiagEuclideanMetric(mass_init); n_samples=1) diff --git a/test/common.jl b/test/common.jl index e5382732..f516eecf 100644 --- a/test/common.jl +++ b/test/common.jl @@ -11,6 +11,21 @@ const DETATOL = 1e-3 * D * TRATIO # Random tolerance const RNDATOL = 5e-2 * D * TRATIO * 2 +# Convenience +# TODO: Remove this if made available in some other package. +using Distributions: Distributions +using Bijectors: Bijectors +struct LogDensityDistribution{D<:Distributions.Distribution} + dist::D +end +LogDensityProblems.dimension(d::LogDensityDistribution) = length(d.dist) +function LogDensityProblems.logdensity(ld::LogDensityDistribution, y) + d = ld.dist + b = Bijectors.inverse(Bijectors.bijector(d)) + x, logjac = Bijectors.with_logabsdet_jacobian(b, y) + return logpdf(d, x) + logjac +end + # Hand-coded multivariate Gaussian struct Gaussian{Tm, Ts} @@ -24,6 +39,9 @@ end ℓπ_gaussian(m, s, x) = ℓπ_gaussian(m .- x, s) +LogDensityProblems.dimension(g::Gaussian) = dim(g.m) +LogDensityProblems.logdensity(g::Gaussian, x) = ℓπ_gaussian(g.m. g.s, x) + function ∇ℓπ_gaussianl(m, s, x) g = m .- x v = ℓπ_gaussian(g, s) @@ -76,6 +94,10 @@ function ℓπ_gdemo(θ) return logprior + loglikelihood end +# Make compat with `LogDensityProblems`. +LogDensityProblems.dimension(::typeof(ℓπ_gdemo)) = 2 +LogDensityProblems.logdensity(::typeof(ℓπ_gdemo), θ) = ℓπ_gdemo(θ) + test_show(x) = test_show(s -> length(s) > 0, x) function test_show(pred, x) io = IOBuffer(; append = true) diff --git a/test/demo.jl b/test/demo.jl index a38eaf25..e1554189 100644 --- a/test/demo.jl +++ b/test/demo.jl @@ -1,13 +1,19 @@ using ReTest -using AdvancedHMC, Distributions, ForwardDiff, ComponentArrays +using AdvancedHMC, Distributions, ForwardDiff, ComponentArrays, AbstractMCMC using LinearAlgebra @testset "Demo" begin - # Choose parameter dimensionality and initial parameter value - D = 10; initial_θ = rand(D) + # Define the target distribution using the `LogDensityProblem` interface + struct DemoProblem + dim::Int + end + LogDensityProblems.logdensity(p::DemoProblem, θ) = logpdf(MvNormal(zeros(p.dim), I), θ) + LogDensityProblems.dimension(p::DemoProblem) = p.dim - # Define the target distribution - ℓπ(θ) = logpdf(MvNormal(zeros(D), I), θ) + # Choose parameter dimensionality and initial parameter value + D = 10 + initial_θ = rand(D) + ℓπ = DemoProblem(D) # Set the number of samples to draw and warmup iterations n_samples, n_adapts = 2_000, 1_000 @@ -24,7 +30,7 @@ using LinearAlgebra # - multinomial sampling scheme, # - generalised No-U-Turn criteria, and # - windowed adaption for step-size and diagonal mass matrix - proposal = NUTS{MultinomialTS, GeneralisedNoUTurn}(integrator) + proposal = NUTS{MultinomialTS,GeneralisedNoUTurn}(integrator) adaptor = StanHMCAdaptor(MassMatrixAdaptor(metric), StepSizeAdaptor(0.8, integrator)) # Run the sampler to draw samples from the specified Gaussian, where @@ -42,15 +48,20 @@ end @testset "Demo ComponentsArrays" begin # target distribution parametrized by ComponentsArray - p1 = ComponentVector(μ = 2.0, σ=1) - ℓπ(p::ComponentArray) = -(1 .- p.μ)^2/(p.σ)^2 + p1 = ComponentVector(μ=2.0, σ=1) + struct DemoProblemComponentArrays end + function LogDensityProblems.logdensity(::DemoProblemComponentArrays, p::ComponentArray) + return -((1 - p.μ) / p.σ)^2 + end + LogDensityProblems.dimension(::DemoProblemComponentArrays) = 2 + ℓπ = DemoProblemComponentArrays() # Define a Hamiltonian system D = length(p1) # number of parameters metric = DiagEuclideanMetric(D) # choose AD framework or provide a function manually - hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff) + hamiltonian = Hamiltonian(metric, ℓπ, Val(:ForwardDiff); x=p1) # Define a leapfrog solver, with initial step size chosen heuristically initial_ϵ = find_good_stepsize(hamiltonian, p1) @@ -70,5 +81,4 @@ end labels = ComponentArrays.labels(samples[1]) @test "μ" ∈ labels @test "σ" ∈ labels - end diff --git a/test/integrator.jl b/test/integrator.jl index fe1279e5..ee350aa6 100644 --- a/test/integrator.jl +++ b/test/integrator.jl @@ -106,8 +106,13 @@ using Statistics: mean end - @testset "Analyitcal solution to Eq (2.11) of Neal (2011)" begin - negU = q -> -dot(q, q) / 2 + @testset "Analytical solution to Eq (2.11) of Neal (2011)" begin + struct NegU + dim::Int + end + LogDensityProblems.logdensity(d::NegU, x) = -dot(x, x) / 2 + LogDensityProblems.dimension(d::NegU) = d.dim + negU = NegU(1) ϵ = 0.01 for lf in [ diff --git a/test/runtests.jl b/test/runtests.jl index 90a22a0b..df8ff95a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,8 @@ using Comonicon +using FillArrays using AdvancedHMC: AdvancedHMC +using LogDensityProblems: LogDensityProblems +using LogDensityProblemsAD: LogDensityProblemsAD println("Environment variables for testing") println(ENV)