TuringLang · yebai · Dec 4, 2022 · Sep 11, 2021 · Oct 4, 2021 · Oct 4, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,3 @@
-/Manifest.toml
+Manifest.toml
 *.DS_Store
-*.png
-deprecated
+working_code
diff --git a/Project.toml b/Project.toml
@@ -5,16 +5,19 @@ version = "0.1.1"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
+ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
+ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 
 [compat]
-AbstractMCMC = "3.2"
+AbstractMCMC = "3.2, 4"
+ConcreteStructs = "0.2"
 Distributions = "0.24, 0.25"
+DocStringExtensions = "0.8, 0.9"
+InverseFunctions = "0.1"
+Setfield = "0.7, 0.8, 1"
 julia = "1"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test"]
diff --git a/src/MCMCTempering.jl b/src/MCMCTempering.jl
@@ -4,15 +4,30 @@ import AbstractMCMC
 import Distributions
 import Random
 
+using ProgressLogging: ProgressLogging
+using ConcreteStructs: @concrete
+using Setfield: @set, @set!
+
+using InverseFunctions
+
+using DocStringExtensions
+
 include("adaptation.jl")
-include("tempered.jl")
+include("swapping.jl")
+include("state.jl")
+include("sampler.jl")
+include("sampling.jl")
 include("ladders.jl")
 include("stepping.jl")
 include("model.jl")
-include("swapping.jl")
-include("plotting.jl")
 
-export tempered, TemperedSampler, plot_swaps, plot_ladders, make_tempered_model, get_tempered_loglikelihoods_and_params, make_tempered_loglikelihood, get_params
+export tempered,
+    tempered_sample,
+    TemperedSampler,
+    make_tempered_model,
+    StandardSwap,
+    RandomPermutationSwap,
+    NonReversibleSwap
 
 function AbstractMCMC.bundle_samples(
     ts::Vector,
@@ -22,7 +37,10 @@ function AbstractMCMC.bundle_samples(
     chain_type::Type;
     kwargs...
 )
-    AbstractMCMC.bundle_samples(ts, model, sampler.internal_sampler, state, chain_type; kwargs...)
+    AbstractMCMC.bundle_samples(
+        ts, model, sampler_for_chain(sampler, state, 1), state_for_chain(state, 1), chain_type;
+        kwargs...
+    )
 end
 
 end
diff --git a/src/adaptation.jl b/src/adaptation.jl
@@ -1,54 +1,240 @@
+using Distributions: StatsFuns
 
-struct PolynomialStep
-    η :: Real
-    c :: Real
+@concrete struct PolynomialStep
+    η
+    c
 end
 function get(step::PolynomialStep, k::Real)
-    step.c * (k + 1.) ^ (-step.η)
+    return step.c * (k + 1) ^ (-step.η)
 end
 
+"""
+    Geometric
 
-struct AdaptiveState
-    swap_target_ar :: Real
-    scale          :: Base.RefValue{<:Real}
-    step           :: PolynomialStep
+Specifies a geometric schedule for the inverse temperatures.
+
+See also: [`AdaptiveState`](@ref), [`update_inverse_temperatures`](@ref), and
+[`weight`](@ref).
+"""
+struct Geometric end
+
+defaultscale(::Geometric, Δ) = eltype(Δ)(0.9)
+
+"""
+    InverselyAdditive
+
+Specifies an additive schedule for the temperatures (not _inverse_ temperatures).
+
+See also: [`AdaptiveState`](@ref), [`update_inverse_temperatures`](@ref), and
+[`weight`](@ref).
+"""
+struct InverselyAdditive end
+
+defaultscale(::InverselyAdditive, Δ) = eltype(Δ)(0.9)
+
+struct AdaptiveState{S,T1<:Real,T2<:Real,P<:PolynomialStep}
+    schedule_type::S
+    swap_target_ar::T1
+    scale_unconstrained::T2
+    step::P
+    n::Int
 end
-function AdaptiveState(swap_target::Real, scale::Real, step::PolynomialStep)
-    AdaptiveState(swap_target, Ref(log(scale)), step)
+
+function AdaptiveState(swap_target_ar, scale_unconstrained, step)
+    return AdaptiveState(InverselyAdditive(), swap_target_ar, scale_unconstrained, step)
+end
+
+function AdaptiveState(schedule_type, swap_target_ar, scale_unconstrained, step)
+    return AdaptiveState(schedule_type, swap_target_ar, scale_unconstrained, step, 1)
 end
 
+"""
+    weight(ρ::AdaptiveState{<:Geometric})
+
+Return the weight/scale to be used in the mapping `β[ℓ] ↦ β[ℓ + 1]`.
+
+# Notes
+In Eq. (13) in [^MIAS12] they use the relation
+
+    β[ℓ + 1] = β[ℓ] * w(ρ)
+
+with
+
+    w(ρ) = exp(-exp(ρ))
+
+because we want `w(ρ) ∈ (0, 1)` while `ρ ∈ ℝ`. As an alternative, we use
+`StatsFuns.logistic(ρ)` which is numerically more stable than `exp(-exp(ρ))` and
+leads to less extreme values, i.e. 0 or 1.
+
+This the same approach as mentioned in [^ATCH11].
+
+# References
+[^MIAS12]: Miasojedow, B., Moulines, E., & Vihola, M., Adaptive Parallel Tempering Algorithm, (2012).
+[^ATCH11]: Atchade, Yves F, Roberts, G. O., & Rosenthal, J. S., Towards optimal scaling of metropolis-coupled markov chain monte carlo, Statistics and Computing, 21(4), 555–568 (2011).
+"""
+weight(ρ::AdaptiveState{<:Geometric}) = geometric_weight_constrain(ρ.scale_unconstrained)
+geometric_weight_constrain(x) = StatsFuns.logistic(x)
+geometric_weight_unconstrain(y) = inverse(StatsFuns.logistic)(y)
+
+"""
+    weight(ρ::AdaptiveState{<:InverselyAdditive})
+
+Return the weight/scale to be used in the mapping `β[ℓ] ↦ β[ℓ + 1]`.
+"""
+weight(ρ::AdaptiveState{<:InverselyAdditive}) = inversely_additive_weight_constrain(ρ.scale_unconstrained)
+inversely_additive_weight_constrain(x) = exp(-x)
+inversely_additive_weight_unconstrain(y) = -log(y)
+
+function init_adaptation(
+    schedule::InverselyAdditive,
+    Δ::Vector{<:Real},
+    swap_target::Real,
+    scale::Real,
+    η::Real,
+    stepsize::Real
+)
+    N_it = length(Δ)
+    step = PolynomialStep(η, stepsize)
+    # TODO: One common state or one per temperature?
+    # ρs = [
+    #     AdaptiveState(schedule, swap_target, inversely_additive_weight_unconstrain(scale), step)
+    #     for _ in 1:(N_it - 1)
+    # ]
+    ρs = AdaptiveState(schedule, swap_target, log(scale), step)
+    return ρs
+end
 
 function init_adaptation(
+    schedule::Geometric,
     Δ::Vector{<:Real},
     swap_target::Real,
     scale::Real,
-    γ::Real
+    η::Real,
+    stepsize::Real
 )
-    Nt = length(Δ)
-    step = PolynomialStep(γ, Nt - 1)
-    Ρ = [AdaptiveState(swap_target, scale, step) for _ in 1:(Nt - 1)]
-    return Ρ
+    N_it = length(Δ)
+    step = PolynomialStep(η, stepsize)
+    # TODO: One common state or one per temperature?
+    # ρs = [
+    #     AdaptiveState(schedule, swap_target, geometric_weight_unconstrain(scale), step)
+    #     for _ in 1:(N_it - 1)
+    # ]
+    ρs = AdaptiveState(schedule, swap_target, geometric_weight_unconstrain(scale), step)
+    return ρs
 end
 
 
-function rhos_to_ladder(Ρ, Δ)
-    β′ = Δ[1]
-    for i in 1:length(Ρ)
-        β′ += exp(Ρ[i].scale[])
-        Δ[i + 1] = Δ[1] / β′
+"""
+    adapt!!(ρ::AdaptiveState, swap_ar)
+
+Return updated `ρ` based on swap acceptance ratio `swap_ar`.
+
+See [`update_inverse_temperatures`](@ref) to see how we compute the resulting
+inverse temperatures from the adapted state `ρ`.
+"""
+function adapt!!(ρ::AdaptiveState, swap_ar)
+    swap_diff = ρ.swap_target_ar - swap_ar
+    γ = get(ρ.step, ρ.n)
+    ρ_new = @set ρ.scale_unconstrained = ρ.scale_unconstrained + γ * swap_diff
+    return @set ρ_new.n += 1
+end
+
+"""
+    adapt!!(ρ::AdaptiveState, Δ, k, swap_ar)
+    adapt!!(ρ::AbstractVector{<:AdaptiveState}, Δ, k, swap_ar)
+
+Return adapted state(s) given that we just proposed a swap of the `k`-th
+and `(k + 1)`-th temperatures with acceptance ratio `swap_ar`.
+"""
+adapt!!(ρ::AdaptiveState, Δ, k, swap_ar) = adapt!!(ρ, swap_ar)
+function adapt!!(ρs::AbstractVector{<:AdaptiveState}, Δ, k, swap_ar)
+    ρs[k] = adapt!!(ρs[k], swap_ar)
+    return ρs
+end
+
+"""
+    update_inverse_temperatures(ρ::AdaptiveState{<:Geometric}, Δ_current)
+    update_inverse_temperatures(ρ::AbstractVector{<:AdaptiveState{<:Geometric}}, Δ_current)
+
+Return updated inverse temperatures computed from adaptation state(s) and `Δ_current`.
+
+This update is similar to Eq. (13) in [^MIAS12], with the only possible deviation
+being how we compute the scaling factor from `ρ`: see [`weight`](@ref) for information.
+
+If `ρ` is a `AbstractVector`, then it should be of length `length(Δ_current) - 1`,
+with `ρ[k]` corresponding to the adaptation state for the `k`-th inverse temperature.
+
+# References
+[^MIAS12]: Miasojedow, B., Moulines, E., & Vihola, M., Adaptive Parallel Tempering Algorithm, (2012).
+"""
+function update_inverse_temperatures(ρ::AdaptiveState{<:Geometric}, Δ_current)
+    Δ = similar(Δ_current)
+    β₀ = Δ_current[1]
+    Δ[1] = β₀
+
+    β = inv(β₀)
+    for ℓ in 1:length(Δ) - 1
+        # TODO: Is it worth it to do this on log-scale instead?
+        β *= weight(ρ)
+        @inbounds Δ[ℓ + 1] = β
     end
     return Δ
 end
 
+function update_inverse_temperatures(ρs::AbstractVector{<:AdaptiveState{<:Geometric}}, Δ_current)
+    Δ = similar(Δ_current)
+    N = length(Δ)
+    @assert length(ρs) ≥ N - 1 "number of adaptive states < number of temperatures"
+
+    β₀ = Δ_current[1]
+    Δ[1] = β₀
 
-function adapt_rho(ρ::AdaptiveState, swap_ar, n)
-    swap_diff = swap_ar - ρ.swap_target_ar
-    γ = get(ρ.step, n)
-    return γ * swap_diff
+    β = β₀
+    for ℓ in 1:N - 1
+        # TODO: Is it worth it to do this on log-scale instead?
+        β *= weight(ρs[ℓ])
+        @inbounds Δ[ℓ + 1] = β
+    end
+    return Δ
 end
 
+"""
+    update_inverse_temperatures(ρ::AdaptiveState{<:InverselyAdditive}, Δ_current)
+    update_inverse_temperatures(ρ::AbstractVector{<:AdaptiveState{<:InverselyAdditive}}, Δ_current)
+
+Return updated inverse temperatures computed from adaptation state(s) and `Δ_current`.
+
+This update increments the temperature (not _inverse_ temperature) by a positive constant,
+which is adapted by `ρ`.
+
+If `ρ` is a `AbstractVector`, then it should be of length `length(Δ_current) - 1`,
+with `ρ[k]` corresponding to the adaptation state for the `k`-th inverse temperature.
+"""
+function update_inverse_temperatures(ρ::AdaptiveState{<:InverselyAdditive}, Δ_current)
+    Δ = similar(Δ_current)
+    β₀ = Δ_current[1]
+    Δ[1] = β₀
 
-function adapt_ladder(Ρ, Δ, k, swap_ar, n)
-    Ρ[k].scale[] += adapt_rho(Ρ[k], swap_ar, n)
-    return Ρ, rhos_to_ladder(Ρ, Δ)
-end
+    T = inv(β₀)
+    for ℓ in 1:length(Δ) - 1
+        T += weight(ρ)
+        @inbounds Δ[ℓ + 1] = inv(T)
+    end
+    return Δ
+end
+
+function update_inverse_temperatures(ρs::AbstractVector{<:AdaptiveState{<:InverselyAdditive}}, Δ_current)
+    Δ = similar(Δ_current)
+    N = length(Δ)
+    @assert length(ρs) ≥ N - 1 "number of adaptive states < number of temperatures"
+
+    β₀ = Δ_current[1]
+    Δ[1] = β₀
+
+    T = inv(β₀)
+    for ℓ in 1:N - 1
+        T += weight(ρs[ℓ])
+        @inbounds Δ[ℓ + 1] = inv(T)
+    end
+    return Δ
+end