From 6ed4894dc5077eded1a0a776da30aaf8b429c158 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Wed, 22 Nov 2023 15:23:25 +0100 Subject: [PATCH 01/10] First sketch of liouville copulas --- src/LiouvilleCopula.jl | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 src/LiouvilleCopula.jl diff --git a/src/LiouvilleCopula.jl b/src/LiouvilleCopula.jl new file mode 100644 index 00000000..fae4ad2c --- /dev/null +++ b/src/LiouvilleCopula.jl @@ -0,0 +1,55 @@ +""" + LiouvilleCopula{d, T, TG} + +Fields: + - \alpha::NTuple{d,T} : the weights for each dimension + - G::TG : the generator <: Generator. + +Constructor: + + LiouvilleCopula(α::Vector{T},G::Generator) + +The Liouville copula has a structure that resembles the [`ArchimedeanCopula`](@ref), when you look at it from it's radial-simplex decomposition. + +Recalling that, for C an archimedean copula with generator ``\\phi``, if ``\\mathbf U \\sim C``, then ``U \\equal R \\mathbf S`` for a random vector ``\\mathbf S \\sim`` `Dirichlet(ones(d))`, that is uniformity on the d-variate simplex, and a non-negative random variable ``R`` that is the Williamson d-transform of `\\phi`. + +The Liouville copula has exactly the same expression but using another Dirichlet distribution instead than uniformity over the simplex. + +References: + McNeil, A. J., & Nešlehová, J. (2010). From archimedean to liouville copulas. Journal of Multivariate Analysis, 101(8), 1772-1790. +""" +struct LiouvilleCopula{d,TG} <: Copula{d} + α::NTuple{d,Int} + G::TG + function LiouvilleCopula(α::Vector{Int},G::Generator) + d = length(α) + @assert sum(α) <= max_monotony(G) "The generator you provided is not monotonous enough (the monotony index must be greater than sum(α), and thus this copula does not exists." + return new{d, typeof(G)}(G) + end +end +williamson_dist(C::LiouvilleCopula{d,TG}) where {d,TG} = williamson_dist(C.G,sum(C.α)) + +function Distributions._rand!(rng::Distributions.AbstractRNG, C, x::AbstractVector{T}) + # By default, we use the williamson sampling. + r = Distributions.rand(williamson_dist(C)) + for i in 1:length(C) + x[i] = Distributions.rand(rng,Gamma(C.α[i],1)) + x[i] = x[i] * r + x[i] = Distributions.cdf(williamson_dist(C.G,C.α[i]),x[i]) + end + return x +end +function _cdf(C::CT,u) where {CT<:LiouvilleCopula} + d = length(C) + + sx = sum(Distributions.quantile(williamson_dist(C.G,C.α[i]),u[i]) for i in 1:d) + r = zero(eltype(u)) + for i in CartesianIndices(α) + ii = (ij-1 for ij in Tuple(i)) + sii = sum(ii) + fii = prod(factorial.(ii)) + # This version is really not efficient as derivatives of the generator ϕ⁽ᵏ⁾(C.G, sii, sx) could be pre-computed since many sii will be the same and sx does never change. + r += (-1)^sii / fii * ϕ⁽ᵏ⁾(C.G, sii, sx) * prod(x .^i) + end + return r +end From 6ac091553e9b4c0e1c7ae6b1692d9fe638176db3 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Thu, 23 Nov 2023 18:05:00 +0100 Subject: [PATCH 02/10] Add docs --- docs/src/Liouville.md | 32 +++++++++++++++++++++++++++----- src/LiouvilleCopula.jl | 6 +++--- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/docs/src/Liouville.md b/docs/src/Liouville.md index 9b68c850..a8391593 100644 --- a/docs/src/Liouville.md +++ b/docs/src/Liouville.md @@ -4,17 +4,39 @@ CurrentModule = Copulas ## Liouville Copulas -!!! note "Not merged yet !" - Liouville copulas are comming in this PR : https://github.com/lrnv/Copulas.jl/pull/83, but this is not merged yet. +Archimedean copulas have been widely used in the literature due to their nice decomposition properties and easy parametrization. The interested reader can refer to the extensive literature [hofert2010,hofert2013a,mcneil2010,cossette2017,cossette2018,genest2011a,dibernardino2013a,dibernardino2013a,dibernardino2016,cooray2018,spreeuw2014](@cite) on Archimedean copulas, their nesting extensions and most importantly their estimation. One major drawback of the Archimedean family is that these copulas have exchangeable marginals (i.e., $C(\bm u) = C(\mathrm{p}(\bm u))$ for any permutation $p(\bm u)$ of $u_1,...,u_d$): the dependence structure is symmetric, which might not be a wanted property. However, from the Radial-simplex expression, we can easily extrapolate a little and take for $\bm S$ a non-uniform distribution on the simplex. -Archimedean copulas have been widely used in the literature due to their nice decomposition properties and easy parametrization. The interested reader can refer to the extensive literature [hofert2010,hofert2013a,mcneil2010,cossette2017,cossette2018,genest2011a,dibernardino2013a,dibernardino2013a,dibernardino2016,cooray2018,spreeuw2014](@cite) on Archimedean copulas, their nesting extensions and most importantly their estimation. - -One major drawback of the Archimedean family is that these copulas have exchangeable marginals (i.e., $C(\bm u) = C(\mathrm{p}(\bm u))$ for any permutation $p(\bm u)$ of $u_1,...,u_d$): the dependence structure is symmetric, which might not be a wanted property. However, from the Radial-simplex expression, we can easily extrapolate a little and take for $\bm S$ a non-uniform distribution on the simplex. +**Definition (Liouville Copulas):** For $R$ a positive random variable, $\bm\alpha \in N^d$ and $S \sim \mathrm{Dirichlet}(\bm \alpha)$ a Dirichlet random vector on the simplex, the copula of the random vector $R\bm S$ is called the Liouville copula with radial part $R$ and Simplex parameters $\alpha$. Liouville's copulas share many properties with Archimedean copulas, but are not exchangeable anymore. This is an easy way to produce non-exchangeable dependence structures. See [cote2019](@cite) for a practical use of this property. Note that Dirichlet distributions are constructed as $\bm S = \frac{\bm G}{\langle \bm 1, \bm G\rangle}$, where $\bm G$ is a vector of independent Gamma distributions with unit scale (and potentially different shapes: taking all shapes equal yields the Archimedean case). +There are still a few properties that are interesting for the implementation, but wich requires a few notations. Let's denote by + +$$\phi_{d}$$ + +the inverse Williamson $d$-transform of $R$ for any integer $d$. + +Then the distribution function of the Liouville copula is given by + +$$C(\bm u) = \sum_{\bm i \le \bm \alpha} \frac{(-1)^{\lvert\bm i\rvert}}{\bm i!} * \phi^{(\lvert\bm i\rvert)}(\lvert \bm x \rvert) * \bm x^{\bm i},$$ + +where $x_i = F_{\phi_{\alpha_i}}^{-1}(u_i)$ for all i. + +And for sampling, the same kind of algorithm is availiable. + +!!! note "Complexity of the matter" + Note that here we need access to the quantile functions of all the (inverse) Williamson $\alpha_i$-transform of $\phi_{\lvert\bm\alpha\rvert}$, which itself is the $\lvert\bm\alpha\rvert$-Williamson transfrom of $R$. This complexity adds up quickly, but the package internally uses fast Faa-di-bruno formula at compile time to overcome much of it. + +You can sample and compute the cdf and pdf of *any* Liouville copula, even one for which you provide the generator and/or the Radial distribution yourself. + + +```@docs +LiouvilleCopula +``` + + ```@bibliography Pages = ["Liouville.md"] Canonical = false diff --git a/src/LiouvilleCopula.jl b/src/LiouvilleCopula.jl index fae4ad2c..0534a52b 100644 --- a/src/LiouvilleCopula.jl +++ b/src/LiouvilleCopula.jl @@ -2,8 +2,8 @@ LiouvilleCopula{d, T, TG} Fields: - - \alpha::NTuple{d,T} : the weights for each dimension - - G::TG : the generator <: Generator. +* `α::NTuple{d,T}` : the weights for each dimension +* `G::TG` : the generator <: Generator. Constructor: @@ -16,7 +16,7 @@ Recalling that, for C an archimedean copula with generator ``\\phi``, if ``\\mat The Liouville copula has exactly the same expression but using another Dirichlet distribution instead than uniformity over the simplex. References: - McNeil, A. J., & Nešlehová, J. (2010). From archimedean to liouville copulas. Journal of Multivariate Analysis, 101(8), 1772-1790. +* [mcneil2010](@cite) McNeil, A. J., & Nešlehová, J. (2010). From archimedean to liouville copulas. Journal of Multivariate Analysis, 101(8), 1772-1790. """ struct LiouvilleCopula{d,TG} <: Copula{d} α::NTuple{d,Int} From 7c21f9d26c9b7ce912a220f994bffdd9fbe3fee2 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Thu, 23 Nov 2023 18:08:51 +0100 Subject: [PATCH 03/10] Export --- src/Copulas.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Copulas.jl b/src/Copulas.jl index a5348721..b71c2490 100644 --- a/src/Copulas.jl +++ b/src/Copulas.jl @@ -79,4 +79,7 @@ module Copulas MCopula, WCopula + include("LiouvilleCopula.jl") + export LiouvilleCopula + end From efbca0c4941afc8013ffb243750077dbd81d7c4e Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Thu, 23 Nov 2023 19:10:29 +0100 Subject: [PATCH 04/10] typo --- src/LiouvilleCopula.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LiouvilleCopula.jl b/src/LiouvilleCopula.jl index 0534a52b..323ebe38 100644 --- a/src/LiouvilleCopula.jl +++ b/src/LiouvilleCopula.jl @@ -29,7 +29,7 @@ struct LiouvilleCopula{d,TG} <: Copula{d} end williamson_dist(C::LiouvilleCopula{d,TG}) where {d,TG} = williamson_dist(C.G,sum(C.α)) -function Distributions._rand!(rng::Distributions.AbstractRNG, C, x::AbstractVector{T}) +function Distributions._rand!(rng::Distributions.AbstractRNG, C::CT, x::AbstractVector{T}) where {T,CT<:LiouvilleCopula} # By default, we use the williamson sampling. r = Distributions.rand(williamson_dist(C)) for i in 1:length(C) From 132856560f6e10fa973df7733e1b8616e68e6dd2 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Fri, 24 Nov 2023 11:53:46 +0100 Subject: [PATCH 05/10] Change my affiliaton --- joss/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss/paper.md b/joss/paper.md index 7d23734d..f2dbba5d 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -12,7 +12,7 @@ authors: orcid: 0000-0002-8198-3656 affiliation: 2 # must use "" if you have more than one. affiliations: - - name: SESSTIM, Aix Marseille University, Marseille, France + - name: Aix Marseille Univ, Inserm, IRD, SESSTIM, Sciences Economiques & Sociales de la Santé & Traitement de l’Information Médicale, ISSPAM, Marseille, France. index: 1 - name: Federal University of Pernambuco index: 2 From a2910fc4d84b3490a115294dc6112ed048e38cc5 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Fri, 24 Nov 2023 15:47:35 +0100 Subject: [PATCH 06/10] Adding a few methods to univariate distributions --- .../ClaytonWilliamsonDistribution.jl | 9 +++++++-- .../WilliamsonFromFrailty.jl | 20 ++++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/UnivariateDistribution/ClaytonWilliamsonDistribution.jl b/src/UnivariateDistribution/ClaytonWilliamsonDistribution.jl index 6eaf3586..6de9e06a 100644 --- a/src/UnivariateDistribution/ClaytonWilliamsonDistribution.jl +++ b/src/UnivariateDistribution/ClaytonWilliamsonDistribution.jl @@ -33,7 +33,12 @@ function Distributions.cdf(D::ClaytonWilliamsonDistribution, x::Real) return 1-rez end end -function Distributions.rand(rng::Distributions.AbstractRNG, d::ClaytonWilliamsonDistribution) - u = rand(rng) +function _quantile(d::ClaytonWilliamsonDistribution,u) Roots.find_zero(x -> (Distributions.cdf(d,x) - u), (0.0, Inf)) +end +function Distributions.rand(rng::Distributions.AbstractRNG, d::ClaytonWilliamsonDistribution) + _quantile(d,rand(rng)) +end +function Distributions.quantile(d::ClaytonWilliamsonDistribution,p::Real) + _quantile(d,p) end \ No newline at end of file diff --git a/src/UnivariateDistribution/WilliamsonFromFrailty.jl b/src/UnivariateDistribution/WilliamsonFromFrailty.jl index 19288d7b..a18b0d7c 100644 --- a/src/UnivariateDistribution/WilliamsonFromFrailty.jl +++ b/src/UnivariateDistribution/WilliamsonFromFrailty.jl @@ -9,13 +9,27 @@ function Distributions.rand(rng::Distributions.AbstractRNG, D::WilliamsonFromFra sy = rand(rng,Distributions.Erlang(d)) return sy/f end -function Distributions.cdf(D::WilliamsonFromFrailty{TF,d}, x::Real) where {TF,d} - # how to compute this cdf ? - return 1 - Distributions.expectation( +function _p(D::WilliamsonFromFrailty{TF,d}, x) where {TF,d} + + # Maybe we need to use a taylor serie approximation near x == 0 + # since this is not working for x too small. + # otherwise, we could simply truncate this: + if x < sqrt(eps(eltype(x))) + return one(x) + end + + return Distributions.expectation( e -> Distributions.cdf(D.frailty_dist,e/x), Distributions.Erlang(d) ) end +function Distributions.cdf(D::WilliamsonFromFrailty{TF,d}, x::Real) where {TF,d} + # how to compute this cdf ? + return 1 - _p(D,x) +end +function Distributions.quantile(D::WilliamsonFromFrailty{TF,d},u::Real) where {TF,d} + return Roots.find_zero(x -> _p(D, x) - (1-u), (0, Inf)) +end function Distributions.pdf(D::WilliamsonFromFrailty{TF,d}, x::Real) where {TF,d} # how to compute this cdf ? return 1/x^2 * Distributions.expectation( From c78f945939be529bba9f63119aa7c0774cfd4650 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Fri, 24 Nov 2023 15:56:37 +0100 Subject: [PATCH 07/10] Changes to the implementation --- src/LiouvilleCopula.jl | 26 +++++++++++++++++++------- test/liouville_tests.jl | 24 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 7 deletions(-) create mode 100644 test/liouville_tests.jl diff --git a/src/LiouvilleCopula.jl b/src/LiouvilleCopula.jl index 323ebe38..8ecdc167 100644 --- a/src/LiouvilleCopula.jl +++ b/src/LiouvilleCopula.jl @@ -24,7 +24,7 @@ struct LiouvilleCopula{d,TG} <: Copula{d} function LiouvilleCopula(α::Vector{Int},G::Generator) d = length(α) @assert sum(α) <= max_monotony(G) "The generator you provided is not monotonous enough (the monotony index must be greater than sum(α), and thus this copula does not exists." - return new{d, typeof(G)}(G) + return new{d, typeof(G)}(Tuple(α), G) end end williamson_dist(C::LiouvilleCopula{d,TG}) where {d,TG} = williamson_dist(C.G,sum(C.α)) @@ -33,23 +33,35 @@ function Distributions._rand!(rng::Distributions.AbstractRNG, C::CT, x::Abstract # By default, we use the williamson sampling. r = Distributions.rand(williamson_dist(C)) for i in 1:length(C) - x[i] = Distributions.rand(rng,Gamma(C.α[i],1)) + x[i] = Distributions.rand(rng,Distributions.Gamma(C.α[i],1)) x[i] = x[i] * r x[i] = Distributions.cdf(williamson_dist(C.G,C.α[i]),x[i]) end return x end -function _cdf(C::CT,u) where {CT<:LiouvilleCopula} +function _cdf(C::CT,v) where {CT<:LiouvilleCopula} d = length(C) - - sx = sum(Distributions.quantile(williamson_dist(C.G,C.α[i]),u[i]) for i in 1:d) + if all(v == 1) + return one(eltype(v)) + end + if any(v == 0) + return zero(eltype(v)) + end + u = 1 .- v # Because survival. + Gis = [williamson_dist(C.G,C.α[i]) for i in 1:d] + x = [Distributions.quantile(Gis[i],u[i]) for i in 1:d] + sx = sum(x) r = zero(eltype(u)) - for i in CartesianIndices(α) + for i in CartesianIndices(C.α) ii = (ij-1 for ij in Tuple(i)) sii = sum(ii) fii = prod(factorial.(ii)) # This version is really not efficient as derivatives of the generator ϕ⁽ᵏ⁾(C.G, sii, sx) could be pre-computed since many sii will be the same and sx does never change. - r += (-1)^sii / fii * ϕ⁽ᵏ⁾(C.G, sii, sx) * prod(x .^i) + r += (-1)^sii / fii * ϕ⁽ᵏ⁾(C.G, sii, sx) * prod(x .^ii) end + + # r is the survival function, not the cdf ! + # but if we use 1-u that might work. + return r end diff --git a/test/liouville_tests.jl b/test/liouville_tests.jl new file mode 100644 index 00000000..06fee840 --- /dev/null +++ b/test/liouville_tests.jl @@ -0,0 +1,24 @@ + +@testitem "Test of Trivariate Liouville Copulas" begin + using Random, Distributions + using StableRNGs + using StatsBase + rng = StableRNG(123) + + for G in ( + Copulas.AMHGenerator(0.6), + Copulas.AMHGenerator(-0.3), + Copulas.ClaytonGenerator(-0.05), + Copulas.IndependentGenerator(), + Copulas.GumbelBarnettGenerator(0.7), + Copulas.InvGaussianGenerator(0.05), + Copulas.InvGaussianGenerator(8), + Copulas.WilliamsonGenerator(LogNormal(),20), + ) + C = LiouvilleCopula(rand(1:6,3),G) + u = rand(C,10) + cdf(C,u) + # check margins uniformity ? + @test true + end +end \ No newline at end of file From b4013855c9bab8604b9533bce83b768cfd6936d7 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Fri, 24 Nov 2023 15:57:02 +0100 Subject: [PATCH 08/10] Add functionality to the logarithmic distributons --- src/UnivariateDistribution/Logarithmic.jl | 27 ++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/UnivariateDistribution/Logarithmic.jl b/src/UnivariateDistribution/Logarithmic.jl index ac009355..24097534 100644 --- a/src/UnivariateDistribution/Logarithmic.jl +++ b/src/UnivariateDistribution/Logarithmic.jl @@ -1,6 +1,6 @@ # Corresponds to https://en.wikipedia.org/wiki/Logarithmic_distribution struct Logarithmic{T<:Real} <: Distributions.DiscreteUnivariateDistribution - α::T # in (0,1), the weight of the logarithmic distribution. + α::T # in (0,1), the weight of the logarithmic distribution, 1-p of the wikipedia parameter p. h::T # = ln(1-α), so in (-Inf,0) function Logarithmic(h::T) where {T <: Real} Tf = promote_type(T,Float64) @@ -12,6 +12,31 @@ Base.eltype(::Logarithmic{T}) where T = promote_type(T,Float64) function Distributions.logpdf(d::Logarithmic{T}, x::Real) where T insupport(d, x) ? x*log1p(-d.α) - log(x) - log(-log(d.α)) : log(zero(T)) end +function _my_beta_inc(α,k) + # this computes \int_{0}^(1-α) t^k/(1-t) dt, for α in [0,1] and k a positive integer. + r = zero(α) + pwα = 1 + sgn = 1 + for l in 0:k + r += sgn * binomial(k,l) * (1 - pwα) / l + sgn *= -1 + pwα *= α + end + return r +end +function Distributions.cdf(d::Logarithmic{T}, x::Real) where T + # Needs cdf to be implemented here. + # quand even more quantile? + # ee her e: https://en.wikipedia.org/wiki/Logarithmic_distribution + # return 1 + incbeta(p,k=1,0)/log1p(-p) + k = Int(trunc(x)) + return 1 + _my_beta_inc(d.α,k)/log(d.α) + # SpecialFunctions.beta_inc(k+1,0,p)/log1p(-p) *SpecialFunctions.beta(k+1,0) +end +function Distributions.quantile(d::Logarithmic{T}, p::Real) where T + return Roots.find_zero(x -> Distributions.cdf(d,x) - p, (0, Inf)) +end + function Distributions.rand(rng::Distributions.AbstractRNG, d::Logarithmic{T}) where T # Sample a Log(p) distribution with the algorithms "LK" and "LS" of Kemp (1981). if d.h > -3 From 1a442e32d85da947e52178ba41cd88b6dda356d5 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Fri, 24 Nov 2023 15:57:15 +0100 Subject: [PATCH 09/10] Add tests of marginal uniformity for Liouville copulas --- test/margins_uniformity.jl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/margins_uniformity.jl b/test/margins_uniformity.jl index 48b6933f..945375f0 100644 --- a/test/margins_uniformity.jl +++ b/test/margins_uniformity.jl @@ -25,7 +25,18 @@ FGMCopula(2,1), MCopula(4), PlackettCopula(2.0), + ArchimedeanCopula(3,WilliamsonGenerator(LogNormal(),4)), # Others ? Yes probably others too ! + (LiouvilleCopula([1,5],G) for G in ( + Copulas.AMHGenerator(0.6), + Copulas.AMHGenerator(-0.3), + Copulas.ClaytonGenerator(-0.05), + Copulas.IndependentGenerator(), + Copulas.GumbelBarnettGenerator(0.7), + Copulas.InvGaussianGenerator(0.05), + Copulas.InvGaussianGenerator(8), + Copulas.WilliamsonGenerator(LogNormal(),6), + ))... ) n = 1000 U = Uniform(0,1) From 7a5eb42da7c02c5e4bc79802826d3efa196fb32b Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Thu, 30 Nov 2023 10:20:52 +0100 Subject: [PATCH 10/10] revert change on paper --- joss/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss/paper.md b/joss/paper.md index f2dbba5d..7d23734d 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -12,7 +12,7 @@ authors: orcid: 0000-0002-8198-3656 affiliation: 2 # must use "" if you have more than one. affiliations: - - name: Aix Marseille Univ, Inserm, IRD, SESSTIM, Sciences Economiques & Sociales de la Santé & Traitement de l’Information Médicale, ISSPAM, Marseille, France. + - name: SESSTIM, Aix Marseille University, Marseille, France index: 1 - name: Federal University of Pernambuco index: 2