From 5c7c614601cfc71670baa903f916b2f3cc1cbb45 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 10:33:00 +0200 Subject: [PATCH 01/15] adding hillestimator --- README.md | 3 +++ src/ExtremeStats.jl | 16 ++++++------ src/plotrecipes/return_levels.jl | 30 +++++++++++------------ src/stats.jl | 42 +++++++++++++++++++++++--------- 4 files changed, 56 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 16a2af0..e84701c 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,9 @@ returnlevels(xs) # mean excess with previous k values meanexcess(xs, k) + +# Hill estimator for the tail index using the top k largest values +hillestimator(xs, k) ``` ## References diff --git a/src/ExtremeStats.jl b/src/ExtremeStats.jl index 9d828d7..4c66fc4 100644 --- a/src/ExtremeStats.jl +++ b/src/ExtremeStats.jl @@ -21,12 +21,12 @@ include("plotrecipes/mean_excess.jl") include("plotrecipes/pareto_quantile.jl") export - # maxima types - BlockMaxima, - PeakOverThreshold, - - # statistics - returnlevels, - meanexcess - + # maxima types + BlockMaxima, + PeakOverThreshold, + + # statistics + returnlevels, + meanexcess +hill_estimator end diff --git a/src/plotrecipes/return_levels.jl b/src/plotrecipes/return_levels.jl index 3cb1875..49d85f2 100644 --- a/src/plotrecipes/return_levels.jl +++ b/src/plotrecipes/return_levels.jl @@ -5,22 +5,22 @@ @userplot ReturnPlot @recipe function f(rp::ReturnPlot) - # get user input - obj = rp.args[1] + # get user input + obj = rp.args[1] - if obj isa AbstractVector - seriestype --> :scatter - δt, ms = returnlevels(obj) - elseif obj isa GeneralizedExtremeValue - seriestype --> :path - mmin, mmax = rp.args[2:3] - δt, ms = returnlevels(obj, mmin, mmax) - end + if obj isa AbstractVector + seriestype --> :scatter + δt, ms = returnlevels(obj) + elseif obj isa GeneralizedExtremeValue + seriestype --> :path + mmin, mmax = rp.args[2:3] + δt, ms = returnlevels(obj, mmin, mmax) + end - xscale --> :log10 - xguide --> "return period" - yguide --> "return level" - label --> "return plot" + xscale --> :log10 + xguide --> "return period" + yguide --> "return level" + label --> "return plot" - δt, ms + δt, ms end diff --git a/src/stats.jl b/src/stats.jl index 303d91c..73a23e6 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -8,12 +8,12 @@ Return periods and levels of data `xs`. """ function returnlevels(xs::AbstractVector) - ms = sort(xs) - n = length(ms) - p = (1:n) ./ (n + 1) - δt = 1 ./ (1 .- p) + ms = sort(xs) + n = length(ms) + p = (1:n) ./ (n + 1) + δt = 1 ./ (1 .- p) - δt, ms + δt, ms end """ @@ -23,12 +23,12 @@ Return `nlevels` periods and levels of generalized extreme value distribution `gev` with maxima in the interval `[mmin,mmax]`. """ function returnlevels(gev::GeneralizedExtremeValue, - mmin::Real, mmax::Real; - nlevels::Int=50) - ms = linspace(mmin, mmax, nlevels) - δt = 1 ./ (1 .- cdf.(gev, ms)) + mmin::Real, mmax::Real; + nlevels::Int=50) + ms = linspace(mmin, mmax, nlevels) + δt = 1 ./ (1 .- cdf.(gev, ms)) - δt, ms + δt, ms end @@ -40,6 +40,24 @@ Return mean excess of the data `xs` using previous `k` values. meanexcess(xs::AbstractVector, k::Int) = meanexcess(xs, [k])[1] function meanexcess(xs::AbstractVector, ks::AbstractVector{Int}) - ys = sort(xs, rev=true) - [mean(log.(ys[1:k-1])) - log(ys[k]) for k in ks] + ys = sort(xs, rev=true) + [mean(log.(ys[1:k-1])) - log(ys[k]) for k in ks] +end + +""" + hillestimator(data, k) + +Return the Hill estimator of the tail index for the data `data` using the top `k` largest values. +""" +function hillestimator(data::AbstractVector, k::Int) + sorted_data = sort(data, rev=true) + + if k >= length(sorted_data) + error("k must be smaller than the number of data points") + end + + sum_log_ratios = sum(log(sorted_data[i]) - log(sorted_data[k+1]) for i in 1:k) + hill_estimate = sum_log_ratios / k + + return hill_estimate end From da8b16f42988bb880a9359057d41dd6889dcd504 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 16:37:36 +0200 Subject: [PATCH 02/15] adding tests --- src/ExtremeStats.jl | 12 +++---- src/fitting.jl | 50 ++++++++++++++---------------- src/maxima.jl | 4 +-- src/plotrecipes/pareto_quantile.jl | 2 +- src/plotrecipes/return_levels.jl | 30 +++++++++--------- src/stats.jl | 39 +++++++++++------------ 6 files changed, 65 insertions(+), 72 deletions(-) diff --git a/src/ExtremeStats.jl b/src/ExtremeStats.jl index 4c66fc4..bfbaf45 100644 --- a/src/ExtremeStats.jl +++ b/src/ExtremeStats.jl @@ -21,12 +21,12 @@ include("plotrecipes/mean_excess.jl") include("plotrecipes/pareto_quantile.jl") export - # maxima types - BlockMaxima, - PeakOverThreshold, + # maxima types + BlockMaxima, + PeakOverThreshold, - # statistics - returnlevels, - meanexcess + # statistics + returnlevels, + meanexcess hill_estimator end diff --git a/src/fitting.jl b/src/fitting.jl index e4cedbc..63f11e3 100644 --- a/src/fitting.jl +++ b/src/fitting.jl @@ -5,16 +5,16 @@ """ log_gpd_pdf(arg, μ, σ, ξ) -Log density function of the generalized Pareto distribution, +Log density function of the generalized Pareto distribution, with an expansion with ξ near zero. """ function log_gpd_pdf(_x, μ, σ, ξ) - x = (_x-μ)/σ + x = (_x - μ) / σ expn = if abs(ξ) < 1e-5 # expansion for ξ near zero. - -x*(ξ+1) + (x^2)*ξ*(ξ+1)/2 - (x^3)*(ξ^2)*(ξ+1)/3 + (x^4)*(ξ^3)*(ξ+1)/4 + -x * (ξ + 1) + (x^2) * ξ * (ξ + 1) / 2 - (x^3) * (ξ^2) * (ξ + 1) / 3 + (x^4) * (ξ^3) * (ξ + 1) / 4 else - (-(ξ+1)/ξ)*log(max(0, 1 + x*ξ)) + (-(ξ + 1) / ξ) * log(max(0, 1 + x * ξ)) end expn - log(σ) end @@ -22,18 +22,18 @@ end """ log_gev_pdf(arg, μ, σ, ξ) -Log density function of the generalized extreme value distribution, +Log density function of the generalized extreme value distribution, with an expansion with ξ near zero. """ function log_gev_pdf(_x, μ, σ, ξ) - x = (_x-μ)/σ + x = (_x - μ) / σ tx = if abs(ξ) < 1e-10 # this cutoff is _not_ the same as for log_gpd_pdf. # expansion near zero. - -x + (x^2)*ξ/2 - (x^3)*(ξ^2)/3 + (x^4)*(ξ^3)/4 + -x + (x^2) * ξ / 2 - (x^3) * (ξ^2) / 3 + (x^4) * (ξ^3) / 4 else - (-1/ξ)*log(max(0, 1 + x*ξ)) + (-1 / ξ) * log(max(0, 1 + x * ξ)) end - (ξ+1)*tx - exp(tx) - log(σ) + (ξ + 1) * tx - exp(tx) - log(σ) end """ @@ -42,23 +42,21 @@ end Fit generalized extreme value distribution `gev` to block maxima `bm` with constrained maximum likelihood estimation. """ -function fit_mle(::Type{GeneralizedExtremeValue}, bm::BlockMaxima; - init=(μ=0.0, σ=1.0, ξ=0.05)) +function fit_mle(::Type{GeneralizedExtremeValue}, bm::BlockMaxima; init=(μ=0.0, σ=1.0, ξ=0.05)) # retrieve maxima values x = collect(bm) n = length(x) # set up the problem - mle = Model(optimizer_with_attributes(Ipopt.Optimizer, "print_level"=>0, - "sb"=>"yes", "max_iter"=>250)) - @variable(mle, μ, start=init.μ) - @variable(mle, σ, start=init.σ) - @variable(mle, ξ, start=init.ξ) + mle = Model(optimizer_with_attributes(Ipopt.Optimizer, "print_level" => 0, "sb" => "yes", "max_iter" => 250)) + @variable(mle, μ, start = init.μ) + @variable(mle, σ, start = init.σ) + @variable(mle, ξ, start = init.ξ) JuMP.register(mle, :log_gev_pdf, 4, log_gev_pdf, autodiff=true) @NLobjective(mle, Max, sum(log_gev_pdf(z, μ, σ, ξ) for z in x)) - @NLconstraint(mle, [i=1:n], 1 + ξ*(x[i]-μ)/σ ≥ 0.0) + @NLconstraint(mle, [i = 1:n], 1 + ξ * (x[i] - μ) / σ ≥ 0.0) @constraint(mle, σ ≥ 1e-10) - @constraint(mle, ξ ≥ -1/2) + @constraint(mle, ξ ≥ -1 / 2) # attempt to solve optimize!(mle) @@ -82,28 +80,26 @@ end Fit generalized Pareto distribution `gp` to peak over threshold maxima `pm` with constrained maximum likelihood estimation. """ -function fit_mle(::Type{GeneralizedPareto}, pm::PeakOverThreshold; - init=(σ=1.0, ξ=0.05)) +function fit_mle(::Type{GeneralizedPareto}, pm::PeakOverThreshold; init=(σ=1.0, ξ=0.05)) # retrieve maxima values x = collect(pm) n = length(x) # set up the problem - mle = Model(optimizer_with_attributes(Ipopt.Optimizer, "print_level"=>0, - "sb"=>"yes", "max_iter"=>250)) - @variable(mle, σ, start=init.σ) - @variable(mle, ξ, start=init.ξ) + mle = Model(optimizer_with_attributes(Ipopt.Optimizer, "print_level" => 0, "sb" => "yes", "max_iter" => 250)) + @variable(mle, σ, start = init.σ) + @variable(mle, ξ, start = init.ξ) JuMP.register(mle, :log_gpd_pdf, 4, log_gpd_pdf, autodiff=true) @NLobjective(mle, Max, sum(log_gpd_pdf(z, pm.u, σ, ξ) for z in x)) - @NLconstraint(mle, [i=1:n], 1 + ξ*(x[i]-pm.u)/σ ≥ 0.0) + @NLconstraint(mle, [i = 1:n], 1 + ξ * (x[i] - pm.u) / σ ≥ 0.0) @constraint(mle, σ ≥ 1e-10) - @constraint(mle, ξ ≥ -1/2) + @constraint(mle, ξ ≥ -1 / 2) # attempt to solve both cases optimize!(mle) # retrieve solver status - status = termination_status(mle) + status = termination_status(mle) # acceptable statuses OK = (MOI.OPTIMAL, MOI.LOCALLY_SOLVED) diff --git a/src/maxima.jl b/src/maxima.jl index ffa8c1f..c02499f 100644 --- a/src/maxima.jl +++ b/src/maxima.jl @@ -31,8 +31,8 @@ end function Base.collect(m::BlockMaxima) result = Vector{Float64}() - for i in 1:m.n:length(m.x)-m.n+1 - xs = skipmissing(view(m.x, i:i+m.n-1)) + for i in 1:(m.n):(length(m.x) - m.n + 1) + xs = skipmissing(view(m.x, i:(i + m.n - 1))) xmax = -Inf for x in xs diff --git a/src/plotrecipes/pareto_quantile.jl b/src/plotrecipes/pareto_quantile.jl index f83c463..dc582a2 100644 --- a/src/plotrecipes/pareto_quantile.jl +++ b/src/plotrecipes/pareto_quantile.jl @@ -10,7 +10,7 @@ x = sort(data, rev=true) n = length(x) - logp = log.([i/(n+1) for i in 1:n]) + logp = log.([i / (n + 1) for i in 1:n]) logx = log.(x) seriestype --> :scatter diff --git a/src/plotrecipes/return_levels.jl b/src/plotrecipes/return_levels.jl index 49d85f2..69bbd78 100644 --- a/src/plotrecipes/return_levels.jl +++ b/src/plotrecipes/return_levels.jl @@ -5,22 +5,22 @@ @userplot ReturnPlot @recipe function f(rp::ReturnPlot) - # get user input - obj = rp.args[1] + # get user input + obj = rp.args[1] - if obj isa AbstractVector - seriestype --> :scatter - δt, ms = returnlevels(obj) - elseif obj isa GeneralizedExtremeValue - seriestype --> :path - mmin, mmax = rp.args[2:3] - δt, ms = returnlevels(obj, mmin, mmax) - end + if obj isa AbstractVector + seriestype --> :scatter + δt, ms = returnlevels(obj) + elseif obj isa GeneralizedExtremeValue + seriestype --> :path + mmin, mmax = rp.args[2:3] + δt, ms = returnlevels(obj, mmin, mmax) + end - xscale --> :log10 - xguide --> "return period" - yguide --> "return level" - label --> "return plot" + xscale --> :log10 + xguide --> "return period" + yguide --> "return level" + label --> "return plot" - δt, ms + δt, ms end diff --git a/src/stats.jl b/src/stats.jl index 73a23e6..f16a0f8 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -8,12 +8,12 @@ Return periods and levels of data `xs`. """ function returnlevels(xs::AbstractVector) - ms = sort(xs) - n = length(ms) - p = (1:n) ./ (n + 1) - δt = 1 ./ (1 .- p) + ms = sort(xs) + n = length(ms) + p = (1:n) ./ (n + 1) + δt = 1 ./ (1 .- p) - δt, ms + δt, ms end """ @@ -22,16 +22,13 @@ end Return `nlevels` periods and levels of generalized extreme value distribution `gev` with maxima in the interval `[mmin,mmax]`. """ -function returnlevels(gev::GeneralizedExtremeValue, - mmin::Real, mmax::Real; - nlevels::Int=50) - ms = linspace(mmin, mmax, nlevels) - δt = 1 ./ (1 .- cdf.(gev, ms)) +function returnlevels(gev::GeneralizedExtremeValue, mmin::Real, mmax::Real; nlevels::Int=50) + ms = linspace(mmin, mmax, nlevels) + δt = 1 ./ (1 .- cdf.(gev, ms)) - δt, ms + δt, ms end - """ meanexcess(xs, k) @@ -40,8 +37,8 @@ Return mean excess of the data `xs` using previous `k` values. meanexcess(xs::AbstractVector, k::Int) = meanexcess(xs, [k])[1] function meanexcess(xs::AbstractVector, ks::AbstractVector{Int}) - ys = sort(xs, rev=true) - [mean(log.(ys[1:k-1])) - log(ys[k]) for k in ks] + ys = sort(xs, rev=true) + [mean(log.(ys[1:(k - 1)])) - log(ys[k]) for k in ks] end """ @@ -50,14 +47,14 @@ end Return the Hill estimator of the tail index for the data `data` using the top `k` largest values. """ function hillestimator(data::AbstractVector, k::Int) - sorted_data = sort(data, rev=true) + sorted_data = sort(data, rev=true) - if k >= length(sorted_data) - error("k must be smaller than the number of data points") - end + if k >= length(sorted_data) + error("k must be smaller than the number of data points") + end - sum_log_ratios = sum(log(sorted_data[i]) - log(sorted_data[k+1]) for i in 1:k) - hill_estimate = sum_log_ratios / k + sum_log_ratios = sum(log(sorted_data[i]) - log(sorted_data[k + 1]) for i in 1:k) + hill_estimate = sum_log_ratios / k - return hill_estimate + return hill_estimate end From 9e43ac2a21a77452ef668a5d3b84124f51e28388 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 16:57:57 +0200 Subject: [PATCH 03/15] correcting formating --- src/fitting.jl | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/src/fitting.jl b/src/fitting.jl index 63f11e3..e4cedbc 100644 --- a/src/fitting.jl +++ b/src/fitting.jl @@ -5,16 +5,16 @@ """ log_gpd_pdf(arg, μ, σ, ξ) -Log density function of the generalized Pareto distribution, +Log density function of the generalized Pareto distribution, with an expansion with ξ near zero. """ function log_gpd_pdf(_x, μ, σ, ξ) - x = (_x - μ) / σ + x = (_x-μ)/σ expn = if abs(ξ) < 1e-5 # expansion for ξ near zero. - -x * (ξ + 1) + (x^2) * ξ * (ξ + 1) / 2 - (x^3) * (ξ^2) * (ξ + 1) / 3 + (x^4) * (ξ^3) * (ξ + 1) / 4 + -x*(ξ+1) + (x^2)*ξ*(ξ+1)/2 - (x^3)*(ξ^2)*(ξ+1)/3 + (x^4)*(ξ^3)*(ξ+1)/4 else - (-(ξ + 1) / ξ) * log(max(0, 1 + x * ξ)) + (-(ξ+1)/ξ)*log(max(0, 1 + x*ξ)) end expn - log(σ) end @@ -22,18 +22,18 @@ end """ log_gev_pdf(arg, μ, σ, ξ) -Log density function of the generalized extreme value distribution, +Log density function of the generalized extreme value distribution, with an expansion with ξ near zero. """ function log_gev_pdf(_x, μ, σ, ξ) - x = (_x - μ) / σ + x = (_x-μ)/σ tx = if abs(ξ) < 1e-10 # this cutoff is _not_ the same as for log_gpd_pdf. # expansion near zero. - -x + (x^2) * ξ / 2 - (x^3) * (ξ^2) / 3 + (x^4) * (ξ^3) / 4 + -x + (x^2)*ξ/2 - (x^3)*(ξ^2)/3 + (x^4)*(ξ^3)/4 else - (-1 / ξ) * log(max(0, 1 + x * ξ)) + (-1/ξ)*log(max(0, 1 + x*ξ)) end - (ξ + 1) * tx - exp(tx) - log(σ) + (ξ+1)*tx - exp(tx) - log(σ) end """ @@ -42,21 +42,23 @@ end Fit generalized extreme value distribution `gev` to block maxima `bm` with constrained maximum likelihood estimation. """ -function fit_mle(::Type{GeneralizedExtremeValue}, bm::BlockMaxima; init=(μ=0.0, σ=1.0, ξ=0.05)) +function fit_mle(::Type{GeneralizedExtremeValue}, bm::BlockMaxima; + init=(μ=0.0, σ=1.0, ξ=0.05)) # retrieve maxima values x = collect(bm) n = length(x) # set up the problem - mle = Model(optimizer_with_attributes(Ipopt.Optimizer, "print_level" => 0, "sb" => "yes", "max_iter" => 250)) - @variable(mle, μ, start = init.μ) - @variable(mle, σ, start = init.σ) - @variable(mle, ξ, start = init.ξ) + mle = Model(optimizer_with_attributes(Ipopt.Optimizer, "print_level"=>0, + "sb"=>"yes", "max_iter"=>250)) + @variable(mle, μ, start=init.μ) + @variable(mle, σ, start=init.σ) + @variable(mle, ξ, start=init.ξ) JuMP.register(mle, :log_gev_pdf, 4, log_gev_pdf, autodiff=true) @NLobjective(mle, Max, sum(log_gev_pdf(z, μ, σ, ξ) for z in x)) - @NLconstraint(mle, [i = 1:n], 1 + ξ * (x[i] - μ) / σ ≥ 0.0) + @NLconstraint(mle, [i=1:n], 1 + ξ*(x[i]-μ)/σ ≥ 0.0) @constraint(mle, σ ≥ 1e-10) - @constraint(mle, ξ ≥ -1 / 2) + @constraint(mle, ξ ≥ -1/2) # attempt to solve optimize!(mle) @@ -80,26 +82,28 @@ end Fit generalized Pareto distribution `gp` to peak over threshold maxima `pm` with constrained maximum likelihood estimation. """ -function fit_mle(::Type{GeneralizedPareto}, pm::PeakOverThreshold; init=(σ=1.0, ξ=0.05)) +function fit_mle(::Type{GeneralizedPareto}, pm::PeakOverThreshold; + init=(σ=1.0, ξ=0.05)) # retrieve maxima values x = collect(pm) n = length(x) # set up the problem - mle = Model(optimizer_with_attributes(Ipopt.Optimizer, "print_level" => 0, "sb" => "yes", "max_iter" => 250)) - @variable(mle, σ, start = init.σ) - @variable(mle, ξ, start = init.ξ) + mle = Model(optimizer_with_attributes(Ipopt.Optimizer, "print_level"=>0, + "sb"=>"yes", "max_iter"=>250)) + @variable(mle, σ, start=init.σ) + @variable(mle, ξ, start=init.ξ) JuMP.register(mle, :log_gpd_pdf, 4, log_gpd_pdf, autodiff=true) @NLobjective(mle, Max, sum(log_gpd_pdf(z, pm.u, σ, ξ) for z in x)) - @NLconstraint(mle, [i = 1:n], 1 + ξ * (x[i] - pm.u) / σ ≥ 0.0) + @NLconstraint(mle, [i=1:n], 1 + ξ*(x[i]-pm.u)/σ ≥ 0.0) @constraint(mle, σ ≥ 1e-10) - @constraint(mle, ξ ≥ -1 / 2) + @constraint(mle, ξ ≥ -1/2) # attempt to solve both cases optimize!(mle) # retrieve solver status - status = termination_status(mle) + status = termination_status(mle) # acceptable statuses OK = (MOI.OPTIMAL, MOI.LOCALLY_SOLVED) From 1b758e325479d9d71bf07ed63f21a4c41f4d0e0b Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 16:58:53 +0200 Subject: [PATCH 04/15] correcting formating --- src/maxima.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/maxima.jl b/src/maxima.jl index c02499f..ffa8c1f 100644 --- a/src/maxima.jl +++ b/src/maxima.jl @@ -31,8 +31,8 @@ end function Base.collect(m::BlockMaxima) result = Vector{Float64}() - for i in 1:(m.n):(length(m.x) - m.n + 1) - xs = skipmissing(view(m.x, i:(i + m.n - 1))) + for i in 1:m.n:length(m.x)-m.n+1 + xs = skipmissing(view(m.x, i:i+m.n-1)) xmax = -Inf for x in xs From a15d618d56ecbf532b055ac709ea11f607205ce7 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 17:00:50 +0200 Subject: [PATCH 05/15] correcting formating --- src/plotrecipes/pareto_quantile.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plotrecipes/pareto_quantile.jl b/src/plotrecipes/pareto_quantile.jl index dc582a2..f83c463 100644 --- a/src/plotrecipes/pareto_quantile.jl +++ b/src/plotrecipes/pareto_quantile.jl @@ -10,7 +10,7 @@ x = sort(data, rev=true) n = length(x) - logp = log.([i / (n + 1) for i in 1:n]) + logp = log.([i/(n+1) for i in 1:n]) logx = log.(x) seriestype --> :scatter From 550cd38cfcad609de2856d9da8a88973d4f7e07f Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 17:02:21 +0200 Subject: [PATCH 06/15] correcting formating --- src/plotrecipes/return_levels.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plotrecipes/return_levels.jl b/src/plotrecipes/return_levels.jl index 69bbd78..7d465a6 100644 --- a/src/plotrecipes/return_levels.jl +++ b/src/plotrecipes/return_levels.jl @@ -20,7 +20,7 @@ xscale --> :log10 xguide --> "return period" yguide --> "return level" - label --> "return plot" + label --> "return plot" - δt, ms + return δt, ms end From 67fcaa6c5f002812e18f14678620b62b37e19e8a Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 17:03:15 +0200 Subject: [PATCH 07/15] correcting formating --- src/plotrecipes/return_levels.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plotrecipes/return_levels.jl b/src/plotrecipes/return_levels.jl index 7d465a6..3cb1875 100644 --- a/src/plotrecipes/return_levels.jl +++ b/src/plotrecipes/return_levels.jl @@ -22,5 +22,5 @@ yguide --> "return level" label --> "return plot" - return δt, ms + δt, ms end From 43bfb0bc4c1c2ef894b561a911a7f0ff723eb770 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 17:06:01 +0200 Subject: [PATCH 08/15] correcting formating --- src/stats.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/stats.jl b/src/stats.jl index f16a0f8..1eba9b1 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -22,13 +22,16 @@ end Return `nlevels` periods and levels of generalized extreme value distribution `gev` with maxima in the interval `[mmin,mmax]`. """ -function returnlevels(gev::GeneralizedExtremeValue, mmin::Real, mmax::Real; nlevels::Int=50) +function returnlevels(gev::GeneralizedExtremeValue, + mmin::Real, mmax::Real; + nlevels::Int=50) ms = linspace(mmin, mmax, nlevels) δt = 1 ./ (1 .- cdf.(gev, ms)) δt, ms end + """ meanexcess(xs, k) @@ -38,7 +41,7 @@ meanexcess(xs::AbstractVector, k::Int) = meanexcess(xs, [k])[1] function meanexcess(xs::AbstractVector, ks::AbstractVector{Int}) ys = sort(xs, rev=true) - [mean(log.(ys[1:(k - 1)])) - log(ys[k]) for k in ks] + [mean(log.(ys[1:k-1])) - log(ys[k]) for k in ks] end """ From e1baac33ae5a9fb3a5126de900dd63511ec571d4 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Sun, 22 Sep 2024 22:12:22 +0200 Subject: [PATCH 09/15] correcting hillestimator --- src/ExtremeStats.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExtremeStats.jl b/src/ExtremeStats.jl index bfbaf45..fd3bdba 100644 --- a/src/ExtremeStats.jl +++ b/src/ExtremeStats.jl @@ -28,5 +28,5 @@ export # statistics returnlevels, meanexcess -hill_estimator + hill_estimator end From 36cffdf08cb84469429798b927e8f0281edccf4d Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Sun, 22 Sep 2024 22:14:34 +0200 Subject: [PATCH 10/15] correcting names hillestimator --- src/stats.jl | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/stats.jl b/src/stats.jl index 1eba9b1..223ffc4 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -8,12 +8,12 @@ Return periods and levels of data `xs`. """ function returnlevels(xs::AbstractVector) - ms = sort(xs) - n = length(ms) - p = (1:n) ./ (n + 1) - δt = 1 ./ (1 .- p) + ms = sort(xs) + n = length(ms) + p = (1:n) ./ (n + 1) + δt = 1 ./ (1 .- p) - δt, ms + δt, ms end """ @@ -23,12 +23,12 @@ Return `nlevels` periods and levels of generalized extreme value distribution `gev` with maxima in the interval `[mmin,mmax]`. """ function returnlevels(gev::GeneralizedExtremeValue, - mmin::Real, mmax::Real; - nlevels::Int=50) - ms = linspace(mmin, mmax, nlevels) - δt = 1 ./ (1 .- cdf.(gev, ms)) + mmin::Real, mmax::Real; + nlevels::Int=50) + ms = linspace(mmin, mmax, nlevels) + δt = 1 ./ (1 .- cdf.(gev, ms)) - δt, ms + δt, ms end @@ -40,24 +40,24 @@ Return mean excess of the data `xs` using previous `k` values. meanexcess(xs::AbstractVector, k::Int) = meanexcess(xs, [k])[1] function meanexcess(xs::AbstractVector, ks::AbstractVector{Int}) - ys = sort(xs, rev=true) - [mean(log.(ys[1:k-1])) - log(ys[k]) for k in ks] + ys = sort(xs, rev=true) + [mean(log.(ys[1:k-1])) - log(ys[k]) for k in ks] end """ hillestimator(data, k) -Return the Hill estimator of the tail index for the data `data` using the top `k` largest values. +Return the Hill estimator of the tail index for the data `xs` using the top `k` largest values. """ -function hillestimator(data::AbstractVector, k::Int) - sorted_data = sort(data, rev=true) +function hillestimator(xs::AbstractVector, k::Int) + sorted_xs = sort(xs, rev=true) - if k >= length(sorted_data) - error("k must be smaller than the number of data points") - end + if k >= length(sorted_xs) + error("k must be smaller than the number of data points") + end - sum_log_ratios = sum(log(sorted_data[i]) - log(sorted_data[k + 1]) for i in 1:k) - hill_estimate = sum_log_ratios / k + sum_log_ratios = sum(log(sorted_xs[i]) - log(sorted_xs[k+1]) for i in 1:k) + hill_estimate = sum_log_ratios / k - return hill_estimate + return hill_estimate end From 8cdceacfce77394ed19912cec1a7233022a01b6d Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Sun, 22 Sep 2024 22:18:51 +0200 Subject: [PATCH 11/15] correcting names hillestimator --- src/stats.jl | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/stats.jl b/src/stats.jl index 223ffc4..4547b4f 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -8,12 +8,12 @@ Return periods and levels of data `xs`. """ function returnlevels(xs::AbstractVector) - ms = sort(xs) - n = length(ms) - p = (1:n) ./ (n + 1) - δt = 1 ./ (1 .- p) + ms = sort(xs) + n = length(ms) + p = (1:n) ./ (n + 1) + δt = 1 ./ (1 .- p) - δt, ms + δt, ms end """ @@ -22,16 +22,13 @@ end Return `nlevels` periods and levels of generalized extreme value distribution `gev` with maxima in the interval `[mmin,mmax]`. """ -function returnlevels(gev::GeneralizedExtremeValue, - mmin::Real, mmax::Real; - nlevels::Int=50) - ms = linspace(mmin, mmax, nlevels) - δt = 1 ./ (1 .- cdf.(gev, ms)) +function returnlevels(gev::GeneralizedExtremeValue, mmin::Real, mmax::Real; nlevels::Int=50) + ms = linspace(mmin, mmax, nlevels) + δt = 1 ./ (1 .- cdf.(gev, ms)) - δt, ms + δt, ms end - """ meanexcess(xs, k) @@ -40,8 +37,8 @@ Return mean excess of the data `xs` using previous `k` values. meanexcess(xs::AbstractVector, k::Int) = meanexcess(xs, [k])[1] function meanexcess(xs::AbstractVector, ks::AbstractVector{Int}) - ys = sort(xs, rev=true) - [mean(log.(ys[1:k-1])) - log(ys[k]) for k in ks] + ys = sort(xs, rev=true) + [mean(log.(ys[1:(k - 1)])) - log(ys[k]) for k in ks] end """ From 34ef490b4580efa3a7d7208f9423c547980ed213 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Sun, 22 Sep 2024 22:27:35 +0200 Subject: [PATCH 12/15] correcting names hillestimator --- src/stats.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/stats.jl b/src/stats.jl index 4547b4f..0795bff 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -29,6 +29,7 @@ function returnlevels(gev::GeneralizedExtremeValue, mmin::Real, mmax::Real; nlev δt, ms end + """ meanexcess(xs, k) @@ -38,7 +39,7 @@ meanexcess(xs::AbstractVector, k::Int) = meanexcess(xs, [k])[1] function meanexcess(xs::AbstractVector, ks::AbstractVector{Int}) ys = sort(xs, rev=true) - [mean(log.(ys[1:(k - 1)])) - log(ys[k]) for k in ks] + [mean(log.(ys[1:k-1])) - log(ys[k]) for k in ks] end """ From 525ec456cf7cc2db58cd7eabccdf6c816449c3d7 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Sun, 22 Sep 2024 22:29:48 +0200 Subject: [PATCH 13/15] correcting names hillestimator --- src/stats.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/stats.jl b/src/stats.jl index 0795bff..7d52b03 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -22,7 +22,9 @@ end Return `nlevels` periods and levels of generalized extreme value distribution `gev` with maxima in the interval `[mmin,mmax]`. """ -function returnlevels(gev::GeneralizedExtremeValue, mmin::Real, mmax::Real; nlevels::Int=50) +function returnlevels(gev::GeneralizedExtremeValue, + mmin::Real, mmax::Real; + nlevels::Int=50) ms = linspace(mmin, mmax, nlevels) δt = 1 ./ (1 .- cdf.(gev, ms)) From 4861858cca6ea2494c81d30a361a98fb69cb158d Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Sun, 22 Sep 2024 22:36:42 +0200 Subject: [PATCH 14/15] correcting names hillestimator --- src/stats.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stats.jl b/src/stats.jl index 7d52b03..d7e8c24 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -23,8 +23,8 @@ Return `nlevels` periods and levels of generalized extreme value distribution `gev` with maxima in the interval `[mmin,mmax]`. """ function returnlevels(gev::GeneralizedExtremeValue, - mmin::Real, mmax::Real; - nlevels::Int=50) + mmin::Real, mmax::Real; + nlevels::Int=50) ms = linspace(mmin, mmax, nlevels) δt = 1 ./ (1 .- cdf.(gev, ms)) From e87bbd765d0c3326c493d40a72f07bd26c3a2063 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Sun, 22 Sep 2024 22:38:08 +0200 Subject: [PATCH 15/15] correcting names hillestimator --- src/ExtremeStats.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ExtremeStats.jl b/src/ExtremeStats.jl index fd3bdba..dc4fd65 100644 --- a/src/ExtremeStats.jl +++ b/src/ExtremeStats.jl @@ -27,6 +27,6 @@ export # statistics returnlevels, - meanexcess - hill_estimator + meanexcess, + hillestimator end