diff --git a/README.md b/README.md index 16a2af0..e84701c 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,9 @@ returnlevels(xs) # mean excess with previous k values meanexcess(xs, k) + +# Hill estimator for the tail index using the top k largest values +hillestimator(xs, k) ``` ## References diff --git a/src/ExtremeStats.jl b/src/ExtremeStats.jl index 9d828d7..dc4fd65 100644 --- a/src/ExtremeStats.jl +++ b/src/ExtremeStats.jl @@ -27,6 +27,6 @@ export # statistics returnlevels, - meanexcess - + meanexcess, + hillestimator end diff --git a/src/stats.jl b/src/stats.jl index 303d91c..d7e8c24 100644 --- a/src/stats.jl +++ b/src/stats.jl @@ -43,3 +43,21 @@ function meanexcess(xs::AbstractVector, ks::AbstractVector{Int}) ys = sort(xs, rev=true) [mean(log.(ys[1:k-1])) - log(ys[k]) for k in ks] end + +""" + hillestimator(data, k) + +Return the Hill estimator of the tail index for the data `xs` using the top `k` largest values. +""" +function hillestimator(xs::AbstractVector, k::Int) + sorted_xs = sort(xs, rev=true) + + if k >= length(sorted_xs) + error("k must be smaller than the number of data points") + end + + sum_log_ratios = sum(log(sorted_xs[i]) - log(sorted_xs[k+1]) for i in 1:k) + hill_estimate = sum_log_ratios / k + + return hill_estimate +end