Skip to content

Commit

Permalink
Remove trailing whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
timholy committed Oct 28, 2024
1 parent e890943 commit cb72365
Show file tree
Hide file tree
Showing 21 changed files with 56 additions and 56 deletions.
Binary file modified docs/src/algo/plap.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions docs/src/examples/maxlikenlm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,13 @@ parameters = Optim.minimizer(opt)
numerical_hessian = hessian!(func,parameters)

# Let's find the estimated value of σ, rather than log σ, and it's standard error
# To do this, we will use the Delta Method: https://en.wikipedia.org/wiki/Delta_method
# To do this, we will use the Delta Method: https://en.wikipedia.org/wiki/Delta_method

# this function exponetiates log σ
function transform(parameters)
parameters[end] = exp(parameters[end])
parameters
end
end

# get the Jacobian of the transformation
J = ForwardDiff.jacobian(transform, parameters)'
Expand Down
50 changes: 25 additions & 25 deletions docs/src/examples/rasch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ using Optim, Random #hide
# assessment data such as student responses to a standardized
# test. Let $X_{pi}$ be the response accuracy of student $p$
# to item $i$ where $X_{pi}=1$ if the item was answered correctly
# and $X_{pi}=0$ otherwise for $p=1,\ldots,n$ and $i=1,\ldots,m$.
# The model for this accuracy is
# and $X_{pi}=0$ otherwise for $p=1,\ldots,n$ and $i=1,\ldots,m$.
# The model for this accuracy is
# ```math
# P(\mathbf{X}_{p}=\mathbf{x}_{p}|\xi_p, \mathbf\epsilon) = \prod_{i=1}^m \dfrac{(\xi_p \epsilon_j)^{x_{pi}}}{1 + \xi_p\epsilon_i}
# ```
# where $\xi_p > 0$ the latent ability of person $p$ and $\epsilon_i > 0$
# is the difficulty of item $i$.
# where $\xi_p > 0$ the latent ability of person $p$ and $\epsilon_i > 0$
# is the difficulty of item $i$.

# We simulate data from this model:

Expand All @@ -28,9 +28,9 @@ theta = randn(n)
delta = randn(m)
r = zeros(n)
s = zeros(m)

for i in 1:n
p = exp.(theta[i] .- delta) ./ (1.0 .+ exp.(theta[i] .- delta))
p = exp.(theta[i] .- delta) ./ (1.0 .+ exp.(theta[i] .- delta))
for j in 1:m
if rand() < p[j] ##correct
r[i] += 1
Expand All @@ -40,9 +40,9 @@ for i in 1:n
end
f = [sum(r.==j) for j in 1:m];

# Since the number of parameters increases
# with sample size standard maximum likelihood will not provide us
# consistent estimates. Instead we consider the conditional likelihood.
# Since the number of parameters increases
# with sample size standard maximum likelihood will not provide us
# consistent estimates. Instead we consider the conditional likelihood.
# It can be shown that the Rasch model is an exponential family model and
# that the sum score $r_p = \sum_{i} x_{pi}$ is the sufficient statistic for
# $\xi_p$. If we condition on the sum score we should be able to eliminate
Expand All @@ -55,7 +55,7 @@ f = [sum(r.==j) for j in 1:m];
# \gamma_r(\mathbf\epsilon) = \sum_{\mathbf{y} : \mathbf{1}^\intercal \mathbf{y} = r} \prod_{j=1}^m \epsilon_j^{y_j}
# ```
# where the sum is over all possible answer configurations that give a sum
# score of $r$. Algorithms to efficiently compute $\gamma$ and its
# score of $r$. Algorithms to efficiently compute $\gamma$ and its
# derivatives are available in the literature (see eg Baker (1996) for a review
# and Biscarri (2018) for a more modern approach)

Expand All @@ -65,7 +65,7 @@ function esf_sum!(S::AbstractArray{T,1}, x::AbstractArray{T,1}) where T <: Real
S[1] = one(T)
@inbounds for col in 1:n
for r in 1:col
row = col - r + 1
row = col - r + 1
S[row+1] = S[row+1] + x[col] * S[row]
end
end
Expand Down Expand Up @@ -114,20 +114,20 @@ function neglogLC(β)
return -s'log.(ϵ) + f'log.(S[2:end])
end

# Parameter estimation is usually performed with respect to the unconstrained parameter
# $\beta_i = -\log{\epsilon_i}$. Taking the derivative with respect to $\beta_i$
# (and applying the chain rule) one obtains
# Parameter estimation is usually performed with respect to the unconstrained parameter
# $\beta_i = -\log{\epsilon_i}$. Taking the derivative with respect to $\beta_i$
# (and applying the chain rule) one obtains
# ```math
# \dfrac{\partial\log L_C(\mathbf\epsilon|\mathbf{r})}{\partial \beta_i} = -s_i + \epsilon_i\sum_{r=1}^m \dfrac{f_r \gamma_{r-1}^{(j)}}{\gamma_r}
# \dfrac{\partial\log L_C(\mathbf\epsilon|\mathbf{r})}{\partial \beta_i} = -s_i + \epsilon_i\sum_{r=1}^m \dfrac{f_r \gamma_{r-1}^{(j)}}{\gamma_r}
# ```
# where $\gamma_{r-1}^{(i)} = \partial \gamma_{r}(\mathbf\epsilon)/\partial\epsilon_i$.
# where $\gamma_{r-1}^{(i)} = \partial \gamma_{r}(\mathbf\epsilon)/\partial\epsilon_i$.

function g!(storage, β)
calculate_common!(β, last_β)
for j in 1:m
storage[j] = s[j]
for l in 1:m
storage[j] -= ϵ[j] * f[l] * (H[j,j,l+1] / S[l+1])
storage[j] -= ϵ[j] * f[l] * (H[j,j,l+1] / S[l+1])
end
end
end
Expand All @@ -147,25 +147,25 @@ function h!(storage, β)
storage[k,j] = 0.0
for l in 1:m
if j == k
storage[j,j] += f[l] * (ϵ[j]*H[j,j,l+1] / S[l+1]) *
storage[j,j] += f[l] * (ϵ[j]*H[j,j,l+1] / S[l+1]) *
(1 - ϵ[j]*H[j,j,l+1] / S[l+1])
elseif k > j
storage[k,j] += ϵ[j] * ϵ[k] * f[l] *
storage[k,j] += ϵ[j] * ϵ[k] * f[l] *
((H[k,j,l] / S[l+1]) - (H[j,j,l+1] * H[k,k,l+1]) / S[l+1] ^ 2)
else #k < j
storage[k,j] += ϵ[j] * ϵ[k] * f[l] *
storage[k,j] += ϵ[j] * ϵ[k] * f[l] *
((H[j,k,l] / S[l+1]) - (H[j,j,l+1] * H[k,k,l+1]) / S[l+1] ^ 2)
end
end
end
end
end

# The estimates of the item parameters are then obtained via standard optimization
# algorithms (either Newton-Raphson or L-BFGS). One last issue is that the model is
# not identifiable (multiplying the $\xi_p$ by a constant and dividing the $\epsilon_i$
# by the same constant results in the same likelihood). Therefore some kind of constraint
# must be imposed when estimating the parameters. Typically either $\epsilon_1 = 0$ or
# The estimates of the item parameters are then obtained via standard optimization
# algorithms (either Newton-Raphson or L-BFGS). One last issue is that the model is
# not identifiable (multiplying the $\xi_p$ by a constant and dividing the $\epsilon_i$
# by the same constant results in the same likelihood). Therefore some kind of constraint
# must be imposed when estimating the parameters. Typically either $\epsilon_1 = 0$ or
# $\prod_{i=1}^m \epsilon_i = 1$ (which is equivalent to $\sum_{i=1}^m \beta_i = 0$).

con_c!(c, x) = (c[1] = sum(x); c)
Expand Down
2 changes: 1 addition & 1 deletion docs/src/user/algochoice.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ There are two main settings you must choose in Optim: the algorithm and the line

The first choice to be made is that of the order of the method. Zeroth-order methods do not have gradient information, and are very slow to converge, especially in high dimension. First-order methods do not have access to curvature information and can take a large number of iterations to converge for badly conditioned problems. Second-order methods can converge very quickly once in the vicinity of a minimizer. Of course, this enhanced performance comes at a cost: the objective function has to be differentiable, you have to supply gradients and Hessians, and, for second order methods, a linear system has to be solved at each step.

If you can provide analytic gradients and Hessians, and the dimension of the problem is not too large, then second order methods are very efficient. The Newton method with trust region is the method of choice.
If you can provide analytic gradients and Hessians, and the dimension of the problem is not too large, then second order methods are very efficient. The Newton method with trust region is the method of choice.

When you do not have an explicit Hessian or when the dimension becomes large enough that the linear solve in the Newton method becomes the bottleneck, first order methods should be preferred. BFGS is a very efficient method, but also requires a linear system solve. LBFGS usually has a performance very close to that of BFGS, and avoids linear system solves (the parameter `m` can be tweaked: increasing it can improve the convergence, at the expense of memory and time spent in linear algebra operations). The conjugate gradient method usually converges less quickly than LBFGS, but requires less memory. Gradient descent should only be used for testing. Acceleration methods are experimental.

Expand Down
2 changes: 1 addition & 1 deletion docs/src/user/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ In addition to the solver, you can alter the behavior of the Optim package by us
* `show_warnings`: Should warnings due to NaNs or Inf be shown? Defaults to `true`.
* `trace_simplex`: Include the full simplex in the trace for `NelderMead`. Defaults to `false`.
* `show_every`: Trace output is printed every `show_every`th iteration.
* `callback`: A function to be called during tracing. A return value of `true` stops the `optimize` call. The callback function is called every `show_every`th iteration. If `store_trace` is false, the argument to the callback is of the type [`OptimizationState`](https://github.com/JuliaNLSolvers/Optim.jl/blob/a1035134ca1f3ebe855f1cde034e32683178225a/src/types.jl#L155), describing the state of the current iteration. If `store_trace` is true, the argument is a list of all the states from the first iteration to the current.
* `callback`: A function to be called during tracing. A return value of `true` stops the `optimize` call. The callback function is called every `show_every`th iteration. If `store_trace` is false, the argument to the callback is of the type [`OptimizationState`](https://github.com/JuliaNLSolvers/Optim.jl/blob/a1035134ca1f3ebe855f1cde034e32683178225a/src/types.jl#L155), describing the state of the current iteration. If `store_trace` is true, the argument is a list of all the states from the first iteration to the current.
* `time_limit`: A soft upper limit on the total run time. Defaults to `NaN` (unlimited).

Box constrained optimization has additional keywords to alter the behavior of the outer solver:
Expand Down
4 changes: 2 additions & 2 deletions joss/paper.bib
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ @article{damle2018variational
year = 2018,
month = jan,
adsurl = {http://adsabs.harvard.edu/abs/2018arXiv180108572D},
adsnote = {Provided by the SAO/NASA Astrophysics Data System},
adsnote = {Provided by the SAO/NASA Astrophysics Data System},
}

@article{dony2018parametric,
Expand All @@ -119,7 +119,7 @@ @article{rackauckas2017differentialequations
}

@article{regier2016celeste,
author = {{Regier}, J. and {Pamnany}, K. and {Giordano}, R. and {Thomas}, R. and
author = {{Regier}, J. and {Pamnany}, K. and {Giordano}, R. and {Thomas}, R. and
{Schlegel}, D. and {McAuliffe}, J. and {Prabhat}},
title = "{Learning an Astronomical Catalog of the Visible Universe through Scalable Bayesian Inference}",
journal = {ArXiv e-prints},
Expand Down
14 changes: 7 additions & 7 deletions src/multivariate/solvers/constrained/fminbox.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ function in_box(bb::BoxBarrier, x)
all(x->x[1]>=x[2] && x[1]<=x[3], zip(x, bb.lower, bb.upper))
end
in_box(bw::BarrierWrapper, x) = in_box(bw.b, x)
# evaluates the value and gradient components comming from the log barrier
# evaluates the value and gradient components comming from the log barrier
function _barrier_term_value(x::T, l, u) where T
dxl = x - l
dxu = u - x

if dxl <= 0 || dxu <= 0
return T(Inf)
end
Expand All @@ -53,7 +53,7 @@ function _barrier_term_gradient(x::T, l, u) where T
if isfinite(u)
g += one(T)/dxu
end
return g
return g
end
function value_gradient!(bb::BoxBarrier, g, x)
g .= _barrier_term_gradient.(x, bb.lower, bb.upper)
Expand Down Expand Up @@ -134,7 +134,7 @@ end
# position, the gradient of the input function should dominate the
# gradient of the barrier.
function initial_mu(gfunc::AbstractArray{T}, gbarrier::AbstractArray{T}, mu0factor::T = T(1)/1000, mu0::T = convert(T, NaN)) where T

if isnan(mu0)
gbarriernorm = sum(abs, gbarrier)
if gbarriernorm > 0
Expand Down Expand Up @@ -378,7 +378,7 @@ function optimize(
# we need to update the +mu*barrier_grad part. Since we're using the
# value_gradient! not !! as in initial_state, we won't make a superfluous
# evaluation

if !(F.method isa NelderMead)
value_gradient!(dfbox, x)
else
Expand Down Expand Up @@ -412,7 +412,7 @@ function optimize(
println()
println("Decreasing barrier term μ.\n")
end

# Decrease mu
dfbox.mu *= T(F.mufactor)
# Test for convergence
Expand All @@ -429,7 +429,7 @@ function optimize(
stopped_by_time_limit = _time-t0 > options.time_limit ? true : false
stopped = stopped_by_time_limit
end

stopped_by =(#f_limit_reached=f_limit_reached,
#g_limit_reached=g_limit_reached,
#h_limit_reached=h_limit_reached,
Expand Down
2 changes: 1 addition & 1 deletion src/multivariate/solvers/constrained/ipnewton/interior.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ end
function optimize(d, lower::AbstractArray, upper::AbstractArray, initial_x::AbstractArray, method::ConstrainedOptimizer,
options::Options = Options(;default_options(method)...))
twicediffed = d isa TwiceDifferentiable ? d : TwiceDifferentiable(d, initial_x)

bounds = ConstraintBounds(lower, upper, [], [])
constraints = TwiceDifferentiableConstraints(
(c,x)->nothing, (J,x)->nothing, (H,x,λ)->nothing, bounds)
Expand Down
2 changes: 1 addition & 1 deletion src/multivariate/solvers/constrained/samin.jl
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ function optimize(obj_fn, lb::AbstractArray, ub::AbstractArray, x::AbstractArray
# last value close enough to last neps values?
fstar[1] = f_old
f_absΔ = abs.(fopt - f_old) # close enough to best so far?
if all((abs.(fopt .- fstar)) .< f_tol) # within to for last neps trials?
if all((abs.(fopt .- fstar)) .< f_tol) # within to for last neps trials?
f_converged = true
# check for bound narrow enough for parameter convergence
if any(bounds .> x_tol)
Expand Down
2 changes: 1 addition & 1 deletion src/multivariate/solvers/first_order/adam.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ function initial_state(method::Adam, options, d, initial_x::AbstractArray{T}) wh

value_gradient!!(d, initial_x)
α, β₁, β₂ = method.α, method.β₁, method.β₂

m = copy(gradient(d))
u = zero(m)
a = 1 - β₁
Expand Down
2 changes: 1 addition & 1 deletion src/multivariate/solvers/first_order/adamax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function initial_state(method::AdaMax, options, d, initial_x::AbstractArray{T})

value_gradient!!(d, initial_x)
α, β₁, β₂ = method.α, method.β₁, method.β₂

m = copy(gradient(d))
u = zero(m)
a = 1 - β₁
Expand Down
2 changes: 1 addition & 1 deletion src/multivariate/solvers/first_order/cg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ function reset!(cg, cgs::ConjugateGradientState, obj, x)
if cg.P !== nothing
project_tangent!(cg.manifold, cgs.pg, x)
end
cgs.s .= -cgs.pg
cgs.s .= -cgs.pg
cgs.f_x_previous = typeof(cgs.f_x_previous)(NaN)
end
function initial_state(method::ConjugateGradient, options, d, initial_x)
Expand Down
4 changes: 2 additions & 2 deletions src/univariate/optimize/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ function optimize(f,
lower::Union{Integer, Real},
upper::Union{Integer, Real};
kwargs...)

T = promote_type(typeof(lower/1), typeof(upper/1))
optimize(f,
T(lower),
Expand All @@ -48,7 +48,7 @@ function optimize(f,
upper::Union{Integer, Real},
method::Union{Brent, GoldenSection};
kwargs...)

T = promote_type(typeof(lower/1), typeof(upper/1))
optimize(f,
T(lower),
Expand Down
2 changes: 1 addition & 1 deletion src/univariate/printing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function Base.show(io::IO, r::UnivariateOptimizationResults)
status_string = ""
if time_run(r) > time_limit(r)
status_string *= "failure (exceeded time limit of $(time_limit(r)))"
else
else
status_string = "success"
end

Expand Down
2 changes: 1 addition & 1 deletion src/univariate/solvers/golden_section.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ function optimize(f, x_lower::T, x_upper::T,
rel_tol,
abs_tol,
tr,
f_calls,
f_calls,
time_limit,
_time)
end
Expand Down
4 changes: 2 additions & 2 deletions test/general/objective_types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
@test Optim.gradient(odad2) == [0.0]
# @test odad3.g == [0.0]
end

for a in (1.0, 5.0)
xa = rand(1)
odad1 = OnceDifferentiable(x->a*x[1], xa; autodiff = :finite)
odad2 = OnceDifferentiable(x->a*x[1], xa; autodiff = :forward)
# odad3 = OnceDifferentiable(x->a*x[1], xa; autodiff = :reverse)
# odad3 = OnceDifferentiable(x->a*x[1], xa; autodiff = :reverse)
Optim.gradient!(odad1, xa)
Optim.gradient!(odad2, xa)
@test Optim.gradient(odad1) [a]
Expand Down
2 changes: 1 addition & 1 deletion test/multivariate/measurements.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ import Measurements
#given an initial value, they should give the exact same answer
@test all(Optim.minimizer(resmes) .|> Measurements.value .== Optim.minimizer(resfloat))
@test Optim.minimum(resmes) .|> Measurements.value .== Optim.minimum(resfloat)

end
2 changes: 1 addition & 1 deletion test/multivariate/optimize/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
end

@testset "#718" begin

f(x) = (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
function g!(G, x)
G[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1]
Expand Down
6 changes: 3 additions & 3 deletions test/multivariate/solvers/first_order/adam_adamax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
# TODO: Check why skip problems fail
skip = ("Large Polynomial", "Parabola", "Paraboloid Random Matrix",
"Paraboloid Diagonal", "Penalty Function I", "Polynomial", "Powell",
"Extended Powell", "Trigonometric", "Himmelblau", "Rosenbrock", "Extended Rosenbrock",
"Quadratic Diagonal", "Beale", "Fletcher-Powell", "Exponential",
"Extended Powell", "Trigonometric", "Himmelblau", "Rosenbrock", "Extended Rosenbrock",
"Quadratic Diagonal", "Beale", "Fletcher-Powell", "Exponential",
)
run_optim_tests(Adam();
skip = skip,
Expand All @@ -38,7 +38,7 @@ end
skip = ("Trigonometric", "Large Polynomial", "Parabola", "Paraboloid Random Matrix",
"Paraboloid Diagonal", "Extended Rosenbrock", "Penalty Function I", "Beale",
"Extended Powell", "Himmelblau", "Large Polynomial", "Polynomial", "Powell",
"Exponential",
"Exponential",
)
run_optim_tests(AdaMax();
skip = skip,
Expand Down
2 changes: 1 addition & 1 deletion test/univariate/solvers/brent.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
@test Optim.converged(result)
@test Optim.minimum(result) == -1.0

## time limit
## time limit
function slow_obj(x)
sleep(0.05)
return sin(x)
Expand Down
2 changes: 1 addition & 1 deletion test/univariate/solvers/golden_section.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
@test Optim.minimum(result) == 2.0
@test_throws ErrorException optimize(identity, 2.0, 1.0, GoldenSection())

## time limit
## time limit
function slow_obj(x)
sleep(0.05)
return sin(x)
Expand Down

0 comments on commit cb72365

Please sign in to comment.