From 7b49c383bf9aa48de965e4d4b055f22b0f36acae Mon Sep 17 00:00:00 2001 From: Chris C Date: Wed, 30 Dec 2020 03:56:12 +1000 Subject: [PATCH] improve predorcent stepper and use curve search (#633) --- examples/common_JuMP.jl | 2 +- examples/densityest/native.jl | 2 +- examples/matrixcompletion/JuMP_benchmark.jl | 4 +- src/Cones/Cones.jl | 29 +- src/Cones/epinorminf.jl | 41 +- .../{episumperentropy.jl => epirelentropy.jl} | 109 ++--- src/Cones/epitracerelentropytri.jl | 75 ++-- src/Cones/wsosinterpepinormone.jl | 2 +- src/MathOptInterface/cones.jl | 34 +- src/Solvers/Solvers.jl | 39 +- src/Solvers/linesearch.jl | 112 ----- src/Solvers/point.jl | 24 +- src/Solvers/process.jl | 2 +- src/Solvers/search.jl | 87 ++++ src/Solvers/steppers/combined.jl | 162 +++++++ src/Solvers/steppers/common.jl | 48 ++- src/Solvers/steppers/heurcomb.jl | 168 -------- src/Solvers/steppers/predorcent.jl | 403 ++++-------------- src/Solvers/systemsolvers/common.jl | 30 +- src/Solvers/systemsolvers/naive.jl | 4 +- src/Solvers/systemsolvers/naiveelim.jl | 6 +- src/Solvers/systemsolvers/qrchol.jl | 2 +- test/barrier.jl | 22 +- test/moi.jl | 2 +- test/moicones.jl | 8 +- test/nativeinstances.jl | 202 ++++----- test/nativesets.jl | 21 +- test/runbarriertests.jl | 4 +- test/runexamplestests.jl | 5 +- test/runmoitests.jl | 22 +- test/runnativetests.jl | 90 ++-- 31 files changed, 785 insertions(+), 976 deletions(-) rename src/Cones/{episumperentropy.jl => epirelentropy.jl} (77%) delete mode 100644 src/Solvers/linesearch.jl create mode 100644 src/Solvers/search.jl create mode 100644 src/Solvers/steppers/combined.jl delete mode 100644 src/Solvers/steppers/heurcomb.jl diff --git a/examples/common_JuMP.jl b/examples/common_JuMP.jl index b0271cc69..8aa704c46 100644 --- a/examples/common_JuMP.jl +++ b/examples/common_JuMP.jl @@ -241,7 +241,7 @@ cone_from_hyp(cone::Cones.EpiNormInf) = (Cones.use_dual_barrier(cone) ? MOI.Norm cone_from_hyp(cone::Cones.EpiNormEucl) = MOI.SecondOrderCone(Cones.dimension(cone)) cone_from_hyp(cone::Cones.EpiPerSquare) = MOI.RotatedSecondOrderCone(Cones.dimension(cone)) cone_from_hyp(cone::Cones.HypoPerLog) = (@assert Cones.dimension(cone) == 3; MOI.ExponentialCone()) -cone_from_hyp(cone::Cones.EpiSumPerEntropy) = MOI.RelativeEntropyCone(Cones.dimension(cone)) +cone_from_hyp(cone::Cones.EpiRelEntropy) = MOI.RelativeEntropyCone(Cones.dimension(cone)) cone_from_hyp(cone::Cones.HypoGeoMean) = MOI.GeometricMeanCone(Cones.dimension(cone)) cone_from_hyp(cone::Cones.Power) = (@assert Cones.dimension(cone) == 3; MOI.PowerCone{Float64}(cone.alpha[1])) cone_from_hyp(cone::Cones.EpiNormSpectral) = (Cones.use_dual_barrier(cone) ? MOI.NormNuclearCone : MOI.NormSpectralCone)(cone.n, cone.m) diff --git a/examples/densityest/native.jl b/examples/densityest/native.jl index 1cdac10ea..c352d3bba 100644 --- a/examples/densityest/native.jl +++ b/examples/densityest/native.jl @@ -126,7 +126,7 @@ function build(inst::DensityEstNative{T}) where {T <: Real} G_likl[row_offset + 1, 2:(1 + U)] = -X_pts_polys[i, :] G_likl[row_offset + 2, ext_offset] = -1 row_offset += 3 - push!(cones, Cones.EpiSumPerEntropy{T}(3)) + push!(cones, Cones.EpiRelEntropy{T}(3)) end end else diff --git a/examples/matrixcompletion/JuMP_benchmark.jl b/examples/matrixcompletion/JuMP_benchmark.jl index 91f1dca7f..2c46b2c4a 100644 --- a/examples/matrixcompletion/JuMP_benchmark.jl +++ b/examples/matrixcompletion/JuMP_benchmark.jl @@ -1,7 +1,7 @@ matrixcompletion_insts = [ - [(k, d) for d in vcat(3, 10:5:max_d)] # includes compile run - for (k, max_d) in ((5, 60), (10, 45)) + [(k, d) for d in vcat(2, 5:5:max_d)] # includes compile run + for (k, max_d) in ((10, 45), (20, 35)) ] insts = Dict() diff --git a/src/Cones/Cones.jl b/src/Cones/Cones.jl index 4d61c19ec..3c7b31ba2 100644 --- a/src/Cones/Cones.jl +++ b/src/Cones/Cones.jl @@ -36,8 +36,7 @@ include("nonnegative.jl") include("epinorminf.jl") include("epinormeucl.jl") include("epipersquare.jl") -include("episumperentropy.jl") -include("epitracerelentropytri.jl") +include("epirelentropy.jl") include("hypoperlog.jl") include("power.jl") include("hypopowermean.jl") @@ -50,6 +49,7 @@ include("doublynonnegativetri.jl") include("matrixepipersquare.jl") include("hypoperlogdettri.jl") include("hyporootdettri.jl") +include("epitracerelentropytri.jl") include("wsosinterpnonnegative.jl") include("wsosinterpepinormone.jl") include("wsosinterpepinormeucl.jl") @@ -190,9 +190,6 @@ function in_neighborhood(cone::Cone{T}, rtmu::T, max_nbhd::T) where {T <: Real} if use_heuristic_neighborhood(cone) nbhd = norm(vec1, Inf) / norm(g, Inf) # nbhd = maximum(abs(dj / gj) for (dj, gj) in zip(vec1, g)) # TODO try this neighborhood - # elseif Cones.use_sqrt_oracles(cone) # NOTE can force factorization when we don't need it - better to use inv hess prod - # inv_hess_sqrt_prod!(vec2, vec1, cone) - # nbhd = norm(vec2) else inv_hess_prod!(vec2, vec1, cone) nbhd_sqr = dot(vec2, vec1) @@ -506,23 +503,31 @@ function arrow_prod(prod::AbstractVecOrMat{T}, arr::AbstractVecOrMat{T}, uu::T, return prod end -sqrt_pos(x::T) where {T <: Real} = sqrt(max(x, eps(T))) - # factorize arrow matrix function arrow_sqrt(uu::T, uw::Vector{T}, ww::Vector{T}, rtuw::Vector{T}, rtww::Vector{T}) where {T <: Real} - @. rtww = sqrt_pos(ww) + tol = sqrt(eps(T)) + any(<(tol), ww) && return zero(T) + @. rtww = sqrt(ww) @. rtuw = uw / rtww - return sqrt_pos(uu - sum(abs2, rtuw)) + diff = uu - sum(abs2, rtuw) + (diff < tol) && return zero(T) + return sqrt(diff) end # 2x2 block case function arrow_sqrt(uu::T, uv::Vector{T}, uw::Vector{T}, vv::Vector{T}, vw::Vector{T}, ww::Vector{T}, rtuv::Vector{T}, rtuw::Vector{T}, rtvv::Vector{T}, rtvw::Vector{T}, rtww::Vector{T}) where {T <: Real} - @. rtww = sqrt_pos(ww) + tol = sqrt(eps(T)) + any(<(tol), ww) && return zero(T) + @. rtww = sqrt(ww) @. rtvw = vw / rtww @. rtuw = uw / rtww - @. rtvv = sqrt_pos(vv - abs2(rtvw)) + @. rtuv = vv - abs2(rtvw) + any(<(tol), rtuv) && return zero(T) + @. rtvv = sqrt(rtuv) @. rtuv = (uv - rtvw * rtuw) / rtvv - return sqrt_pos(uu - sum(abs2, rtuv) - sum(abs2, rtuw)) + diff = uu - sum(abs2, rtuv) - sum(abs2, rtuw) + (diff < tol) && return zero(T) + return sqrt(diff) end # lmul with lower Cholesky factor of arrow matrix diff --git a/src/Cones/epinorminf.jl b/src/Cones/epinorminf.jl index c995329bb..4b12e9062 100644 --- a/src/Cones/epinorminf.jl +++ b/src/Cones/epinorminf.jl @@ -25,6 +25,7 @@ mutable struct EpiNormInf{T <: Real, R <: RealOrComplex{T}} <: Cone{T} inv_hess_updated::Bool hess_aux_updated::Bool hess_sqrt_aux_updated::Bool + use_hess_sqrt::Bool inv_hess_aux_updated::Bool is_feas::Bool hess::Symmetric{T, SparseMatrixCSC{T, Int}} @@ -69,7 +70,12 @@ use_heuristic_neighborhood(cone::EpiNormInf) = false reset_data(cone::EpiNormInf) = (cone.feas_updated = cone.grad_updated = cone.hess_updated = cone.inv_hess_updated = cone.hess_aux_updated = cone.hess_sqrt_aux_updated = cone.inv_hess_aux_updated = false) -use_sqrt_oracles(cone::EpiNormInf) = true +function use_sqrt_oracles(cone::EpiNormInf) + cone.use_hess_sqrt || return false + cone.hess_sqrt_aux_updated || update_hess_sqrt_aux(cone) + !cone.use_hess_sqrt + return cone.use_hess_sqrt +end # TODO only allocate the fields we use function setup_extra_data(cone::EpiNormInf{T, R}) where {R <: RealOrComplex{T}} where {T <: Real} @@ -93,6 +99,7 @@ function setup_extra_data(cone::EpiNormInf{T, R}) where {R <: RealOrComplex{T}} cone.Hiuim = zeros(T, n) cone.idet = zeros(T, n) end + cone.use_hess_sqrt = true return cone end @@ -109,7 +116,7 @@ function update_feas(cone::EpiNormInf{T}) where T u = cone.point[1] @views vec_copy_to!(cone.w, cone.point[2:end]) - cone.is_feas = (u > eps(T) && abs2(u) - maximum(abs2, cone.w) > eps(T)) + cone.is_feas = (u > eps(T) && u - norm(cone.w, Inf) > eps(T)) cone.feas_updated = true return cone.is_feas @@ -137,10 +144,9 @@ function update_grad(cone::EpiNormInf{T, R}) where {R <: RealOrComplex{T}} where w = cone.w den = cone.den - @inbounds for (j, wj) in enumerate(w) - absw = abs(wj) - den[j] = T(0.5) * (u - absw) * (u + absw) - end + usqr = abs2(u) + @. den = usqr - abs2(w) + den .*= T(0.5) @. cone.uden = u / den @. cone.wden = w / den cone.grad[1] = (cone.n - 1) / u - sum(cone.uden) @@ -236,6 +242,9 @@ function update_inv_hess_aux(cone::EpiNormInf{T}) where {T <: Real} schur += inv(u2pwj2) end cone.schur = schur + if schur < zero(T) + @warn("bad schur $schur") + end if cone.is_complex @. cone.idet = cone.Hrere * cone.Himim - abs2(cone.Hreim) @@ -253,23 +262,22 @@ function update_inv_hess(cone::EpiNormInf{T}) where {T <: Real} Hi = cone.inv_hess.data wden = cone.wden u = cone.point[1] + schur = cone.schur - Hi[1, 1] = 1 + Hi[1, 1] = inv(schur) @inbounds for j in 1:cone.n if cone.is_complex - Hi[1, 2j] = cone.Hiure[j] - Hi[1, 2j + 1] = cone.Hiuim[j] + Hi[2j, 1] = cone.Hiure[j] + Hi[2j + 1, 1] = cone.Hiuim[j] else - Hi[1, j + 1] = cone.Hiure[j] + Hi[j + 1, 1] = cone.Hiure[j] end end + @. Hi[1, 2:end] = Hi[2:end, 1] / schur - rtschur = sqrt_pos(cone.schur) - Hi[1, :] ./= rtschur @inbounds for j in 2:cone.dim, i in 2:j - Hi[i, j] = Hi[1, j] * Hi[1, i] + Hi[i, j] = Hi[j, 1] * Hi[1, i] end - Hi[1, :] ./= rtschur if cone.is_complex @inbounds for j in 1:cone.n @@ -330,12 +338,13 @@ function update_hess_sqrt_aux(cone::EpiNormInf) else cone.rtuu = arrow_sqrt(cone.Huu, cone.Hure, cone.Hrere, cone.rture, cone.rtrere) end + cone.use_hess_sqrt = !iszero(cone.rtuu) cone.hess_sqrt_aux_updated = true return end function hess_sqrt_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiNormInf) - cone.hess_sqrt_aux_updated || update_hess_sqrt_aux(cone) + @assert cone.hess_sqrt_aux_updated && cone.use_hess_sqrt if cone.is_complex return arrow_sqrt_prod(prod, arr, cone.rtuu, cone.rture, cone.rtuim, cone.rtrere, cone.rtreim, cone.rtimim) else @@ -344,7 +353,7 @@ function hess_sqrt_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::Ep end function inv_hess_sqrt_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiNormInf) - cone.hess_sqrt_aux_updated || update_hess_sqrt_aux(cone) + @assert cone.hess_sqrt_aux_updated && cone.use_hess_sqrt if cone.is_complex return inv_arrow_sqrt_prod(prod, arr, cone.rtuu, cone.rture, cone.rtuim, cone.rtrere, cone.rtreim, cone.rtimim) else diff --git a/src/Cones/episumperentropy.jl b/src/Cones/epirelentropy.jl similarity index 77% rename from src/Cones/episumperentropy.jl rename to src/Cones/epirelentropy.jl index 588aac039..3ba556051 100644 --- a/src/Cones/episumperentropy.jl +++ b/src/Cones/epirelentropy.jl @@ -6,7 +6,7 @@ barrier from "Primal-Dual Interior-Point Methods for Domain-Driven Formulations" -log(u - sum_i w_i*log(w_i/v_i)) - sum_i (log(v_i) + log(w_i)) =# -mutable struct EpiSumPerEntropy{T <: Real} <: Cone{T} +mutable struct EpiRelEntropy{T <: Real} <: Cone{T} use_dual_barrier::Bool dim::Int w_dim::Int @@ -25,6 +25,7 @@ mutable struct EpiSumPerEntropy{T <: Real} <: Cone{T} inv_hess_updated::Bool inv_hess_aux_updated::Bool inv_hess_sqrt_aux_updated::Bool + use_inv_hess_sqrt::Bool is_feas::Bool hess::Symmetric{T, Matrix{T}} inv_hess::Symmetric{T, SparseMatrixCSC{T, Int}} @@ -47,7 +48,7 @@ mutable struct EpiSumPerEntropy{T <: Real} <: Cone{T} temp1::Vector{T} temp2::Vector{T} - function EpiSumPerEntropy{T}( + function EpiRelEntropy{T}( dim::Int; use_dual::Bool = false, ) where {T <: Real} @@ -63,14 +64,19 @@ mutable struct EpiSumPerEntropy{T <: Real} <: Cone{T} end end -use_heuristic_neighborhood(cone::EpiSumPerEntropy) = false +use_heuristic_neighborhood(cone::EpiRelEntropy) = false -reset_data(cone::EpiSumPerEntropy) = (cone.feas_updated = cone.grad_updated = cone.hess_updated = cone.inv_hess_updated = cone.inv_hess_aux_updated = cone.inv_hess_sqrt_aux_updated = false) +reset_data(cone::EpiRelEntropy) = (cone.feas_updated = cone.grad_updated = cone.hess_updated = cone.inv_hess_updated = cone.inv_hess_aux_updated = cone.inv_hess_sqrt_aux_updated = false) -use_sqrt_oracles(cone::EpiSumPerEntropy) = true +function use_sqrt_oracles(cone::EpiRelEntropy) + cone.use_inv_hess_sqrt || return false + cone.inv_hess_sqrt_aux_updated || update_inv_hess_sqrt_aux(cone) + !cone.use_inv_hess_sqrt + return cone.use_inv_hess_sqrt +end # TODO only allocate the fields we use -function setup_extra_data(cone::EpiSumPerEntropy{T}) where {T <: Real} +function setup_extra_data(cone::EpiRelEntropy{T}) where {T <: Real} w_dim = cone.w_dim cone.lwv = zeros(T, w_dim) cone.tau = zeros(T, w_dim) @@ -86,19 +92,20 @@ function setup_extra_data(cone::EpiSumPerEntropy{T}) where {T <: Real} cone.rtiww = zeros(T, w_dim) cone.temp1 = zeros(T, w_dim) cone.temp2 = zeros(T, w_dim) + cone.use_inv_hess_sqrt = true return cone end -get_nu(cone::EpiSumPerEntropy) = cone.dim +get_nu(cone::EpiRelEntropy) = cone.dim -function set_initial_point(arr::AbstractVector, cone::EpiSumPerEntropy) - (arr[1], v, w) = get_central_ray_episumperentropy(cone.w_dim) +function set_initial_point(arr::AbstractVector, cone::EpiRelEntropy) + (arr[1], v, w) = get_central_ray_epirelentropy(cone.w_dim) @views arr[cone.v_idxs] .= v @views arr[cone.w_idxs] .= w return arr end -function update_feas(cone::EpiSumPerEntropy{T}) where T +function update_feas(cone::EpiRelEntropy{T}) where T @assert !cone.feas_updated u = cone.point[1] @views v = cone.point[cone.v_idxs] @@ -116,19 +123,19 @@ function update_feas(cone::EpiSumPerEntropy{T}) where T return cone.is_feas end -function is_dual_feas(cone::EpiSumPerEntropy{T}) where T +function is_dual_feas(cone::EpiRelEntropy{T}) where T u = cone.dual_point[1] @views v = cone.dual_point[cone.v_idxs] @views w = cone.dual_point[cone.w_idxs] - if all(vi -> vi > eps(T), v) && (u > eps(T)) + if all(vi -> vi > eps(T), v) && u > eps(T) return all(u * (1 + log(vi / u)) + wi > eps(T) for (vi, wi) in zip(v, w)) end return false end -function update_grad(cone::EpiSumPerEntropy) +function update_grad(cone::EpiRelEntropy) @assert cone.is_feas u = cone.point[1] @views v = cone.point[cone.v_idxs] @@ -139,14 +146,14 @@ function update_grad(cone::EpiSumPerEntropy) @. tau = (cone.lwv + 1) / -z g[1] = -inv(z) - @. @views g[cone.v_idxs] = (-w / z - 1) / v + @. @views g[cone.v_idxs] = -(w / z + 1) / v @. @views g[cone.w_idxs] = -inv(w) - tau cone.grad_updated = true return cone.grad end -function update_hess(cone::EpiSumPerEntropy{T}) where T +function update_hess(cone::EpiRelEntropy{T}) where T @assert cone.grad_updated if !isdefined(cone, :hess) cone.hess = Symmetric(zeros(T, cone.dim, cone.dim), :U) @@ -160,6 +167,7 @@ function update_hess(cone::EpiSumPerEntropy{T}) where T tau = cone.tau z = cone.z sigma = cone.temp1 + g = cone.grad H = cone.hess.data # H_u_u, H_u_v, H_u_w parts @@ -175,14 +183,13 @@ function update_hess(cone::EpiSumPerEntropy{T}) where T wi = point[w_idx] taui = tau[i] sigmai = sigma[i] - invvi = inv(vi) - H[v_idx, v_idx] = abs2(sigmai) + (sigmai + invvi) / vi + H[v_idx, v_idx] = abs2(sigmai) - g[v_idx] / vi H[w_idx, w_idx] = abs2(taui) + (zi + inv(wi)) / wi @. H[v_idx, w_idxs] = sigmai * tau @. H[w_idx, v_idxs] = sigma * taui - H[v_idx, w_idx] -= invvi / z + H[v_idx, w_idx] -= zi / vi @inbounds for j in (i + 1):cone.w_dim H[v_idx, v_idxs[j]] = sigmai * sigma[j] @@ -195,7 +202,7 @@ function update_hess(cone::EpiSumPerEntropy{T}) where T end # auxiliary calculations for inverse Hessian -function update_inv_hess_aux(cone::EpiSumPerEntropy{T}) where T +function update_inv_hess_aux(cone::EpiRelEntropy{T}) where T @assert !cone.inv_hess_aux_updated point = cone.point @views v = point[cone.v_idxs] @@ -204,32 +211,33 @@ function update_inv_hess_aux(cone::EpiSumPerEntropy{T}) where T HiuHu = zero(T) @inbounds for i in 1:cone.w_dim - lwv = cone.lwv[i] wi = w[i] vi = v[i] - wlwv = wi * lwv - scal = wi / (z + 2 * wi) - uvi = cone.Hiuv[i] = vi * (wlwv - z) * scal - uwi = cone.Hiuw[i] = wi * (z * (lwv + 1) + wlwv) * scal - HiuHu += uvi * wi / vi + uwi * cone.tau[i] * z + lwvi = cone.lwv[i] + zwi = z + wi + z2wi = zwi + wi + wz2wi = wi / z2wi + vz2wi = vi / z2wi + uvvi = wi * (wi * lwvi - z) + uwwi = wi * (z + lwvi * zwi) * wz2wi + HiuHu += wz2wi * uvvi - uwwi * (lwvi + 1) + cone.Hiuv[i] = vz2wi * uvvi + cone.Hiuw[i] = uwwi + cone.Hivw[i] = wi * vi * wz2wi + cone.Hiww[i] = wi * zwi * wz2wi + cone.Hivv[i] = vi * zwi * vz2wi end cone.Hiuu = abs2(z) - HiuHu - @assert cone.Hiuu > 0 - - denom = cone.temp1 - @. denom = z + 2 * w - @. cone.Hivw = w * v / denom * w - @. cone.Hivv = (z + w) / denom - @. cone.Hiww = w * cone.Hivv * w - @. cone.Hivv *= v - @. cone.Hivv *= v + if cone.Hiuu < zero(T) + @warn("bad Hiuu $(cone.Hiuu)") + end cone.inv_hess_aux_updated = true return end # updates for nonzero values in the inverse Hessian -function update_inv_hess(cone::EpiSumPerEntropy{T}) where T +function update_inv_hess(cone::EpiRelEntropy{T}) where T cone.inv_hess_aux_updated || update_inv_hess_aux(cone) if !isdefined(cone, :inv_hess) @@ -250,7 +258,7 @@ function update_inv_hess(cone::EpiSumPerEntropy{T}) where T return cone.inv_hess end -function hess_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiSumPerEntropy) +function hess_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiRelEntropy) @assert cone.grad_updated v_idxs = cone.v_idxs w_idxs = cone.w_idxs @@ -282,30 +290,31 @@ function hess_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiSumP return prod end -function inv_hess_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiSumPerEntropy) +function inv_hess_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiRelEntropy) cone.inv_hess_aux_updated || update_inv_hess_aux(cone) return arrow_prod(prod, arr, cone.Hiuu, cone.Hiuv, cone.Hiuw, cone.Hivv, cone.Hivw, cone.Hiww) end -function update_inv_hess_sqrt_aux(cone::EpiSumPerEntropy) +function update_inv_hess_sqrt_aux(cone::EpiRelEntropy) cone.inv_hess_aux_updated || update_inv_hess_aux(cone) @assert !cone.inv_hess_sqrt_aux_updated cone.rtiuu = arrow_sqrt(cone.Hiuu, cone.Hiuv, cone.Hiuw, cone.Hivv, cone.Hivw, cone.Hiww, cone.rtiuv, cone.rtiuw, cone.rtivv, cone.rtivw, cone.rtiww) + cone.use_inv_hess_sqrt = !iszero(cone.rtiuu) cone.inv_hess_sqrt_aux_updated = true return end -function hess_sqrt_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiSumPerEntropy) - cone.inv_hess_sqrt_aux_updated || update_inv_hess_sqrt_aux(cone) +function hess_sqrt_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiRelEntropy) + @assert cone.inv_hess_sqrt_aux_updated && cone.use_inv_hess_sqrt return inv_arrow_sqrt_prod(prod, arr, cone.rtiuu, cone.rtiuv, cone.rtiuw, cone.rtivv, cone.rtivw, cone.rtiww) end -function inv_hess_sqrt_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiSumPerEntropy) - cone.inv_hess_sqrt_aux_updated || update_inv_hess_sqrt_aux(cone) +function inv_hess_sqrt_prod!(prod::AbstractVecOrMat, arr::AbstractVecOrMat, cone::EpiRelEntropy) + @assert cone.inv_hess_sqrt_aux_updated && cone.use_inv_hess_sqrt return arrow_sqrt_prod(prod, arr, cone.rtiuu, cone.rtiuv, cone.rtiuw, cone.rtivv, cone.rtivw, cone.rtiww) end -function correction(cone::EpiSumPerEntropy, primal_dir::AbstractVector) +function correction(cone::EpiRelEntropy, primal_dir::AbstractVector) @assert cone.grad_updated tau = cone.tau z = cone.z @@ -346,15 +355,15 @@ function correction(cone::EpiSumPerEntropy, primal_dir::AbstractVector) end # TODO remove this in favor of new hess_nz_count etc functions that directly use uu, uw, ww etc -inv_hess_nz_count(cone::EpiSumPerEntropy) = 3 * cone.dim - 2 + 2 * cone.w_dim -inv_hess_nz_count_tril(cone::EpiSumPerEntropy) = 2 * cone.dim - 1 + cone.w_dim -inv_hess_nz_idxs_col(cone::EpiSumPerEntropy, j::Int) = (j == 1 ? (1:cone.dim) : (iseven(j) ? [1, j, j + 1] : [1, j - 1, j])) -inv_hess_nz_idxs_col_tril(cone::EpiSumPerEntropy, j::Int) = (j == 1 ? (1:cone.dim) : (iseven(j) ? [j, j + 1] : [j])) +inv_hess_nz_count(cone::EpiRelEntropy) = 3 * cone.dim - 2 + 2 * cone.w_dim +inv_hess_nz_count_tril(cone::EpiRelEntropy) = 2 * cone.dim - 1 + cone.w_dim +inv_hess_nz_idxs_col(cone::EpiRelEntropy, j::Int) = (j == 1 ? (1:cone.dim) : (iseven(j) ? [1, j, j + 1] : [1, j - 1, j])) +inv_hess_nz_idxs_col_tril(cone::EpiRelEntropy, j::Int) = (j == 1 ? (1:cone.dim) : (iseven(j) ? [j, j + 1] : [j])) # see analysis in https://github.com/lkapelevich/HypatiaSupplements.jl/tree/master/centralpoints -function get_central_ray_episumperentropy(w_dim::Int) +function get_central_ray_epirelentropy(w_dim::Int) if w_dim <= 10 - return central_rays_episumperentropy[w_dim, :] + return central_rays_epirelentropy[w_dim, :] end # use nonlinear fit for higher dimensions rtwdim = sqrt(w_dim) @@ -370,7 +379,7 @@ function get_central_ray_episumperentropy(w_dim::Int) return [u, v, w] end -const central_rays_episumperentropy = [ +const central_rays_epirelentropy = [ 0.827838399 1.290927714 0.805102005; 0.708612491 1.256859155 0.818070438; 0.622618845 1.231401008 0.829317079; diff --git a/src/Cones/epitracerelentropytri.jl b/src/Cones/epitracerelentropytri.jl index 75e1fe42c..718845fde 100644 --- a/src/Cones/epitracerelentropytri.jl +++ b/src/Cones/epitracerelentropytri.jl @@ -9,17 +9,18 @@ by L. Faybusovich and C. Zhou uses the log-homogeneous but not self-concordant barrier -log(u - tr(W * log(W) - W * log(V))) - logdet(W) - logdet(V) -TODO allocations +TODO reduce allocations =# mutable struct EpiTraceRelEntropyTri{T <: Real} <: Cone{T} use_dual_barrier::Bool + use_heuristic_neighborhood::Bool dim::Int - rt2::T d::Int + rt2::T + point::Vector{T} dual_point::Vector{T} - grad::Vector{T} correction::Vector{T} vec1::Vector{T} @@ -34,6 +35,7 @@ mutable struct EpiTraceRelEntropyTri{T <: Real} <: Cone{T} inv_hess::Symmetric{T, Matrix{T}} hess_fact_cache + # TODO type fields V W Vi @@ -59,11 +61,13 @@ mutable struct EpiTraceRelEntropyTri{T <: Real} <: Cone{T} function EpiTraceRelEntropyTri{T}( dim::Int; use_dual::Bool = false, + use_heuristic_neighborhood::Bool = default_use_heuristic_neighborhood(), hess_fact_cache = hessian_cache(T), ) where {T <: Real} @assert dim > 2 cone = new{T}() cone.use_dual_barrier = use_dual + cone.use_heuristic_neighborhood = use_heuristic_neighborhood cone.dim = dim cone.vw_dim = div(dim - 1, 2) cone.d = round(Int, sqrt(0.25 + 2 * cone.vw_dim) - 0.5) @@ -72,9 +76,7 @@ mutable struct EpiTraceRelEntropyTri{T <: Real} <: Cone{T} end end -use_correction(::EpiTraceRelEntropyTri) = false - -use_heuristic_neighborhood(::EpiTraceRelEntropyTri) = false +use_correction(::EpiTraceRelEntropyTri) = false # TODO # TODO only allocate the fields we use function setup_extra_data(cone::EpiTraceRelEntropyTri{T}) where {T <: Real} @@ -113,8 +115,8 @@ get_nu(cone::EpiTraceRelEntropyTri) = 2 * cone.d + 1 function set_initial_point(arr::AbstractVector, cone::EpiTraceRelEntropyTri{T}) where {T <: Real} arr .= 0 - # at the initial point V and W are diagonal, equivalent to episumperentropy - (arr[1], v, w) = get_central_ray_episumperentropy(cone.d) + # at the initial point V and W are diagonal, equivalent to epirelentropy + (arr[1], v, w) = get_central_ray_epirelentropy(cone.d) k = 1 for i in 1:cone.d arr[1 + k] = v @@ -130,39 +132,30 @@ function update_feas(cone::EpiTraceRelEntropyTri{T}) where {T <: Real} vw_dim = cone.vw_dim @views V = Hermitian(svec_to_smat!(cone.V, point[cone.V_idxs], cone.rt2), :U) @views W = Hermitian(svec_to_smat!(cone.W, point[cone.W_idxs], cone.rt2), :U) + + cone.is_feas = false (V_vals, V_vecs) = cone.V_fact = eigen(V) - (W_vals, W_vecs) = cone.W_fact = eigen(W) - if isposdef(cone.V_fact) && isposdef(cone.W_fact) - @. cone.V_vals_log = log(V_vals) - @. cone.W_vals_log = log(W_vals) - mul!(cone.tmp, V_vecs, Diagonal(cone.V_vals_log)) - V_log = mul!(cone.V_log, cone.tmp, V_vecs') - mul!(cone.tmp, W_vecs, Diagonal(cone.W_vals_log)) - W_log = mul!(cone.W_log, cone.tmp, W_vecs') - @. cone.WV_log = W_log - V_log - cone.z = point[1] - dot(W, Hermitian(cone.WV_log, :U)) - cone.is_feas = (cone.z > 0) - else - cone.is_feas = false + if isposdef(cone.V_fact) + (W_vals, W_vecs) = cone.W_fact = eigen(W) + if isposdef(cone.W_fact) + @. cone.V_vals_log = log(V_vals) + @. cone.W_vals_log = log(W_vals) + mul!(cone.tmp, V_vecs, Diagonal(cone.V_vals_log)) + V_log = mul!(cone.V_log, cone.tmp, V_vecs') + mul!(cone.tmp, W_vecs, Diagonal(cone.W_vals_log)) + W_log = mul!(cone.W_log, cone.tmp, W_vecs') + @. cone.WV_log = W_log - V_log + cone.z = point[1] - dot(W, Hermitian(cone.WV_log, :U)) + cone.is_feas = (cone.z > 0) + end end + cone.feas_updated = true return cone.is_feas end is_dual_feas(::EpiTraceRelEntropyTri) = true -function diff_mat!(mat::Matrix{T}, vals::Vector{T}, log_vals::Vector{T}) where T - rteps = sqrt(eps(T)) - for j in eachindex(vals) - (vj, lvj) = (vals[j], log_vals[j]) - for i in 1:j - (vi, lvi) = (vals[i], log_vals[i]) - mat[i, j] = (abs(vi - vj) < rteps ? inv(vi) : (lvi - lvj) / (vi - vj)) - end - end - return mat -end - function update_grad(cone::EpiTraceRelEntropyTri{T}) where {T <: Real} @assert cone.is_feas d = cone.d @@ -265,15 +258,27 @@ function update_hess(cone::EpiTraceRelEntropyTri{T}) where {T <: Real} return cone.hess end +function diff_mat!(mat::Matrix{T}, vals::Vector{T}, log_vals::Vector{T}) where {T <: Real} + rteps = sqrt(eps(T)) + for j in eachindex(vals) + (vj, lvj) = (vals[j], log_vals[j]) + for i in 1:j + (vi, lvi) = (vals[i], log_vals[i]) + mat[i, j] = (abs(vi - vj) < rteps ? inv(vi) : (lvi - lvj) / (vi - vj)) + end + end + return mat +end + # TODO optimize -function grad_logm!(mat::Matrix{T}, vecs::Matrix{T}, diff_mat::Hermitian{T, Matrix{T}}, rt2::T) where T +function grad_logm!(mat::Matrix{T}, vecs::Matrix{T}, diff_mat::Hermitian{T, Matrix{T}}, rt2::T) where {T <: Real} A = symm_kron(similar(mat), vecs, rt2, upper_only = false) l = smat_to_svec!(zeros(T, size(mat, 1)), diff_mat, one(T)) mat .= A' * Diagonal(l) * A return mat end -function hess_tr_logm!(mat, vecs, mat_inner, diff_tensor, rt2) +function hess_tr_logm!(mat, vecs, mat_inner, diff_tensor, rt2) # TODO type fields d = size(vecs, 1) row_idx = 1 for j in 1:d, i in 1:j diff --git a/src/Cones/wsosinterpepinormone.jl b/src/Cones/wsosinterpepinormone.jl index 3508d4caa..b38047ffd 100644 --- a/src/Cones/wsosinterpepinormone.jl +++ b/src/Cones/wsosinterpepinormone.jl @@ -135,7 +135,7 @@ reset_data(cone::WSOSInterpEpiNormOne) = (cone.feas_updated = cone.grad_updated get_nu(cone::WSOSInterpEpiNormOne) = cone.R * sum(size(Pk, 2) for Pk in cone.Ps) -use_sqrt_oracles(::WSOSInterpEpiNormOne) = false +use_sqrt_oracles(::WSOSInterpEpiNormOne) = false # Hessian is block sparse function set_initial_point(arr::AbstractVector, cone::WSOSInterpEpiNormOne) @views arr[1:cone.U] .= 1 diff --git a/src/MathOptInterface/cones.jl b/src/MathOptInterface/cones.jl index b9fdd7f6e..0f3fbd013 100644 --- a/src/MathOptInterface/cones.jl +++ b/src/MathOptInterface/cones.jl @@ -16,7 +16,7 @@ cone_from_moi(::Type{T}, cone::MOI.DualExponentialCone) where {T <: Real} = Cone cone_from_moi(::Type{T}, cone::MOI.PowerCone{T}) where {T <: Real} = Cones.Power{T}(T[cone.exponent, 1 - cone.exponent], 1) cone_from_moi(::Type{T}, cone::MOI.DualPowerCone{T}) where {T <: Real} = Cones.Power{T}(T[cone.exponent, 1 - cone.exponent], 1, use_dual = true) cone_from_moi(::Type{T}, cone::MOI.GeometricMeanCone) where {T <: Real} = (l = MOI.dimension(cone) - 1; Cones.HypoGeoMean{T}(1 + l)) -cone_from_moi(::Type{T}, cone::MOI.RelativeEntropyCone) where {T <: Real} = Cones.EpiSumPerEntropy{T}(MOI.dimension(cone)) +cone_from_moi(::Type{T}, cone::MOI.RelativeEntropyCone) where {T <: Real} = Cones.EpiRelEntropy{T}(MOI.dimension(cone)) cone_from_moi(::Type{T}, cone::MOI.NormSpectralCone) where {T <: Real} = Cones.EpiNormSpectral{T, T}(extrema((cone.row_dim, cone.column_dim))...) cone_from_moi(::Type{T}, cone::MOI.NormNuclearCone) where {T <: Real} = Cones.EpiNormSpectral{T, T}(extrema((cone.row_dim, cone.column_dim))..., use_dual = true) cone_from_moi(::Type{T}, cone::MOI.PositiveSemidefiniteConeTriangle) where {T <: Real} = Cones.PosSemidefTri{T, T}(MOI.dimension(cone)) @@ -167,14 +167,14 @@ HypoPerLogCone{T}(dim::Int) where {T <: Real} = HypoPerLogCone{T}(dim, false) MOI.dimension(cone::HypoPerLogCone) = cone.dim cone_from_moi(::Type{T}, cone::HypoPerLogCone{T}) where {T <: Real} = Cones.HypoPerLog{T}(cone.dim, use_dual = cone.use_dual) -export EpiSumPerEntropyCone -struct EpiSumPerEntropyCone{T <: Real} <: MOI.AbstractVectorSet +export EpiRelEntropyCone +struct EpiRelEntropyCone{T <: Real} <: MOI.AbstractVectorSet dim::Int use_dual::Bool end -EpiSumPerEntropyCone{T}(dim::Int) where {T <: Real} = EpiSumPerEntropyCone{T}(dim, false) -MOI.dimension(cone::EpiSumPerEntropyCone) = cone.dim -cone_from_moi(::Type{T}, cone::EpiSumPerEntropyCone{T}) where {T <: Real} = Cones.EpiSumPerEntropy{T}(cone.dim, use_dual = cone.use_dual) +EpiRelEntropyCone{T}(dim::Int) where {T <: Real} = EpiRelEntropyCone{T}(dim, false) +MOI.dimension(cone::EpiRelEntropyCone) = cone.dim +cone_from_moi(::Type{T}, cone::EpiRelEntropyCone{T}) where {T <: Real} = Cones.EpiRelEntropy{T}(cone.dim, use_dual = cone.use_dual) export HypoGeoMeanCone struct HypoGeoMeanCone{T <: Real} <: MOI.AbstractVectorSet @@ -244,6 +244,15 @@ MOI.dimension(cone::PosSemidefTriSparseCone{T, T} where {T <: Real}) = length(co MOI.dimension(cone::PosSemidefTriSparseCone{T, Complex{T}} where {T <: Real}) = (2 * length(cone.row_idxs) - cone.side) cone_from_moi(::Type{T}, cone::PosSemidefTriSparseCone{T, R}) where {R <: RealOrComplex{T}} where {T <: Real} = Cones.PosSemidefTriSparse{T, R}(cone.side, cone.row_idxs, cone.col_idxs, use_dual = cone.use_dual) +export DoublyNonnegativeTriCone +struct DoublyNonnegativeTriCone{T <: Real} <: MOI.AbstractVectorSet + dim::Int + use_dual::Bool +end +DoublyNonnegativeTriCone{T}(dim::Int) where {T <: Real} = DoublyNonnegativeTriCone{T}(dim, false) +MOI.dimension(cone::DoublyNonnegativeTriCone where {T <: Real}) = cone.dim +cone_from_moi(::Type{T}, cone::DoublyNonnegativeTriCone{T}) where {T <: Real} = Cones.DoublyNonnegativeTri{T}(cone.dim, use_dual = cone.use_dual) + export HypoPerLogdetTriCone struct HypoPerLogdetTriCone{T <: Real, R <: RealOrComplex{T}} <: MOI.AbstractVectorSet dim::Int @@ -262,15 +271,6 @@ HypoRootdetTriCone{T, R}(dim::Int) where {R <: RealOrComplex{T}} where {T <: Rea MOI.dimension(cone::HypoRootdetTriCone where {T <: Real}) = cone.dim cone_from_moi(::Type{T}, cone::HypoRootdetTriCone{T, R}) where {R <: RealOrComplex{T}} where {T <: Real} = Cones.HypoRootdetTri{T, R}(cone.dim, use_dual = cone.use_dual) -export DoublyNonnegativeTriCone -struct DoublyNonnegativeTriCone{T <: Real} <: MOI.AbstractVectorSet - dim::Int - use_dual::Bool -end -DoublyNonnegativeTriCone{T}(dim::Int) where {T <: Real} = DoublyNonnegativeTriCone{T}(dim, false) -MOI.dimension(cone::DoublyNonnegativeTriCone where {T <: Real}) = cone.dim -cone_from_moi(::Type{T}, cone::DoublyNonnegativeTriCone{T}) where {T <: Real} = Cones.DoublyNonnegativeTri{T}(cone.dim, use_dual = cone.use_dual) - export EpiTraceRelEntropyTriCone struct EpiTraceRelEntropyTriCone{T <: Real} <: MOI.AbstractVectorSet dim::Int @@ -333,7 +333,7 @@ const HypatiaCones{T <: Real} = Union{ EpiPerSquareCone{T}, PowerCone{T}, HypoPerLogCone{T}, - EpiSumPerEntropyCone{T}, + EpiRelEntropyCone{T}, HypoGeoMeanCone{T}, HypoPowerMeanCone{T}, EpiNormSpectralCone{T, T}, @@ -345,11 +345,11 @@ const HypatiaCones{T <: Real} = Union{ PosSemidefTriCone{T, Complex{T}}, PosSemidefTriSparseCone{T, T}, PosSemidefTriSparseCone{T, Complex{T}}, + DoublyNonnegativeTriCone{T}, HypoPerLogdetTriCone{T, T}, HypoPerLogdetTriCone{T, Complex{T}}, HypoRootdetTriCone{T, T}, HypoRootdetTriCone{T, Complex{T}}, - DoublyNonnegativeTriCone{T}, EpiTraceRelEntropyTriCone{T}, WSOSInterpNonnegativeCone{T, T}, WSOSInterpNonnegativeCone{T, Complex{T}}, diff --git a/src/Solvers/Solvers.jl b/src/Solvers/Solvers.jl index ac823712f..aa8fa509d 100644 --- a/src/Solvers/Solvers.jl +++ b/src/Solvers/Solvers.jl @@ -209,7 +209,7 @@ mutable struct Solver{T <: Real} end end -default_stepper(T) = HeurCombStepper{T}() +default_stepper(T) = CombinedStepper{T}() default_system_solver(T) = QRCholDenseSystemSolver{T}() function solve(solver::Solver{T}) where {T <: Real} @@ -256,8 +256,8 @@ function solve(solver::Solver{T}) where {T <: Real} point.y .= init_y point.z .= init_z point.s .= init_s - point.tau[1] = one(T) - point.kap[1] = one(T) + point.tau[] = one(T) + point.kap[] = one(T) calc_mu(solver) if isnan(solver.mu) || abs(one(T) - solver.mu) > sqrt(eps(T)) @warn("initial mu is $(solver.mu) but should be 1 (this could indicate a problem with cone barrier oracles)") @@ -284,13 +284,19 @@ function solve(solver::Solver{T}) where {T <: Real} load(stepper, solver) load(solver.system_solver, solver) + solver.verbose && print_header(stepper, solver) + flush(stdout) + # iterate from initial point while true calc_residual(solver) calc_convergence_params(solver) - solver.verbose && print_iteration_stats(stepper, solver) + if solver.verbose + print_iteration(stepper, solver) + flush(stdout) + end check_convergence(solver) && break @@ -306,13 +312,15 @@ function solve(solver::Solver{T}) where {T <: Real} end step(stepper, solver) || break + flush(stdout) - if point.tau[1] <= zero(T) || point.kap[1] <= zero(T) || solver.mu <= zero(T) - @warn("numerical failure: tau is $(point.tau[1]), kappa is $(point.kap[1]), mu is $(solver.mu); terminating") + if point.tau[] <= zero(T) || point.kap[] <= zero(T) || solver.mu <= zero(T) + @warn("numerical failure: tau is $(point.tau[]), kappa is $(point.kap[]), mu is $(solver.mu); terminating") solver.status = NumericalFailure break end + flush(stdout) solver.num_iters += 1 end @@ -326,20 +334,21 @@ function solve(solver::Solver{T}) where {T <: Real} free_memory(solver.system_solver) solver.verbose && println("\nstatus is $(solver.status) after $(solver.num_iters) iterations and $(trunc(solver.solve_time, digits=3)) seconds\n") + flush(stdout) return solver end function calc_mu(solver::Solver{T}) where {T <: Real} point = solver.point - solver.mu = (dot(point.z, point.s) + point.tau[1] * point.kap[1]) / (solver.model.nu + 1) + solver.mu = (dot(point.z, point.s) + point.tau[] * point.kap[]) / (solver.model.nu + 1) return solver.mu end function calc_residual(solver::Solver{T}) where {T <: Real} model = solver.model point = solver.point - tau = point.tau[1] + tau = point.tau[] # x_residual = -A'*y - G'*z - c*tau x_residual = solver.x_residual @@ -379,8 +388,8 @@ function calc_convergence_params(solver::Solver{T}) where {T <: Real} solver.primal_obj_t = dot(model.c, point.x) solver.dual_obj_t = -dot(model.b, point.y) - dot(model.h, point.z) - solver.primal_obj = solver.primal_obj_t / solver.point.tau[1] + model.obj_offset - solver.dual_obj = solver.dual_obj_t / solver.point.tau[1] + model.obj_offset + solver.primal_obj = solver.primal_obj_t / solver.point.tau[] + model.obj_offset + solver.dual_obj = solver.dual_obj_t / solver.point.tau[] + model.obj_offset solver.gap = dot(point.z, point.s) solver.x_feas = solver.x_norm_res * solver.x_conv_tol @@ -393,7 +402,7 @@ end function check_convergence(solver::Solver{T}) where {T <: Real} # check convergence criteria # TODO nearly primal or dual infeasible or nearly optimal cases? - tau = solver.point.tau[1] + tau = solver.point.tau[] primal_obj_t = solver.primal_obj_t dual_obj_t = solver.dual_obj_t @@ -423,7 +432,7 @@ function check_convergence(solver::Solver{T}) where {T <: Real} end # TODO experiment with ill-posedness check - if solver.mu <= solver.tol_infeas && tau <= solver.tol_infeas * min(one(T), solver.point.kap[1]) + if solver.mu <= solver.tol_infeas && tau <= solver.tol_infeas * min(one(T), solver.point.kap[]) solver.verbose && println("ill-posedness detected; terminating") solver.status = IllPosed return true @@ -487,8 +496,8 @@ get_z(solver::Solver) = copy(solver.result.z) get_x(solver::Solver) = copy(solver.result.x) get_y(solver::Solver) = copy(solver.result.y) -get_tau(solver::Solver) = solver.point.tau[1] -get_kappa(solver::Solver) = solver.point.kap[1] +get_tau(solver::Solver) = solver.point.tau[] +get_kappa(solver::Solver) = solver.point.kap[] get_mu(solver::Solver) = solver.mu function load(solver::Solver{T}, model::Models.Model{T}) where {T <: Real} @@ -500,7 +509,7 @@ end include("process.jl") -include("linesearch.jl") +include("search.jl") include("steppers/common.jl") diff --git a/src/Solvers/linesearch.jl b/src/Solvers/linesearch.jl deleted file mode 100644 index a17c5b0d1..000000000 --- a/src/Solvers/linesearch.jl +++ /dev/null @@ -1,112 +0,0 @@ -#= -line search for s,z -=# - -mutable struct LineSearcher{T <: Real} - z::Vector{T} - s::Vector{T} - dual_views::Vector{SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true}} - primal_views::Vector{SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true}} - skzk::Vector{T} - nup1::T - cone_times::Vector{Float64} - cone_order::Vector{Int} - - function LineSearcher{T}(model::Models.Model{T}) where {T <: Real} - cones = model.cones - line_searcher = new{T}() - z = line_searcher.z = zeros(T, model.q) - s = line_searcher.s = zeros(T, model.q) - line_searcher.dual_views = [view(Cones.use_dual_barrier(cone) ? s : z, idxs) for (cone, idxs) in zip(cones, model.cone_idxs)] - line_searcher.primal_views = [view(Cones.use_dual_barrier(cone) ? z : s, idxs) for (cone, idxs) in zip(cones, model.cone_idxs)] - line_searcher.skzk = zeros(T, length(cones)) - line_searcher.nup1 = T(model.nu + 1) - line_searcher.cone_times = zeros(length(cones)) - line_searcher.cone_order = collect(1:length(cones)) - return line_searcher - end -end - -# backtracking line search to find large distance to step in direction while remaining inside cones and inside a given neighborhood -function find_max_alpha( - point::Point{T}, - dir::Point{T}, - line_searcher::LineSearcher{T}, - model::Models.Model{T}; - prev_alpha::T, - min_alpha::T, - min_nbhd::T = T(0.01), - max_nbhd::T = T(0.99), - ) where {T <: Real} - cones = model.cones - cone_order = line_searcher.cone_order - (tau, kap) = (point.tau[1], point.kap[1]) - (tau_dir, kap_dir) = (dir.tau[1], dir.kap[1]) - skzk = line_searcher.skzk - - alpha_reduce = T(0.95) # TODO tune, maybe try smaller for pred_alpha since heuristic - - # TODO experiment with starting alpha (<1) - alpha = max(T(0.1), min(prev_alpha * T(1.4), one(T))) # TODO option for parameter - - if tau_dir < zero(T) - alpha = min(alpha, -tau / tau_dir) - end - if kap_dir < zero(T) - alpha = min(alpha, -kap / kap_dir) - end - alpha *= T(0.9999) - - alpha /= alpha_reduce - # TODO for feas, as soon as cone is feas, don't test feas again, since line search is backwards - while true - if alpha < min_alpha - # alpha is very small so finish - alpha = zero(T) - break - end - alpha *= alpha_reduce - - taukap_ls = (tau + alpha * tau_dir) * (kap + alpha * kap_dir) - (taukap_ls < eps(T)) && continue - - # order the cones by how long it takes to check neighborhood condition and iterate in that order, to improve efficiency - sortperm!(cone_order, line_searcher.cone_times, initialized = true) # NOTE stochastic - - @. line_searcher.z = point.z + alpha * dir.z - @. line_searcher.s = point.s + alpha * dir.s - - for k in cone_order - skzk[k] = dot(line_searcher.primal_views[k], line_searcher.dual_views[k]) - end - any(<(eps(T)), skzk) && continue - - mu_ls = (sum(skzk) + taukap_ls) / line_searcher.nup1 - (mu_ls < eps(T)) && continue - - min_nbhd_mu = min_nbhd * mu_ls - (taukap_ls < min_nbhd_mu) && continue - any(skzk[k] < min_nbhd_mu * Cones.get_nu(cones[k]) for k in cone_order) && continue - isfinite(max_nbhd) && (abs(taukap_ls - mu_ls) > max_nbhd * mu_ls) && continue - - rtmu = sqrt(mu_ls) - irtmu = inv(rtmu) - in_nbhd = true - for k in cone_order - cone_k = cones[k] - line_searcher.cone_times[k] = @elapsed begin - Cones.load_point(cone_k, line_searcher.primal_views[k], irtmu) - Cones.load_dual_point(cone_k, line_searcher.dual_views[k]) - Cones.reset_data(cone_k) - in_nbhd_k = (Cones.is_feas(cone_k) && Cones.is_dual_feas(cone_k) && (isinf(max_nbhd) || Cones.in_neighborhood(cone_k, rtmu, max_nbhd))) - end - if !in_nbhd_k - in_nbhd = false - break - end - end - in_nbhd && break - end - - return alpha -end diff --git a/src/Solvers/point.jl b/src/Solvers/point.jl index a138b2e9a..36ee13554 100644 --- a/src/Solvers/point.jl +++ b/src/Solvers/point.jl @@ -8,10 +8,11 @@ mutable struct Point{T <: Real} x::SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true} y::SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true} z::SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true} - tau::SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true} + tau::SubArray{T, 0, Vector{T}, Tuple{Int}, true} s::SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true} - kap::SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true} + kap::SubArray{T, 0, Vector{T}, Tuple{Int}, true} + ztsk::SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true} z_views::Vector{SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true}} s_views::Vector{SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true}} dual_views::Vector{SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int}}, true}} @@ -20,19 +21,26 @@ mutable struct Point{T <: Real} Point{T}() where {T <: Real} = new{T}() end -function Point(model::Models.Model{T}) where {T <: Real} +function Point( + model::Models.Model{T}; + ztsk_only::Bool = false, + ) where {T <: Real} point = Point{T}() (n, p, q) = (model.n, model.p, model.q) - tau_idx = n + p + q + 1 + dim = tau_idx + q + 1 vec = point.vec = zeros(T, tau_idx + q + 1) + + @views point.ztsk = vec[(n + p + 1):end] + ztsk_only && return point + + point.tau = view(vec, tau_idx) + point.kap = view(vec, dim) @views begin - point.x = vec[1:n] - point.y = vec[n .+ (1:p)] point.z = vec[n + p .+ (1:q)] - point.tau = vec[tau_idx:tau_idx] point.s = vec[tau_idx .+ (1:q)] - point.kap = vec[end:end] + point.x = vec[1:n] + point.y = vec[n .+ (1:p)] end point.z_views = [view(point.z, idxs) for idxs in model.cone_idxs] diff --git a/src/Solvers/process.jl b/src/Solvers/process.jl index fb0df0b4e..606394f54 100644 --- a/src/Solvers/process.jl +++ b/src/Solvers/process.jl @@ -350,7 +350,7 @@ function postprocess(solver::Solver{T}) where {T <: Real} if in(solver.status, (PrimalInfeasible, DualInfeasible)) tau = true else - tau = point.tau[1] + tau = point.tau[] if tau <= 0 result.vec .= NaN return nothing diff --git a/src/Solvers/search.jl b/src/Solvers/search.jl new file mode 100644 index 000000000..d48824928 --- /dev/null +++ b/src/Solvers/search.jl @@ -0,0 +1,87 @@ +#= +step distance search helpers for s,z +=# + +mutable struct StepSearcher{T <: Real} + skzk::Vector{T} + nup1::T + cone_times::Vector{Float64} + cone_order::Vector{Int} + min_nbhd::T + max_nbhd::T + alpha_sched::Vector{T} + + function StepSearcher{T}(model::Models.Model{T}) where {T <: Real} + cones = model.cones + step_searcher = new{T}() + step_searcher.skzk = zeros(T, length(cones)) + step_searcher.nup1 = T(model.nu + 1) + step_searcher.cone_times = zeros(length(cones)) + step_searcher.cone_order = collect(1:length(cones)) + step_searcher.min_nbhd = T(0.01) # TODO tune + step_searcher.max_nbhd = T(0.99) # TODO tune, maybe should be different for cones without third order correction + step_searcher.alpha_sched = T[0.9999, 0.999, 0.99, 0.97, 0.95, 0.9, 0.85, 0.8, 0.7, 0.6, 0.5, 0.3, 0.1, 0.03, 0.01, 0.003, 0.001, 0.0001] # TODO tune + return step_searcher + end +end + +# backwards search on alphas in alpha schedule +function search_alpha( + point::Point{T}, + model::Models.Model{T}, + stepper::Stepper{T}; + # prev_alpha::T = one(T), # TODO so don't try largest alpha first always + min_alpha::T = zero(T), + ) where {T <: Real} + step_searcher = stepper.step_searcher + for alpha in step_searcher.alpha_sched + (alpha < min_alpha) && break # alpha is very small so finish + update_cone_points(alpha, point, stepper) + check_cone_points(stepper.temp, step_searcher, model) && return alpha + end + return zero(T) +end + +function check_cone_points( + cand::Point{T}, + step_searcher::StepSearcher{T}, + model::Models.Model{T}, + ) where {T <: Real} + cone_order = step_searcher.cone_order + skzk = step_searcher.skzk + cones = model.cones + max_nbhd = step_searcher.max_nbhd + + taukap_cand = cand.tau[] * cand.kap[] + (min(cand.tau[], cand.kap[], taukap_cand) < eps(T)) && return false + + for k in cone_order + skzk[k] = dot(cand.primal_views[k], cand.dual_views[k]) + (skzk[k] < eps(T)) && return false + end + mu_cand = (sum(skzk) + taukap_cand) / step_searcher.nup1 + (mu_cand < eps(T)) && return false + + min_nbhd_mu = step_searcher.min_nbhd * mu_cand + (taukap_cand < min_nbhd_mu) && return false + any(skzk[k] < min_nbhd_mu * Cones.get_nu(cones[k]) for k in cone_order) && return false + isfinite(max_nbhd) && (abs(taukap_cand - mu_cand) > max_nbhd * mu_cand) && return false + + # order the cones by how long it takes to check neighborhood condition and iterate in that order, to improve efficiency + sortperm!(cone_order, step_searcher.cone_times, initialized = true) # NOTE stochastic + + rtmu = sqrt(mu_cand) + irtmu = inv(rtmu) + for k in cone_order + cone_k = cones[k] + start_time = time() + Cones.load_point(cone_k, cand.primal_views[k], irtmu) + Cones.load_dual_point(cone_k, cand.dual_views[k]) + Cones.reset_data(cone_k) + in_nbhd_k = (Cones.is_feas(cone_k) && Cones.is_dual_feas(cone_k) && (isinf(max_nbhd) || Cones.in_neighborhood(cone_k, rtmu, max_nbhd))) + step_searcher.cone_times[k] = time() - start_time + in_nbhd_k || return false + end + + return true +end diff --git a/src/Solvers/steppers/combined.jl b/src/Solvers/steppers/combined.jl new file mode 100644 index 000000000..3c1621abe --- /dev/null +++ b/src/Solvers/steppers/combined.jl @@ -0,0 +1,162 @@ +#= +combined predict and center stepper +=# + +mutable struct CombinedStepper{T <: Real} <: Stepper{T} + use_correction::Bool # TODO remove if unused + prev_alpha::T + rhs::Point{T} + dir::Point{T} + temp::Point{T} + dir_cent::Point{T} + dir_centcorr::Point{T} + dir_pred::Point{T} + dir_predcorr::Point{T} + dir_temp::Vector{T} + step_searcher::StepSearcher{T} + cent_only::Bool + uncorr_only::Bool + + function CombinedStepper{T}(; + use_correction::Bool = true, + ) where {T <: Real} + stepper = new{T}() + stepper.use_correction = use_correction + return stepper + end +end + +function load(stepper::CombinedStepper{T}, solver::Solver{T}) where {T <: Real} + model = solver.model + # TODO allow no correction + # if stepper.use_correction && !any(Cones.use_correction, model.cones) + # # model has no cones that use correction + # stepper.use_correction = false + # end + @assert stepper.use_correction + + stepper.prev_alpha = one(T) + stepper.rhs = Point(model) + stepper.dir = Point(model) + stepper.temp = Point(model) + stepper.dir_cent = Point(model, ztsk_only = true) + stepper.dir_pred = Point(model, ztsk_only = true) + if stepper.use_correction + stepper.dir_centcorr = Point(model, ztsk_only = true) + stepper.dir_predcorr = Point(model, ztsk_only = true) + end + stepper.dir_temp = zeros(T, length(stepper.rhs.vec)) + stepper.step_searcher = StepSearcher{T}(model) + stepper.uncorr_only = stepper.cent_only = false + + return stepper +end + +function step(stepper::CombinedStepper{T}, solver::Solver{T}) where {T <: Real} + point = solver.point + model = solver.model + rhs = stepper.rhs + dir = stepper.dir + dir_cent = stepper.dir_cent + dir_centcorr = stepper.dir_centcorr + dir_pred = stepper.dir_pred + dir_predcorr = stepper.dir_predcorr + + # update linear system solver factorization + update_lhs(solver.system_solver, solver) + + # calculate centering direction and correction + update_rhs_cent(solver, rhs) + get_directions(stepper, solver, false, iter_ref_steps = 3) + copyto!(dir_cent.vec, dir.vec) + update_rhs_centcorr(solver, rhs, dir) + get_directions(stepper, solver, false, iter_ref_steps = 3) + copyto!(dir_centcorr.vec, dir.vec) + + # calculate affine/prediction direction and correction + update_rhs_pred(solver, rhs) + get_directions(stepper, solver, true, iter_ref_steps = 3) + copyto!(dir_pred.vec, dir.vec) + update_rhs_predcorr(solver, rhs, dir) + get_directions(stepper, solver, true, iter_ref_steps = 3) + copyto!(dir_predcorr.vec, dir.vec) + + # if stepper.use_correction # TODO? + + stepper.uncorr_only = stepper.cent_only = false + alpha = search_alpha(point, model, stepper) + + if iszero(alpha) + println("trying combined without correction") + stepper.uncorr_only = true + alpha = search_alpha(point, model, stepper) + + if iszero(alpha) + # try centering direction + println("trying centering without correction") + stepper.cent_only = true + alpha = search_alpha(point, model, stepper) + + if iszero(alpha) + @warn("cannot step in centering direction") + solver.status = NumericalFailure + return false + end + + # step + @. point.vec += alpha * dir_cent.vec + else + # step + alpha_m1 = 1 - alpha + @. point.vec += alpha * dir_pred.vec + alpha_m1 * dir_cent.vec + end + else + # step + alpha_sqr = abs2(alpha) + alpha_m1 = 1 - alpha + alpha_m1sqr = abs2(alpha_m1) + @. point.vec += alpha * dir_pred.vec + alpha_sqr * dir_predcorr.vec + alpha_m1 * dir_cent.vec + alpha_m1sqr * dir_centcorr.vec + end + + stepper.prev_alpha = alpha + calc_mu(solver) + + return true +end + +expect_improvement(stepper::CombinedStepper) = true + +function update_cone_points( + alpha::T, + point::Point{T}, + stepper::CombinedStepper{T} + ) where {T <: Real} + cand = stepper.temp + dir_cent = stepper.dir_cent + dir_pred = stepper.dir_pred + + if stepper.cent_only + @assert stepper.uncorr_only + @. cand.ztsk = point.ztsk + alpha * dir_cent.ztsk + elseif stepper.uncorr_only + alpha_m1 = 1 - alpha + @. cand.ztsk = point.ztsk + alpha * dir_pred.ztsk + alpha_m1 * dir_cent.ztsk + else + dir_centcorr = stepper.dir_centcorr + dir_predcorr = stepper.dir_predcorr + alpha_sqr = abs2(alpha) + alpha_m1 = 1 - alpha + alpha_m1sqr = abs2(alpha_m1) + @. cand.ztsk = point.ztsk + alpha * dir_pred.ztsk + alpha_sqr * dir_predcorr.ztsk + alpha_m1 * dir_cent.ztsk + alpha_m1sqr * dir_centcorr.ztsk + end + + return +end + +print_header_more(stepper::CombinedStepper, solver::Solver) = @printf("%5s %9s", "step", "alpha") + +function print_iteration_more(stepper::CombinedStepper, solver::Solver) + step = (stepper.cent_only ? "cent" : (stepper.uncorr_only ? "comb" : "corr")) + @printf("%5s %9.2e", step, stepper.prev_alpha) + return +end diff --git a/src/Solvers/steppers/common.jl b/src/Solvers/steppers/common.jl index 974d2ee24..09291e9a5 100644 --- a/src/Solvers/steppers/common.jl +++ b/src/Solvers/steppers/common.jl @@ -10,13 +10,13 @@ function update_rhs_pred( rhs.x .= solver.x_residual rhs.y .= solver.y_residual rhs.z .= solver.z_residual - rhs.tau[1] = solver.point.kap[1] + solver.primal_obj_t - solver.dual_obj_t + rhs.tau[] = solver.point.kap[] + solver.primal_obj_t - solver.dual_obj_t for (s_k, d_k) in zip(rhs.s_views, solver.point.dual_views) @. s_k = -d_k end - rhs.kap[1] = -solver.point.kap[1] + rhs.kap[] = -solver.point.kap[] return rhs end @@ -45,7 +45,7 @@ function update_rhs_predcorr( dot1 = dot(corr_k, cone_k.point) dot2 = irtrtmu * dot(prim_k_scal, H_prim_dir_k) corr_viol = abs(dot1 - dot2) / (rteps + abs(dot2)) - if corr_viol < T(1e-3) # TODO tune + if corr_viol < T(1e-4) # TODO tune @. rhs.s_views[k] = H_prim_dir_k + corr_k # else # @warn("pred corr viol: $corr_viol") @@ -53,7 +53,7 @@ function update_rhs_predcorr( end # TODO NT way: - rhs.kap[1] = dir.tau[1] * dir.kap[1] / solver.point.tau[1] + rhs.kap[] = dir.tau[] * dir.kap[] / solver.point.tau[] # TODO SY way: # tau_dir_tau = dir.tau / solver.point.tau # rhs[end] = tau_dir_tau * solver.mu / solver.point.tau * (1 + tau_dir_tau) @@ -69,7 +69,7 @@ function update_rhs_cent( rhs.x .= 0 rhs.y .= 0 rhs.z .= 0 - rhs.tau[1] = 0 + rhs.tau[] = 0 rtmu = sqrt(solver.mu) for (k, cone_k) in enumerate(solver.model.cones) @@ -79,7 +79,7 @@ function update_rhs_cent( end # kap - rhs.kap[1] = -solver.point.kap[1] + solver.mu / solver.point.tau[1] + rhs.kap[] = -solver.point.kap[] + solver.mu / solver.point.tau[] return rhs end @@ -108,7 +108,7 @@ function update_rhs_centcorr( dot1 = dot(corr_k, cone_k.point) dot2 = dot(prim_k_scal, H_prim_dir_k_scal) corr_viol = abs(dot1 - dot2) / (rteps + abs(dot2)) - if corr_viol < T(1e-3) # TODO tune + if corr_viol < T(1e-4) # TODO tune rhs.s_views[k] .= corr_k # else # @warn("cent corr viol: $corr_viol") @@ -118,11 +118,39 @@ function update_rhs_centcorr( # TODO NT way: # rhs.kap = dir.tau * dir.kap / solver.point.tau # TODO SY way: - tau_dir_tau = dir.tau[1] / solver.point.tau[1] - rhs.kap[1] = tau_dir_tau * solver.mu / solver.point.tau[1] * tau_dir_tau + tau_dir_tau = dir.tau[] / solver.point.tau[] + rhs.kap[] = tau_dir_tau * solver.mu / solver.point.tau[] * tau_dir_tau return rhs end -include("heurcomb.jl") +function print_header(stepper::Stepper, solver::Solver) + @printf("\n%5s %12s %12s %9s %9s ", "iter", "p_obj", "d_obj", "abs_gap", "x_feas") + if !iszero(solver.model.p) + @printf("%9s ", "y_feas") + end + @printf("%9s %9s %9s %9s ", "z_feas", "tau", "kap", "mu") + print_header_more(stepper, solver) + println() + return +end +print_header_more(stepper::Stepper, solver::Solver) = nothing + +function print_iteration(stepper::Stepper, solver::Solver) + @printf("%5d %12.4e %12.4e %9.2e %9.2e ", + solver.num_iters, solver.primal_obj, solver.dual_obj, solver.gap, solver.x_feas + ) + if !iszero(solver.model.p) + @printf("%9.2e ", solver.y_feas) + end + @printf("%9.2e %9.2e %9.2e %9.2e ", + solver.z_feas, solver.point.tau[], solver.point.kap[], solver.mu + ) + print_iteration_more(stepper, solver) + println() + return +end +print_iteration_more(stepper::Stepper, solver::Solver) = nothing + include("predorcent.jl") +include("combined.jl") diff --git a/src/Solvers/steppers/heurcomb.jl b/src/Solvers/steppers/heurcomb.jl deleted file mode 100644 index 81b8a7e85..000000000 --- a/src/Solvers/steppers/heurcomb.jl +++ /dev/null @@ -1,168 +0,0 @@ -#= -combined directions stepper -=# - -mutable struct HeurCombStepper{T <: Real} <: Stepper{T} - gamma_fun::Function - prev_pred_alpha::T - prev_alpha::T - prev_gamma::T - - rhs::Point{T} - dir::Point{T} - res::Point{T} - dir_temp::Vector{T} - dir_cent::Vector{T} - dir_centcorr::Vector{T} - dir_pred::Vector{T} - dir_predcorr::Vector{T} - - line_searcher::LineSearcher{T} - - function HeurCombStepper{T}(; - gamma_fun::Function = (a::T -> (1 - a)), - # gamma_fun::Function = (a -> abs2(1 - a)), - ) where {T <: Real} - stepper = new{T}() - stepper.gamma_fun = gamma_fun - return stepper - end -end - -# create the stepper cache -function load(stepper::HeurCombStepper{T}, solver::Solver{T}) where {T <: Real} - stepper.prev_pred_alpha = one(T) - stepper.prev_gamma = one(T) - stepper.prev_alpha = one(T) - - model = solver.model - stepper.rhs = Point(model) - stepper.dir = Point(model) - stepper.res = Point(model) - dim = length(stepper.rhs.vec) - stepper.dir_temp = zeros(T, dim) - stepper.dir_cent = zeros(T, dim) - stepper.dir_centcorr = zeros(T, dim) - stepper.dir_pred = zeros(T, dim) - stepper.dir_predcorr = zeros(T, dim) - - stepper.line_searcher = LineSearcher{T}(model) - - return stepper -end - -# original combined stepper -function step(stepper::HeurCombStepper{T}, solver::Solver{T}) where {T <: Real} - point = solver.point - model = solver.model - rhs = stepper.rhs - dir = stepper.dir - dir_cent = stepper.dir_cent - dir_centcorr = stepper.dir_centcorr - dir_pred = stepper.dir_pred - dir_predcorr = stepper.dir_predcorr - - # update linear system solver factorization - update_lhs(solver.system_solver, solver) - - # calculate centering direction and correction - update_rhs_cent(solver, rhs) - get_directions(stepper, solver, false, iter_ref_steps = 3) - copyto!(dir_cent, dir.vec) - update_rhs_centcorr(solver, rhs, dir) - get_directions(stepper, solver, false, iter_ref_steps = 3) - copyto!(dir_centcorr, dir.vec) - - # calculate affine/prediction direction and correction - update_rhs_pred(solver, rhs) - get_directions(stepper, solver, true, iter_ref_steps = 3) - copyto!(dir_pred, dir.vec) - update_rhs_predcorr(solver, rhs, dir) - get_directions(stepper, solver, true, iter_ref_steps = 3) - copyto!(dir_predcorr, dir.vec) - - # calculate centering factor gamma by finding distance pred_alpha for stepping in pred direction - copyto!(dir.vec, dir_pred) - # TODO try max_nbhd = Inf, but careful of cones with no dual feas check - stepper.prev_pred_alpha = pred_alpha = find_max_alpha(point, dir, stepper.line_searcher, model, prev_alpha = stepper.prev_pred_alpha, min_alpha = T(1e-2)) - stepper.prev_gamma = gamma = stepper.gamma_fun(pred_alpha) - - # calculate combined direction and keep in dir - gamma_alpha = gamma * pred_alpha - gamma1 = 1 - gamma - gamma1_alpha = gamma1 * pred_alpha - @. dir.vec = gamma * dir_cent + gamma_alpha * dir_centcorr + gamma1 * dir_pred + gamma1_alpha * dir_predcorr - - # find distance alpha for stepping in combined direction - alpha = find_max_alpha(point, dir, stepper.line_searcher, model, prev_alpha = stepper.prev_alpha, min_alpha = T(1e-3)) - - if iszero(alpha) - # could not step far in combined direction, so attempt a pure centering step - solver.verbose && println("performing centering step") - @. dir.vec = dir_cent + dir_centcorr - - # find distance alpha for stepping in centering direction - alpha = find_max_alpha(point, dir, stepper.line_searcher, model, prev_alpha = one(T), min_alpha = T(1e-3)) - - if iszero(alpha) - copyto!(dir.vec, dir_cent) - alpha = find_max_alpha(point, dir, stepper.line_searcher, model, prev_alpha = one(T), min_alpha = T(1e-6)) - if iszero(alpha) - @warn("numerical failure: could not step in centering direction; terminating") - solver.status = NumericalFailure - return false - end - end - end - stepper.prev_alpha = alpha - - # step - @. point.vec += alpha * dir.vec - calc_mu(solver) - - return true -end - -expect_improvement(::HeurCombStepper) = true - -function print_iteration_stats(stepper::HeurCombStepper{T}, solver::Solver{T}) where {T <: Real} - if iszero(solver.num_iters) - if iszero(solver.model.p) - @printf("\n%5s %12s %12s %9s %9s %9s %9s %9s %9s %9s %9s\n", - "iter", "p_obj", "d_obj", "abs_gap", - "x_feas", "z_feas", "tau", "kap", "mu", - "gamma", "alpha", - ) - @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e\n", - solver.num_iters, solver.primal_obj, solver.dual_obj, solver.gap, - solver.x_feas, solver.z_feas, solver.point.tau[1], solver.point.kap[1], solver.mu - ) - else - @printf("\n%5s %12s %12s %9s %9s %9s %9s %9s %9s %9s %9s %9s\n", - "iter", "p_obj", "d_obj", "abs_gap", - "x_feas", "y_feas", "z_feas", "tau", "kap", "mu", - "gamma", "alpha", - ) - @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e\n", - solver.num_iters, solver.primal_obj, solver.dual_obj, solver.gap, - solver.x_feas, solver.y_feas, solver.z_feas, solver.point.tau[1], solver.point.kap[1], solver.mu - ) - end - else - if iszero(solver.model.p) - @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e\n", - solver.num_iters, solver.primal_obj, solver.dual_obj, solver.gap, - solver.x_feas, solver.z_feas, solver.point.tau[1], solver.point.kap[1], solver.mu, - stepper.prev_gamma, stepper.prev_alpha, - ) - else - @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e\n", - solver.num_iters, solver.primal_obj, solver.dual_obj, solver.gap, - solver.x_feas, solver.y_feas, solver.z_feas, solver.point.tau[1], solver.point.kap[1], solver.mu, - stepper.prev_gamma, stepper.prev_alpha, - ) - end - end - flush(stdout) - return -end diff --git a/src/Solvers/steppers/predorcent.jl b/src/Solvers/steppers/predorcent.jl index 8f4071094..72c7e689e 100644 --- a/src/Solvers/steppers/predorcent.jl +++ b/src/Solvers/steppers/predorcent.jl @@ -6,15 +6,14 @@ mutable struct PredOrCentStepper{T <: Real} <: Stepper{T} use_correction::Bool prev_alpha::T cent_count::Int - rhs::Point{T} dir::Point{T} - res::Point{T} + temp::Point{T} + dir_nocorr::Point{T} + dir_corr::Point{T} dir_temp::Vector{T} - dir_nocorr::Vector{T} - dir_corr::Vector{T} - - line_searcher::LineSearcher{T} + step_searcher::StepSearcher{T} + uncorr_only::Bool function PredOrCentStepper{T}(; use_correction::Bool = true, @@ -25,23 +24,25 @@ mutable struct PredOrCentStepper{T <: Real} <: Stepper{T} end end -# create the stepper cache function load(stepper::PredOrCentStepper{T}, solver::Solver{T}) where {T <: Real} + model = solver.model + if stepper.use_correction && !any(Cones.use_correction, model.cones) + # model has no cones that use correction + stepper.use_correction = false + end + stepper.prev_alpha = one(T) stepper.cent_count = 0 - - model = solver.model stepper.rhs = Point(model) stepper.dir = Point(model) - stepper.res = Point(model) - dim = length(stepper.rhs.vec) - stepper.dir_temp = zeros(T, dim) + stepper.temp = Point(model) + stepper.dir_nocorr = Point(model, ztsk_only = true) # TODO probably don't need this AND dir if stepper.use_correction - stepper.dir_nocorr = zeros(T, dim) - stepper.dir_corr = zeros(T, dim) + stepper.dir_corr = Point(model, ztsk_only = true) end - - stepper.line_searcher = LineSearcher{T}(model) + stepper.dir_temp = zeros(T, length(stepper.rhs.vec)) + stepper.step_searcher = StepSearcher{T}(model) + stepper.uncorr_only = false return stepper end @@ -51,67 +52,68 @@ function step(stepper::PredOrCentStepper{T}, solver::Solver{T}) where {T <: Real model = solver.model rhs = stepper.rhs dir = stepper.dir + dir_nocorr = stepper.dir_nocorr # update linear system solver factorization update_lhs(solver.system_solver, solver) # decide whether to predict or center - is_pred = (stepper.cent_count > 3) || all(Cones.in_neighborhood.(model.cones, sqrt(solver.mu), T(0.05))) # TODO tune + is_pred = (stepper.prev_alpha > T(0.1)) && ((stepper.cent_count > 3) || all(Cones.in_neighborhood.(model.cones, sqrt(solver.mu), T(0.05)))) # TODO tune, make option stepper.cent_count = (is_pred ? 0 : stepper.cent_count + 1) - (rhs_fun_nocorr, rhs_fun_corr) = (is_pred ? (update_rhs_pred, update_rhs_predcorr) : (update_rhs_cent, update_rhs_centcorr)) + rhs_fun_nocorr = (is_pred ? update_rhs_pred : update_rhs_cent) - # get direction + # get uncorrected direction rhs_fun_nocorr(solver, rhs) get_directions(stepper, solver, is_pred, iter_ref_steps = 3) - is_corrected = false + copyto!(dir_nocorr.vec, dir.vec) # TODO maybe instead of copying, pass in the dir point we want into the directions function + alpha = zero(T) + if stepper.use_correction + # get correction direction + rhs_fun_corr = (is_pred ? update_rhs_predcorr : update_rhs_centcorr) + dir_corr = stepper.dir_corr rhs_fun_corr(solver, rhs, dir) - if !iszero(rhs.vec) - dir_nocorr = stepper.dir_nocorr - dir_corr = stepper.dir_corr - copyto!(dir_nocorr, dir.vec) + get_directions(stepper, solver, is_pred, iter_ref_steps = 3) + copyto!(dir_corr.vec, dir.vec) + + # do curve search with correction + stepper.uncorr_only = false + alpha = search_alpha(point, model, stepper, min_alpha = T(1e-2)) # TODO tune min_alpha + if iszero(alpha) + # try not using correction + @warn("very small alpha in curve search; trying without correction") + else + # step + @. point.vec += alpha * dir_nocorr.vec + abs2(alpha) * dir_corr.vec + end + end + + if iszero(alpha) + # do line search in uncorrected direction + stepper.uncorr_only = true + alpha = search_alpha(point, model, stepper, min_alpha = T(1e-2)) + if iszero(alpha) && is_pred + # do centering step instead + @warn("very small alpha in line search; trying centering") + update_rhs_cent(solver, rhs) get_directions(stepper, solver, is_pred, iter_ref_steps = 3) - copyto!(dir_corr, dir.vec) + copyto!(dir_nocorr.vec, dir.vec) + stepper.cent_count = 1 - # get nocorr alpha step length - copyto!(dir.vec, dir_nocorr) - # TODO try max_nbhd = Inf, but careful of cones with no dual feas check - # TODO change prev_alpha? - alpha_nocorr = find_max_alpha(point, dir, stepper.line_searcher, model, prev_alpha = one(T), min_alpha = T(1e-3)) - if iszero(alpha_nocorr) - # @warn("very small alpha for uncorrected direction ($alpha_nocorr)") - alpha_nocorr = T(0.5) # attempt recovery TODO tune - end + alpha = search_alpha(point, model, stepper) + end - # get direction with correction - @. dir.vec = dir_nocorr + alpha_nocorr * dir_corr - is_corrected = true + if iszero(alpha) + @warn("very small alpha in line search; terminating") + solver.status = NumericalFailure + return false end - end - # get alpha step length - # TODO change prev_alpha? - alpha = find_max_alpha(point, dir, stepper.line_searcher, model, prev_alpha = one(T), min_alpha = T(1e-3), max_nbhd = T(0.99)) - # if is_corrected && alpha < 0.99 * alpha_nocorr - # @warn("alpha worse after correction ($alpha < $alpha_nocorr)") - # # # TODO test whether this helps: - # # # step alpha_nocorr in uncorrected direction - # # alpha = alpha_nocorr - # # copyto!(dir.vec, dir_nocorr) - # end - stepper.prev_alpha = alpha - # if !is_pred && alpha < 0.99 - # @warn("small alpha for centering step") - # end - if iszero(alpha) - # TODO attempt recovery - @warn("very small alpha") - solver.status = NumericalFailure - return false + # step + @. point.vec += alpha * dir_nocorr.vec end - # step - @. point.vec += alpha * dir.vec + stepper.prev_alpha = alpha calc_mu(solver) return true @@ -119,270 +121,29 @@ end expect_improvement(stepper::PredOrCentStepper) = iszero(stepper.cent_count) -function print_iteration_stats(stepper::PredOrCentStepper{T}, solver::Solver{T}) where {T <: Real} - if iszero(solver.num_iters) - if iszero(solver.model.p) - @printf("\n%5s %12s %12s %9s %9s %9s %9s %9s %9s %5s %9s\n", - "iter", "p_obj", "d_obj", "abs_gap", - "x_feas", "z_feas", "tau", "kap", "mu", - "step", "alpha", - ) - @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e\n", - solver.num_iters, solver.primal_obj, solver.dual_obj, solver.gap, - solver.x_feas, solver.z_feas, solver.point.tau[1], solver.point.kap[1], solver.mu - ) - else - @printf("\n%5s %12s %12s %9s %9s %9s %9s %9s %9s %9s %5s %9s\n", - "iter", "p_obj", "d_obj", "abs_gap", - "x_feas", "y_feas", "z_feas", "tau", "kap", "mu", - "step", "alpha", - ) - @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e\n", - solver.num_iters, solver.primal_obj, solver.dual_obj, solver.gap, - solver.x_feas, solver.y_feas, solver.z_feas, solver.point.tau[1], solver.point.kap[1], solver.mu - ) - end +function update_cone_points( + alpha::T, + point::Point{T}, + stepper::PredOrCentStepper{T} + ) where {T <: Real} + cand = stepper.temp + dir_nocorr = stepper.dir_nocorr + + if stepper.uncorr_only + @. cand.ztsk = point.ztsk + alpha * dir_nocorr.ztsk else - step = (iszero(stepper.cent_count) ? "pred" : "cent") - if iszero(solver.model.p) - @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %5s %9.2e\n", - solver.num_iters, solver.primal_obj, solver.dual_obj, solver.gap, - solver.x_feas, solver.z_feas, solver.point.tau[1], solver.point.kap[1], solver.mu, - step, stepper.prev_alpha, - ) - else - @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %5s %9.2e %9.2e\n", - solver.num_iters, solver.primal_obj, solver.dual_obj, solver.gap, - solver.x_feas, solver.y_feas, solver.z_feas, solver.point.tau[1], solver.point.kap[1], solver.mu, - step, stepper.prev_alpha, - ) - end + dir_corr = stepper.dir_corr + alpha_sqr = abs2(alpha) + @. cand.ztsk = point.ztsk + alpha * dir_nocorr.ztsk + alpha_sqr * dir_corr.ztsk end - flush(stdout) + return end -# # stepper using line search between cent and pred points -# function step(stepper::PredOrCentStepper{T}, solver::Solver{T}) where {T <: Real} -# point = solver.point -# -# # # TODO remove the need for this updating here - should be done in line search (some instances failing without it though) -# # rtmu = sqrt(solver.mu) -# # irtmu = inv(rtmu) -# # cones = solver.model.cones -# # Cones.load_point.(cones, point.primal_views, irtmu) -# # Cones.load_dual_point.(cones, point.dual_views) -# # Cones.reset_data.(cones) -# # @assert all(Cones.is_feas.(cones)) -# # Cones.grad.(cones) -# # Cones.hess.(cones) -# # # # @assert all(Cones.in_neighborhood.(cones, rtmu, T(0.7))) -# -# # update linear system solver factorization and helpers -# Cones.grad.(solver.model.cones) -# update_lhs(solver.system_solver, solver) -# -# z_dir = stepper.dir.z -# s_dir = stepper.dir.s -# -# # calculate centering direction and keep in dir_cent -# update_rhs_cent(stepper, solver) -# get_directions(stepper, solver, false, iter_ref_steps = 3) -# dir_cent = copy(stepper.dir) # TODO -# tau_cent = stepper.dir[stepper.tau_row] -# kap_cent = stepper.dir[stepper.kap_row] -# z_cent = copy(z_dir) -# s_cent = copy(s_dir) -# -# update_rhs_centcorr(stepper, solver) -# get_directions(stepper, solver, false, iter_ref_steps = 3) -# dir_centcorr = copy(stepper.dir) # TODO -# tau_centcorr = stepper.dir[stepper.tau_row] -# kap_centcorr = stepper.dir[stepper.kap_row] -# z_centcorr = copy(z_dir) -# s_centcorr = copy(s_dir) -# -# # calculate affine/prediction direction and keep in dir -# update_rhs_pred(stepper, solver) -# get_directions(stepper, solver, true, iter_ref_steps = 3) -# dir_pred = copy(stepper.dir) # TODO -# tau_pred = stepper.dir[stepper.tau_row] -# kap_pred = stepper.dir[stepper.kap_row] -# z_pred = copy(z_dir) -# s_pred = copy(s_dir) -# -# update_rhs_predcorr(stepper, solver) -# get_directions(stepper, solver, true, iter_ref_steps = 3) -# dir_predcorr = copy(stepper.dir) # TODO -# tau_predcorr = stepper.dir[stepper.tau_row] -# kap_predcorr = stepper.dir[stepper.kap_row] -# z_predcorr = copy(z_dir) -# s_predcorr = copy(s_dir) -# -# # TODO check cent point (step 1) is acceptable -# @. stepper.dir = dir_cent + dir_centcorr -# alpha = find_max_alpha(stepper, solver, prev_alpha = one(T), min_alpha = T(0.1)) # TODO only check end point alpha = 1 maybe -# # TODO cleanup -# if alpha < 0.99 -# # @show alpha -# @warn("could not do full step in centering-correction direction ($alpha)") -# dir_centcorr .= 0 -# stepper.dir .= 0 -# tau_centcorr = stepper.dir[stepper.tau_row] -# kap_centcorr = stepper.dir[stepper.kap_row] -# z_centcorr = copy(z_dir) -# s_centcorr = copy(s_dir) -# -# @. stepper.dir = dir_cent + dir_centcorr -# alpha = find_max_alpha(stepper, solver, prev_alpha = one(T), min_alpha = T(0.1)) -# @show alpha -# end -# -# # TODO use a smarter line search, eg bisection -# # TODO start with beta = 0.9999 for pred factor, and decrease until point satisfies nbhd -# cones = solver.model.cones -# cone_times = stepper.cone_times -# cone_order = stepper.cone_order -# z = solver.point.z -# s = solver.point.s -# tau = solver.point.tau -# kap = solver.point.kap -# # z_dir = stepper.dir.z -# # s_dir = stepper.dir.s -# # tau_dir = stepper.dir[stepper.tau_row] -# # kap_dir = stepper.dir[stepper.kap_row] -# z_ls = stepper.z_ls -# s_ls = stepper.s_ls -# primals_ls = stepper.primal_views_ls -# duals_ls = stepper.dual_views_ls -# sz_ks = zeros(T, length(cone_order)) # TODO prealloc -# tau_ls = zero(T) -# kap_ls = zero(T) -# -# nup1 = solver.model.nu + 1 -# # max_nbhd = T(0.99) -# # max_nbhd = T(0.9) -# max_nbhd = one(T) -# min_nbhd = T(0.01) -# -# # beta_schedule = T[0.9999, 0.99, 0.97, 0.95, 0.9, 0.85, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0] -# # beta = zero(T) -# beta = max(T(0.1), min(stepper.prev_gamma * T(1.4), one(T))) # TODO option for parameter -# beta_decrease = T(0.95) -# beta *= T(0.9999) -# beta /= beta_decrease -# -# iter_ls = 0 -# in_nbhd = false -# -# # while iter_ls < length(beta_schedule) -# while beta > 0 -# in_nbhd = false -# iter_ls += 1 -# -# # beta = beta_schedule[iter_ls] -# beta *= beta_decrease -# if beta < T(0.01) -# beta = zero(T) # pure centering -# end -# betam1 = 1 - beta -# -# # TODO shouldn't need to reduce corr on cent? -# tau_ls = tau + betam1 * (tau_cent + betam1 * tau_centcorr) + beta * (tau_pred + beta * tau_predcorr) -# kap_ls = kap + betam1 * (kap_cent + betam1 * kap_centcorr) + beta * (kap_pred + beta * kap_predcorr) -# taukap_ls = tau_ls * kap_ls -# (tau_ls < eps(T) || kap_ls < eps(T) || taukap_ls < eps(T)) && continue -# -# # order the cones by how long it takes to check neighborhood condition and iterate in that order, to improve efficiency -# sortperm!(cone_order, cone_times, initialized = true) # NOTE stochastic -# -# @. z_ls = z + betam1 * (z_cent + betam1 * z_centcorr) + beta * (z_pred + beta * z_predcorr) -# @. s_ls = s + betam1 * (s_cent + betam1 * s_centcorr) + beta * (s_pred + beta * s_predcorr) -# -# for k in cone_order -# sz_ks[k] = dot(primals_ls[k], duals_ls[k]) -# end -# any(<(eps(T)), sz_ks) && continue -# -# mu_ls = (sum(sz_ks) + taukap_ls) / nup1 -# (mu_ls < eps(T)) && continue -# -# # TODO experiment with SY nbhd for tau-kappa -# (abs(taukap_ls - mu_ls) > max_nbhd * mu_ls) && continue -# -# min_nbhd_mu = min_nbhd * mu_ls -# (taukap_ls < min_nbhd_mu) && continue -# any(sz_ks[k] < min_nbhd_mu * Cones.get_nu(cones[k]) for k in cone_order) && continue -# -# rtmu = sqrt(mu_ls) -# irtmu = inv(rtmu) -# in_nbhd = true -# for k in cone_order -# cone_k = cones[k] -# time_k = time_ns() -# -# Cones.load_point(cone_k, primals_ls[k], irtmu) -# Cones.load_dual_point(cone_k, duals_ls[k]) -# Cones.reset_data(cone_k) -# -# in_nbhd_k = (Cones.is_feas(cone_k) && Cones.is_dual_feas(cone_k) && Cones.in_neighborhood(cone_k, rtmu, max_nbhd)) -# # in_nbhd_k = (Cones.is_feas(cone_k) && Cones.is_dual_feas(cone_k) && (isinf(max_nbhd) || Cones.in_neighborhood(cone_k, rtmu, max_nbhd))) -# # TODO is_dual_feas function should fall back to a nbhd-like check (for ray maybe) if not using nbhd check -# # in_nbhd_k = (Cones.is_feas(cone_k) && Cones.is_dual_feas(cone_k)) -# -# cone_times[k] = time_ns() - time_k -# if !in_nbhd_k -# in_nbhd = false -# break -# end -# end -# in_nbhd && break -# iszero(beta) && break -# end -# # @show iter_ls -# -# # TODO if zero not feasible, do backwards line search -# if !in_nbhd -# @show beta -# -# copyto!(stepper.dir, dir_cent) -# -# alpha = find_max_alpha(stepper, solver, prev_alpha = one(T), min_alpha = T(1e-3)) -# if iszero(alpha) -# @warn("numerical failure: could not step in centering direction; terminating") -# solver.status = NumericalFailure -# return false -# end -# stepper.prev_alpha = alpha -# -# # step distance alpha in combined direction -# @. point.x += alpha * stepper.dir.x -# @. point.y += alpha * stepper.dir.y -# @. point.z += alpha * stepper.dir.z -# @. point.s += alpha * stepper.dir.s -# solver.point.tau += alpha * stepper.dir[stepper.tau_row] -# solver.point.kap += alpha * stepper.dir[stepper.kap_row] -# else -# stepper.prev_gamma = gamma = beta # TODO -# -# # step to combined point -# copyto!(point.z, z_ls) -# copyto!(point.s, s_ls) -# solver.point.tau = tau_ls -# solver.point.kap = kap_ls -# -# # TODO improve -# betam1 = 1 - beta -# @. stepper.dir = betam1 * (dir_cent + betam1 * dir_centcorr) # TODO shouldn't need to reduce corr on cent -# # @. stepper.dir = betam1 * (dir_cent + dir_centcorr) -# @. point.x += stepper.dir.x -# @. point.y += stepper.dir.y -# @. stepper.dir = beta * (dir_pred + beta * dir_predcorr) -# @. point.x += stepper.dir.x -# @. point.y += stepper.dir.y -# end -# -# calc_mu(solver) -# -# return true -# end +print_header_more(stepper::PredOrCentStepper, solver::Solver) = @printf("%5s %9s", "step", "alpha") + +function print_iteration_more(stepper::PredOrCentStepper, solver::Solver) + step = (iszero(stepper.cent_count) ? "pred" : "cent") + @printf("%5s %9.2e", step, stepper.prev_alpha) + return +end diff --git a/src/Solvers/systemsolvers/common.jl b/src/Solvers/systemsolvers/common.jl index adb06539c..7de57d111 100644 --- a/src/Solvers/systemsolvers/common.jl +++ b/src/Solvers/systemsolvers/common.jl @@ -21,11 +21,11 @@ function get_directions( rhs = stepper.rhs dir = stepper.dir dir_temp = stepper.dir_temp - res = stepper.res + res = stepper.temp system_solver = solver.system_solver - tau = solver.point.tau[1] - tau_scal = (use_nt ? solver.point.kap[1] : solver.mu / tau) / tau + tau = solver.point.tau[] + tau_scal = (use_nt ? solver.point.kap[] : solver.mu / tau) / tau solve_system(system_solver, solver, dir, rhs, tau_scal) @@ -73,9 +73,9 @@ function apply_lhs( ) where {T <: Real} model = solver.model dir = stepper.dir - res = stepper.res - tau_dir = dir.tau[1] - kap_dir = dir.kap[1] + res = stepper.temp + tau_dir = dir.tau[] + kap_dir = dir.kap[] # A'*y + G'*z + c*tau copyto!(res.x, model.c) @@ -84,7 +84,7 @@ function apply_lhs( @. res.z = model.h * tau_dir - dir.s mul!(res.z, model.G, dir.x, -1, true) # -c'*x - b'*y - h'*z - kap - res.tau[1] = -dot(model.c, dir.x) - dot(model.h, dir.z) - kap_dir + res.tau[] = -dot(model.c, dir.x) - dot(model.h, dir.z) - kap_dir # if p = 0, ignore A, b, y if !iszero(model.p) # A'*y + G'*z + c*tau @@ -93,7 +93,7 @@ function apply_lhs( copyto!(res.y, model.b) mul!(res.y, model.A, dir.x, -1, tau_dir) # -c'*x - b'*y - h'*z - kap - res.tau[1] -= dot(model.b, dir.y) + res.tau[] -= dot(model.b, dir.y) end # s @@ -105,9 +105,9 @@ function apply_lhs( @. s_res_k += dir.dual_views[k] end - res.kap[1] = tau_scal * tau_dir + kap_dir + res.kap[] = tau_scal * tau_dir + kap_dir - return stepper.res + return res end include("naive.jl") @@ -145,7 +145,7 @@ function solve_system( model = solver.model solve_subsystem4(system_solver, solver, sol, rhs, tau_scal) - tau = sol.tau[1] + tau = sol.tau[] # lift to get s and kap # s = -G*x + h*tau - zrhs @@ -153,7 +153,7 @@ function solve_system( mul!(sol.s, model.G, sol.x, -1, true) # kap = -kapbar/taubar*tau + kaprhs - sol.kap[1] = -tau_scal * tau + rhs.kap[1] + sol.kap[] = -tau_scal * tau + rhs.kap[] return sol end @@ -178,13 +178,13 @@ function solve_subsystem4( # lift to get tau sol_const = system_solver.sol_const - tau_num = rhs.tau[1] + rhs.kap[1] + dot_obj(model, sol_sub) + tau_num = rhs.tau[] + rhs.kap[] + dot_obj(model, sol_sub) tau_denom = tau_scal - dot_obj(model, sol_const) sol_tau = tau_num / tau_denom dim3 = length(sol_sub.vec) @. @views sol.vec[1:dim3] = sol_sub.vec + sol_tau * sol_const.vec - sol.tau[1] = sol_tau + sol.tau[] = sol_tau return sol end @@ -193,6 +193,7 @@ function setup_point_sub(system_solver::Union{QRCholSystemSolver{T}, SymIndefSys (n, p, q) = (model.n, model.p, model.q) dim_sub = n + p + q z_start = model.n + model.p + sol_sub = system_solver.sol_sub = Point{T}() rhs_sub = system_solver.rhs_sub = Point{T}() rhs_const = system_solver.rhs_const = Point{T}() @@ -207,6 +208,7 @@ function setup_point_sub(system_solver::Union{QRCholSystemSolver{T}, SymIndefSys @. rhs_const.x = -model.c @. rhs_const.y = model.b @. rhs_const.z = model.h + return nothing end diff --git a/src/Solvers/systemsolvers/naive.jl b/src/Solvers/systemsolvers/naive.jl index e5cd7e1bc..c4a20ff97 100644 --- a/src/Solvers/systemsolvers/naive.jl +++ b/src/Solvers/systemsolvers/naive.jl @@ -110,7 +110,7 @@ function update_lhs(system_solver::NaiveSparseSystemSolver, solver::Solver) @views copyto!(system_solver.lhs.nzval[system_solver.hess_idxs[k][j]], H_k[nz_rows, j]) end end - tau = solver.point.tau[1] + tau = solver.point.tau[] system_solver.lhs.nzval[system_solver.mtt_idx] = solver.mu / tau / tau # NOTE: mismatch when using NT for kaptau update_fact(system_solver.fact_cache, system_solver.lhs) @@ -177,7 +177,7 @@ function update_lhs(system_solver::NaiveDenseSystemSolver, solver::Solver) for (cone_k, lhs_H_k) in zip(solver.model.cones, system_solver.lhs_H_k) copyto!(lhs_H_k, Cones.hess(cone_k)) end - tau = solver.point.tau[1] + tau = solver.point.tau[] system_solver.lhs[end, system_solver.tau_row] = solver.mu / tau / tau # NOTE: mismatch when using NT for kaptau update_fact(system_solver.fact_cache, system_solver.lhs) diff --git a/src/Solvers/systemsolvers/naiveelim.jl b/src/Solvers/systemsolvers/naiveelim.jl index f24aa7ef8..77a7d389b 100644 --- a/src/Solvers/systemsolvers/naiveelim.jl +++ b/src/Solvers/systemsolvers/naiveelim.jl @@ -61,7 +61,7 @@ function solve_subsystem4( end end # -c'x - b'y - h'z + kapbar/taubar*tau = taurhs + kaprhs - rhs_sub.vec[end] += rhs.kap[1] + rhs_sub.vec[end] += rhs.kap[] sol_sub = system_solver.sol_sub solve_inner_system(system_solver, sol_sub, rhs_sub) @@ -187,7 +187,7 @@ function update_lhs(system_solver::NaiveElimSparseSystemSolver, solver::Solver) @views copyto!(system_solver.lhs_sub.nzval[system_solver.hess_idxs[k][j]], H_k[nz_rows, j]) end end - tau = solver.point.tau[1] + tau = solver.point.tau[] system_solver.lhs_sub.nzval[end] = solver.mu / tau / tau # NOTE: mismatch when using NT for kaptau update_fact(system_solver.fact_cache, system_solver.lhs_sub) @@ -254,7 +254,7 @@ function update_lhs(system_solver::NaiveElimDenseSystemSolver{T}, solver::Solver @views Cones.hess_prod!(lhs_sub[z_rows_k, end], model.h[idxs_k], cone_k) end end - tau = solver.point.tau[1] + tau = solver.point.tau[] lhs_sub[end, end] = solver.mu / tau / tau # NOTE: mismatch when using NT for kaptau update_fact(system_solver.fact_cache, system_solver.lhs_sub) diff --git a/src/Solvers/systemsolvers/qrchol.jl b/src/Solvers/systemsolvers/qrchol.jl index d317b2961..d1213afff 100644 --- a/src/Solvers/systemsolvers/qrchol.jl +++ b/src/Solvers/systemsolvers/qrchol.jl @@ -248,7 +248,7 @@ function update_lhs(system_solver::QRCholDenseSystemSolver{T}, solver::Solver{T} load_matrix(system_solver.fact_cache, system_solver.lhs1) else # attempt recovery - increase_diag!(system_solver.lhs1) + increase_diag!(system_solver.lhs1.data) end if !update_fact(system_solver.fact_cache, system_solver.lhs1) # attempt recovery # TODO make more efficient diff --git a/test/barrier.jl b/test/barrier.jl index 599e9af0f..d8f75b4c3 100644 --- a/test/barrier.jl +++ b/test/barrier.jl @@ -99,10 +99,12 @@ function test_grad_hess(cone::Cones.Cone{T}, point::Vector{T}, dual_point::Vecto @test Cones.hess_prod!(prod, point, cone) ≈ -grad atol=tol rtol=tol @test Cones.inv_hess_prod!(prod, grad, cone) ≈ -point atol=tol rtol=tol - prod_mat2 = Matrix(Cones.hess_sqrt_prod!(prod_mat, inv_hess, cone)') - @test Cones.hess_sqrt_prod!(prod_mat, prod_mat2, cone) ≈ I atol=tol rtol=tol - Cones.inv_hess_sqrt_prod!(prod_mat2, Matrix(one(T) * I, dim, dim), cone) - @test prod_mat2' * prod_mat2 ≈ inv_hess atol=tol rtol=tol + if Cones.use_sqrt_oracles(cone) + prod_mat2 = Matrix(Cones.hess_sqrt_prod!(prod_mat, inv_hess, cone)') + @test Cones.hess_sqrt_prod!(prod_mat, prod_mat2, cone) ≈ I atol=tol rtol=tol + Cones.inv_hess_sqrt_prod!(prod_mat2, Matrix(one(T) * I, dim, dim), cone) + @test prod_mat2' * prod_mat2 ≈ inv_hess atol=tol rtol=tol + end return end @@ -199,16 +201,16 @@ function test_hypoperlog_barrier(T::Type{<:Real}) return end -function test_episumperentropy_barrier(T::Type{<:Real}) +function test_epirelentropy_barrier(T::Type{<:Real}) function barrier(s) (u, v, w) = (s[1], s[2:2:(end - 1)], s[3:2:end]) return -log(u - sum(wi * log(wi / vi) for (vi, wi) in zip(v, w))) - sum(log(vi) + log(wi) for (vi, wi) in zip(v, w)) end for w_dim in [1, 2, 3] - test_barrier_oracles(Cones.EpiSumPerEntropy{T}(1 + 2 * w_dim), barrier, init_tol = 1e-5) + test_barrier_oracles(Cones.EpiRelEntropy{T}(1 + 2 * w_dim), barrier, init_tol = 1e-5) end for w_dim in [15, 65, 75, 100] - test_barrier_oracles(Cones.EpiSumPerEntropy{T}(1 + 2 * w_dim), barrier, init_tol = 1e-1, init_only = true) + test_barrier_oracles(Cones.EpiRelEntropy{T}(1 + 2 * w_dim), barrier, init_tol = 1e-1, init_only = true) end return end @@ -280,8 +282,8 @@ end function test_epitracerelentropytri_barrier(T::Type{<:Real}) rt2 = sqrt(T(2)) for side in [1, 2, 3, 8, 12] - @show side svec_dim = Cones.svec_length(side) + cone = Cones.EpiTraceRelEntropyTri{T}(2 * svec_dim + 1) function barrier(s) u = s[1] u = s[1] @@ -290,9 +292,9 @@ function test_epitracerelentropytri_barrier(T::Type{<:Real}) return -log(u - tr(W * logm(W) - W * logm(V))) - logdet(V) - logdet(W) end if side <= 3 - test_barrier_oracles(Cones.EpiTraceRelEntropyTri{T}(2 * svec_dim + 1), barrier, init_tol = 1e-5) + test_barrier_oracles(cone, barrier, init_tol = 1e-5) else - test_barrier_oracles(Cones.EpiTraceRelEntropyTri{T}(2 * svec_dim + 1), barrier, init_tol = 1e-1, init_only = true) + test_barrier_oracles(cone, barrier, init_tol = 1e-1, init_only = true) end end return diff --git a/test/moi.jl b/test/moi.jl index 7bb4acf33..a4455493c 100644 --- a/test/moi.jl +++ b/test/moi.jl @@ -47,7 +47,7 @@ conic_exclude = String[ function test_moi(T::Type{<:Real}; solver_options...) optimizer = MOIU.CachingOptimizer(MOIU.UniversalFallback(MOIU.Model{T}()), Hypatia.Optimizer{T}(; solver_options...)) - tol = sqrt(sqrt(eps(T))) # TODO remove Float64, waiting for MOI to be tagged after https://github.com/jump-dev/MathOptInterface.jl/pull/1176 + tol = 2sqrt(sqrt(eps(T))) config = MOIT.TestConfig{T}( atol = tol, rtol = tol, diff --git a/test/moicones.jl b/test/moicones.jl index a6535665c..939e5c464 100644 --- a/test/moicones.jl +++ b/test/moicones.jl @@ -93,7 +93,7 @@ function test_moi_cones(T::Type{<:Real}) @testset "RelativeEntropyCone" begin moi_cone = MOI.RelativeEntropyCone(3) hyp_cone = Hypatia.cone_from_moi(T, moi_cone) - @test hyp_cone isa Cones.EpiSumPerEntropy{T} + @test hyp_cone isa Cones.EpiRelEntropy{T} @test MOI.dimension(moi_cone) == Cones.dimension(hyp_cone) == 3 @test !Cones.use_dual_barrier(hyp_cone) end @@ -180,10 +180,10 @@ function test_moi_cones(T::Type{<:Real}) @test MOI.dimension(moi_cone) == Cones.dimension(hyp_cone) == 4 end - @testset "EpiSumPerEntropy" begin - moi_cone = Hypatia.EpiSumPerEntropyCone{T}(5) + @testset "EpiRelEntropy" begin + moi_cone = Hypatia.EpiRelEntropyCone{T}(5) hyp_cone = Hypatia.cone_from_moi(T, moi_cone) - @test hyp_cone isa Cones.EpiSumPerEntropy{T} + @test hyp_cone isa Cones.EpiRelEntropy{T} @test MOI.dimension(moi_cone) == Cones.dimension(hyp_cone) == 5 end diff --git a/test/nativeinstances.jl b/test/nativeinstances.jl index ae4ae85c0..420e6c3ba 100644 --- a/test/nativeinstances.jl +++ b/test/nativeinstances.jl @@ -480,7 +480,7 @@ function epipersquare4(T; options...) end # TODO add use_dual = true tests -function episumperentropy1(T; options...) +function epirelentropy1(T; options...) tol = test_tol(T) Random.seed!(1) for w_dim in [1, 2, 3] @@ -494,7 +494,7 @@ function episumperentropy1(T; options...) h[2:2:(end - 1)] .= 1 w = rand(T, w_dim) .+ 1 h[3:2:end] .= w - cones = Cone{T}[Cones.EpiSumPerEntropy{T}(dim)] + cones = Cone{T}[Cones.EpiRelEntropy{T}(dim)] r = build_solve_check(c, A, b, G, h, cones, tol; options...) @test r.status == Solvers.Optimal @@ -502,7 +502,7 @@ function episumperentropy1(T; options...) end end -function episumperentropy2(T; options...) +function epirelentropy2(T; options...) tol = test_tol(T) for w_dim in [1, 2, 4] dim = 1 + 2 * w_dim @@ -515,7 +515,7 @@ function episumperentropy2(T; options...) end h = zeros(T, dim) h[2:2:(dim - 1)] .= 1 - cones = Cone{T}[Cones.EpiSumPerEntropy{T}(dim)] + cones = Cone{T}[Cones.EpiRelEntropy{T}(dim)] r = build_solve_check(c, A, b, G, h, cones, tol; options...) @test r.status == Solvers.Optimal @@ -524,7 +524,7 @@ function episumperentropy2(T; options...) end end -function episumperentropy3(T; options...) +function epirelentropy3(T; options...) tol = test_tol(T) for w_dim in [2, 4] dim = 1 + 2 * w_dim @@ -537,7 +537,7 @@ function episumperentropy3(T; options...) end h = zeros(T, dim) h[3:2:end] .= 1 - cones = Cone{T}[Cones.EpiSumPerEntropy{T}(dim)] + cones = Cone{T}[Cones.EpiRelEntropy{T}(dim)] r = build_solve_check(c, A, b, G, h, cones, tol; options...) @test r.status == Solvers.Optimal @@ -546,14 +546,14 @@ function episumperentropy3(T; options...) end end -function episumperentropy4(T; options...) +function epirelentropy4(T; options...) tol = test_tol(T) c = T[1] A = zeros(T, 0, 1) b = zeros(T, 0) G = Matrix{T}(-I, 5, 1) h = T[0, 1, 2, 5, 3] - cones = Cone{T}[Cones.EpiSumPerEntropy{T}(5)] + cones = Cone{T}[Cones.EpiRelEntropy{T}(5)] r = build_solve_check(c, A, b, G, h, cones, tol; options...) @test r.status == Solvers.Optimal @@ -563,14 +563,14 @@ function episumperentropy4(T; options...) @test r.z ≈ [1, 2, log(inv(T(2))) - 1, 3 / T(5), log(5 / T(3)) - 1] atol=tol rtol=tol end -function episumperentropy5(T; options...) +function epirelentropy5(T; options...) tol = test_tol(T) c = T[0, -1] A = zeros(T, 0, 2) b = zeros(T, 0) G = vcat(zeros(T, 1, 2), repeat(T[0 0; -1 -1], 3), [-1, 0]') h = T[0, 1, 0, 1, 0, 1, 0, 0] - cones = Cone{T}[Cones.EpiSumPerEntropy{T}(7), Cones.Nonnegative{T}(1)] + cones = Cone{T}[Cones.EpiRelEntropy{T}(7), Cones.Nonnegative{T}(1)] r = build_solve_check(c, A, b, G, h, cones, tol; options...) @test r.status == Solvers.Optimal @@ -579,97 +579,6 @@ function episumperentropy5(T; options...) @test r.z ≈ inv(T(3)) * [1, 1, -1, 1, -1, 1, -1, 3] atol=tol rtol=tol end -function epitracerelentropytri1(T; options...) - tol = sqrt(sqrt(eps(T))) - Random.seed!(1) - rt2 = sqrt(T(2)) - side = 4 - svec_dim = Cones.svec_length(side) - cone_dim = 2 * svec_dim + 1 - c = T[1] - A = zeros(T, 0, 1) - b = T[] - W = rand(T, side, side) - W = W * W' - V = rand(T, side, side) - V = V * V' - h = vcat(zero(T), Cones.smat_to_svec!(zeros(T, svec_dim), V, rt2), Cones.smat_to_svec!(zeros(T, svec_dim), W, rt2)) - G = zeros(T, cone_dim, 1) - G[1, 1] = -1 - cones = Cone{T}[Cones.EpiTraceRelEntropyTri{T}(cone_dim)] - - r = build_solve_check(c, A, b, G, h, cones, tol; options...) - @test r.status == Solvers.Optimal - # TODO need https://github.com/JuliaLinearAlgebra/GenericLinearAlgebra.jl/issues/51 to use log with BF - (vals_V, vecs_V) = eigen(Hermitian(V, :U)) - (vals_W, vecs_W) = eigen(Hermitian(W, :U)) - log_V = vecs_V * Diagonal(log.(vals_V)) * vecs_V' - log_W = vecs_W * Diagonal(log.(vals_W)) * vecs_W' - @test r.primal_obj ≈ tr(W * log_W - W * log_V) atol=tol rtol=tol -end - -function epitracerelentropytri2(T; options...) - tol = sqrt(sqrt(eps(T))) - Random.seed!(1) - rt2 = sqrt(T(2)) - side = 3 - svec_dim = Cones.svec_length(side) - cone_dim = 2 * svec_dim + 1 - c = vcat(zeros(T, svec_dim), -ones(T, svec_dim)) - A = hcat(Matrix{T}(I, svec_dim, svec_dim), zeros(T, svec_dim, svec_dim)) - svec_I = Cones.smat_to_svec!(zeros(T, svec_dim), Matrix{T}(I, side, side), rt2) - b = svec_I - h = vcat(T(5), zeros(T, 2 * svec_dim)) - G = vcat(zeros(T, 1, 2 * svec_dim), ModelUtilities.vec_to_svec!(Diagonal(-one(T) * I, 2 * svec_dim))) - cones = Cone{T}[Cones.EpiTraceRelEntropyTri{T}(cone_dim)] - - r = build_solve_check(c, A, b, G, h, cones, tol; options...) - @test r.status == Solvers.Optimal - W = Hermitian(Cones.svec_to_smat!(zeros(T, side, side), r.s[(svec_dim + 2):end], rt2), :U) - @test tr(W * log(W)) ≈ T(5) atol=tol rtol=tol -end - -function epitracerelentropytri3(T; options...) - tol = sqrt(sqrt(eps(T))) - Random.seed!(1) - rt2 = sqrt(T(2)) - side = 3 - svec_dim = Cones.svec_length(side) - cone_dim = 2 * svec_dim + 1 - c = vcat(zeros(T, svec_dim + 1), ones(T, svec_dim)) - A = hcat(one(T), zeros(T, 1, 2 * svec_dim)) - b = [zero(T)] - h = zeros(T, cone_dim) - G = Diagonal(-one(T) * I, cone_dim) - cones = Cone{T}[Cones.EpiTraceRelEntropyTri{T}(cone_dim)] - - r = build_solve_check(c, A, b, G, h, cones, tol; options...) - @test r.status == Solvers.Optimal - @test r.primal_obj ≈ zero(T) atol=tol rtol=tol - @test r.s[1] ≈ zero(T) atol=tol rtol=tol - @test r.s[(svec_dim + 2):end] ≈ zeros(T, svec_dim) atol=tol rtol=tol -end - -function epitracerelentropytri4(T; options...) - tol = sqrt(sqrt(eps(T))) - Random.seed!(1) - rt2 = sqrt(T(2)) - side = 3 - svec_dim = Cones.svec_length(side) - cone_dim = 2 * svec_dim + 1 - c = vcat(zero(T), ones(T, svec_dim), zeros(T, svec_dim)) - A = hcat(one(T), zeros(T, 1, 2 * svec_dim)) - b = [zero(T)] - h = zeros(T, cone_dim) - G = Diagonal(-one(T) * I, cone_dim) - cones = Cone{T}[Cones.EpiTraceRelEntropyTri{T}(cone_dim)] - - r = build_solve_check(c, A, b, G, h, cones, tol; options...) - @test r.status == Solvers.Optimal - @test r.primal_obj ≈ zero(T) atol=tol rtol=tol - @test r.s ≈ zeros(T, cone_dim) atol=tol rtol=tol -end - function hypoperlog1(T; options...) tol = test_tol(T) Texph = exp(T(0.5)) @@ -1970,6 +1879,97 @@ function hyporootdettri4(T; options...) @test r.z ≈ [-1, 0.5, 0, 0.5, 0.5, 0.5] atol=tol rtol=tol end +function epitracerelentropytri1(T; options...) + tol = sqrt(sqrt(eps(T))) + Random.seed!(1) + rt2 = sqrt(T(2)) + side = 4 + svec_dim = Cones.svec_length(side) + cone_dim = 2 * svec_dim + 1 + c = T[1] + A = zeros(T, 0, 1) + b = T[] + W = rand(T, side, side) + W = W * W' + V = rand(T, side, side) + V = V * V' + h = vcat(zero(T), Cones.smat_to_svec!(zeros(T, svec_dim), V, rt2), Cones.smat_to_svec!(zeros(T, svec_dim), W, rt2)) + G = zeros(T, cone_dim, 1) + G[1, 1] = -1 + cones = Cone{T}[Cones.EpiTraceRelEntropyTri{T}(cone_dim)] + + r = build_solve_check(c, A, b, G, h, cones, tol; options...) + @test r.status == Solvers.Optimal + # TODO need https://github.com/JuliaLinearAlgebra/GenericLinearAlgebra.jl/issues/51 to use log with BF + (vals_V, vecs_V) = eigen(Hermitian(V, :U)) + (vals_W, vecs_W) = eigen(Hermitian(W, :U)) + log_V = vecs_V * Diagonal(log.(vals_V)) * vecs_V' + log_W = vecs_W * Diagonal(log.(vals_W)) * vecs_W' + @test r.primal_obj ≈ tr(W * log_W - W * log_V) atol=tol rtol=tol +end + +function epitracerelentropytri2(T; options...) + tol = sqrt(sqrt(eps(T))) + Random.seed!(1) + rt2 = sqrt(T(2)) + side = 3 + svec_dim = Cones.svec_length(side) + cone_dim = 2 * svec_dim + 1 + c = vcat(zeros(T, svec_dim), -ones(T, svec_dim)) + A = hcat(Matrix{T}(I, svec_dim, svec_dim), zeros(T, svec_dim, svec_dim)) + svec_I = Cones.smat_to_svec!(zeros(T, svec_dim), Matrix{T}(I, side, side), rt2) + b = svec_I + h = vcat(T(5), zeros(T, 2 * svec_dim)) + G = vcat(zeros(T, 1, 2 * svec_dim), ModelUtilities.vec_to_svec!(Diagonal(-one(T) * I, 2 * svec_dim))) + cones = Cone{T}[Cones.EpiTraceRelEntropyTri{T}(cone_dim)] + + r = build_solve_check(c, A, b, G, h, cones, tol; options...) + @test r.status == Solvers.Optimal + W = Hermitian(Cones.svec_to_smat!(zeros(T, side, side), r.s[(svec_dim + 2):end], rt2), :U) + @test tr(W * log(W)) ≈ T(5) atol=tol rtol=tol +end + +function epitracerelentropytri3(T; options...) + tol = sqrt(sqrt(eps(T))) + Random.seed!(1) + rt2 = sqrt(T(2)) + side = 3 + svec_dim = Cones.svec_length(side) + cone_dim = 2 * svec_dim + 1 + c = vcat(zeros(T, svec_dim + 1), ones(T, svec_dim)) + A = hcat(one(T), zeros(T, 1, 2 * svec_dim)) + b = [zero(T)] + h = zeros(T, cone_dim) + G = Diagonal(-one(T) * I, cone_dim) + cones = Cone{T}[Cones.EpiTraceRelEntropyTri{T}(cone_dim)] + + r = build_solve_check(c, A, b, G, h, cones, tol; options...) + @test r.status == Solvers.Optimal + @test r.primal_obj ≈ zero(T) atol=tol rtol=tol + @test r.s[1] ≈ zero(T) atol=tol rtol=tol + @test r.s[(svec_dim + 2):end] ≈ zeros(T, svec_dim) atol=tol rtol=tol +end + +function epitracerelentropytri4(T; options...) + tol = sqrt(sqrt(eps(T))) + Random.seed!(1) + rt2 = sqrt(T(2)) + side = 3 + svec_dim = Cones.svec_length(side) + cone_dim = 2 * svec_dim + 1 + c = vcat(zero(T), ones(T, svec_dim), zeros(T, svec_dim)) + A = hcat(one(T), zeros(T, 1, 2 * svec_dim)) + b = [zero(T)] + h = zeros(T, cone_dim) + G = Diagonal(-one(T) * I, cone_dim) + cones = Cone{T}[Cones.EpiTraceRelEntropyTri{T}(cone_dim)] + + r = build_solve_check(c, A, b, G, h, cones, tol; options...) + @test r.status == Solvers.Optimal + @test r.primal_obj ≈ zero(T) atol=tol rtol=tol + @test r.s ≈ zeros(T, cone_dim) atol=tol rtol=tol +end + function wsosinterpnonnegative1(T; options...) tol = test_tol(T) (U, pts, Ps) = ModelUtilities.interpolate(ModelUtilities.Box{T}(-zeros(T, 2), ones(T, 2)), 2) diff --git a/test/nativesets.jl b/test/nativesets.jl index 47c8452f5..eb09c9d02 100644 --- a/test/nativesets.jl +++ b/test/nativesets.jl @@ -26,7 +26,7 @@ inst_cones_few = [ "epinorminf6", "epinormeucl1", "epipersquare1", - "episumperentropy4", + "epirelentropy4", "hypoperlog1", "power1", "hypogeomean1", @@ -41,6 +41,7 @@ inst_cones_few = [ "doublynonnegativetri1", "hypoperlogdettri1", "hyporootdettri1", + "epitracerelentropytri1", "wsosinterpnonnegative1", "wsosinterppossemideftri1", "wsosinterpepinormeucl2", @@ -68,11 +69,11 @@ inst_cones_many = [ "epipersquare2", "epipersquare3", "epipersquare4", - "episumperentropy1", - "episumperentropy2", - "episumperentropy3", - "episumperentropy4", - "episumperentropy5", + "epirelentropy1", + "epirelentropy2", + "epirelentropy3", + "epirelentropy4", + "epirelentropy5", "hypoperlog1", "hypoperlog2", "hypoperlog3", @@ -123,10 +124,6 @@ inst_cones_many = [ "matrixepipersquare1", "matrixepipersquare2", "matrixepipersquare3", - "epitracerelentropytri1", - "epitracerelentropytri2", - "epitracerelentropytri3", - "epitracerelentropytri4", "hypoperlogdettri1", "hypoperlogdettri2", "hypoperlogdettri3", @@ -135,6 +132,10 @@ inst_cones_many = [ "hyporootdettri2", "hyporootdettri3", "hyporootdettri4", + "epitracerelentropytri1", + "epitracerelentropytri2", + "epitracerelentropytri3", + "epitracerelentropytri4", "wsosinterpnonnegative1", "wsosinterpnonnegative2", "wsosinterpnonnegative3", diff --git a/test/runbarriertests.jl b/test/runbarriertests.jl index 591927e58..e53b69ec1 100644 --- a/test/runbarriertests.jl +++ b/test/runbarriertests.jl @@ -11,8 +11,7 @@ barrier_test_names = [ "epinorminf", "epinormeucl", "epipersquare", - "episumperentropy", - "epitracerelentropytri", + "epirelentropy", "hypoperlog", "power", "hypopowermean", @@ -25,6 +24,7 @@ barrier_test_names = [ "matrixepipersquare", "hypoperlogdettri", "hyporootdettri", + "epitracerelentropytri", "wsosinterpnonnegative", "wsosinterpepinormone", "wsosinterpepinormeucl", diff --git a/test/runexamplestests.jl b/test/runexamplestests.jl index 5d527ee51..db7bd095a 100644 --- a/test/runexamplestests.jl +++ b/test/runexamplestests.jl @@ -19,8 +19,9 @@ default_options = ( verbose = false, # verbose = true, default_tol_relax = 10, - # stepper = Solvers.HeurCombStepper{Float64}() - # stepper = Solvers.PredOrCentStepper{Float64}() + stepper = Solvers.CombinedStepper{Float64}(), + # stepper = Solvers.PredOrCentStepper{Float64}(), + iter_limit = 250, ) # instance sets and real types to run and corresponding time limits (seconds) diff --git a/test/runmoitests.jl b/test/runmoitests.jl index 0a01a0016..c67157cb7 100644 --- a/test/runmoitests.jl +++ b/test/runmoitests.jl @@ -24,21 +24,21 @@ include(joinpath(@__DIR__, "moi.jl")) end end -default_options = ( - # verbose = true, - verbose = false, - default_tol_relax = 2, - ) - @testset "MOI.Test tests" begin println("\nstarting MOI.Test tests") options = [ - (Float64, false), - (Float64, true), - (Float32, true), - (BigFloat, true), + # (Float64, Solvers.PredOrCentStepper, false), # TODO + (Float64, Solvers.CombinedStepper, true), + (Float32, Solvers.CombinedStepper, true), + (BigFloat, Solvers.PredOrCentStepper, true), ] - for (T, use_dense_model) in options + for (T, stepper, use_dense_model) in options + default_options = ( + # verbose = true, + verbose = false, + default_tol_relax = 2, + stepper = stepper{T}(), + ) test_info = "$T, $use_dense_model" @testset "$test_info" begin println(test_info, " ...") diff --git a/test/runnativetests.jl b/test/runnativetests.jl index 7355162e8..92bb6b00e 100644 --- a/test/runnativetests.jl +++ b/test/runnativetests.jl @@ -61,56 +61,56 @@ perf = DataFrames.DataFrame( @testset "native tests" begin -@testset "default options tests" begin - println("starting default options tests") - inst_defaults = vcat( - inst_preproc, - inst_infeas, - inst_cones_many, - ) - for inst_name in inst_defaults - test_instance_solver(inst_name, Float64, default_options) - end -end - -@testset "no preprocess tests" begin - println("\nstarting no preprocess tests") - for inst_name in inst_cones_few, T in diff_reals - options = (; default_options..., preprocess = false, reduce = false, system_solver = Solvers.SymIndefDenseSystemSolver{T}()) - test_instance_solver(inst_name, T, options) - end -end - -@testset "indirect solvers tests" begin - println("\nstarting indirect solvers tests") - for inst_name in inst_indirect, T in diff_reals - options = (; default_options..., init_use_indirect = true, preprocess = false, reduce = false, system_solver = Solvers.SymIndefIndirectSystemSolver{T}(), tol_feas = 1e-4, tol_rel_opt = 1e-4, tol_abs_opt = 1e-4, tol_infeas = 1e-6) - test_instance_solver(inst_name, T, options) - end -end - -@testset "system solvers tests" begin - println("\nstarting system solvers tests") - system_solvers = [ - (Solvers.NaiveDenseSystemSolver, diff_reals), - (Solvers.NaiveSparseSystemSolver, [Float64,]), - (Solvers.NaiveElimDenseSystemSolver, diff_reals), - (Solvers.NaiveElimSparseSystemSolver, [Float64,]), - (Solvers.SymIndefDenseSystemSolver, all_reals), - (Solvers.SymIndefSparseSystemSolver, [Float64,]), - (Solvers.QRCholDenseSystemSolver, all_reals), - ] - for inst_name in inst_cones_few, (system_solver, real_types) in system_solvers, T in real_types - options = (; default_options..., system_solver = system_solver{T}(), reduce = false) - test_instance_solver(inst_name, T, options, string_nameof(system_solver)) - end -end +# @testset "default options tests" begin +# println("starting default options tests") +# inst_defaults = vcat( +# inst_preproc, +# inst_infeas, +# inst_cones_many, +# ) +# for inst_name in inst_defaults +# test_instance_solver(inst_name, Float64, default_options) +# end +# end +# +# @testset "no preprocess tests" begin +# println("\nstarting no preprocess tests") +# for inst_name in inst_cones_few, T in diff_reals +# options = (; default_options..., preprocess = false, reduce = false, system_solver = Solvers.SymIndefDenseSystemSolver{T}()) +# test_instance_solver(inst_name, T, options) +# end +# end +# +# @testset "indirect solvers tests" begin +# println("\nstarting indirect solvers tests") +# for inst_name in inst_indirect, T in diff_reals +# options = (; default_options..., init_use_indirect = true, preprocess = false, reduce = false, system_solver = Solvers.SymIndefIndirectSystemSolver{T}(), tol_feas = 1e-4, tol_rel_opt = 1e-4, tol_abs_opt = 1e-4, tol_infeas = 1e-6) +# test_instance_solver(inst_name, T, options) +# end +# end +# +# @testset "system solvers tests" begin +# println("\nstarting system solvers tests") +# system_solvers = [ +# (Solvers.NaiveDenseSystemSolver, diff_reals), +# (Solvers.NaiveSparseSystemSolver, [Float64,]), +# (Solvers.NaiveElimDenseSystemSolver, diff_reals), +# (Solvers.NaiveElimSparseSystemSolver, [Float64,]), +# (Solvers.SymIndefDenseSystemSolver, all_reals), +# (Solvers.SymIndefSparseSystemSolver, [Float64,]), +# (Solvers.QRCholDenseSystemSolver, all_reals), +# ] +# for inst_name in inst_cones_few, (system_solver, real_types) in system_solvers, T in real_types +# options = (; default_options..., system_solver = system_solver{T}(), reduce = false) +# test_instance_solver(inst_name, T, options, string_nameof(system_solver)) +# end +# end @testset "steppers tests" begin println("\nstarting steppers tests (with printing)") steppers = [ - (Solvers.HeurCombStepper, diff_reals), (Solvers.PredOrCentStepper, diff_reals), + (Solvers.CombinedStepper, diff_reals), ] for inst_name in inst_cones_few, (stepper, real_types) in steppers, T in diff_reals options = (; default_options..., verbose = true, stepper = stepper{T}())