diff --git a/lib/MadNLPGPU/src/MadNLPGPU.jl b/lib/MadNLPGPU/src/MadNLPGPU.jl
index a3a33308..75d23d85 100644
--- a/lib/MadNLPGPU/src/MadNLPGPU.jl
+++ b/lib/MadNLPGPU/src/MadNLPGPU.jl
@@ -2,19 +2,23 @@ module MadNLPGPU
import LinearAlgebra
# CUDA
-import CUDA: CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version
+import CUDA: CUDA, CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version
+import .CUSOLVER:
+ libcusolver, cusolverStatus_t, CuPtr, cudaDataType, cublasFillMode_t, cusolverDnHandle_t, dense_handle
+import .CUBLAS: handle, CUBLAS_DIAG_NON_UNIT,
+ CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER, CUBLAS_SIDE_LEFT, CUBLAS_OP_N, CUBLAS_OP_T
+
# Kernels
import KernelAbstractions: @kernel, @index, wait, Event
import CUDAKernels: CUDADevice
import MadNLP
-
import MadNLP:
@kwdef, Logger, @debug, @warn, @error,
AbstractOptions, AbstractLinearSolver, AbstractNLPModel, set_options!,
SymbolicException,FactorizationException,SolveException,InertiaException,
introduce, factorize!, solve!, improve!, is_inertia, inertia, tril_to_full!,
- LapackOptions, input_type
+ LapackOptions, input_type, is_supported
diff --git a/lib/MadNLPGPU/src/interface.jl b/lib/MadNLPGPU/src/interface.jl
index 004a8110..1aaed015 100644
--- a/lib/MadNLPGPU/src/interface.jl
+++ b/lib/MadNLPGPU/src/interface.jl
@@ -1,22 +1,22 @@
-function CuInteriorPointSolver(nlp::AbstractNLPModel;
+function CuInteriorPointSolver(nlp::AbstractNLPModel{T};
option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), kwargs...
-)
+) where T
opt = MadNLP.Options(linear_solver=LapackGPUSolver)
MadNLP.set_options!(opt,option_dict,kwargs)
MadNLP.check_option_sanity(opt)
KKTSystem = if (opt.kkt_system == MadNLP.SPARSE_KKT_SYSTEM) || (opt.kkt_system == MadNLP.SPARSE_UNREDUCED_KKT_SYSTEM)
error("Sparse KKT system are currently not supported on CUDA GPU.\n" *
- "Please use `DENSE_KKT_SYSTEM` or `DENSE_CONDENSED_KKT_SYSTEM` instead.")
+ "Please use `DENSE_KKT_SYSTEM` or `DENSE_CONDENSED_KKT_SYSTEM` instead.")
elseif opt.kkt_system == MadNLP.DENSE_KKT_SYSTEM
- MT = CuMatrix{Float64}
- VT = CuVector{Float64}
- MadNLP.DenseKKTSystem{Float64, VT, MT}
+ MT = CuMatrix{T}
+ VT = CuVector{T}
+ MadNLP.DenseKKTSystem{T, VT, MT}
elseif opt.kkt_system == MadNLP.DENSE_CONDENSED_KKT_SYSTEM
- MT = CuMatrix{Float64}
- VT = CuVector{Float64}
- MadNLP.DenseCondensedKKTSystem{Float64, VT, MT}
+ MT = CuMatrix{T}
+ VT = CuVector{T}
+ MadNLP.DenseCondensedKKTSystem{T, VT, MT}
end
- return MadNLP.InteriorPointSolver{KKTSystem}(nlp, opt; option_linear_solver=option_dict)
+ return MadNLP.InteriorPointSolver{T,KKTSystem}(nlp, opt; option_linear_solver=option_dict)
end
diff --git a/lib/MadNLPGPU/src/lapackgpu.jl b/lib/MadNLPGPU/src/lapackgpu.jl
index 615004e9..5d75b913 100644
--- a/lib/MadNLPGPU/src/lapackgpu.jl
+++ b/lib/MadNLPGPU/src/lapackgpu.jl
@@ -1,22 +1,10 @@
-
-import .CUSOLVER:
- cusolverDnDsytrf_bufferSize, cusolverDnDsytrf,
- cusolverDnDpotrf_bufferSize, cusolverDnDpotrf, cusolverDnDpotrs,
- cusolverDnDgetrf_bufferSize, cusolverDnDgetrf, cusolverDnDgetrs,
- cusolverDnDgeqrf_bufferSize, cusolverDnDgeqrf, cusolverDnDgeqrf_bufferSize,
- cusolverDnDormqr_bufferSize, cusolverDnDormqr,
- libcusolver, cusolverStatus_t, CuPtr, cudaDataType, cublasFillMode_t, cusolverDnHandle_t, dense_handle
-import .CUBLAS: cublasDtrsm_v2, handle, CUBLAS_DIAG_NON_UNIT,
- CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER, CUBLAS_SIDE_LEFT, CUBLAS_OP_N, CUBLAS_OP_T
-
-
-mutable struct LapackGPUSolver{MT} <: AbstractLinearSolver
- dense::MT
- fact::CuMatrix{Float64}
- rhs::CuVector{Float64}
- work::CuVector{Float64}
+mutable struct LapackGPUSolver{T} <: AbstractLinearSolver{T}
+ dense::AbstractMatrix{T}
+ fact::CuMatrix{T}
+ rhs::CuVector{T}
+ work::CuVector{T}
lwork
- work_host::Vector{Float64}
+ work_host::Vector{T}
lwork_host
info::CuVector{Int32}
etc::Dict{Symbol,Any} # throw some algorithm-specific things here
@@ -24,22 +12,25 @@ mutable struct LapackGPUSolver{MT} <: AbstractLinearSolver
logger::Logger
end
-function LapackGPUSolver(dense::MT;
- option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(),
- opt=LapackOptions(),logger=Logger(),
- kwargs...) where {MT <: AbstractMatrix}
+
+function LapackGPUSolver(
+ dense::MT;
+ option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(),
+ opt=LapackOptions(),logger=Logger(),
+ kwargs...) where {T,MT <: AbstractMatrix{T}}
set_options!(opt,option_dict,kwargs...)
- fact = CuMatrix{Float64}(undef,size(dense))
- rhs = CuVector{Float64}(undef,size(dense,1))
- work = CuVector{Float64}(undef, 1)
+ fact = CuMatrix{T}(undef,size(dense))
+ rhs = CuVector{T}(undef,size(dense,1))
+ work = CuVector{T}(undef, 1)
lwork = Int32[1]
- work_host = Vector{Float64}(undef, 1)
+ work_host = Vector{T}(undef, 1)
lwork_host = Int32[1]
info = CuVector{Int32}(undef,1)
etc = Dict{Symbol,Any}()
- return LapackGPUSolver(dense,fact,rhs,work,lwork,work_host,lwork_host,info,etc,opt,logger)
+
+ return LapackGPUSolver{T}(dense,fact,rhs,work,lwork,work_host,lwork_host,info,etc,opt,logger)
end
function factorize!(M::LapackGPUSolver)
@@ -72,159 +63,144 @@ end
improve!(M::LapackGPUSolver) = false
introduce(M::LapackGPUSolver) = "Lapack-GPU ($(M.opt.lapack_algorithm))"
-if runtime_version() >= v"11.3.1"
-
- is_inertia(M::LapackGPUSolver) = M.opt.lapack_algorithm == MadNLP.CHOLESKY # TODO: implement inertia(M::Solver) for BUNCHKAUFMAN
-
- function factorize_bunchkaufman!(M::LapackGPUSolver)
- haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
- haskey(M.etc,:ipiv64) || (M.etc[:ipiv64] = CuVector{Int64}(undef,length(M.etc[:ipiv])))
-
- copyto!(M.fact,M.dense)
- cusolverDnDsytrf_bufferSize(
- dense_handle(),Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
- length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
- cusolverDnDsytrf(
- dense_handle(),CUBLAS_FILL_MODE_LOWER,
- Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),
- M.etc[:ipiv],M.work,M.lwork[],M.info)
- return M
- end
-
- function solve_bunchkaufman!(M::LapackGPUSolver,x)
-
- copyto!(M.etc[:ipiv64],M.etc[:ipiv])
- copyto!(M.rhs,x)
- ccall((:cusolverDnXsytrs_bufferSize, libcusolver()), cusolverStatus_t,
- (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType,
- CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType,
- CuPtr{Cdouble}, Int64, Ptr{Int64}, Ptr{Int64}),
- dense_handle(), CUBLAS_FILL_MODE_LOWER,
- size(M.fact,1),1,R_64F,M.fact,size(M.fact,2),
- M.etc[:ipiv64],R_64F,M.rhs,length(M.rhs),M.lwork,M.lwork_host)
- length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
- length(M.work_host) < M.lwork_host[] && resize!(work_host,Int(M.lwork_host[]))
- ccall((:cusolverDnXsytrs, libcusolver()), cusolverStatus_t,
- (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType,
- CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType,
- CuPtr{Cdouble}, Int64, CuPtr{Cdouble}, Int64, Ptr{Cdouble}, Int64,
- CuPtr{Int64}),
- dense_handle(),CUBLAS_FILL_MODE_LOWER,
- size(M.fact,1),1,R_64F,M.fact,size(M.fact,2),
- M.etc[:ipiv64],R_64F,M.rhs,length(M.rhs),M.work,M.lwork[],M.work_host,M.lwork_host[],M.info)
- copyto!(x,M.rhs)
-
- return x
- end
-else
- is_inertia(M::LapackGPUSolver) =
- M.opt.lapack_algorithm == MadNLP.CHOLESKY
-
- function factorize_bunchkaufman!(M::LapackGPUSolver)
- haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
-
- copyto!(M.fact,M.dense)
- CUSOLVER.cusolverDnDsytrf_bufferSize(
- CUSOLVER.dense_handle(),Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
- length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
- CUSOLVER.cusolverDnDsytrf(
- CUSOLVER.dense_handle(),CUBLAS.CUBLAS_FILL_MODE_LOWER,
- Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),
- M.etc[:ipiv],M.work,M.lwork[],M.info)
-
- # need to send the factorization back to cpu to call mkl sytrs --------------
- haskey(M.etc,:fact_cpu) || (M.etc[:fact_cpu] = Matrix{Float64}(undef,size(M.dense)))
- haskey(M.etc,:ipiv_cpu) || (M.etc[:ipiv_cpu] = Vector{Int}(undef,length(M.etc[:ipiv])))
- haskey(M.etc,:info_cpu) || (M.etc[:info_cpu] = Vector{Int}(undef,length(M.info)))
- copyto!(M.etc[:fact_cpu],M.fact)
- copyto!(M.etc[:ipiv_cpu],M.etc[:ipiv])
- copyto!(M.etc[:info_cpu],M.info)
- # ---------------------------------------------------------------------------
- return M
- end
-
- function solve_bunchkaufman!(M::LapackGPUSolver,x)
- ccall(
- (:dsytrs_64_,"libopenblas64_"), # MKL doesn't work for some reason...
- Cvoid,
- (Ref{Cchar},Ref{Int},Ref{Int},Ptr{Cdouble},Ref{Int},Ptr{Int},Ptr{Cdouble},Ref{Int},Ptr{Int}),
- 'L',size(M.fact,1),1,M.etc[:fact_cpu],size(M.fact,2),M.etc[:ipiv_cpu],x,length(x),[1])
-
- return x
+for (sytrf,sytrf_buffer,getrf,getrf_buffer,getrs,geqrf,geqrf_buffer,ormqr,ormqr_buffer,trsm,potrf,potrf_buffer,potrs,typ,cutyp) in (
+ (
+ :cusolverDnDsytrf, :cusolverDnDsytrf_bufferSize,
+ :cusolverDnDgetrf, :cusolverDnDgetrf_bufferSize, :cusolverDnDgetrs,
+ :cusolverDnDgeqrf, :cusolverDnDgeqrf_bufferSize,
+ :cusolverDnDormqr, :cusolverDnDormqr_bufferSize,
+ :cublasDtrsm_v2,
+ :cusolverDnDpotrf, :cusolverDnDpotrf_bufferSize,
+ :cusolverDnDpotrs,
+ Float64, CUDA.R_64F
+ ),
+ (
+ :cusolverDnSsytrf, :cusolverDnSsytrf_bufferSize,
+ :cusolverDnSgetrf, :cusolverDnSgetrf_bufferSize, :cusolverDnSgetrs,
+ :cusolverDnSgeqrf, :cusolverDnSgeqrf_bufferSize,
+ :cusolverDnSormqr, :cusolverDnSormqr_bufferSize,
+ :cublasStrsm_v2,
+ :cusolverDnSpotrf, :cusolverDnSpotrf_bufferSize,
+ :cusolverDnSpotrs,
+ Float32, CUDA.R_32F
+ ),
+ )
+ @eval begin
+ function factorize_bunchkaufman!(M::LapackGPUSolver{$typ})
+ haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
+ haskey(M.etc,:ipiv64) || (M.etc[:ipiv64] = CuVector{Int64}(undef,length(M.etc[:ipiv])))
+
+ copyto!(M.fact,M.dense)
+ CUSOLVER.$sytrf_buffer(
+ dense_handle(),Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
+ length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
+ CUSOLVER.$sytrf(
+ dense_handle(),CUBLAS_FILL_MODE_LOWER,
+ Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),
+ M.etc[:ipiv],M.work,M.lwork[],M.info)
+ return M
+ end
+
+ function solve_bunchkaufman!(M::LapackGPUSolver{$typ},x)
+
+ copyto!(M.etc[:ipiv64],M.etc[:ipiv])
+ copyto!(M.rhs,x)
+ ccall((:cusolverDnXsytrs_bufferSize, libcusolver()), cusolverStatus_t,
+ (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType,
+ CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType,
+ CuPtr{Cdouble}, Int64, Ptr{Int64}, Ptr{Int64}),
+ dense_handle(), CUBLAS_FILL_MODE_LOWER,
+ size(M.fact,1),1,$cutyp,M.fact,size(M.fact,2),
+ M.etc[:ipiv64],$cutyp,M.rhs,length(M.rhs),M.lwork,M.lwork_host)
+ length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
+ length(M.work_host) < M.lwork_host[] && resize!(work_host,Int(M.lwork_host[]))
+ ccall((:cusolverDnXsytrs, libcusolver()), cusolverStatus_t,
+ (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType,
+ CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType,
+ CuPtr{Cdouble}, Int64, CuPtr{Cdouble}, Int64, Ptr{Cdouble}, Int64,
+ CuPtr{Int64}),
+ dense_handle(),CUBLAS_FILL_MODE_LOWER,
+ size(M.fact,1),1,$cutyp,M.fact,size(M.fact,2),
+ M.etc[:ipiv64],$cutyp,M.rhs,length(M.rhs),M.work,M.lwork[],M.work_host,M.lwork_host[],M.info)
+ copyto!(x,M.rhs)
+
+ return x
+ end
+
+ function factorize_lu!(M::LapackGPUSolver{$typ})
+ haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
+ tril_to_full!(M.dense)
+ copyto!(M.fact,M.dense)
+ CUSOLVER.$getrf_buffer(
+ dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),
+ M.fact,Int32(size(M.fact,2)),M.lwork)
+ length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
+ CUSOLVER.$getrf(
+ dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),
+ M.fact,Int32(size(M.fact,2)),M.work,M.etc[:ipiv],M.info)
+ return M
+ end
+
+ function solve_lu!(M::LapackGPUSolver{$typ},x)
+ copyto!(M.rhs,x)
+ CUSOLVER.$getrs(
+ dense_handle(),CUBLAS_OP_N,
+ Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)),
+ M.etc[:ipiv],M.rhs,Int32(length(M.rhs)),M.info)
+ copyto!(x,M.rhs)
+ return x
+ end
+
+ function factorize_qr!(M::LapackGPUSolver{$typ})
+ haskey(M.etc,:tau) || (M.etc[:tau] = CuVector{$typ}(undef,size(M.dense,1)))
+ haskey(M.etc,:one) || (M.etc[:one] = ones($typ,1))
+ tril_to_full!(M.dense)
+ copyto!(M.fact,M.dense)
+ CUSOLVER.$geqrf_buffer(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.lwork)
+ length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
+ CUSOLVER.$geqrf(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.work,M.lwork[],M.info)
+ return M
+ end
+
+ function solve_qr!(M::LapackGPUSolver{$typ},x)
+ copyto!(M.rhs,x)
+ CUSOLVER.$ormqr_buffer(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T,
+ Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.lwork)
+ length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
+ CUSOLVER.$ormqr(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T,
+ Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.work,M.lwork[],M.info)
+ CUBLAS.$trsm(handle(),CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_UPPER,CUBLAS_OP_N,CUBLAS_DIAG_NON_UNIT,
+ Int32(size(M.fact,1)),Int32(1),M.etc[:one],M.fact,Int32(size(M.fact,2)),M.rhs,Int32(length(M.rhs)))
+ copyto!(x,M.rhs)
+ return x
+ end
+
+ function factorize_cholesky!(M::LapackGPUSolver{$typ})
+ copyto!(M.fact,M.dense)
+ CUSOLVER.$potrf_buffer(
+ dense_handle(),CUBLAS_FILL_MODE_LOWER,
+ Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
+ length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
+ CUSOLVER.$potrf(
+ dense_handle(),CUBLAS_FILL_MODE_LOWER,
+ Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),
+ M.work,M.lwork[],M.info)
+ return M
+ end
+
+ function solve_cholesky!(M::LapackGPUSolver{$typ},x)
+ copyto!(M.rhs,x)
+ CUSOLVER.$potrs(
+ dense_handle(),CUBLAS_FILL_MODE_LOWER,
+ Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)),
+ M.rhs,Int32(length(M.rhs)),M.info)
+ copyto!(x,M.rhs)
+ return x
+ end
end
end
-function factorize_lu!(M::LapackGPUSolver)
- haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
- tril_to_full!(M.dense)
- copyto!(M.fact,M.dense)
- cusolverDnDgetrf_bufferSize(
- dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),
- M.fact,Int32(size(M.fact,2)),M.lwork)
- length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
- cusolverDnDgetrf(
- dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),
- M.fact,Int32(size(M.fact,2)),M.work,M.etc[:ipiv],M.info)
- return M
-end
-
-function solve_lu!(M::LapackGPUSolver,x)
- copyto!(M.rhs,x)
- cusolverDnDgetrs(
- dense_handle(),CUBLAS_OP_N,
- Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)),
- M.etc[:ipiv],M.rhs,Int32(length(M.rhs)),M.info)
- copyto!(x,M.rhs)
- return x
-end
-
-function factorize_qr!(M::LapackGPUSolver)
- haskey(M.etc,:tau) || (M.etc[:tau] = CuVector{Float64}(undef,size(M.dense,1)))
- haskey(M.etc,:one) || (M.etc[:one] = ones(1))
- tril_to_full!(M.dense)
- copyto!(M.fact,M.dense)
- cusolverDnDgeqrf_bufferSize(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.lwork)
- length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
- cusolverDnDgeqrf(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.work,M.lwork[],M.info)
- return M
-end
-
-function solve_qr!(M::LapackGPUSolver,x)
- copyto!(M.rhs,x)
- cusolverDnDormqr_bufferSize(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T,
- Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.lwork)
- length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
- cusolverDnDormqr(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T,
- Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.work,M.lwork[],M.info)
- cublasDtrsm_v2(handle(),CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_UPPER,CUBLAS_OP_N,CUBLAS_DIAG_NON_UNIT,
- Int32(size(M.fact,1)),Int32(1),M.etc[:one],M.fact,Int32(size(M.fact,2)),M.rhs,Int32(length(M.rhs)))
- copyto!(x,M.rhs)
- return x
-end
-
-function factorize_cholesky!(M::LapackGPUSolver)
- copyto!(M.fact,M.dense)
- cusolverDnDpotrf_bufferSize(
- dense_handle(),CUBLAS_FILL_MODE_LOWER,
- Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
- length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
- cusolverDnDpotrf(
- dense_handle(),CUBLAS_FILL_MODE_LOWER,
- Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),
- M.work,M.lwork[],M.info)
- return M
-end
-
-function solve_cholesky!(M::LapackGPUSolver,x)
- copyto!(M.rhs,x)
- cusolverDnDpotrs(
- dense_handle(),CUBLAS_FILL_MODE_LOWER,
- Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)),
- M.rhs,Int32(length(M.rhs)),M.info)
- copyto!(x,M.rhs)
- return x
-end
-
+is_inertia(M::LapackGPUSolver) = M.opt.lapack_algorithm == MadNLP.CHOLESKY # TODO: implement inertia(M::LapackGPUSolver) for BUNCHKAUFMAN
function inertia(M::LapackGPUSolver)
if M.opt.lapack_algorithm == MadNLP.BUNCHKAUFMAN
inertia(M.etc[:fact_cpu],M.etc[:ipiv_cpu],M.etc[:info_cpu][])
@@ -236,4 +212,6 @@ function inertia(M::LapackGPUSolver)
end
input_type(::Type{LapackGPUSolver}) = :dense
+is_supported(::Type{LapackGPUSolver},::Type{Float32}) = true
+is_supported(::Type{LapackGPUSolver},::Type{Float64}) = true
diff --git a/lib/MadNLPGPU/test/densekkt_gpu.jl b/lib/MadNLPGPU/test/densekkt_gpu.jl
index b52ea6c5..805cbc07 100644
--- a/lib/MadNLPGPU/test/densekkt_gpu.jl
+++ b/lib/MadNLPGPU/test/densekkt_gpu.jl
@@ -9,32 +9,32 @@ function _compare_gpu_with_cpu(KKTSystem, n, m, ind_fixed)
elseif (KKTSystem == MadNLP.DenseCondensedKKTSystem)
MadNLP.DENSE_CONDENSED_KKT_SYSTEM
end
- # Define options
- madnlp_options = Dict{Symbol, Any}(
- :kkt_system=>opt_kkt,
- :linear_solver=>LapackGPUSolver,
- :print_level=>MadNLP.ERROR,
- )
-
- nlp = MadNLPTests.DenseDummyQP(; n=n, m=m, fixed_variables=ind_fixed)
-
- # Solve on CPU
- h_ips = MadNLP.InteriorPointSolver(nlp; option_dict=copy(madnlp_options))
- MadNLP.optimize!(h_ips)
-
- # Solve on GPU
- d_ips = MadNLPGPU.CuInteriorPointSolver(nlp; option_dict=copy(madnlp_options))
- MadNLP.optimize!(d_ips)
-
- T = Float64
- VT = CuVector{T}
- MT = CuMatrix{T}
- @test isa(d_ips.kkt, KKTSystem{T, VT, MT})
- # # Check that both results match exactly
- @test h_ips.cnt.k == d_ips.cnt.k
- @test h_ips.obj_val ≈ d_ips.obj_val atol=1e-10
- @test h_ips.x ≈ d_ips.x atol=1e-10
- @test h_ips.l ≈ d_ips.l atol=1e-10
+
+ for (T,tol,atol) in [(Float32,1e-3,1e-1), (Float64,1e-8,1e-6)]
+ madnlp_options = Dict{Symbol, Any}(
+ :kkt_system=>opt_kkt,
+ :linear_solver=>LapackGPUSolver,
+ :print_level=>MadNLP.ERROR,
+ :tol=>tol
+ )
+
+ nlp = MadNLPTests.DenseDummyQP{T}(; n=n, m=m, fixed_variables=ind_fixed)
+
+ # Solve on CPU
+ h_ips = MadNLP.InteriorPointSolver(nlp; option_dict=copy(madnlp_options))
+ MadNLP.optimize!(h_ips)
+
+ # Solve on GPU
+ d_ips = MadNLPGPU.CuInteriorPointSolver(nlp; option_dict=copy(madnlp_options))
+ MadNLP.optimize!(d_ips)
+
+ @test isa(d_ips.kkt, KKTSystem{T, CuVector{T}, CuMatrix{T}})
+ # # Check that both results match exactly
+ @test h_ips.cnt.k == d_ips.cnt.k
+ @test h_ips.obj_val ≈ d_ips.obj_val atol=atol
+ @test h_ips.x ≈ d_ips.x atol=atol
+ @test h_ips.l ≈ d_ips.l atol=atol
+ end
end
@testset "MadNLPGPU ($(kkt_system))" for kkt_system in [
diff --git a/lib/MadNLPGPU/test/runtests.jl b/lib/MadNLPGPU/test/runtests.jl
index b59224fe..07956b05 100644
--- a/lib/MadNLPGPU/test/runtests.jl
+++ b/lib/MadNLPGPU/test/runtests.jl
@@ -35,8 +35,12 @@ testset = [
],
]
-# Test LapackGPU wrapper
-@testset "LapackGPU test" begin
+@testset "MadNLPGPU test" begin
+
+ MadNLPTests.test_linear_solver(LapackGPUSolver,Float32)
+ MadNLPTests.test_linear_solver(LapackGPUSolver,Float64)
+
+ # Test LapackGPU wrapper
for (name,optimizer_constructor,exclude) in testset
test_madnlp(name,optimizer_constructor,exclude)
end
@@ -44,4 +48,3 @@ end
# Test DenseKKTSystem on GPU
include("densekkt_gpu.jl")
-
diff --git a/lib/MadNLPHSL/README.md b/lib/MadNLPHSL/README.md
index b811b0b6..2fb8cec1 100644
--- a/lib/MadNLPHSL/README.md
+++ b/lib/MadNLPHSL/README.md
@@ -7,11 +7,20 @@ pkg> add MadNLPHSL
To build MadNLP with HSL linear solvers (Ma27, Ma57, Ma77, Ma86, Ma97), the source codes need to be obtained by the user from under Coin-HSL Full (Stable). The source codes are distribted as a tarball file `coinhsl-*.tar.gz`. The absolute path to the extracted source code or the complied library should be provided to the user. If the user has an already compiled HSL sovler library, one can simply provide a path to that shared library.In this case, the source code is not compiled and the provided shared library is directly used.
```julia
-# either one of the following should be given
+# at least one of the following should be given
julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/coinhsl"
-julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/coinhsl-archive-2021.05.05"
-julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/ma57-3.11.0/"
+julia> ENV["MADNLP_MA27_SOURCE_PATH"] = "/opt/coinhsl-archive-2021.05.05"
+julia> ENV["MADNLP_MA57_SOURCE_PATH"] = "/opt/ma57-3.11.0/"
+julia> ENV["MADNLP_MA77_SOURCE_PATH"] = "/opt/hsl_ma77-6.3.0"
+julia> ENV["MADNLP_MA86_SOURCE_PATH"] = "/opt/hsl_ma86-1.7.2"
+julia> ENV["MADNLP_MA97_SOURCE_PATH"] = "/opt/hsl_ma97-2.7.1"
+
julia> ENV["MADNLP_HSL_LIBRARY_PATH"] = "/usr/lib/libcoinhsl.so"
+julia> ENV["MADNLP_MA27_LIBRARY_PATH"] = "/usr/lib/libma27.so"
+julia> ENV["MADNLP_MA57_LIBRARY_PATH"] = "/usr/lib/libma57.so"
+julia> ENV["MADNLP_MA77_LIBRARY_PATH"] = "/usr/lib/libma77.so"
+julia> ENV["MADNLP_MA86_LIBRARY_PATH"] = "/usr/lib/libma86.so"
+julia> ENV["MADNLP_MA97_LIBRARY_PATH"] = "/usr/lib/libma97.so"
# optionally, one can specify
julia> ENV["MADNLP_HSL_BLAS"] = "mkl" # default is "openblas"
```
diff --git a/lib/MadNLPHSL/deps/build.jl b/lib/MadNLPHSL/deps/build.jl
index d158ed4f..2cde9269 100644
--- a/lib/MadNLPHSL/deps/build.jl
+++ b/lib/MadNLPHSL/deps/build.jl
@@ -13,8 +13,6 @@ const rpath = `-Wl,-rpath,`
const whole_archive= Sys.isapple() ? `-Wl,-all_load` : `-Wl,--whole-archive`
const no_whole_archive = Sys.isapple() ? `-Wl,-noall_load` : `-Wl,--no-whole-archive`
const libdir = mkpath(joinpath(@__DIR__, "lib"))
-const hsl_library_path = haskey(ENV,"MADNLP_HSL_LIBRARY_PATH") ? ENV["MADNLP_HSL_LIBRARY_PATH"] : ""
-const hsl_source_path = haskey(ENV,"MADNLP_HSL_SOURCE_PATH") ? ENV["MADNLP_HSL_SOURCE_PATH"] : ""
const FC = haskey(ENV,"MADNLP_FC") ? ENV["MADNLP_FC"] : `gfortran`
const libmetis_dir = joinpath(artifact"METIS", "lib")
const with_metis = `-L$libmetis_dir $rpath$libmetis_dir -lmetis`
@@ -26,25 +24,59 @@ else
const libblas_dir = joinpath(artifact"OpenBLAS32","lib")
const with_blas = `-L$libblas_dir $rpath$libblas_dir -lopenblas`
end
-
-const targets =[
- [
- "deps.f", "deps90.f90",
+
+const supported_library = [
+ (:libhsl, "MADNLP_HSL_LIBRARY_PATH", "MADNLP_HSL_SOURCE_PATH")
+ (:libma27, "MADNLP_MA27_LIBRARY_PATH", "MADNLP_MA27_SOURCE_PATH")
+ (:libma57, "MADNLP_MA57_LIBRARY_PATH", "MADNLP_MA57_SOURCE_PATH")
+ (:libma77, "MADNLP_MA77_LIBRARY_PATH", "MADNLP_MA77_SOURCE_PATH")
+ (:libma86, "MADNLP_MA86_LIBRARY_PATH", "MADNLP_MA86_SOURCE_PATH")
+ (:libma97, "MADNLP_MA97_LIBRARY_PATH", "MADNLP_MA97_SOURCE_PATH")
+]
+
+const targets_dict = Dict(
+ :libhsl=> [
+ "deps.f",
+ "deps90.f90",
+ "ma27d.f",
+ "ma57d.f",
+ "hsl_ma77d.f90",
+ "hsl_ma86d.f90",
+ "hsl_ma97d.f90",
+ "hsl_mc68i_ciface.f90",
+ "hsl_ma77d_ciface.f90",
+ "hsl_ma86d_ciface.f90",
+ "hsl_ma97d_ciface.f90",
],
- [
- "ma27d.f", "ma27s.f",
- "ma57d.f", "ma57s.f",
- "hsl_ma77d.f90", "hsl_ma77s.f90",
- "hsl_ma86d.f90", "hsl_ma86s.f90",
- "hsl_ma97d.f90", "hsl_ma97s.f90",
+ :libma27 => [
+ "deps.f",
+ "ma27d.f",
+ "ma27s.f",
],
- [
- "hsl_mc68i_ciface.f90",
+ :libma57 => [
+ "sdeps.f", "ddeps.f",
+ "ma57d.f", "ma57s.f",
+ ],
+ :libma77 => [
+ "common.f", "common90.f90",
+ "ddeps90.f90", "sdeps90.f90",
+ "hsl_ma77d.f90", "hsl_ma77s.f90",
"hsl_ma77d_ciface.f90", "hsl_ma77s_ciface.f90",
+ ],
+ :libma86 => [
+ "common.f", "common90.f90",
+ "sdeps90.f90",
+ "hsl_ma86d.f90", "hsl_ma86s.f90",
"hsl_ma86d_ciface.f90", "hsl_ma86s_ciface.f90",
- "hsl_ma97d_ciface.f90", "hsl_ma97s_ciface.f90",
+ "hsl_mc68i_ciface.f90",
+ ],
+ :libma97 => [
+ "common.f", "common90.f90",
+ "sdeps90.f90", "ddeps90.f90",
+ "hsl_ma97d.f90", "hsl_ma97s.f90",
+ "hsl_ma97d_ciface.f90", "hsl_ma97s_ciface.f90",
]
-]
+)
rm(libdir;recursive=true,force=true)
mkpath(libdir)
@@ -52,48 +84,60 @@ isvalid(cmd::Cmd)=(try run(cmd) catch e return false end; return true)
# HSL
-if hsl_source_path != ""
- if isvalid(`$FC --version`)
- OC = OutputCollector[]
- cd(hsl_source_path)
+attempted = Tuple{Symbol,Product}[]
- names_succeeded = []
- for i=1:3
- names = []
- for (root, dirs, files) in walkdir(hsl_source_path)
- for file in files;
- if file in targets[i];
- filter!(x->x != file,files)
- name = splitext(relpath(joinpath(root,file),hsl_source_path))
- push!(names, name)
- @info "$(name[1])$(name[2]) source code detected."
- end
+for (lib, envlib, envsrc) in supported_library
+ if haskey(ENV,envlib)
+ push!(attempted, (lib,FileProduct(ENV[envlib], lib)))
+ elseif haskey(ENV,envsrc) && isvalid(`$FC --version`)
+ @info "Compiling $lib"
+ source_path = ENV[envsrc]
+ targets = targets_dict[lib]
+
+ cd(source_path)
+
+ list = []
+ for (root, dir, files) in walkdir(source_path)
+ for file in files
+ if file in targets
+ @info "$file source code detected."
+ push!(list, (root, dir, file))
end
end
- succeeded = wait.(
- [OutputCollector(`$FC -fopenmp -fPIC -c -O3 -o $name.o $name$ext`,verbose=verbose)
- for (name,ext) in names])
- append!(names_succeeded, names[succeeded])
end
- cmd = `$FC -o$(libdir)/libhsl.$so -shared -fPIC -O3 -fopenmp`
- append!(cmd.exec, ["$name.o" for (name,ext) in names_succeeded])
+ succeeded = []
+ for target in targets
+ for (root, dir, file) in list
+ if file == target
+ name, ext = splitext(relpath(joinpath(root,file),source_path))
+ isvalid(`$FC -fopenmp -fPIC -c -O3 -o $name.o $name$ext`)
+ push!(succeeded, (name, ext))
+ end
+ end
+ end
+
+
+ cmd = `$FC -o$(libdir)/$lib.$so -shared -fPIC -O3 -fopenmp`
+ append!(cmd.exec, ["$name.o" for (name,ext) in succeeded])
append!(cmd.exec, with_metis.exec)
append!(cmd.exec, with_blas.exec)
run(cmd)
cd("$(@__DIR__)")
- product = FileProduct(prefix,joinpath(libdir,"libhsl.$so"), :libhsl)
+ push!(attempted, (lib,FileProduct(prefix,joinpath(libdir,"$lib.$so"), lib)))
end
-else
- product = FileProduct(hsl_library_path, :libhsl)
end
# write deps.jl
-if satisfied(product)
- @info "Building HSL succeeded."
- write_deps_file(joinpath(@__DIR__, "deps.jl"),Product[product], verbose=verbose)
-else
- @error "Building HSL failed."
- write_deps_file(joinpath(@__DIR__, "deps.jl"),Product[], verbose=verbose)
+succeeded = Product[]
+for (lib, product) in attempted
+ if satisfied(product)
+ @info "Building $lib succeeded."
+ push!(succeeded, product)
+ else
+ @error "Building $lib failed."
+ end
end
+
+write_deps_file(joinpath(@__DIR__, "deps.jl"), succeeded, verbose=verbose)
diff --git a/lib/MadNLPHSL/src/MadNLPHSL.jl b/lib/MadNLPHSL/src/MadNLPHSL.jl
index 890fa855..c3a7e9de 100644
--- a/lib/MadNLPHSL/src/MadNLPHSL.jl
+++ b/lib/MadNLPHSL/src/MadNLPHSL.jl
@@ -5,25 +5,56 @@ import MadNLP: @kwdef, Logger, @debug, @warn, @error,
AbstractOptions, AbstractLinearSolver, set_options!, SparseMatrixCSC, SubVector,
SymbolicException,FactorizationException,SolveException,InertiaException,
introduce, factorize!, solve!, improve!, is_inertia, inertia, findIJ, nnz,
- get_tril_to_full, transfer!, input_type, _madnlp_unsafe_wrap
+ get_tril_to_full, transfer!, input_type, _madnlp_unsafe_wrap,
+ is_supported
include(joinpath("..","deps","deps.jl"))
+include("common.jl")
+include("mc68.jl")
+
if @isdefined(libhsl)
- include("common.jl")
- include("mc68.jl")
+ @isdefined(libma27) || const libma27 = libhsl
+ @isdefined(libma57) || const libma57 = libhsl
+ @isdefined(libma77) || const libma77 = libhsl
+ @isdefined(libma86) || const libma86 = libhsl
+ @isdefined(libma97) || const libma97 = libhsl
+end
+
+if @isdefined(libma27)
include("ma27.jl")
+ export Ma27Solver
+end
+
+if @isdefined(libma57)
include("ma57.jl")
+ export Ma57Solver
+end
+
+if @isdefined(libma77)
include("ma77.jl")
+ export Ma77Solver
+end
+
+if @isdefined(libma86)
include("ma86.jl")
+ export Ma86Solver
+end
+
+if @isdefined(libma97)
include("ma97.jl")
- export Ma27Solver, Ma57Solver, Ma77Solver, Ma86Solver, Ma97Solver
+ export Ma97Solver
end
function __init__()
check_deps()
try
- @isdefined(libhsl) && dlopen(libhsl,RTLD_DEEPBIND)
+ @isdefined(libhsl) && dlopen(libhsl,RTLD_DEEPBIND)
+ @isdefined(libma27) && dlopen(libma27,RTLD_DEEPBIND)
+ @isdefined(libma77) && dlopen(libma57,RTLD_DEEPBIND)
+ @isdefined(libma77) && dlopen(libma77,RTLD_DEEPBIND)
+ @isdefined(libma86) && dlopen(libma77,RTLD_DEEPBIND)
+ @isdefined(libma97) && dlopen(libma97,RTLD_DEEPBIND)
catch e
println("HSL shared library cannot be loaded")
end
diff --git a/lib/MadNLPHSL/src/ma27.jl b/lib/MadNLPHSL/src/ma27.jl
index 418b98a2..133a345c 100644
--- a/lib/MadNLPHSL/src/ma27.jl
+++ b/lib/MadNLPHSL/src/ma27.jl
@@ -1,6 +1,6 @@
-const ma27_default_icntl = Int32[
+ma27_default_icntl() = Int32[
6,6,0,2139062143,1,32639,32639,32639,32639,14,9,8,8,9,10,32639,32639,32639,32689,24,11,9,8,9,10,0,0,0,0,0]
-const ma27_default_cntl = [.1,1.0,0.,0.,0.]
+ma27_default_cntl(T) = T[.1,1.0,0.,0.,0.]
@kwdef mutable struct Ma27Options <: AbstractOptions
ma27_pivtol::Float64 = 1e-8
@@ -10,18 +10,18 @@ const ma27_default_cntl = [.1,1.0,0.,0.,0.]
ma27_meminc_factor::Float64 =2.
end
-mutable struct Ma27Solver <: AbstractLinearSolver
- csc::SparseMatrixCSC{Float64,Int32}
+mutable struct Ma27Solver{T} <: AbstractLinearSolver{T}
+ csc::SparseMatrixCSC{T,Int32}
I::Vector{Int32}
J::Vector{Int32}
icntl::Vector{Int32}
- cntl::Vector{Float64}
+ cntl::Vector{T}
info::Vector{Int32}
- a::Vector{Float64}
- a_view::Vector{Float64}
+ a::Vector{T}
+ a_view::Vector{T}
la::Int32
ikeep::Vector{Int32}
@@ -29,51 +29,68 @@ mutable struct Ma27Solver <: AbstractLinearSolver
liw::Int32
iw1::Vector{Int32}
nsteps::Vector{Int32}
- w::Vector{Float64}
+ w::Vector{T}
maxfrt::Vector{Int32}
opt::Ma27Options
logger::Logger
end
-ma27ad!(n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint},
- iw::Vector{Cint},liw::Cint,ikeep::Vector{Cint},iw1::Vector{Cint},
- nsteps::Vector{Cint},iflag::Cint,icntl::Vector{Cint},cntl::Vector{Cdouble},
- info::Vector{Cint},ops::Cdouble) = ccall(
- (:ma27ad_,libhsl),
+
+for (fa, fb, fc, typ) in [
+ (:ma27ad_,:ma27bd_,:ma27cd_,Float64),
+ (:ma27a_,:ma27b_,:ma27c_,Float32)
+ ]
+ @eval begin
+ ma27a!(
+ n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint},
+ iw::Vector{Cint},liw::Cint,ikeep::Vector{Cint},iw1::Vector{Cint},
+ nsteps::Vector{Cint},iflag::Cint,icntl::Vector{Cint},cntl::Vector{$typ},
+ info::Vector{Cint},ops::$typ
+ ) = ccall(
+ ($(string(fa)),libma27),
Nothing,
(Ref{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint},
Ptr{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint},
- Ptr{Cint},Ref{Cint},Ptr{Cint},Ptr{Cdouble},
- Ptr{Cint},Ref{Cdouble}),
- n,nz,I,J,iw,liw,ikeep,iw1,nsteps,iflag,icntl,cntl,info,ops)
-
-ma27bd!(n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint},
- a::Vector{Cdouble},la::Cint,iw::Vector{Cint},liw::Cint,
- ikeep::Vector{Cint},nsteps::Vector{Cint},maxfrt::Vector{Cint},iw1::Vector{Cint},
- icntl::Vector{Cint},cntl::Vector{Cdouble},info::Vector{Cint}) = ccall(
- (:ma27bd_,libhsl),
+ Ptr{Cint},Ref{Cint},Ptr{Cint},Ptr{$typ},
+ Ptr{Cint},Ref{$typ}),
+ n,nz,I,J,iw,liw,ikeep,iw1,nsteps,iflag,icntl,cntl,info,ops
+ )
+
+ ma27b!(
+ n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint},
+ a::Vector{$typ},la::Cint,iw::Vector{Cint},liw::Cint,
+ ikeep::Vector{Cint},nsteps::Vector{Cint},maxfrt::Vector{Cint},iw1::Vector{Cint},
+ icntl::Vector{Cint},cntl::Vector{$typ},info::Vector{Cint}
+ ) = ccall(
+ ($(string(fb)),libma27),
Nothing,
(Ref{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint},
- Ptr{Cdouble},Ref{Cint},Ptr{Cint},Ref{Cint},
+ Ptr{$typ},Ref{Cint},Ptr{Cint},Ref{Cint},
Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint},
- Ptr{Cint},Ptr{Cdouble},Ptr{Cint}),
- n,nz,I,J,a,la,iw,liw,ikeep,nsteps,maxfrt,iw1,icntl,cntl,info)
-
-ma27cd!(n::Cint,a::Vector{Cdouble},la::Cint,iw::Vector{Cint},
- liw::Cint,w::Vector{Cdouble},maxfrt::Vector{Cint},rhs::Vector{Cdouble},
- iw1::Vector{Cint},nsteps::Vector{Cint},icntl::Vector{Cint},
- info::Vector{Cint}) = ccall(
- (:ma27cd_,libhsl),
+ Ptr{Cint},Ptr{$typ},Ptr{Cint}),
+ n,nz,I,J,a,la,iw,liw,ikeep,nsteps,maxfrt,iw1,icntl,cntl,info
+ )
+
+ ma27c!(
+ n::Cint,a::Vector{$typ},la::Cint,iw::Vector{Cint},
+ liw::Cint,w::Vector{$typ},maxfrt::Vector{Cint},rhs::Vector{$typ},
+ iw1::Vector{Cint},nsteps::Vector{Cint},icntl::Vector{Cint},
+ info::Vector{Cint}
+ ) = ccall(
+ ($(string(fc)),libma27),
Nothing,
- (Ref{Cint},Ptr{Cdouble},Ref{Cint},Ptr{Cint},
- Ref{Cint},Ptr{Cdouble},Ptr{Cint},Ptr{Cdouble},
+ (Ref{Cint},Ptr{$typ},Ref{Cint},Ptr{Cint},
+ Ref{Cint},Ptr{$typ},Ptr{Cint},Ptr{$typ},
Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint}),
- n,a,la,iw,liw,w,maxfrt,rhs,iw1,nsteps,icntl,info)
+ n,a,la,iw,liw,w,maxfrt,rhs,iw1,nsteps,icntl,info
+ )
+ end
+end
-function Ma27Solver(csc::SparseMatrixCSC;
+function Ma27Solver(csc::SparseMatrixCSC{T};
option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(),
- opt=Ma27Options(),logger=Logger(),kwargs...)
+ opt=Ma27Options(),logger=Logger(),kwargs...) where T
set_options!(opt,option_dict,kwargs)
@@ -87,31 +104,31 @@ function Ma27Solver(csc::SparseMatrixCSC;
nsteps=Int32[1]
iflag =Int32(0)
- icntl= copy(ma27_default_icntl)
+ icntl= ma27_default_icntl()
+ cntl = ma27_default_cntl(T)
icntl[1:2] .= 0
- cntl = copy(ma27_default_cntl)
cntl[1] = opt.ma27_pivtol
info = Vector{Int32}(undef,20)
- ma27ad!(Int32(csc.n),nz,I,J,iw,liw,ikeep,iw1,nsteps,Int32(0),icntl,cntl,info,0.)
+ ma27a!(Int32(csc.n),nz,I,J,iw,liw,ikeep,iw1,nsteps,Int32(0),icntl,cntl,info,zero(T))
info[1]<0 && throw(SymbolicException())
la = ceil(Int32,max(nz,opt.ma27_la_init_factor*info[5]))
- a = Vector{Float64}(undef,la)
+ a = Vector{T}(undef,la)
a_view = _madnlp_unsafe_wrap(a,nnz(csc))
liw= ceil(Int32,opt.ma27_liw_init_factor*info[6])
resize!(iw,liw)
maxfrt=Int32[1]
- return Ma27Solver(csc,I,J,icntl,cntl,info,a,a_view,la,ikeep,iw,liw,
- iw1,nsteps,Vector{Float64}(),maxfrt,opt,logger)
+ return Ma27Solver{T}(csc,I,J,icntl,cntl,info,a,a_view,la,ikeep,iw,liw,
+ iw1,nsteps,Vector{T}(),maxfrt,opt,logger)
end
function factorize!(M::Ma27Solver)
M.a_view.=M.csc.nzval
while true
- ma27bd!(Int32(M.csc.n),Int32(nnz(M.csc)),M.I,M.J,M.a,M.la,
+ ma27b!(Int32(M.csc.n),Int32(nnz(M.csc)),M.I,M.J,M.a,M.la,
M.iw,M.liw,M.ikeep,M.nsteps,M.maxfrt,
M.iw1,M.icntl,M.cntl,M.info)
if M.info[1] == -3
@@ -131,10 +148,10 @@ function factorize!(M::Ma27Solver)
return M
end
-function solve!(M::Ma27Solver,rhs::Vector{Float64})
+function solve!(M::Ma27Solver{T},rhs::Vector{T}) where T
length(M.w)= n
error("The number of constraints `m` should be less than the number of variable `n`.")
end
@@ -66,29 +61,29 @@ function DenseDummyQP(; n=100, m=10, fixed_variables=Int[], equality_cons=[])
Random.seed!(1)
# Build QP problem 0.5 * x' * P * x + q' * x
- P = randn(n , n)
+ P = randn(T,n , n)
P += P' # P is symmetric
- P += 100.0 * I
+ P += T(100.0) * I
- q = randn(n)
+ q = randn(T,n)
# Build constraints gl <= Ax <= gu
- A = zeros(m, n)
+ A = zeros(T,m, n)
for j in 1:m
- A[j, j] = 1.0
- A[j, j+1] = -1.0
+ A[j, j] = one(T)
+ A[j, j+1] = -one(T)
end
- x0 = zeros(n)
- y0 = zeros(m)
+ x0 = zeros(T,n)
+ y0 = zeros(T,m)
# Bound constraints
- xu = fill(1.0, n)
- xl = fill(0.0, n)
- gl = fill(0.0, m)
- gu = fill(1.0, m)
+ xu = fill(one(T), n)
+ xl = fill(zero(T), n)
+ gl = fill(zero(T), m)
+ gu = fill(one(T), m)
# Update gu to load equality constraints
- gu[equality_cons] .= 0.0
+ gu[equality_cons] .= zero(T)
xl[fixed_variables] .= xu[fixed_variables]
@@ -100,7 +95,7 @@ function DenseDummyQP(; n=100, m=10, fixed_variables=Int[], equality_cons=[])
jcols = [i for i in 1:n for j in 1:m]
nnzj = n * m
- return DenseDummyQP(
+ return DenseDummyQP{T}(
NLPModels.NLPModelMeta(
n,
ncon = m,
@@ -119,3 +114,4 @@ function DenseDummyQP(; n=100, m=10, fixed_variables=Int[], equality_cons=[])
)
end
+DenseDummyQP(; kwargs...) = DenseDummyQP{Float64}(; kwargs...)
diff --git a/lib/MadNLPTests/src/MadNLPTests.jl b/lib/MadNLPTests/src/MadNLPTests.jl
index 33c338e0..7eb34473 100644
--- a/lib/MadNLPTests/src/MadNLPTests.jl
+++ b/lib/MadNLPTests/src/MadNLPTests.jl
@@ -20,23 +20,25 @@ function solcmp(x,sol;atol=1e-4,rtol=1e-4)
return (aerr < atol || rerr < rtol)
end
-function test_linear_solver(solver)
+function test_linear_solver(solver,T; kwargs...)
+
m = 2
n = 2
row = Int32[1,2,2]
col = Int32[1,1,2]
- val = Float64[1.,.1,2.]
- b = [1.0,3.0]
+ val = T[1.,.1,2.]
+ b = T[1.0,3.0]
x = similar(b)
@testset "Linear solver $solver" begin
+
csc = sparse(row,col,val,m,n)
- dense = Array(csc)
sol= [0.8542713567839195, 1.4572864321608041]
if MadNLP.input_type(solver) == :csc
- M = solver(csc)
+ M = solver(csc;kwargs...)
elseif MadNLP.input_type(solver) == :dense
- M = solver(dense)
+ dense = Array(csc)
+ M = solver(dense;kwargs...)
end
MadNLP.introduce(M)
MadNLP.improve!(M)
diff --git a/src/IPM/IPM.jl b/src/IPM/IPM.jl
index e046f14a..16dd949b 100644
--- a/src/IPM/IPM.jl
+++ b/src/IPM/IPM.jl
@@ -1,11 +1,11 @@
# MadNLP.jl
# Created by Sungho Shin (sungho.shin@wisc.edu)
-abstract type AbstractInteriorPointSolver end
+abstract type AbstractInteriorPointSolver{T} end
include("restoration.jl")
-mutable struct InteriorPointSolver{KKTSystem} <: AbstractInteriorPointSolver
+mutable struct InteriorPointSolver{T,KKTSystem <: AbstractKKTSystem{T}} <: AbstractInteriorPointSolver{T}
nlp::AbstractNLPModel
kkt::KKTSystem
@@ -18,111 +18,113 @@ mutable struct InteriorPointSolver{KKTSystem} <: AbstractInteriorPointSolver
nlb::Int
nub::Int
- x::Vector{Float64} # primal (after reformulation)
- l::Vector{Float64} # dual
- zl::Vector{Float64} # dual (after reformulation)
- zu::Vector{Float64} # dual (after reformulation)
- xl::Vector{Float64} # primal lower bound (after reformulation)
- xu::Vector{Float64} # primal upper bound (after reformulation)
+ x::Vector{T} # primal (after reformulation)
+ l::Vector{T} # dual
+ zl::Vector{T} # dual (after reformulation)
+ zu::Vector{T} # dual (after reformulation)
+ xl::Vector{T} # primal lower bound (after reformulation)
+ xu::Vector{T} # primal upper bound (after reformulation)
- obj_val::Float64
- f::Vector{Float64}
- c::Vector{Float64}
+ obj_val::T
+ f::Vector{T}
+ c::Vector{T}
- jacl::Vector{Float64}
+ jacl::Vector{T}
- d::UnreducedKKTVector{Float64, Vector{Float64}}
- p::UnreducedKKTVector{Float64, Vector{Float64}}
+ d::UnreducedKKTVector{T, Vector{T}}
+ p::UnreducedKKTVector{T, Vector{T}}
- _w1::AbstractKKTVector{Float64, Vector{Float64}}
- _w2::AbstractKKTVector{Float64, Vector{Float64}}
+ _w1::AbstractKKTVector{T, Vector{T}}
+ _w2::AbstractKKTVector{T, Vector{T}}
- _w3::AbstractKKTVector{Float64, Vector{Float64}}
- _w4::AbstractKKTVector{Float64, Vector{Float64}}
+ _w3::AbstractKKTVector{T, Vector{T}}
+ _w4::AbstractKKTVector{T, Vector{T}}
- x_trial::Vector{Float64}
- c_trial::Vector{Float64}
- obj_val_trial::Float64
+ x_trial::Vector{T}
+ c_trial::Vector{T}
+ obj_val_trial::T
- x_slk::Vector{Float64}
- c_slk::SubVector{Float64}
- rhs::Vector{Float64}
+ x_slk::Vector{T}
+ c_slk::SubVector{T}
+ rhs::Vector{T}
ind_ineq::Vector{Int}
ind_fixed::Vector{Int}
ind_llb::Vector{Int}
ind_uub::Vector{Int}
- x_lr::SubVector{Float64}
- x_ur::SubVector{Float64}
- xl_r::SubVector{Float64}
- xu_r::SubVector{Float64}
- zl_r::SubVector{Float64}
- zu_r::SubVector{Float64}
-
- dx_lr::SubVector{Float64}
- dx_ur::SubVector{Float64}
- x_trial_lr::SubVector{Float64}
- x_trial_ur::SubVector{Float64}
-
- linear_solver::AbstractLinearSolver
- iterator::AbstractIterator
-
- obj_scale::Vector{Float64}
- con_scale::Vector{Float64}
- con_jac_scale::Vector{Float64}
- inf_pr::Float64
- inf_du::Float64
- inf_compl::Float64
-
- theta_min::Float64
- theta_max::Float64
- mu::Float64
- tau::Float64
-
- alpha::Float64
- alpha_z::Float64
+ x_lr::SubVector{T}
+ x_ur::SubVector{T}
+ xl_r::SubVector{T}
+ xu_r::SubVector{T}
+ zl_r::SubVector{T}
+ zu_r::SubVector{T}
+
+ dx_lr::SubVector{T}
+ dx_ur::SubVector{T}
+ x_trial_lr::SubVector{T}
+ x_trial_ur::SubVector{T}
+
+ linear_solver::AbstractLinearSolver{T}
+ iterator::AbstractIterator{T}
+
+ obj_scale::Vector{T}
+ con_scale::Vector{T}
+ con_jac_scale::Vector{T}
+ inf_pr::T
+ inf_du::T
+ inf_compl::T
+
+ theta_min::T
+ theta_max::T
+ mu::T
+ tau::T
+
+ alpha::T
+ alpha_z::T
ftype::String
- del_w::Float64
- del_c::Float64
- del_w_last::Float64
+ del_w::T
+ del_c::T
+ del_w_last::T
- filter::Vector{Tuple{Float64,Float64}}
+ filter::Vector{Tuple{T,T}}
- RR::Union{Nothing,RobustRestorer}
+ RR::Union{Nothing,RobustRestorer{T}}
status::Status
output::Dict
end
-function InteriorPointSolver(nlp::AbstractNLPModel;
+function InteriorPointSolver(nlp::AbstractNLPModel{T};
option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), kwargs...
-)
+) where T
opt = Options(linear_solver=default_linear_solver())
set_options!(opt,option_dict,kwargs)
check_option_sanity(opt)
+
+ @assert is_supported(opt.linear_solver,T)
- VT = Vector{Float64}
+ VT = Vector{T}
KKTSystem = if opt.kkt_system == SPARSE_KKT_SYSTEM
- MT = (input_type(opt.linear_solver) == :csc) ? SparseMatrixCSC{Float64, Int32} : Matrix{Float64}
- SparseKKTSystem{Float64, VT, MT}
+ MT = (input_type(opt.linear_solver) == :csc) ? SparseMatrixCSC{T, Int32} : Matrix{T}
+ SparseKKTSystem{T, VT, MT}
elseif opt.kkt_system == SPARSE_UNREDUCED_KKT_SYSTEM
- MT = (input_type(opt.linear_solver) == :csc) ? SparseMatrixCSC{Float64, Int32} : Matrix{Float64}
- SparseUnreducedKKTSystem{Float64, VT, MT}
+ MT = (input_type(opt.linear_solver) == :csc) ? SparseMatrixCSC{T, Int32} : Matrix{T}
+ SparseUnreducedKKTSystem{T, VT, MT}
elseif opt.kkt_system == DENSE_KKT_SYSTEM
- MT = Matrix{Float64}
- DenseKKTSystem{Float64, VT, MT}
+ MT = Matrix{T}
+ DenseKKTSystem{T, VT, MT}
elseif opt.kkt_system == DENSE_CONDENSED_KKT_SYSTEM
- MT = Matrix{Float64}
- DenseCondensedKKTSystem{Float64, VT, MT}
+ MT = Matrix{T}
+ DenseCondensedKKTSystem{T, VT, MT}
end
- return InteriorPointSolver{KKTSystem}(nlp, opt; option_linear_solver=option_dict)
+ return InteriorPointSolver{T,KKTSystem}(nlp, opt; option_linear_solver=option_dict)
end
# Inner constructor
-function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options;
+function InteriorPointSolver{T,KKTSystem}(nlp::AbstractNLPModel, opt::Options;
option_linear_solver::Dict{Symbol,Any}=Dict{Symbol,Any}(),
-) where {KKTSystem<:AbstractKKTSystem}
+) where {T, KKTSystem<:AbstractKKTSystem{T}}
cnt = Counters(start_time=time())
logger = Logger(print_level=opt.print_level,file_print_level=opt.file_print_level,
@@ -145,21 +147,21 @@ function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options;
xl = [get_lvar(nlp);view(get_lcon(nlp),ind_cons.ind_ineq)]
xu = [get_uvar(nlp);view(get_ucon(nlp),ind_cons.ind_ineq)]
- x = [get_x0(nlp);zeros(ns)]
+ x = [get_x0(nlp);zeros(T,ns)]
l = copy(get_y0(nlp))
- zl= zeros(get_nvar(nlp)+ns)
- zu= zeros(get_nvar(nlp)+ns)
+ zl= zeros(T,get_nvar(nlp)+ns)
+ zu= zeros(T,get_nvar(nlp)+ns)
- f = zeros(n) # not sure why, but seems necessary to initialize to 0 when used with Plasmo interface
- c = zeros(m)
+ f = zeros(T,n) # not sure why, but seems necessary to initialize to 0 when used with Plasmo interface
+ c = zeros(T,m)
n_jac = nnz_jacobian(kkt)
nlb = length(ind_cons.ind_lb)
nub = length(ind_cons.ind_ub)
- x_trial=Vector{Float64}(undef,n)
- c_trial=Vector{Float64}(undef,m)
+ x_trial=Vector{T}(undef,n)
+ c_trial=Vector{T}(undef,m)
x_slk= _madnlp_unsafe_wrap(x,ns, get_nvar(nlp)+1)
c_slk= view(c,ind_cons.ind_ineq)
@@ -174,24 +176,29 @@ function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options;
x_trial_lr = view(x_trial, ind_cons.ind_lb)
x_trial_ur = view(x_trial, ind_cons.ind_ub)
- aug_vec_length = is_reduced(kkt) ? n+m : n+m+nlb+nub
-
- _w1 = is_reduced(kkt) ? ReducedKKTVector(n, m) : UnreducedKKTVector(n, m, nlb, nub)
- _w2 = is_reduced(kkt) ? ReducedKKTVector(n, m) : UnreducedKKTVector(n, m, nlb, nub)
- _w3 = is_reduced(kkt) ? ReducedKKTVector(n, m) : UnreducedKKTVector(n, m, nlb, nub)
- _w4 = is_reduced(kkt) ? ReducedKKTVector(n, m) : UnreducedKKTVector(n, m, nlb, nub)
+ if is_reduced(kkt)
+ _w1 = ReducedKKTVector{T,typeof(x)}(n, m)
+ _w2 = ReducedKKTVector{T,typeof(x)}(n, m)
+ _w3 = ReducedKKTVector{T,typeof(x)}(n, m)
+ _w4 = ReducedKKTVector{T,typeof(x)}(n, m)
+ else
+ _w1 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
+ _w2 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
+ _w3 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
+ _w4 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
+ end
- jacl = zeros(n) # spblas may throw an error if not initialized to zero
+ jacl = zeros(T,n) # spblas may throw an error if not initialized to zero
- d = UnreducedKKTVector(n, m, nlb, nub)
+ d = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
dx_lr = view(d.xp, ind_cons.ind_lb) # TODO
dx_ur = view(d.xp, ind_cons.ind_ub) # TODO
- p = UnreducedKKTVector(n, m, nlb, nub)
+ p = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
- obj_scale = [1.0]
- con_scale = ones(m)
- con_jac_scale = ones(n_jac)
+ obj_scale = T[1.0]
+ con_scale = ones(T,m)
+ con_jac_scale = ones(T,n_jac)
@trace(logger,"Initializing linear solver.")
cnt.linear_solver_time =
@@ -210,7 +217,7 @@ function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options;
!isempty(option_linear_solver) && print_ignored_options(logger, option_linear_solver)
- return InteriorPointSolver{KKTSystem}(nlp,kkt,opt,cnt,logger,
+ return InteriorPointSolver{T,KKTSystem}(nlp,kkt,opt,cnt,logger,
n,m,nlb,nub,x,l,zl,zu,xl,xu,0.,f,c,
jacl,
d, p,
@@ -221,7 +228,7 @@ function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options;
linear_solver,iterator,
obj_scale,con_scale,con_jac_scale,
0.,0.,0.,0.,0.,0.,0.,0.,0.," ",0.,0.,0.,
- Vector{Float64}[],nothing,INITIAL,Dict(),
+ Vector{T}[],nothing,INITIAL,Dict(),
)
end
diff --git a/src/IPM/callbacks.jl b/src/IPM/callbacks.jl
index f742388d..fd98d7fb 100644
--- a/src/IPM/callbacks.jl
+++ b/src/IPM/callbacks.jl
@@ -1,4 +1,4 @@
-function eval_f_wrapper(ips::InteriorPointSolver, x::Vector{Float64})
+function eval_f_wrapper(ips::InteriorPointSolver, x::Vector{T}) where T
nlp = ips.nlp
cnt = ips.cnt
@trace(ips.logger,"Evaluating objective.")
@@ -9,7 +9,7 @@ function eval_f_wrapper(ips::InteriorPointSolver, x::Vector{Float64})
return obj_val*ips.obj_scale[]
end
-function eval_grad_f_wrapper!(ips::InteriorPointSolver, f::Vector{Float64},x::Vector{Float64})
+function eval_grad_f_wrapper!(ips::InteriorPointSolver, f::Vector{T},x::Vector{T}) where T
nlp = ips.nlp
cnt = ips.cnt
@trace(ips.logger,"Evaluating objective gradient.")
@@ -26,7 +26,7 @@ function eval_grad_f_wrapper!(ips::InteriorPointSolver, f::Vector{Float64},x::Ve
return f
end
-function eval_cons_wrapper!(ips::InteriorPointSolver, c::Vector{Float64},x::Vector{Float64})
+function eval_cons_wrapper!(ips::InteriorPointSolver, c::Vector{T},x::Vector{T}) where T
nlp = ips.nlp
cnt = ips.cnt
@trace(ips.logger, "Evaluating constraints.")
@@ -45,7 +45,7 @@ function eval_cons_wrapper!(ips::InteriorPointSolver, c::Vector{Float64},x::Vect
return c
end
-function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x::Vector{Float64})
+function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x::Vector{T}) where T
nlp = ipp.nlp
cnt = ipp.cnt
ns = length(ipp.ind_ineq)
@@ -65,7 +65,7 @@ function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x::
return jac
end
-function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x::Vector{Float64},l::Vector{Float64};is_resto=false)
+function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x::Vector{T},l::Vector{T};is_resto=false) where T
nlp = ipp.nlp
cnt = ipp.cnt
@trace(ipp.logger,"Evaluating Lagrangian Hessian.")
@@ -86,7 +86,7 @@ function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem
return hess
end
-function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem, x::Vector{Float64})
+function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem, x::Vector{T}) where T
nlp = ipp.nlp
cnt = ipp.cnt
ns = length(ipp.ind_ineq)
@@ -105,7 +105,7 @@ function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem
return jac
end
-function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem, x::Vector{Float64},l::Vector{Float64};is_resto=false)
+function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem, x::Vector{T},l::Vector{T};is_resto=false) where T
nlp = ipp.nlp
cnt = ipp.cnt
@trace(ipp.logger,"Evaluating Lagrangian Hessian.")
diff --git a/src/IPM/factorization.jl b/src/IPM/factorization.jl
index ae6742eb..66edab72 100644
--- a/src/IPM/factorization.jl
+++ b/src/IPM/factorization.jl
@@ -36,10 +36,10 @@ function solve_refine_wrapper!(
end
function solve_refine_wrapper!(
- ips::InteriorPointSolver{<:DenseCondensedKKTSystem},
+ ips::InteriorPointSolver{T,<:DenseCondensedKKTSystem},
x::AbstractKKTVector,
b::AbstractKKTVector,
-)
+) where T
cnt = ips.cnt
@trace(ips.logger,"Iterative solution started.")
fixed_variable_treatment_vec!(full(b), ips.ind_fixed)
diff --git a/src/IPM/kernels.jl b/src/IPM/kernels.jl
index 945168cd..9082d30c 100644
--- a/src/IPM/kernels.jl
+++ b/src/IPM/kernels.jl
@@ -311,10 +311,10 @@ function initialize_variables!(x,xl,xu,bound_push,bound_fac)
end
end
-function adjust_boundary!(x_lr,xl_r,x_ur,xu_r,mu)
+function adjust_boundary!(x_lr::VT,xl_r,x_ur,xu_r,mu) where {T, VT <: AbstractVector{T}}
adjusted = 0
- c1 = eps(Float64)*mu
- c2= eps(Float64)^(3/4)
+ c1 = eps(T)*mu
+ c2= eps(T)^(3/4)
@simd for i=1:length(xl_r)
@inbounds x_lr[i]-xl_r[i] < c1 && (xl_r[i] -= c2*max(1,abs(x_lr[i]));adjusted+=1)
end
@@ -355,9 +355,9 @@ function get_alpha_min(theta,varphi_d,theta_min,gamma_theta,gamma_phi,alpha_min_
end
is_switching(varphi_d,alpha,s_phi,del,theta,s_theta) = varphi_d < 0 && alpha*(-varphi_d)^s_phi > del*theta^s_theta
is_armijo(varphi_trial,varphi,eta_phi,alpha,varphi_d) = varphi_trial <= varphi + eta_phi*alpha*varphi_d
-is_sufficient_progress(theta_trial,theta,gamma_theta,varphi_trial,varphi,gamma_phi,has_constraints) =
- (has_constraints && ((theta_trial<=(1-gamma_theta)*theta+10*eps(Float64)*abs(theta))) ||
- ((varphi_trial<=varphi-gamma_phi*theta +10*eps(Float64)*abs(varphi))))
+is_sufficient_progress(theta_trial::T,theta,gamma_theta,varphi_trial,varphi,gamma_phi,has_constraints) where T =
+ (has_constraints && ((theta_trial<=(1-gamma_theta)*theta+10*eps(T)*abs(theta))) ||
+ ((varphi_trial<=varphi-gamma_phi*theta +10*eps(T)*abs(varphi))))
augment_filter!(filter,theta,varphi,gamma_theta) = push!(filter,((1-gamma_theta)*theta,varphi-gamma_theta*theta))
function is_filter_acceptable(filter,theta,varphi)
!isnan(theta) || return false
diff --git a/src/IPM/restoration.jl b/src/IPM/restoration.jl
index ca349450..fca35020 100644
--- a/src/IPM/restoration.jl
+++ b/src/IPM/restoration.jl
@@ -1,49 +1,49 @@
-mutable struct RobustRestorer
- obj_val_R::Float64
- f_R::Vector{Float64}
- x_ref::Vector{Float64}
+mutable struct RobustRestorer{T}
+ obj_val_R::T
+ f_R::Vector{T}
+ x_ref::Vector{T}
- theta_ref::Float64
- D_R::Vector{Float64}
- obj_val_R_trial::Float64
+ theta_ref::T
+ D_R::Vector{T}
+ obj_val_R_trial::T
- pp::Vector{Float64}
- nn::Vector{Float64}
- zp::Vector{Float64}
- zn::Vector{Float64}
+ pp::Vector{T}
+ nn::Vector{T}
+ zp::Vector{T}
+ zn::Vector{T}
- dpp::Vector{Float64}
- dnn::Vector{Float64}
- dzp::Vector{Float64}
- dzn::Vector{Float64}
+ dpp::Vector{T}
+ dnn::Vector{T}
+ dzp::Vector{T}
+ dzn::Vector{T}
- pp_trial::Vector{Float64}
- nn_trial::Vector{Float64}
+ pp_trial::Vector{T}
+ nn_trial::Vector{T}
- inf_pr_R::Float64
- inf_du_R::Float64
- inf_compl_R::Float64
+ inf_pr_R::T
+ inf_du_R::T
+ inf_compl_R::T
- mu_R::Float64
- tau_R::Float64
- zeta::Float64
+ mu_R::T
+ tau_R::T
+ zeta::T
- filter::Vector{Tuple{Float64,Float64}}
+ filter::Vector{Tuple{T,T}}
end
-function RobustRestorer(ips::AbstractInteriorPointSolver)
+function RobustRestorer(ips::AbstractInteriorPointSolver{T}) where T
- nn = Vector{Float64}(undef,ips.m)
- zp = Vector{Float64}(undef,ips.m)
- zn = Vector{Float64}(undef,ips.m)
- dpp= Vector{Float64}(undef,ips.m)
- dnn= Vector{Float64}(undef,ips.m)
- dzp= Vector{Float64}(undef,ips.m)
- dzn= Vector{Float64}(undef,ips.m)
- pp_trial = Vector{Float64}(undef,ips.m)
- nn_trial = Vector{Float64}(undef,ips.m)
+ nn = Vector{T}(undef,ips.m)
+ zp = Vector{T}(undef,ips.m)
+ zn = Vector{T}(undef,ips.m)
+ dpp= Vector{T}(undef,ips.m)
+ dnn= Vector{T}(undef,ips.m)
+ dzp= Vector{T}(undef,ips.m)
+ dzn= Vector{T}(undef,ips.m)
+ pp_trial = Vector{T}(undef,ips.m)
+ nn_trial = Vector{T}(undef,ips.m)
- return RobustRestorer(
+ return RobustRestorer{T}(
0.,
primal(ips._w2),
primal(ips._w1),
@@ -57,7 +57,7 @@ function RobustRestorer(ips::AbstractInteriorPointSolver)
dual(ips._w2),
dual(ips._w1),
0.,0.,0.,0.,0.,0.,
- Tuple{Float64,Float64}[],
+ Tuple{T,T}[],
)
end
diff --git a/src/IPM/solver.jl b/src/IPM/solver.jl
index fc205659..0eb6f666 100644
--- a/src/IPM/solver.jl
+++ b/src/IPM/solver.jl
@@ -213,7 +213,7 @@ function regular!(ips::AbstractInteriorPointSolver)
ips.alpha = alpha_max
varphi_trial= 0.
theta_trial = 0.
- small_search_norm = get_rel_search_norm(ips.x, primal(ips.d)) < 10*eps(Float64)
+ small_search_norm = get_rel_search_norm(ips.x, primal(ips.d)) < 10*eps(eltype(ips.x))
switching_condition = is_switching(varphi_d,ips.alpha,ips.opt.s_phi,ips.opt.delta,2.,ips.opt.s_theta)
armijo_condition = false
while true
@@ -247,7 +247,7 @@ function regular!(ips::AbstractInteriorPointSolver)
return RESTORE
else
@trace(ips.logger,"Step rejected; proceed with the next trial step.")
- ips.alpha * norm(primal(ips.d)) < eps(Float64)*10 &&
+ ips.alpha * norm(primal(ips.d)) < eps(eltype(ips.x))*10 &&
return ips.cnt.acceptable_cnt >0 ?
SOLVED_TO_ACCEPTABLE_LEVEL : SEARCH_DIRECTION_BECOMES_TOO_SMALL
end
@@ -434,7 +434,7 @@ function robust!(ips::InteriorPointSolver)
ips.cnt.l = 1
theta_R_trial = 0.
varphi_R_trial = 0.
- small_search_norm = get_rel_search_norm(ips.x, primal(ips.d)) < 10*eps(Float64)
+ small_search_norm = get_rel_search_norm(ips.x, primal(ips.d)) < 10*eps(eltype(ips.x))
switching_condition = is_switching(varphi_d_R,ips.alpha,ips.opt.s_phi,ips.opt.delta,theta_R,ips.opt.s_theta)
armijo_condition = false
@@ -470,7 +470,7 @@ function robust!(ips::InteriorPointSolver)
return RESTORATION_FAILED
else
@trace(ips.logger,"Step rejected; proceed with the next trial step.")
- ips.alpha < eps(Float64)*10 && return ips.cnt.acceptable_cnt >0 ?
+ ips.alpha < eps(eltype(ips.x))*10 && return ips.cnt.acceptable_cnt >0 ?
SOLVED_TO_ACCEPTABLE_LEVEL : SEARCH_DIRECTION_BECOMES_TOO_SMALL
end
end
@@ -632,8 +632,8 @@ end
curv_test(t,n,g,wx,inertia_free_tol) = dot(wx,t) + max(dot(wx,n)-dot(g,n),0) - inertia_free_tol*dot(t,t) >=0
-function second_order_correction(ips::AbstractInteriorPointSolver,alpha_max::Float64,theta::Float64,varphi::Float64,
- theta_trial::Float64,varphi_d::Float64,switching_condition::Bool)
+function second_order_correction(ips::AbstractInteriorPointSolver,alpha_max,theta,varphi,
+ theta_trial,varphi_d,switching_condition::Bool)
@trace(ips.logger,"Second-order correction started.")
dual(ips._w1) .= alpha_max .* ips.c .+ ips.c_trial
diff --git a/src/KKT/rhs.jl b/src/KKT/rhs.jl
index 95a12c2e..f5080d46 100644
--- a/src/KKT/rhs.jl
+++ b/src/KKT/rhs.jl
@@ -95,14 +95,14 @@ struct ReducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
xl::VT # unsafe view
end
-ReducedKKTVector(n::Int, m::Int, nlb::Int, nub::Int) = ReducedKKTVector(n, m)
-function ReducedKKTVector(n::Int, m::Int)
- x = Vector{Float64}(undef, n + m)
+ReducedKKTVector{T,VT}(n::Int, m::Int, nlb::Int, nub::Int) where {T, VT <: AbstractVector{T}} = ReducedKKTVector{T,VT}(n, m)
+function ReducedKKTVector{T,VT}(n::Int, m::Int) where {T, VT <: AbstractVector{T}}
+ x = VT(undef, n + m)
fill!(x, 0.0)
# Wrap directly array x to avoid dealing with views
xp = _madnlp_unsafe_wrap(x, n)
xl = _madnlp_unsafe_wrap(x, m, n+1)
- return ReducedKKTVector{Float64, Vector{Float64}}(x, xp, xl)
+ return ReducedKKTVector{T, VT}(x, xp, xl)
end
function ReducedKKTVector(rhs::AbstractKKTVector)
return ReducedKKTVector(number_primal(rhs), number_dual(rhs))
@@ -133,8 +133,8 @@ struct UnreducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
xzu::VT # unsafe view
end
-function UnreducedKKTVector(n::Int, m::Int, nlb::Int, nub::Int)
- values = Vector{Float64}(undef, n + m + nlb + nub)
+function UnreducedKKTVector{T, VT}(n::Int, m::Int, nlb::Int, nub::Int) where {T, VT <: AbstractVector{T}}
+ values = VT(undef,n+m+nlb+nub)
fill!(values, 0.0)
# Wrap directly array x to avoid dealing with views
x = _madnlp_unsafe_wrap(values, n + m) # Primal-Dual
@@ -142,7 +142,7 @@ function UnreducedKKTVector(n::Int, m::Int, nlb::Int, nub::Int)
xl = _madnlp_unsafe_wrap(values, m, n+1) # Dual
xzl = _madnlp_unsafe_wrap(values, nlb, n + m + 1) # Lower bound
xzu = _madnlp_unsafe_wrap(values, nub, n + m + nlb + 1) # Upper bound
- return UnreducedKKTVector{Float64, Vector{Float64}}(values, x, xp, xl, xzl, xzu)
+ return UnreducedKKTVector{T, VT}(values, x, xp, xl, xzl, xzu)
end
full(rhs::UnreducedKKTVector) = rhs.values
diff --git a/src/LinearSolvers/backsolve.jl b/src/LinearSolvers/backsolve.jl
index 0a32078c..22cb3b07 100644
--- a/src/LinearSolvers/backsolve.jl
+++ b/src/LinearSolvers/backsolve.jl
@@ -1,7 +1,7 @@
# MadNLP.jl
# Created by Sungho Shin (sungho.shin@wisc.edu)
-struct RichardsonIterator{T, VT, KKT, LinSolver} <: AbstractIterator
+struct RichardsonIterator{T, VT, KKT, LinSolver <: AbstractLinearSolver{T}} <: AbstractIterator{T}
linear_solver::LinSolver
kkt::KKT
residual::VT
@@ -11,11 +11,11 @@ struct RichardsonIterator{T, VT, KKT, LinSolver} <: AbstractIterator
logger::Logger
end
function RichardsonIterator(
- linear_solver::AbstractLinearSolver,
+ linear_solver::AbstractLinearSolver{T},
kkt::AbstractKKTSystem,
res::AbstractVector;
- max_iter=10, tol=1e-10, acceptable_tol=1e-5, logger=Logger(),
-)
+ max_iter=10, tol=T(1e-10), acceptable_tol=T(1e-5), logger=Logger(),
+) where T
return RichardsonIterator(
linear_solver, kkt, res, max_iter, tol, acceptable_tol, logger,
)
@@ -24,18 +24,18 @@ end
# Solve reduced KKT system. Require only the primal/dual values.
function solve_refine!(
x::AbstractKKTVector{T, VT},
- solver::RichardsonIterator{T, VT, KKT},
+ solver::RichardsonIterator{T, VT, KKT, LinSolver},
b::AbstractKKTVector{T, VT},
-) where {T, VT, KKT<:AbstractReducedKKTSystem}
+) where {T, VT, KKT<:AbstractReducedKKTSystem, LinSolver}
solve_refine!(primal_dual(x), solver, primal_dual(b))
end
# Solve unreduced KKT system. Require UnreducedKKTVector as inputs.
function solve_refine!(
x::UnreducedKKTVector{T, VT},
- solver::RichardsonIterator{T, VT, KKT},
+ solver::RichardsonIterator{T, VT, KKT, LinSolver},
b::UnreducedKKTVector{T, VT},
-) where {T, VT, KKT<:AbstractUnreducedKKTSystem}
+) where {T, VT, KKT<:AbstractUnreducedKKTSystem, LinSolver}
solve_refine!(full(x), solver, full(b))
end
diff --git a/src/LinearSolvers/lapack.jl b/src/LinearSolvers/lapack.jl
index ab526337..60f1c628 100644
--- a/src/LinearSolvers/lapack.jl
+++ b/src/LinearSolvers/lapack.jl
@@ -2,10 +2,10 @@
lapack_algorithm::LinearFactorization = BUNCHKAUFMAN
end
-mutable struct LapackCPUSolver <: AbstractLinearSolver
- dense::Matrix{Float64}
- fact::Matrix{Float64}
- work::Vector{Float64}
+mutable struct LapackCPUSolver{T} <: AbstractLinearSolver{T}
+ dense::Matrix{T}
+ fact::Matrix{T}
+ work::Vector{T}
lwork::BlasInt
info::Ref{BlasInt}
etc::Dict{Symbol,Any}
@@ -13,65 +13,79 @@ mutable struct LapackCPUSolver <: AbstractLinearSolver
logger::Logger
end
-sytrf(uplo,n,a,lda,ipiv,work,lwork,info)=ccall(
- (@blasfunc(dsytrf_),libblas),
- Cvoid,
- (Ref{Cchar},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}),
- uplo,n,a,lda,ipiv,work,lwork,info)
-sytrs(uplo,n,nrhs,a,lda,ipiv,b,ldb,info)=ccall(
- (@blasfunc(dsytrs_),libblas),
- Cvoid,
- (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}),
- uplo,n,nrhs,a,lda,ipiv,b,ldb,info)
-getrf(m,n,a,lda,ipiv,info)=ccall(
- (@blasfunc(dgetrf_),libblas),
- Cvoid,
- (Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt},Ptr{BlasInt}),
- m,n,a,lda,ipiv,info)
-getrs(trans,n,nrhs,a,lda,ipiv,b,ldb,info)=ccall(
- (@blasfunc(dgetrs_),libblas),
- Cvoid,
- (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}),
- trans,n,nrhs,a,lda,ipiv,b,ldb,info)
-geqrf(m,n,a,lda,tau,work,lwork,info)=ccall(
- (@blasfunc(dgeqrf_),libblas),
- Cvoid,
- (Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{Cdouble},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}),
- m,n,a,lda,tau,work,lwork,info)
-ormqr(side,trans,m,n,k,a,lda,tau,c,ldc,work,lwork,info)=ccall(
- (@blasfunc(dormqr_),libblas),
- Cvoid,
- (Ref{Cchar}, Ref{Cchar}, Ref{BlasInt}, Ref{BlasInt},Ref{BlasInt}, Ptr{Cdouble}, Ref{BlasInt}, Ptr{Cdouble},Ptr{Cdouble}, Ref{BlasInt}, Ptr{Cdouble}, Ref{BlasInt},Ptr{BlasInt}),
- side,trans,m,n,k,a,lda,tau,c,ldc,work,lwork,info)
-trsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb)=ccall(
- (@blasfunc(dtrsm_),libblas),
- Cvoid,
- (Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ref{Cdouble},Ptr{Cdouble},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt}),
- side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb)
-potrf(uplo,n,a,lda,info)=ccall(
- (@blasfunc(dpotrf_),libblas),
- Cvoid,
- (Ref{Cchar},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}),
- uplo,n,a,lda,info)
-potrs(uplo,n,nrhs,a,lda,b,ldb,info)=ccall(
- (@blasfunc(dpotrs_),libblas),
- Cvoid,
- (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}),
- uplo,n,nrhs,a,lda,b,ldb,info)
-
-function LapackCPUSolver(dense::Matrix{Float64};
- option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(),
- opt=LapackOptions(),logger=Logger(),
- kwargs...)
+
+for (sytrf,sytrs,getrf,getrs,geqrf,ormqr,trsm,potrf,potrs,typ) in (
+ (@blasfunc(dsytrf_), @blasfunc(dsytrs_), @blasfunc(dgetrf_), @blasfunc(dgetrs_),
+ @blasfunc(dgeqrf_), @blasfunc(dormqr_), @blasfunc(dtrsm_), @blasfunc(dpotrf_), @blasfunc(dpotrs_),
+ Float64),
+ (@blasfunc(ssytrf_), @blasfunc(ssytrs_), @blasfunc(sgetrf_), @blasfunc(sgetrs_),
+ @blasfunc(sgeqrf_), @blasfunc(sormqr_), @blasfunc(strsm_), @blasfunc(spotrf_), @blasfunc(spotrs_),
+ Float32)
+ )
+ @eval begin
+ sytrf(uplo,n,a::Matrix{$typ},lda,ipiv,work,lwork,info)=ccall(
+ ($(string(sytrf)),libblas),
+ Cvoid,
+ (Ref{Cchar},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}),
+ uplo,n,a,lda,ipiv,work,lwork,info)
+ sytrs(uplo,n,nrhs,a::Matrix{$typ},lda,ipiv,b,ldb,info)=ccall(
+ ($(string(sytrs)),libblas),
+ Cvoid,
+ (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}),
+ uplo,n,nrhs,a,lda,ipiv,b,ldb,info)
+ getrf(m,n,a::Matrix{$typ},lda,ipiv,info)=ccall(
+ ($(string(getrf)),libblas),
+ Cvoid,
+ (Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt},Ptr{BlasInt}),
+ m,n,a,lda,ipiv,info)
+ getrs(trans,n,nrhs,a::Matrix{$typ},lda,ipiv,b,ldb,info)=ccall(
+ ($(string(getrs)),libblas),
+ Cvoid,
+ (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}),
+ trans,n,nrhs,a,lda,ipiv,b,ldb,info)
+ geqrf(m,n,a::Matrix{$typ},lda,tau,work,lwork,info)=ccall(
+ ($(string(geqrf)),libblas),
+ Cvoid,
+ (Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{$typ},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}),
+ m,n,a,lda,tau,work,lwork,info)
+ ormqr(side,trans,m,n,k,a::Matrix{$typ},lda,tau,c,ldc,work,lwork,info)=ccall(
+ ($(string(ormqr)),libblas),
+ Cvoid,
+ (Ref{Cchar}, Ref{Cchar}, Ref{BlasInt}, Ref{BlasInt},Ref{BlasInt}, Ptr{$typ}, Ref{BlasInt}, Ptr{$typ},Ptr{$typ}, Ref{BlasInt}, Ptr{$typ}, Ref{BlasInt},Ptr{BlasInt}),
+ side,trans,m,n,k,a,lda,tau,c,ldc,work,lwork,info)
+ trsm(side,uplo,transa,diag,m,n,alpha,a::Matrix{$typ},lda,b,ldb)=ccall(
+ ($(string(trsm)),libblas),
+ Cvoid,
+ (Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ref{$typ},Ptr{$typ},Ref{BlasInt},Ptr{$typ},Ref{BlasInt}),
+ side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb)
+ potrf(uplo,n,a::Matrix{$typ},lda,info)=ccall(
+ ($(string(potrf)),libblas),
+ Cvoid,
+ (Ref{Cchar},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}),
+ uplo,n,a,lda,info)
+ potrs(uplo,n,nrhs,a::Matrix{$typ},lda,b,ldb,info)=ccall(
+ ($(string(potrs)),libblas),
+ Cvoid,
+ (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}),
+ uplo,n,nrhs,a,lda,b,ldb,info)
+ end
+end
+
+function LapackCPUSolver(
+ dense::Matrix{T};
+ option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(),
+ opt=LapackOptions(),
+ logger=Logger(),
+ kwargs...) where T
set_options!(opt,option_dict,kwargs...)
fact = copy(dense)
etc = Dict{Symbol,Any}()
- work = Vector{Float64}(undef, 1)
+ work = Vector{T}(undef, 1)
info=0
- return LapackCPUSolver(dense,fact,work,-1,info,etc,opt,logger)
+ return LapackCPUSolver{T}(dense,fact,work,-1,info,etc,opt,logger)
end
function factorize!(M::LapackCPUSolver)
@@ -87,7 +101,7 @@ function factorize!(M::LapackCPUSolver)
error(LOGGER,"Invalid lapack_algorithm")
end
end
-function solve!(M::LapackCPUSolver, x::Vector{Float64})
+function solve!(M::LapackCPUSolver{T}, x::Vector{T}) where T
if M.opt.lapack_algorithm == BUNCHKAUFMAN
solve_bunchkaufman!(M,x)
elseif M.opt.lapack_algorithm == LU
@@ -133,9 +147,10 @@ function solve_lu!(M::LapackCPUSolver,x)
return x
end
-function factorize_qr!(M::LapackCPUSolver)
+
+function factorize_qr!(M::LapackCPUSolver{T}) where T
size(M.fact,1) == 0 && return M
- haskey(M.etc,:tau) || (M.etc[:tau] = Vector{Float64}(undef,size(M.dense,1)))
+ haskey(M.etc,:tau) || (M.etc[:tau] = Vector{T}(undef,size(M.dense,1)))
tril_to_full!(M.dense)
M.lwork = -1
M.fact .= M.dense
@@ -218,3 +233,5 @@ function num_neg_ev(n,D,ipiv)
return numneg
end
+is_supported(::Type{LapackCPUSolver},::Type{Float32}) = true
+is_supported(::Type{LapackCPUSolver},::Type{Float64}) = true
diff --git a/src/LinearSolvers/linearsolvers.jl b/src/LinearSolvers/linearsolvers.jl
index 6b2212ea..23aaedd4 100644
--- a/src/LinearSolvers/linearsolvers.jl
+++ b/src/LinearSolvers/linearsolvers.jl
@@ -10,7 +10,7 @@
Abstract type for linear solver targeting
the resolution of the linear system ``Ax=b``.
"""
-abstract type AbstractLinearSolver end
+abstract type AbstractLinearSolver{T} end
"""
introduce(::AbstractLinearSolver)
@@ -37,6 +37,25 @@ factorized previously with [`factorize!`](@ref).
"""
function solve! end
+"""
+ is_supported(solver,T)
+
+Return `true` if `solver` supports the floating point
+number type `T`.
+
+# Examples
+```julia-repl
+julia> is_supported(UmfpackSolver,Float64)
+true
+
+julia> is_supported(UmfpackSolver,Float32)
+false
+```
+"""
+function is_supported(::Type{LS},::Type{T}) where {LS <: AbstractLinearSolver, T <: AbstractFloat}
+ return false
+end
+
"""
is_inertia(::AbstractLinearSolver)
@@ -60,6 +79,7 @@ with
"""
function inertia end
+
function improve! end
# Default function for AbstractKKTVector
@@ -70,7 +90,7 @@ end
#=
Iterator's interface
=#
-abstract type AbstractIterator end
+abstract type AbstractIterator{T} end
"""
solve_refine!(x, ::AbstractIterator, b)
diff --git a/src/LinearSolvers/umfpack.jl b/src/LinearSolvers/umfpack.jl
index f1d533de..8bc46255 100644
--- a/src/LinearSolvers/umfpack.jl
+++ b/src/LinearSolvers/umfpack.jl
@@ -9,48 +9,61 @@ const umfpack_default_info = copy(UMFPACK.umf_info)
umfpack_strategy::Float64 = 2.
end
-mutable struct UmfpackSolver <: AbstractLinearSolver
+mutable struct UmfpackSolver{T} <: AbstractLinearSolver{T}
inner::UMFPACK.UmfpackLU
- tril::SparseMatrixCSC{Float64}
- full::SparseMatrixCSC{Float64}
- tril_to_full_view::SubVector{Float64}
+ tril::SparseMatrixCSC{T}
+ full::SparseMatrixCSC{T}
+ tril_to_full_view::SubVector{T}
- p::Vector{Float64}
+ p::Vector{T}
tmp::Vector{Ptr{Cvoid}}
- ctrl::Vector{Float64}
- info::Vector{Float64}
+ ctrl::Vector{T}
+ info::Vector{T}
opt::UmfpackOptions
logger::Logger
end
-umfpack_di_numeric(colptr::Vector{Int32},rowval::Vector{Int32},
- nzval::Vector{Float64},symbolic::Ptr{Nothing},
- tmp::Vector{Ptr{Nothing}},ctrl::Vector{Float64},
- info::Vector{Float64}) = ccall(
- (:umfpack_di_numeric,:libumfpack),
- Int32,
- (Ptr{Int32},Ptr{Int32},Ptr{Float64},Ptr{Cvoid},Ptr{Cvoid},
- Ptr{Float64},Ptr{Float64}),
- colptr,rowval,nzval,symbolic,tmp,ctrl,info)
-umfpack_di_solve(typ,colptr,rowval,nzval,x,b,numeric,ctrl,info) = ccall(
- (:umfpack_di_solve,:libumfpack),
- Int32,
- (Int32, Ptr{Int32}, Ptr{Int32}, Ptr{Float64},Ptr{Float64},
- Ptr{Float64}, Ptr{Cvoid}, Ptr{Float64},Ptr{Float64}),
- typ,colptr,rowval,nzval,x,b,numeric,ctrl,info)
-
-
-
-function UmfpackSolver(csc::SparseMatrixCSC;
+
+for (numeric,solve,T) in (
+ (:umfpack_di_numeric, :umfpack_di_solve, Float64),
+ (:umfpack_si_numeric, :umfpack_si_solve, Float32),
+ )
+ @eval begin
+ umfpack_numeric(
+ colptr::Vector{Int32},rowval::Vector{Int32},
+ nzval::Vector{$T},symbolic::Ptr{Nothing},
+ tmp::Vector{Ptr{Nothing}},ctrl::Vector{$T},
+ info::Vector{$T}) = ccall(
+ ($(string(numeric)),:libumfpack),
+ Int32,
+ (Ptr{Int32},Ptr{Int32},Ptr{$T},Ptr{Cvoid},Ptr{Cvoid},
+ Ptr{$T},Ptr{$T}),
+ colptr,rowval,nzval,symbolic,tmp,ctrl,info)
+ umfpack_solve(
+ typ,colptr::Vector{Int32},rowval::Vector{Int32},
+ nzval::Vector{$T},x::Vector{$T},b::Vector{$T},
+ numeric,ctrl::Vector{$T},info::Vector{$T}) = ccall(
+ ($(string(solve)),:libumfpack),
+ Int32,
+ (Int32, Ptr{Int32}, Ptr{Int32}, Ptr{$T},Ptr{$T},
+ Ptr{$T}, Ptr{Cvoid}, Ptr{$T},Ptr{$T}),
+ typ,colptr,rowval,nzval,x,b,numeric,ctrl,info)
+ end
+end
+
+
+
+function UmfpackSolver(
+ csc::SparseMatrixCSC{T};
option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(),
opt=UmfpackOptions(),logger=Logger(),
- kwargs...)
+ kwargs...) where T
set_options!(opt,option_dict,kwargs)
- p = Vector{Float64}(undef,csc.n)
+ p = Vector{T}(undef,csc.n)
full,tril_to_full_view = get_tril_to_full(csc)
full.colptr.-=1; full.rowval.-=1
@@ -73,14 +86,14 @@ end
function factorize!(M::UmfpackSolver)
UMFPACK.umfpack_free_numeric(M.inner)
M.full.nzval.=M.tril_to_full_view
- status = umfpack_di_numeric(M.inner.colptr,M.inner.rowval,M.inner.nzval,M.inner.symbolic,M.tmp,M.ctrl,M.info)
+ status = umfpack_numeric(M.inner.colptr,M.inner.rowval,M.inner.nzval,M.inner.symbolic,M.tmp,M.ctrl,M.info)
M.inner.numeric = M.tmp[]
M.inner.status = status
return M
end
-function solve!(M::UmfpackSolver,rhs::Vector{Float64})
- status = umfpack_di_solve(1,M.inner.colptr,M.inner.rowval,M.inner.nzval,M.p,rhs,M.inner.numeric,M.ctrl,M.info)
+function solve!(M::UmfpackSolver{T},rhs::Vector{T}) where T
+ status = umfpack_solve(1,M.inner.colptr,M.inner.rowval,M.inner.nzval,M.p,rhs,M.inner.numeric,M.ctrl,M.info)
rhs .= M.p
return rhs
end
@@ -100,4 +113,4 @@ function improve!(M::UmfpackSolver)
return false
end
introduce(::UmfpackSolver)="umfpack"
-
+is_supported(::Type{UmfpackSolver},::Type{Float64}) = true
diff --git a/src/MadNLP.jl b/src/MadNLP.jl
index 2051168a..af31ddc8 100644
--- a/src/MadNLP.jl
+++ b/src/MadNLP.jl
@@ -19,7 +19,7 @@ const MOI = MathOptInterface
const MOIU = MathOptInterface.Utilities
const NLPModelsCounters = _Counters
-export madnlp
+export madnlp, UmfpackSolver, LapackCPUSolver
# Version info
version() = parsefile(joinpath(@__DIR__,"..","Project.toml"))["version"]
diff --git a/src/matrixtools.jl b/src/matrixtools.jl
index df12d459..da7d9838 100644
--- a/src/matrixtools.jl
+++ b/src/matrixtools.jl
@@ -11,7 +11,7 @@ mutable struct SparseMatrixCOO{Tv,Ti<:Integer, VTv<:AbstractVector{Tv}} <: Abstr
V::VTv
end
size(A::SparseMatrixCOO) = (A.m,A.n)
-getindex(A::SparseMatrixCOO{Float64,Ti},i::Int,j::Int) where Ti <: Integer = sum(A.V[(A.I.==i) .* (A.J.==j)])
+getindex(A::SparseMatrixCOO{Tv,Ti},i::Int,j::Int) where {Tv, Ti <: Integer} = sum(A.V[(A.I.==i) .* (A.J.==j)])
nnz(A::SparseMatrixCOO) = length(A.I)
function findIJ(S::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
diff --git a/test/madnlp_dense.jl b/test/madnlp_dense.jl
index 43c0a49a..cfae62a3 100644
--- a/test/madnlp_dense.jl
+++ b/test/madnlp_dense.jl
@@ -7,33 +7,39 @@ using SparseArrays
using Random
function _compare_dense_with_sparse(kkt_system, n, m, ind_fixed, ind_eq)
- sparse_options = Dict{Symbol, Any}(
- :kkt_system=>MadNLP.SPARSE_KKT_SYSTEM,
- :linear_solver=>MadNLP.LapackCPUSolver,
- :print_level=>MadNLP.ERROR,
- )
- dense_options = Dict{Symbol, Any}(
- :kkt_system=>kkt_system,
- :linear_solver=>MadNLP.LapackCPUSolver,
- :print_level=>MadNLP.ERROR,
- )
- nlp = MadNLPTests.DenseDummyQP(; n=n, m=m, fixed_variables=ind_fixed, equality_cons=ind_eq)
+ for (T,tol,atol) in [(Float32,1e-3,1e-1), (Float64,1e-8,1e-6)]
+
+ sparse_options = Dict{Symbol, Any}(
+ :kkt_system=>MadNLP.SPARSE_KKT_SYSTEM,
+ :linear_solver=>MadNLP.LapackCPUSolver,
+ :print_level=>MadNLP.ERROR,
+ :tol=>tol
+ )
+ dense_options = Dict{Symbol, Any}(
+ :kkt_system=>kkt_system,
+ :linear_solver=>MadNLP.LapackCPUSolver,
+ :print_level=>MadNLP.ERROR,
+ :tol=>tol
+ )
+
+ nlp = MadNLPTests.DenseDummyQP{T}(; n=n, m=m, fixed_variables=ind_fixed, equality_cons=ind_eq)
- ips = MadNLP.InteriorPointSolver(nlp, option_dict=sparse_options)
- ipd = MadNLP.InteriorPointSolver(nlp, option_dict=dense_options)
+ ips = MadNLP.InteriorPointSolver(nlp, option_dict=sparse_options)
+ ipd = MadNLP.InteriorPointSolver(nlp, option_dict=dense_options)
- MadNLP.optimize!(ips)
- MadNLP.optimize!(ipd)
+ MadNLP.optimize!(ips)
+ MadNLP.optimize!(ipd)
- # Check that dense formulation matches exactly sparse formulation
- @test ips.cnt.k == ipd.cnt.k
- @test ips.obj_val ≈ ipd.obj_val atol=1e-10
- @test ips.x ≈ ipd.x atol=1e-10
- @test ips.l ≈ ipd.l atol=1e-10
- @test ips.kkt.jac_com[:, 1:n] == ipd.kkt.jac
- if isa(ipd.kkt, MadNLP.AbstractReducedKKTSystem)
- @test Symmetric(ips.kkt.aug_com, :L) ≈ ipd.kkt.aug_com atol=1e-10
+ # Check that dense formulation matches exactly sparse formulation
+ @test ips.cnt.k == ipd.cnt.k
+ @test ips.obj_val ≈ ipd.obj_val atol=atol
+ @test ips.x ≈ ipd.x atol=atol
+ @test ips.l ≈ ipd.l atol=atol
+ @test ips.kkt.jac_com[:, 1:n] == ipd.kkt.jac
+ if isa(ipd.kkt, MadNLP.AbstractReducedKKTSystem)
+ @test Symmetric(ips.kkt.aug_com, :L) ≈ ipd.kkt.aug_com atol=atol
+ end
end
end
@@ -120,4 +126,3 @@ end
res = MadNLP.optimize!(ips)
@test ips.status == MadNLP.SOLVE_SUCCEEDED
end
-
diff --git a/test/madnlp_test.jl b/test/madnlp_test.jl
index 4571b3f5..eca19e41 100644
--- a/test/madnlp_test.jl
+++ b/test/madnlp_test.jl
@@ -70,6 +70,7 @@ testset = [
],
]
+
for (name,optimizer_constructor,exclude) in testset
test_madnlp(name,optimizer_constructor,exclude)
end
diff --git a/test/matrix_test.jl b/test/matrix_test.jl
index 07fa60ac..75f1568b 100644
--- a/test/matrix_test.jl
+++ b/test/matrix_test.jl
@@ -30,14 +30,6 @@ end
end
-MadNLPTests.test_linear_solver(MadNLP.LapackCPUSolver)
-MadNLPTests.test_linear_solver(MadNLP.UmfpackSolver)
-# @test_linear_solver ma27
-# @test_linear_solver ma57
-# @test_linear_solver ma77
-# @test_linear_solver ma86
-# @test_linear_solver ma97
-# @test_linear_solver pardiso
-# @test_linear_solver pardisomkl
-# @test_linear_solver umfpack
-# @test_linear_solver mumps
+MadNLPTests.test_linear_solver(UmfpackSolver,Float64)
+MadNLPTests.test_linear_solver(LapackCPUSolver,Float32)
+MadNLPTests.test_linear_solver(LapackCPUSolver,Float64)
diff --git a/test/nlpmodels_test.jl b/test/nlpmodels_test.jl
deleted file mode 100644
index a5d55fce..00000000
--- a/test/nlpmodels_test.jl
+++ /dev/null
@@ -1,11 +0,0 @@
-hs33 = AmplModel(joinpath(@__DIR__, "hs033.nl"))
-result = madnlp(hs33;print_level=MadNLP.ERROR)
-
-@test result.status == :first_order
-@test solcmp(result.solution,[0.0,1.4142135570650927,1.4142135585382265])
-@test solcmp(result.multipliers,[0.17677669589725922,-0.17677669527079812])
-@test solcmp(result.multipliers_L,[11.000000117266442,1.7719330023793877e-9,1.7753439380861844e-9])
-@test solcmp(result.multipliers_U,[0.,0.,0.])
-@test solcmp([result.objective],[-4.585786548956206])
-
-finalize(hs33)
diff --git a/test/runtests.jl b/test/runtests.jl
index d8e9a5c0..52fac3ed 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,6 +1,5 @@
using Test, MadNLP, MadNLPTests, MINLPTests
import MathOptInterface
-# import AmplNLReader: AmplModel
import SparseArrays: sparse
@testset "MadNLP test" begin
@@ -12,11 +11,6 @@ import SparseArrays: sparse
include("MOI_interface_test.jl")
end
- # this is temporarily commented out due to the incompatibility between NLPModels v0.17.2 and AmplNLReader v0.11.0
- # @testset "NLPModels interface" begin
- # include("nlpmodels_test.jl")
- # end
-
@testset "MadNLP test" begin
include("madnlp_test.jl")
include("madnlp_dense.jl")