diff --git a/Project.toml b/Project.toml index f7b60817..d8fd8dff 100644 --- a/Project.toml +++ b/Project.toml @@ -13,12 +13,23 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35" +[weakdeps] +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[extensions] +ADNLPModelsEnzymeExt = "Enzyme" +ADNLPModelsZygoteExt = "Zygote" + [compat] ADTypes = "1.2.1" -ForwardDiff = "0.9.0, 0.10.0" -NLPModels = "0.18, 0.19, 0.20, 0.21" -Requires = "1" +ForwardDiff = "0.10.0" +NLPModels = "0.21.3" ReverseDiff = "1" SparseConnectivityTracer = "0.6.1" SparseMatrixColorings = "0.4.0" -julia = "^1.6" +Enzyme = "0.13" +Zygote = "0.6" +LinearAlgebra = "1.10" +SparseArrays = "1.10" +julia = "1.10" diff --git a/ext/ADNLPModelsEnzymeExt.jl b/ext/ADNLPModelsEnzymeExt.jl new file mode 100644 index 00000000..60c1deb3 --- /dev/null +++ b/ext/ADNLPModelsEnzymeExt.jl @@ -0,0 +1,10 @@ +module ADNLPModelsEnzymeExt + +using Enzyme, ADNLPModels + +function ADNLPModels.gradient!(::ADNLPModels.EnzymeADGradient, g, f, x) + Enzyme.autodiff(Enzyme.Reverse, f, Enzyme.Duplicated(x, g)) # gradient!(Reverse, g, f, x) + return g +end + +end diff --git a/ext/ADNLPModelsZygoteExt.jl b/ext/ADNLPModelsZygoteExt.jl new file mode 100644 index 00000000..86f5101e --- /dev/null +++ b/ext/ADNLPModelsZygoteExt.jl @@ -0,0 +1,41 @@ +module ADNLPModelsZygoteExt + +using Zygote, ADNLPModels + +function gradient(::ADNLPModels.ZygoteADGradient, f, x) + g = Zygote.gradient(f, x)[1] + return g === nothing ? zero(x) : g +end +function gradient!(::ADNLPModels.ZygoteADGradient, g, f, x) + _g = Zygote.gradient(f, x)[1] + g .= _g === nothing ? 0 : _g +end + +function Jprod!(::ADNLPModels.ZygoteADJprod, Jv, f, x, v, ::Val) + Jv .= vec(Zygote.jacobian(t -> f(x + t * v), 0)[1]) + return Jv +end + +function Jtprod!(::ADNLPModels.ZygoteADJtprod, Jtv, f, x, v, ::Val) + g = Zygote.gradient(x -> dot(f(x), v), x)[1] + if g === nothing + Jtv .= zero(x) + else + Jtv .= g + end + return Jtv +end + +function jacobian(::ADNLPModels.ZygoteADJacobian, f, x) + return Zygote.jacobian(f, x)[1] +end + +function hessian(b::ADNLPModels.ZygoteADHessian, f, x) + return jacobian( + ADNLPModels.ForwardDiffADJacobian(length(x), f, x0 = x), + x -> gradient(ADNLPModels.ZygoteADGradient(), f, x), + x, + ) +end + +end diff --git a/src/ADNLPModels.jl b/src/ADNLPModels.jl index a50d1005..0b6e26ad 100644 --- a/src/ADNLPModels.jl +++ b/src/ADNLPModels.jl @@ -27,11 +27,13 @@ include("sparse_hessian.jl") include("forward.jl") include("reverse.jl") -include("enzyme.jl") -include("zygote.jl") include("predefined_backend.jl") include("nlp.jl") +# Extensions +include("enzyme.jl") +include("zygote.jl") + function ADNLPModel!(model::AbstractNLPModel; kwargs...) return if model.meta.nlin > 0 ADNLPModel!( diff --git a/src/enzyme.jl b/src/enzyme.jl index db5133fe..b57b1cae 100644 --- a/src/enzyme.jl +++ b/src/enzyme.jl @@ -10,12 +10,3 @@ function EnzymeADGradient( ) return EnzymeADGradient() end - -@init begin - @require Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" begin - function ADNLPModels.gradient!(::EnzymeADGradient, g, f, x) - Enzyme.autodiff(Enzyme.Reverse, f, Enzyme.Duplicated(x, g)) # gradient!(Reverse, g, f, x) - return g - end - end -end diff --git a/src/zygote.jl b/src/zygote.jl index 63358a7e..5c1d8fb0 100644 --- a/src/zygote.jl +++ b/src/zygote.jl @@ -1,119 +1,83 @@ struct ZygoteADGradient <: ADBackend end +struct ZygoteADJprod <: ImmutableADbackend end +struct ZygoteADJtprod <: ImmutableADbackend end struct ZygoteADJacobian <: ImmutableADbackend nnzj::Int end struct ZygoteADHessian <: ImmutableADbackend nnzh::Int end -struct ZygoteADJprod <: ImmutableADbackend end -struct ZygoteADJtprod <: ImmutableADbackend end -@init begin - @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" begin - # See https://fluxml.ai/Zygote.jl/latest/limitations/ - function get_immutable_c(nlp::ADModel) - function c(x; nnln = nlp.meta.nnln) - c = Zygote.Buffer(x, nnln) - nlp.c!(c, x) - return copy(c) - end - return c - end - get_c(nlp::ADModel, ::ImmutableADbackend) = get_immutable_c(nlp) +# See https://fluxml.ai/Zygote.jl/latest/limitations/ +function get_immutable_c(nlp::ADModel) + function c(x; nnln = nlp.meta.nnln) + c = Zygote.Buffer(x, nnln) + nlp.c!(c, x) + return copy(c) + end + return c +end +get_c(nlp::ADModel, ::ImmutableADbackend) = get_immutable_c(nlp) - function get_immutable_F(nls::AbstractADNLSModel) - function F(x; nequ = nls.nls_meta.nequ) - Fx = Zygote.Buffer(x, nequ) - nls.F!(Fx, x) - return copy(Fx) - end - return F - end - get_F(nls::AbstractADNLSModel, ::ImmutableADbackend) = get_immutable_F(nls) +function get_immutable_F(nls::AbstractADNLSModel) + function F(x; nequ = nls.nls_meta.nequ) + Fx = Zygote.Buffer(x, nequ) + nls.F!(Fx, x) + return copy(Fx) + end + return F +end +get_F(nls::AbstractADNLSModel, ::ImmutableADbackend) = get_immutable_F(nls) - function ZygoteADGradient( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - return ZygoteADGradient() - end - function gradient(::ZygoteADGradient, f, x) - g = Zygote.gradient(f, x)[1] - return g === nothing ? zero(x) : g - end - function gradient!(::ZygoteADGradient, g, f, x) - _g = Zygote.gradient(f, x)[1] - g .= _g === nothing ? 0 : _g - end +function ZygoteADGradient( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + kwargs..., +) + return ZygoteADGradient() +end - function ZygoteADJacobian( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - @assert nvar > 0 - nnzj = nvar * ncon - return ZygoteADJacobian(nnzj) - end - function jacobian(::ZygoteADJacobian, f, x) - return Zygote.jacobian(f, x)[1] - end +function ZygoteADJprod( + nvar::Integer, + ncon::Integer = 0, + c::Function = (args...) -> []; + kwargs..., +) + return ZygoteADJprod() +end - function ZygoteADHessian( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - @assert nvar > 0 - nnzh = nvar * (nvar + 1) / 2 - return ZygoteADHessian(nnzh) - end - function hessian(b::ZygoteADHessian, f, x) - return jacobian( - ForwardDiffADJacobian(length(x), f, x0 = x), - x -> gradient(ZygoteADGradient(), f, x), - x, - ) - end +function ZygoteADJtprod( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + kwargs..., +) + return ZygoteADJtprod() +end - function ZygoteADJprod( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - return ZygoteADJprod() - end - function Jprod!(::ZygoteADJprod, Jv, f, x, v, ::Val) - Jv .= vec(Zygote.jacobian(t -> f(x + t * v), 0)[1]) - return Jv - end +function ZygoteADJacobian( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + kwargs..., +) + @assert nvar > 0 + nnzj = nvar * ncon + return ZygoteADJacobian(nnzj) +end - function ZygoteADJtprod( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - return ZygoteADJtprod() - end - function Jtprod!(::ZygoteADJtprod, Jtv, f, x, v, ::Val) - g = Zygote.gradient(x -> dot(f(x), v), x)[1] - if g === nothing - Jtv .= zero(x) - else - Jtv .= g - end - return Jtv - end - end +function ZygoteADHessian( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + kwargs..., +) + @assert nvar > 0 + nnzh = nvar * (nvar + 1) / 2 + return ZygoteADHessian(nnzh) end