From 68c1b4a429f0b838600b5620565daea8b64aa8f5 Mon Sep 17 00:00:00 2001 From: odow Date: Tue, 3 Sep 2024 14:30:53 +1200 Subject: [PATCH] Add Hessian support to GrayBox --- ext/MathOptAIFluxExt.jl | 28 ++++++++--- ext/MathOptAIPythonCallExt.jl | 33 +++++++++---- src/predictors/GrayBox.jl | 49 +++++++++++++------ test/test_Flux.jl | 43 ++++++++++++++++ test/test_PythonCall.jl | 92 +++++++++++++++++++++++++++++++++++ 5 files changed, 213 insertions(+), 32 deletions(-) diff --git a/ext/MathOptAIFluxExt.jl b/ext/MathOptAIFluxExt.jl index 4c26831..da81836 100644 --- a/ext/MathOptAIFluxExt.jl +++ b/ext/MathOptAIFluxExt.jl @@ -71,9 +71,9 @@ function MathOptAI.add_predictor( x::Vector; config::Dict = Dict{Any,Any}(), reduced_space::Bool = false, - gray_box::Bool = false, + kwargs..., ) - inner_predictor = MathOptAI.build_predictor(predictor; config, gray_box) + inner_predictor = MathOptAI.build_predictor(predictor; config, kwargs...) if reduced_space inner_predictor = MathOptAI.ReducedSpace(inner_predictor) end @@ -85,6 +85,7 @@ end predictor::Flux.Chain; config::Dict = Dict{Any,Any}(), gray_box::Bool = false, + gray_box_hessian::Bool = false, ) Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref). @@ -110,6 +111,8 @@ Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref). `Flux.relu => MathOptAI.QuadraticReLU()`. * `gray_box`: if `true`, the neural network is added as a user-defined nonlinear operator, with gradients provided by `Flux.withjacobian`. + * `gray_box_hessian`: if `true`, the gray box additionally computes the Hessian + of the output using `Flux.hessian`. ## Example @@ -141,12 +144,13 @@ function MathOptAI.build_predictor( predictor::Flux.Chain; config::Dict = Dict{Any,Any}(), gray_box::Bool = false, + gray_box_hessian::Bool = false, ) if gray_box if !isempty(config) error("cannot specify the `config` kwarg if `gray_box = true`") end - return MathOptAI.GrayBox(predictor) + return MathOptAI.GrayBox(predictor; hessian = gray_box_hessian) end inner_predictor = MathOptAI.Pipeline(MathOptAI.AbstractPredictor[]) for layer in predictor.layers @@ -155,15 +159,23 @@ function MathOptAI.build_predictor( return inner_predictor end -function MathOptAI.GrayBox(predictor::Flux.Chain) +function MathOptAI.GrayBox(predictor::Flux.Chain; hessian::Bool = false) function output_size(x) return only(Flux.outputsize(predictor, (length(x),))) end - function with_jacobian(x) - ret = Flux.withjacobian(x -> predictor(Float32.(x)), collect(x)) - return (value = ret.val, jacobian = only(ret.grad)) + function callback(x) + x32 = collect(Float32.(x)) + ret = Flux.withjacobian(predictor, x32) + if !hessian + return (value = ret.val, jacobian = only(ret.grad)) + end + Hs = map(1:length(ret.val)) do i + return Flux.hessian(x -> predictor(x)[i], x32) + end + H = cat(Hs...; dims = 3) + return (value = ret.val, jacobian = only(ret.grad), hessian = H) end - return MathOptAI.GrayBox(output_size, with_jacobian) + return MathOptAI.GrayBox(output_size, callback; has_hessian = hessian) end function _add_predictor(::MathOptAI.Pipeline, layer::Any, ::Dict) diff --git a/ext/MathOptAIPythonCallExt.jl b/ext/MathOptAIPythonCallExt.jl index 5977e87..1f2b8e9 100644 --- a/ext/MathOptAIPythonCallExt.jl +++ b/ext/MathOptAIPythonCallExt.jl @@ -38,6 +38,8 @@ Add a trained neural network from Pytorch via PythonCall.jl to `model`. The supported Symbols are `:ReLU`, `:Sigmoid`, and `:Tanh`. * `gray_box`: if `true`, the neural network is added as a user-defined nonlinear operator, with gradients provided by `torch.func.jacrev`. + * `gray_box_hessian`: if `true`, the gray box additionally computes the Hessian + of the output using `torch.func.hessian`. """ function MathOptAI.add_predictor( model::JuMP.AbstractModel, @@ -45,9 +47,9 @@ function MathOptAI.add_predictor( x::Vector; config::Dict = Dict{Any,Any}(), reduced_space::Bool = false, - gray_box::Bool = false, + kwargs..., ) - inner_predictor = MathOptAI.build_predictor(predictor; config, gray_box) + inner_predictor = MathOptAI.build_predictor(predictor; config, kwargs...) if reduced_space inner_predictor = MathOptAI.ReducedSpace(inner_predictor) end @@ -59,6 +61,7 @@ end predictor::MathOptAI.PytorchModel; config::Dict = Dict{Any,Any}(), gray_box::Bool = false, + gray_box_hessian::Bool = false, ) Convert a trained neural network from Pytorch via PythonCall.jl to a @@ -80,17 +83,20 @@ Convert a trained neural network from Pytorch via PythonCall.jl to a The supported Symbols are `:ReLU`, `:Sigmoid`, and `:Tanh`. * `gray_box`: if `true`, the neural network is added as a user-defined nonlinear operator, with gradients provided by `torch.func.jacrev`. + * `gray_box_hessian`: if `true`, the gray box additionally computes the Hessian + of the output using `torch.func.hessian`. """ function MathOptAI.build_predictor( predictor::MathOptAI.PytorchModel; config::Dict = Dict{Any,Any}(), gray_box::Bool = false, + gray_box_hessian::Bool = false, ) if gray_box if !isempty(config) error("cannot specify the `config` kwarg if `gray_box = true`") end - return MathOptAI.GrayBox(predictor) + return MathOptAI.GrayBox(predictor; hessian = gray_box_hessian) end torch = PythonCall.pyimport("torch") nn = PythonCall.pyimport("torch.nn") @@ -118,23 +124,30 @@ function _predictor(nn, layer, config) return error("unsupported layer: $layer") end -function MathOptAI.GrayBox(predictor::MathOptAI.PytorchModel) +function MathOptAI.GrayBox( + predictor::MathOptAI.PytorchModel; + hessian::Bool = false, +) torch = PythonCall.pyimport("torch") torch_model = torch.load(predictor.filename) J = torch.func.jacrev(torch_model) + H = torch.func.hessian(torch_model) # TODO(odow): I'm not sure if there is a better way to get the output # dimension of a torch model object? output_size(::Any) = PythonCall.pyconvert(Int, torch_model[-1].out_features) - function with_jacobian(x) + function callback(x) py_x = torch.tensor(collect(x)) py_value = torch_model(py_x).detach().numpy() + value = PythonCall.pyconvert(Vector, py_value) py_jacobian = J(py_x).detach().numpy() - return (; - value = PythonCall.pyconvert(Vector, py_value), - jacobian = PythonCall.pyconvert(Matrix, py_jacobian), - ) + jacobian = PythonCall.pyconvert(Matrix, py_jacobian) + if !hessian + return (; value, jacobian) + end + hessians = PythonCall.pyconvert(Array, H(py_x).detach().numpy()) + return (; value, jacobian, hessian = permutedims(hessians, (2, 3, 1))) end - return MathOptAI.GrayBox(output_size, with_jacobian) + return MathOptAI.GrayBox(output_size, callback; has_hessian = hessian) end end # module diff --git a/src/predictors/GrayBox.jl b/src/predictors/GrayBox.jl index 8b631fb..f22b0b3 100644 --- a/src/predictors/GrayBox.jl +++ b/src/predictors/GrayBox.jl @@ -7,7 +7,8 @@ """ GrayBox( output_size::Function, - with_jacobian::Function, + callback::Function; + has_hessian::Bool = false, ) <: AbstractPredictor An [`AbstractPredictor`](@ref) that represents the function ``f(x)`` as a @@ -17,10 +18,13 @@ user-defined nonlinear operator. * `output_size(x::Vector):Int`: given an input vector `x`, return the dimension of the output vector - * `with_jacobian(x::Vector)::NamedTuple -> (;value, jacobian)`: given an input - vector `x`, return a `NamedTuple` that computes the primal value and Jacobian - of the output value with respect to the input. `jacobian[j, i]` is the - partial derivative of `value[j]` with respect to `x[i]`. + * `callback(x::Vector)::NamedTuple -> (;value, jacobian[, hessian])`: given an + input vector `x`, return a `NamedTuple` that computes the primal value and + Jacobian of the output value with respect to the input. `jacobian[j, i]` is + the partial derivative of `value[j]` with respect to `x[i]`. + * `has_hessian`: if `true`, the `callback` additionally contains a field + `hessian`, which is an `N × N × M` matrix, where `hessian[i, j, k]` is the + partial derivative of `value[k]` with respect to `x[i]` and `x[j]`. ## Example @@ -55,7 +59,16 @@ julia> y = MathOptAI.add_predictor(model, MathOptAI.ReducedSpace(f), x) """ struct GrayBox{F<:Function,G<:Function} <: AbstractPredictor output_size::F - with_jacobian::G + callback::G + has_hessian::Bool + + function GrayBox( + output_size::F, + callback::G; + has_hessian::Bool = false, + ) where {F<:Function,G<:Function} + return new{F,G}(output_size, callback, has_hessian) + end end function add_predictor(model::JuMP.AbstractModel, predictor::GrayBox, x::Vector) @@ -73,7 +86,7 @@ function add_predictor( last_x, cache = nothing, nothing function update(x) if x != last_x - cache = predictor.predictor.with_jacobian(x) + cache = predictor.predictor.callback(x) last_x = x end return @@ -87,14 +100,22 @@ function add_predictor( g .= cache.jacobian[i, :] return end + function ∇²f(H::AbstractMatrix{Float64}, k::Int, x...) + update(x) + for j in 1:length(x), i in j:length(x) + H[i, j] = cache.hessian[i, j, k] + end + return + end return map(1:predictor.predictor.output_size(x)) do i - op_i = JuMP.add_nonlinear_operator( - model, - length(x), - (x...) -> f(i, x...), - (g, x...) -> ∇f(g, i, x...); - name = Symbol("op_$(gensym())"), - ) + callbacks = if predictor.predictor.has_hessian + ∇²fi = (H, x...) -> ∇²f(H, i, x...) + ((x...) -> f(i, x...), (g, x...) -> ∇f(g, i, x...), ∇²fi) + else + ((x...) -> f(i, x...), (g, x...) -> ∇f(g, i, x...)) + end + name = Symbol("op_$(gensym())") + op_i = JuMP.add_nonlinear_operator(model, length(x), callbacks...; name) return op_i(x...) end end diff --git a/test/test_Flux.jl b/test/test_Flux.jl index fb0605d..103fef1 100644 --- a/test/test_Flux.jl +++ b/test/test_Flux.jl @@ -230,6 +230,27 @@ function test_gray_box_scalar_output() return end +function test_gray_box_scalar_output_hessian() + chain = Flux.Chain(Flux.Dense(2 => 16, Flux.relu), Flux.Dense(16 => 1)) + model = Model(Ipopt.Optimizer) + set_silent(model) + set_attribute(model, "max_iter", 5) + @variable(model, 0 <= x[1:2] <= 1) + y = MathOptAI.add_predictor( + model, + chain, + x; + gray_box = true, + gray_box_hessian = true, + reduced_space = true, + ) + @objective(model, Max, only(y)) + optimize!(model) + @test termination_status(model) == ITERATION_LIMIT + @test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2) + return +end + function test_gray_box_vector_output() chain = Flux.Chain(Flux.Dense(3 => 16, Flux.relu), Flux.Dense(16 => 2)) model = Model(Ipopt.Optimizer) @@ -251,6 +272,28 @@ function test_gray_box_vector_output() return end +function test_gray_box_vector_output_hessian() + chain = Flux.Chain(Flux.Dense(3 => 16, Flux.relu), Flux.Dense(16 => 2)) + model = Model(Ipopt.Optimizer) + set_silent(model) + set_attribute(model, "max_iter", 5) + @variable(model, 0 <= x[1:3] <= 1) + y = MathOptAI.add_predictor( + model, + chain, + x; + gray_box = true, + gray_box_hessian = true, + reduced_space = true, + ) + @test length(y) == 2 + @objective(model, Max, sum(y)) + optimize!(model) + @test termination_status(model) == ITERATION_LIMIT + @test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2) + return +end + end # module TestFluxExt.runtests() diff --git a/test/test_PythonCall.jl b/test/test_PythonCall.jl index ecad1e8..f94abe6 100644 --- a/test/test_PythonCall.jl +++ b/test/test_PythonCall.jl @@ -190,6 +190,43 @@ function test_model_Tanh_scalar_GrayBox() return end +function test_model_Tanh_scalar_GrayBox_hessian() + dir = mktempdir() + filename = joinpath(dir, "model_Tanh_GrayBox.pt") + PythonCall.pyexec( + """ + import torch + + model = torch.nn.Sequential( + torch.nn.Linear(2, 16), + torch.nn.Tanh(), + torch.nn.Linear(16, 1), + ) + + torch.save(model, filename) + """, + @__MODULE__, + (; filename = filename), + ) + model = Model(Ipopt.Optimizer) + set_silent(model) + @variable(model, x[1:2]) + ml_model = MathOptAI.PytorchModel(filename) + y = MathOptAI.add_predictor( + model, + ml_model, + x; + gray_box = true, + gray_box_hessian = true, + ) + @test num_variables(model) == 3 + @test num_constraints(model; count_variable_in_set_constraints = true) == 1 + optimize!(model) + @test is_solved_and_feasible(model) + @test ≈(_evaluate_model(filename, value.(x)), value.(y); atol = 1e-5) + return +end + function test_model_Tanh_vector_GrayBox() dir = mktempdir() filename = joinpath(dir, "model_Tanh_vector_GrayBox.pt") @@ -239,6 +276,61 @@ function test_model_Tanh_vector_GrayBox() return end +function test_model_Tanh_vector_GrayBox_hessian() + dir = mktempdir() + filename = joinpath(dir, "model_Tanh_vector_GrayBox.pt") + PythonCall.pyexec( + """ + import torch + + model = torch.nn.Sequential( + torch.nn.Linear(3, 16), + torch.nn.Tanh(), + torch.nn.Linear(16, 2), + ) + + torch.save(model, filename) + """, + @__MODULE__, + (; filename = filename), + ) + # Full-space + model = Model(Ipopt.Optimizer) + set_silent(model) + @variable(model, x[1:3]) + ml_model = MathOptAI.PytorchModel(filename) + y = MathOptAI.add_predictor( + model, + ml_model, + x; + gray_box = true, + gray_box_hessian = true, + ) + @test num_variables(model) == 5 + @test num_constraints(model; count_variable_in_set_constraints = true) == 2 + optimize!(model) + @test is_solved_and_feasible(model) + @test ≈(_evaluate_model(filename, value.(x)), value.(y); atol = 1e-5) + # Reduced-space + model = Model(Ipopt.Optimizer) + set_silent(model) + @variable(model, x[1:3]) + ml_model = MathOptAI.PytorchModel(filename) + y = MathOptAI.add_predictor( + model, + ml_model, + x; + gray_box = true, + reduced_space = true, + ) + @test num_variables(model) == 3 + @test num_constraints(model; count_variable_in_set_constraints = true) == 0 + optimize!(model) + @test is_solved_and_feasible(model) + @test ≈(_evaluate_model(filename, value.(x)), value.(y); atol = 1e-5) + return +end + end # module TestPythonCallExt.runtests()