Add GrayBox

lanl-ansi · Aug 29, 2024 · e0a6959 · e0a6959
1 parent 2ccb9a8
commit e0a6959
Show file tree

Hide file tree

Showing 3 changed files with 140 additions and 40 deletions.
diff --git a/ext/MathOptAIFluxExt.jl b/ext/MathOptAIFluxExt.jl
@@ -17,6 +17,7 @@ import MathOptAI
         x::Vector;
         config::Dict = Dict{Any,Any}(),
         reduced_space::Bool = false,
+        gray_box::Bool = false,
     )
 
 Add a trained neural network from Flux.jl to `model`.
@@ -40,6 +41,8 @@ Add a trained neural network from Flux.jl to `model`.
    [`AbstractPredictor`](@ref)s that control how the activation functions are
    reformulated. For example, `Flux.sigmoid => MathOptAI.Sigmoid()` or
    `Flux.relu => MathOptAI.QuadraticReLU()`.
+ * `gray_box`: if `true`, the neural network is added as a user-defined
+   nonlinear operator, with gradients provided by `Flux.withjacobian`.
 
 ## Example
 
@@ -70,54 +73,18 @@ function MathOptAI.add_predictor(
     reduced_space::Bool = false,
     gray_box::Bool = false,
 )
-    if gray_box
-        return _add_gray_box_predictor(model, predictor, x)
-    end
-    inner_predictor = MathOptAI.build_predictor(predictor; config)
+    inner_predictor = MathOptAI.build_predictor(predictor; config, gray_box)
     if reduced_space
         inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
     end
     return MathOptAI.add_predictor(model, inner_predictor, x)
 end
 
-function _add_gray_box_predictor(
-    model::JuMP.AbstractModel,
-    predictor::Flux.Chain,
-    x::Vector,
-)
-    input_size = length(x)
-    output_size = only(Flux.outputsize(predictor, (input_size,)))
-    if output_size != 1
-        error("Unable to add vector-valued gray-box")
-    end
-    last_x, last_f, last_∇f = nothing, nothing, nothing
-    function update(x)
-        if x != last_x
-            ret = Flux.withgradient(collect(x)) do x
-                return only(predictor(Float32.(x)))
-            end
-            last_x = x
-            last_f, last_∇f = Float64(ret.val), Float64.(only(ret.grad))
-        end
-        return
-    end
-    function f(x...)
-        update(x)
-        return last_f
-    end
-    function ∇f(g, x...)
-        update(x)
-        g .= last_∇f
-        return
-    end
-    op = JuMP.add_nonlinear_operator(model, input_size, f, ∇f; name = :op_flux)
-    return [op(x...)]
-end
-
 """
     MathOptAI.build_predictor(
         predictor::Flux.Chain;
         config::Dict = Dict{Any,Any}(),
+        gray_box::Bool = false,
     )
 
 Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref).
@@ -141,6 +108,8 @@ Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref).
    [`AbstractPredictor`](@ref)s that control how the activation functions are
    reformulated. For example, `Flux.sigmoid => MathOptAI.Sigmoid()` or
    `Flux.relu => MathOptAI.QuadraticReLU()`.
+ * `gray_box`: if `true`, the neural network is added as a user-defined
+   nonlinear operator, with gradients provided by `Flux.withjacobian`.
 
 ## Example
 
@@ -171,14 +140,29 @@ Pipeline with layers:
 function MathOptAI.build_predictor(
     predictor::Flux.Chain;
     config::Dict = Dict{Any,Any}(),
+    gray_box::Bool = false,
 )
+    if gray_box
+        return _build_gray_box(predictor)
+    end
     inner_predictor = MathOptAI.Pipeline(MathOptAI.AbstractPredictor[])
     for layer in predictor.layers
         _add_predictor(inner_predictor, layer, config)
     end
     return inner_predictor
 end
 
+function _build_gray_box(predictor::Flux.Chain)
+    function output_size(x)
+        return only(Flux.outputsize(predictor, (length(x),)))
+    end
+    function with_jacobian(x)
+        ret = Flux.withjacobian(x -> predictor(Float32.(x)), collect(x))
+        return (value = ret.val, jacobian = only(ret.rad))
+    end
+    return MathOptAI.GrayBox(output_size, with_jacobian)
+end
+
 function _add_predictor(::MathOptAI.Pipeline, layer::Any, ::Dict)
     return error("Unsupported layer: $layer")
 end

diff --git a/src/predictors/GrayBox.jl b/src/predictors/GrayBox.jl
@@ -0,0 +1,100 @@
+# Copyright (c) 2024: Oscar Dowson and contributors
+# Copyright (c) 2024: Triad National Security, LLC
+#
+# Use of this source code is governed by a BSD-style license that can be found
+# in the LICENSE.md file.
+
+"""
+    GrayBox(
+        output_size::Function,
+        with_jacobian::Function,
+    ) <: AbstractPredictor
+
+An [`AbstractPredictor`](@ref) that represents the function ``f(x)`` as a
+user-defined nonlinear operator.
+
+## arguments
+
+ * `output_size(x::Vector):Int`: given an input vector `x`, return the dimension
+   of the output vector
+ * `with_jacobian(x::Vector)::NamedTuple -> (;value, jacobian)`: given an input
+   vector `x`, return a `NamedTuple` that computes the primal value and Jacobian
+   of the output value with respect to the input. `jacobian[j, i]` is the
+   partial derivative of `value[j]` with respect to `x[i]`.
+
+## Example
+
+```jldoctest; filter=r"##[0-9]+"
+julia> using JuMP, MathOptAI
+
+julia> model = Model();
+
+julia> @variable(model, x[1:2]);
+
+julia> f = MathOptAI.GrayBox(
+           x -> 2,
+           x -> (value = x.^2, jacobian = [2 * x[1] 0.0; 0.0 2 * x[2]]),
+       );
+
+julia> y = MathOptAI.add_predictor(model, f, x)
+1-element Vector{VariableRef}:
+ moai_GrayBox[1]
+ moai_GrayBox[2]
+
+julia> print(model)
+Feasibility
+Subject to
+ op_##238(x[1], x[2]) - moai_GrayBox[1] = 0
+ op_##239(x[1], x[2]) - moai_GrayBox[2] = 0
+
+julia> y = MathOptAI.add_predictor(model, MathOptAI.ReducedSpace(f), x)
+2-element Vector{NonlinearExpr}:
+ op_##240(x[1], x[2])
+ op_##241(x[1], x[2])
+```
+"""
+struct GrayBox{F<:Function,G<:Function} <: AbstractPredictor
+    output_size::F
+    with_jacobian::G
+end
+
+function add_predictor(model::JuMP.AbstractModel, predictor::GrayBox, x::Vector)
+    op = add_predictor(model, ReducedSpace(predictor), x)
+    y = JuMP.@variable(model, [1:length(op)], base_name = "moai_GrayBox")
+    JuMP.@constraint(model, op .== y)
+    return y
+end
+
+function add_predictor(
+    model::JuMP.AbstractModel,
+    predictor::ReducedSpace{<:GrayBox},
+    x::Vector,
+)
+    last_x, cache = nothing, nothing
+    function update(x)
+        if x != last_x
+            cache = predictor.predictor.with_jacobian(x)
+            last_x = x
+        end
+        return
+    end
+    function f(i::Int, x...)::Float64
+        update(x)
+        return cache.value[i]
+    end
+    function ∇f(g::AbstractVector{Float64}, i::Int, x...)
+        update(x)
+        g .= cache.jacobian[i, :]
+        return
+    end
+    return map(1:predictor.predictor.output_size(x)) do i
+        op_i = JuMP.add_nonlinear_operator(
+            model,
+            length(x),
+            (x...) -> f(i, x...),
+            (g, x...) -> ∇f(g, i, x...);
+            name = Symbol("op_$(gensym())"),
+        )
+        return op_i(x...)
+    end
+end
diff --git a/test/test_Flux.jl b/test/test_Flux.jl
@@ -201,15 +201,31 @@ function test_unsupported_layer()
     return
 end
 
-function test_gray_box()
+function test_gray_box_scalar_output()
     chain = Flux.Chain(Flux.Dense(2 => 16, Flux.relu), Flux.Dense(16 => 1))
     model = Model(Ipopt.Optimizer)
     set_silent(model)
+    set_attribute(model, "max_iter", 5)
     @variable(model, 0 <= x[1:2] <= 1)
     y = MathOptAI.add_predictor(model, chain, x; gray_box = true)
     @objective(model, Max, only(y))
     optimize!(model)
-    @test is_solved_and_feasible(model)
+    @test termination_status(model) == ITERATION_LIMIT
+    @test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2)
+    return
+end
+
+function test_gray_box_vector_output()
+    chain = Flux.Chain(Flux.Dense(3 => 16, Flux.relu), Flux.Dense(16 => 2))
+    model = Model(Ipopt.Optimizer)
+    set_silent(model)
+    set_attribute(model, "max_iter", 5)
+    @variable(model, 0 <= x[1:3] <= 1)
+    y = MathOptAI.add_predictor(model, chain, x; gray_box = true)
+    @test length(y) == 2
+    @objective(model, Max, sum(y))
+    optimize!(model)
+    @test termination_status(model) == ITERATION_LIMIT
     @test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2)
     return
 end