From 758fdf7f0cab1919818d8652c8409cecf3684db9 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Thu, 15 Aug 2024 19:36:45 +1200
Subject: [PATCH] Add build_predictor (#78)

---
 docs/src/api.md                      | 27 ++++++----
 ext/MathOptAIDecisionTreeExt.jl      | 36 ++++++++++++-
 ext/MathOptAIFluxExt.jl              | 68 +++++++++++++++++++++++--
 ext/MathOptAIGLMExt.jl               | 66 ++++++++++++++++++++++--
 ext/MathOptAILuxExt.jl               | 76 ++++++++++++++++++++++++++--
 ext/MathOptAIPythonCallExt.jl        | 37 ++++++++++++--
 src/MathOptAI.jl                     | 10 ++++
 src/predictors/BinaryDecisionTree.jl | 12 +++--
 8 files changed, 302 insertions(+), 30 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index 877cba8..ffc7401 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -31,16 +31,9 @@ AbstractPredictor
 add_predictor
 ```
 
-```@autodocs
-Modules = [
-    Base.get_extension(MathOptAI, :MathOptAIAbstractGPsExt),
-    Base.get_extension(MathOptAI, :MathOptAIDecisionTreeExt),
-    Base.get_extension(MathOptAI, :MathOptAIFluxExt),
-    Base.get_extension(MathOptAI, :MathOptAIGLMExt),
-    Base.get_extension(MathOptAI, :MathOptAILuxExt),
-    Base.get_extension(MathOptAI, :MathOptAIPythonCallExt),
-    Base.get_extension(MathOptAI, :MathOptAIStatsModelsExt),
-]
+## `build_predictor`
+```@docs
+build_predictor
 ```
 
 ## `Affine`
@@ -112,3 +105,17 @@ SoftPlus
 ```@docs
 Tanh
 ```
+
+## Extensions
+
+```@autodocs
+Modules = [
+    Base.get_extension(MathOptAI, :MathOptAIAbstractGPsExt),
+    Base.get_extension(MathOptAI, :MathOptAIDecisionTreeExt),
+    Base.get_extension(MathOptAI, :MathOptAIFluxExt),
+    Base.get_extension(MathOptAI, :MathOptAIGLMExt),
+    Base.get_extension(MathOptAI, :MathOptAILuxExt),
+    Base.get_extension(MathOptAI, :MathOptAIPythonCallExt),
+    Base.get_extension(MathOptAI, :MathOptAIStatsModelsExt),
+]
+```
diff --git a/ext/MathOptAIDecisionTreeExt.jl b/ext/MathOptAIDecisionTreeExt.jl
index 6501e20..6ab0afd 100644
--- a/ext/MathOptAIDecisionTreeExt.jl
+++ b/ext/MathOptAIDecisionTreeExt.jl
@@ -53,10 +53,44 @@ function MathOptAI.add_predictor(
     predictor::DecisionTree.Root,
     x::Vector,
 )
-    inner_predictor = _tree_or_leaf(predictor.node)
+    inner_predictor = MathOptAI.build_predictor(predictor.node)
     return MathOptAI.add_predictor(model, inner_predictor, x)
 end
 
+"""
+    MathOptAI.build_predictor(predictor::DecisionTree.Root)
+
+Convert a binary decision tree from DecisionTree.jl to a
+[`BinaryDecisionTree`](@ref).
+
+## Example
+
+```jldoctest
+julia> using MathOptAI, DecisionTree
+
+julia> truth(x::Vector) = x[1] <= 0.5 ? -2 : (x[2] <= 0.3 ? 3 : 4)
+truth (generic function with 1 method)
+
+julia> features = abs.(sin.((1:10) .* (3:4)'));
+
+julia> size(features)
+(10, 2)
+
+julia> labels = truth.(Vector.(eachrow(features)));
+
+julia> ml_model = DecisionTree.build_tree(labels, features)
+Decision Tree
+Leaves: 3
+Depth:  2
+
+julia> MathOptAI.build_predictor(ml_model)
+BinaryDecisionTree{Float64,Int64} [leaves=3, depth=2]
+```
+"""
+function MathOptAI.build_predictor(predictor::DecisionTree.Root)
+    return _tree_or_leaf(predictor.node)
+end
+
 function _tree_or_leaf(node::DecisionTree.Node{K,V}) where {K,V}
     return MathOptAI.BinaryDecisionTree{K,V}(
         node.featid,
diff --git a/ext/MathOptAIFluxExt.jl b/ext/MathOptAIFluxExt.jl
index b934117..dcb875b 100644
--- a/ext/MathOptAIFluxExt.jl
+++ b/ext/MathOptAIFluxExt.jl
@@ -67,16 +67,76 @@ function MathOptAI.add_predictor(
     config::Dict = Dict{Any,Any}(),
     reduced_space::Bool = false,
 )
-    inner_predictor = MathOptAI.Pipeline(MathOptAI.AbstractPredictor[])
-    for layer in predictor.layers
-        _add_predictor(inner_predictor, layer, config)
-    end
+    inner_predictor = MathOptAI.build_predictor(predictor; config)
     if reduced_space
         inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
     end
     return MathOptAI.add_predictor(model, inner_predictor, x)
 end
 
+"""
+    MathOptAI.build_predictor(
+        predictor::Flux.Chain;
+        config::Dict = Dict{Any,Any}(),
+    )
+
+Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref).
+
+## Supported layers
+
+ * `Flux.Dense`
+ * `Flux.softmax`
+
+## Supported activation functions
+
+ * `Flux.relu`
+ * `Flux.sigmoid`
+ * `Flux.softplus`
+ * `Flux.tanh`
+
+## Keyword arguments
+
+ * `config`: a dictionary that maps `Flux` activation functions to an
+   [`AbstractPredictor`](@ref) to control how the activation functions are
+   reformulated.
+
+## Example
+
+```jldoctest
+julia> using Flux, MathOptAI
+
+julia> chain = Flux.Chain(Flux.Dense(1 => 16, Flux.relu), Flux.Dense(16 => 1));
+
+julia> MathOptAI.build_predictor(
+           chain;
+           config = Dict(Flux.relu => MathOptAI.ReLU()),
+       )
+Pipeline with layers:
+ * Affine(A, b) [input: 1, output: 16]
+ * ReLU()
+ * Affine(A, b) [input: 16, output: 1]
+
+julia> MathOptAI.build_predictor(
+           chain;
+           config = Dict(Flux.relu => MathOptAI.ReLUQuadratic()),
+       )
+Pipeline with layers:
+ * Affine(A, b) [input: 1, output: 16]
+ * ReLUQuadratic()
+ * Affine(A, b) [input: 16, output: 1]
+```
+"""
+function MathOptAI.build_predictor(
+    predictor::Flux.Chain;
+    config::Dict = Dict{Any,Any}(),
+)
+    inner_predictor = MathOptAI.Pipeline(MathOptAI.AbstractPredictor[])
+    for layer in predictor.layers
+        _add_predictor(inner_predictor, layer, config)
+    end
+    return inner_predictor
+end
+
 _default(::typeof(identity)) = nothing
 _default(::Any) = missing
 _default(::typeof(Flux.relu)) = MathOptAI.ReLU()
diff --git a/ext/MathOptAIGLMExt.jl b/ext/MathOptAIGLMExt.jl
index e83f493..9c576e7 100644
--- a/ext/MathOptAIGLMExt.jl
+++ b/ext/MathOptAIGLMExt.jl
@@ -44,13 +44,35 @@ function MathOptAI.add_predictor(
     x::Vector;
     reduced_space::Bool = false,
 )
-    inner_predictor = MathOptAI.Affine(GLM.coef(predictor))
+    inner_predictor = MathOptAI.build_predictor(predictor)
     if reduced_space
         inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
     end
     return MathOptAI.add_predictor(model, inner_predictor, x)
 end
 
+"""
+    MathOptAI.build_predictor(predictor::GLM.LinearModel)
+
+Convert a trained linear model from GLM.jl to an [`Affine`](@ref) layer.
+
+## Example
+
+```jldoctest
+julia> using GLM, MathOptAI
+
+julia> X, Y = rand(10, 2), rand(10);
+
+julia> model_glm = GLM.lm(X, Y);
+
+julia> MathOptAI.build_predictor(model_glm)
+Affine(A, b) [input: 2, output: 1]
+```
+"""
+function MathOptAI.build_predictor(predictor::GLM.LinearModel)
+    return MathOptAI.Affine(GLM.coef(predictor))
+end
+
 """
     MathOptAI.add_predictor(
         model::JuMP.Model,
@@ -100,12 +122,50 @@ function MathOptAI.add_predictor(
     sigmoid::MathOptAI.AbstractPredictor = MathOptAI.Sigmoid(),
     reduced_space::Bool = false,
 )
-    affine = MathOptAI.Affine(GLM.coef(predictor))
-    inner_predictor = MathOptAI.Pipeline(affine, sigmoid)
+    inner_predictor = MathOptAI.build_predictor(predictor; sigmoid)
     if reduced_space
         inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
     end
     return MathOptAI.add_predictor(model, inner_predictor, x)
 end
 
+"""
+    MathOptAI.build_predictor(
+        predictor::GLM.GeneralizedLinearModel{
+            GLM.GlmResp{Vector{Float64},GLM.Bernoulli{Float64},GLM.LogitLink},
+        };
+        sigmoid::MathOptAI.AbstractPredictor = MathOptAI.Sigmoid(),
+    )
+
+Convert a trained logistic model from GLM.jl to a [`Pipeline`](@ref) layer.
+
+## Keyword arguments
+
+ * `sigmoid`: the predictor to use for the sigmoid layer.
+
+## Example
+
+```jldoctest
+julia> using GLM, MathOptAI
+
+julia> X, Y = rand(10, 2), rand(Bool, 10);
+
+julia> model_glm = GLM.glm(X, Y, GLM.Bernoulli());
+
+julia> MathOptAI.build_predictor(model_glm)
+Pipeline with layers:
+ * Affine(A, b) [input: 2, output: 1]
+ * Sigmoid()
+```
+"""
+function MathOptAI.build_predictor(
+    predictor::GLM.GeneralizedLinearModel{
+        GLM.GlmResp{Vector{Float64},GLM.Bernoulli{Float64},GLM.LogitLink},
+    };
+    sigmoid::MathOptAI.AbstractPredictor = MathOptAI.Sigmoid(),
+)
+    affine = MathOptAI.Affine(GLM.coef(predictor))
+    return MathOptAI.Pipeline(affine, sigmoid)
+end
+
 end  # module
diff --git a/ext/MathOptAILuxExt.jl b/ext/MathOptAILuxExt.jl
index da06e6a..234724c 100644
--- a/ext/MathOptAILuxExt.jl
+++ b/ext/MathOptAILuxExt.jl
@@ -76,16 +76,84 @@ function MathOptAI.add_predictor(
     x::Vector;
     config::Dict = Dict{Any,Any}(),
     reduced_space::Bool = false,
+)
+    inner_predictor = MathOptAI.build_predictor(predictor; config)
+    if reduced_space
+        inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
+    end
+    return MathOptAI.add_predictor(model, inner_predictor, x)
+end
+
+"""
+    MathOptAI.build_predictor(
+        predictor::Tuple{<:Lux.Chain,<:NamedTuple,<:NamedTuple};
+        config::Dict = Dict{Any,Any}(),
+    )
+
+Convert a trained neural network from Lux.jl to a [`Pipeline`](@ref).
+
+## Supported layers
+
+ * `Lux.Dense`
+
+## Supported activation functions
+
+ * `Lux.relu`
+ * `Lux.sigmoid`
+ * `Lux.softplus`
+ * `Lux.tanh`
+
+## Keyword arguments
+
+ * `config`: a dictionary that maps `Lux` activation functions to an
+   [`AbstractPredictor`](@ref) to control how the activation functions are
+   reformulated.
+
+## Example
+
+```jldoctest; filter=r"[┌|└].+"
+julia> using Lux, MathOptAI, Random
+
+julia> rng = Random.MersenneTwister();
+
+julia> chain = Lux.Chain(Lux.Dense(1 => 16, Lux.relu), Lux.Dense(16 => 1))
+Chain(
+    layer_1 = Dense(1 => 16, relu),     # 32 parameters
+    layer_2 = Dense(16 => 1),           # 17 parameters
+)         # Total: 49 parameters,
+          #        plus 0 states.
+
+julia> parameters, state = Lux.setup(rng, chain);
+
+julia> predictor = MathOptAI.build_predictor(
+           (chain, parameters, state);
+           config = Dict(Lux.relu => MathOptAI.ReLU()),
+       )
+Pipeline with layers:
+ * Affine(A, b) [input: 1, output: 16]
+ * ReLU()
+ * Affine(A, b) [input: 16, output: 1]
+
+julia> MathOptAI.build_predictor(
+           (chain, parameters, state);
+           config = Dict(Lux.relu => MathOptAI.ReLUQuadratic()),
+       )
+Pipeline with layers:
+ * Affine(A, b) [input: 1, output: 16]
+ * ReLUQuadratic()
+ * Affine(A, b) [input: 16, output: 1]
+```
+"""
+function MathOptAI.build_predictor(
+    predictor::Tuple{<:Lux.Chain,<:NamedTuple,<:NamedTuple};
+    config::Dict = Dict{Any,Any}(),
 )
     chain, parameters, _ = predictor
     inner_predictor = MathOptAI.Pipeline(MathOptAI.AbstractPredictor[])
     for (layer, parameter) in zip(chain.layers, parameters)
         _add_predictor(inner_predictor, layer, parameter, config)
     end
-    if reduced_space
-        inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
-    end
-    return MathOptAI.add_predictor(model, inner_predictor, x)
+    return inner_predictor
 end
 
 _default(::typeof(identity)) = nothing
diff --git a/ext/MathOptAIPythonCallExt.jl b/ext/MathOptAIPythonCallExt.jl
index 55ef34f..29e40de 100644
--- a/ext/MathOptAIPythonCallExt.jl
+++ b/ext/MathOptAIPythonCallExt.jl
@@ -41,10 +41,7 @@ function MathOptAI.add_predictor(
     config::Dict = Dict{Any,Any}(),
     reduced_space::Bool = false,
 )
-    torch = PythonCall.pyimport("torch")
-    nn = PythonCall.pyimport("torch.nn")
-    torch_model = torch.load(predictor.filename)
-    inner_predictor = _predictor(nn, torch_model, config)
+    inner_predictor = MathOptAI.build_predictor(predictor; config)
     if reduced_space
         # If config maps to a ReducedSpace predictor, we'll get a MethodError
         # when trying to add the nested redcued space predictors.
@@ -54,6 +51,38 @@ function MathOptAI.add_predictor(
     return MathOptAI.add_predictor(model, inner_predictor, x)
 end
 
+"""
+    MathOptAI.build_predictor(
+        predictor::MathOptAI.PytorchModel;
+        config::Dict = Dict{Any,Any}(),
+    )
+
+Convert a trained neural network from Pytorch via PythonCall.jl to a
+[`Pipeline`](@ref).
+
+## Supported layers
+
+ * `nn.Linear`
+ * `nn.ReLU`
+ * `nn.Sequential`
+ * `nn.Sigmoid`
+ * `nn.Tanh`
+
+## Keyword arguments
+
+ * `config`: a dictionary that maps symbols to an [`AbstractPredictor`](@ref)
+   to control how the activation functions are reformulated.
+"""
+function MathOptAI.build_predictor(
+    predictor::MathOptAI.PytorchModel;
+    config::Dict = Dict{Any,Any}(),
+)
+    torch = PythonCall.pyimport("torch")
+    nn = PythonCall.pyimport("torch.nn")
+    torch_model = torch.load(predictor.filename)
+    return _predictor(nn, torch_model, config)
+end
+
 function _predictor(nn, layer, config)
     if Bool(PythonCall.pybuiltins.isinstance(layer, nn.Linear))
         weight = mapreduce(vcat, layer.weight.tolist()) do w
diff --git a/src/MathOptAI.jl b/src/MathOptAI.jl
index 07ddb96..b1b715c 100644
--- a/src/MathOptAI.jl
+++ b/src/MathOptAI.jl
@@ -104,6 +104,16 @@ function add_predictor(
     return reduce(hcat, y)
 end
 
+"""
+    build_predictor(extension; kwargs...)::AbstractPredictor
+
+A uniform interface to convert various extension types to an
+[`AbstractPredictor`](@ref).
+
+See the various extension docstrings for details.
+"""
+function build_predictor end
+
 """
     ReducedSpace(predictor::AbstractPredictor)
 
diff --git a/src/predictors/BinaryDecisionTree.jl b/src/predictors/BinaryDecisionTree.jl
index b4a1d9e..2d24655 100644
--- a/src/predictors/BinaryDecisionTree.jl
+++ b/src/predictors/BinaryDecisionTree.jl
@@ -22,9 +22,7 @@ An [`AbstractPredictor`](@ref) that represents a binary decision tree.
 To represent the tree `x[1] <= 0.0 ? -1 : (x[1] <= 1.0 ? 0 : 1)`, do:
 
 ```jldoctest doc_decision_tree
-julia> using JuMP
-
-julia> import MathOptAI
+julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -36,7 +34,7 @@ julia> f = MathOptAI.BinaryDecisionTree{Float64,Int}(
            -1,
            MathOptAI.BinaryDecisionTree{Float64,Int}(1, 1.0, 0, 1),
        )
-MathOptAI.BinaryDecisionTree{Float64, Int64}(1, 0.0, -1, MathOptAI.BinaryDecisionTree{Float64, Int64}(1, 1.0, 0, 1))
+BinaryDecisionTree{Float64,Int64} [leaves=3, depth=2]
 
 julia> y = MathOptAI.add_predictor(model, f, x)
 1-element Vector{VariableRef}:
@@ -64,6 +62,12 @@ struct BinaryDecisionTree{K,V}
     rhs::Union{V,BinaryDecisionTree{K,V}}
 end
 
+function Base.show(io::IO, predictor::BinaryDecisionTree{K,V}) where {K,V}
+    paths = _tree_to_paths(predictor)
+    leaves, depth = length(paths), maximum(length.(paths))
+    return print(io, "BinaryDecisionTree{$K,$V} [leaves=$leaves, depth=$depth]")
+end
+
 function add_predictor(
     model::JuMP.Model,
     predictor::BinaryDecisionTree,