From e60f33e7e001884b6a065a86de25dc4a88998d16 Mon Sep 17 00:00:00 2001
From: "Hiro Vuong (Nam)" <22463238+vuonghoainam@users.noreply.github.com>
Date: Fri, 24 Feb 2023 23:32:55 +0700
Subject: [PATCH] Remove tensorflow from h1st dependencies (#161)

---
 .github/workflows/test.yml                |  1 -
 h1st/model/model.py                       |  2 +-
 h1st/model/repository/model_repository.py | 10 +---
 pyproject.toml                            |  1 -
 tests/model/test_model.py                 | 59 -----------------------
 5 files changed, 2 insertions(+), 71 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7f0fd8e9..5d464d4f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -92,7 +92,6 @@ jobs:
       - name: Install dependencies
         run: |
           poetry install --no-interaction
-          poetry run python -m pip install tensorflow
 
       - name: Run tests
         run: poetry run pytest
diff --git a/h1st/model/model.py b/h1st/model/model.py
index c15f5a1f..8fe2d587 100644
--- a/h1st/model/model.py
+++ b/h1st/model/model.py
@@ -47,7 +47,7 @@ def persist(self, version=None) -> str:
         Persist this model's properties to the ModelRepository. Currently, only `stats`, `metrics`, `model` properties are supported.
 
         `model` property could be single model, list or dict of models
-        Currently, only sklearn and tensorflow-keras are supported.
+        Currently, only sklearn are supported, but you can extend this method to support any framework.
 
         :param version: model version, leave blank for autogeneration
         :returns: model version
diff --git a/h1st/model/repository/model_repository.py b/h1st/model/repository/model_repository.py
index ea08ba26..b582e65a 100644
--- a/h1st/model/repository/model_repository.py
+++ b/h1st/model/repository/model_repository.py
@@ -9,7 +9,6 @@
 import ulid
 import joblib
 import skfuzzy
-# import tensorflow
 import sklearn
 
 from h1st.model.repository.storage.s3 import S3Storage
@@ -54,8 +53,7 @@ def _get_rule_engine_type(self, rules):
     def _get_model_type(self, model):
         if isinstance(model, sklearn.base.BaseEstimator):
             return "sklearn"
-        # if isinstance(model, tensorflow.keras.Model):
-        #     return "tensorflow-keras"
+            
         if model is None:
             return "custom"
 
@@ -87,10 +85,6 @@ def _serialize_single_model(self, model, path, model_name="model"):
         if model_type == "sklearn":
             model_path = "%s.joblib" % model_name
             joblib.dump(model, path + "/%s" % model_path)
-        # elif model_type == "tensorflow-keras":
-        #     model_path = model_name
-        #     os.makedirs(path + "/%s" % model_path, exist_ok=True)
-        #     model.save_weights(path + "/%s/weights" % model_path)
         elif model_type == "custom":
             model_path = model_name  # XXX
         else:
@@ -103,8 +97,6 @@ def _deserialize_single_model(self, model, path, model_type, model_path):
             # This is a sklearn model
             model = joblib.load(path + "/%s" % model_path)
             # print(str(type(model)))
-        # elif model_type == "tensorflow-keras":
-        #     model.load_weights(path + "/%s/weights" % model_path).expect_partial()
         elif model_type == "custom":
             model = None
 
diff --git a/pyproject.toml b/pyproject.toml
index 9fc952f8..bf099c2e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,6 @@ pyarrow = ">=9.0.0"
 
 # Machine Learning / Deep Learning
 scikit-learn = ">=1.1.2"
-xgboost = ">=1.7.2"
 
 # Trustworthy AI
 graphviz = ">=0.20.1"
diff --git a/tests/model/test_model.py b/tests/model/test_model.py
index cb65fd05..736741e6 100644
--- a/tests/model/test_model.py
+++ b/tests/model/test_model.py
@@ -1,6 +1,5 @@
 import tempfile
 
-import tensorflow as tf
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.datasets import load_iris
 from sklearn.linear_model import LogisticRegression
@@ -59,9 +58,6 @@ def assert_models(self, modeler_class, model_class, model_type, model_path, coll
             if model_type == 'sklearn':
                 import joblib
                 assert model_type == model_serde._get_model_type(joblib.load('%s/%s' % (path, model_path)))
-            elif model_type == 'tensorflow-keras':
-                # Currently, we save/load weights => nothing do assert here 
-                pass
 
             model_serde.deserialize(model_2, path)
             
@@ -140,61 +136,6 @@ class MyModel(MLModel):
 
         self.assert_models(MyModeler, MyModel, 'sklearn', 'model_Iris.joblib', 'dict')
 
-    def test_serialize_tensorflow_model(self):
-        class MyModeler(MLModeler):
-            def load_data(self) -> dict:
-                data = load_iris()
-                return {'X': data.data, 'y': data.target}
-
-            def train_base_model(self, prepared_data):
-                X, y = prepared_data['X'], prepared_data['y']
-                model = self.model_class.get_model_arch()
-                model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
-                model.fit(X, y, verbose=2, batch_size=5, epochs=20)
-                return model
-        
-        class MyModel(MLModel):
-            def __init__(self):
-                self.base_model = self.get_model_arch()
-
-            @staticmethod
-            def get_model_arch():
-                model = tf.keras.Sequential([
-                                                tf.keras.layers.Dense(8, input_dim=4, activation='relu'),
-                                                tf.keras.layers.Dense(1, activation='softmax')
-                                            ])
-                return model
-
-        self.assert_models(MyModeler, MyModel, 'tensorflow-keras', 'model')
-
-    def test_serialize_dict_tensorflow_model(self):
-        class MyModeler(MLModeler):
-            def load_data(self) -> dict:
-                data = load_iris()
-                return {'X': data.data, 'y': data.target}
-
-            def train_base_model(self, prepared_data):
-                X, y = prepared_data['X'], prepared_data['y']
-                model = self.model_class.get_model_arch()['Iris']
-                model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
-                model.fit(X, y, verbose=2, batch_size=5, epochs=20)
-                return {'Iris': model}
-        
-        class MyModel(MLModel):
-            def __init__(self):
-                self.base_model = self.get_model_arch()
-
-            @staticmethod
-            def get_model_arch():
-                model = tf.keras.Sequential([
-                                                tf.keras.layers.Dense(8, input_dim=4, activation='relu'),
-                                                tf.keras.layers.Dense(1, activation='softmax')
-                                            ])
-                return {'Iris': model}
-
-        self.assert_models(MyModeler, MyModel, 'tensorflow-keras', 'model_Iris', 'dict')
-
-
 class TestModelStatsSerDe:
     def test_serialize_dict(self):
         class MyModeler(MLModeler):