Merge pull request #11 from jvachier/jv/improvement_models

Improvement.
jvachier · Jan 28, 2024 · 2862660 · 2862660
2 parents d7d2156 + 7e2e25a
commit 2862660
Show file tree

Hide file tree

Showing 7 changed files with 80 additions and 68 deletions.
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -0,0 +1,27 @@
+name: Pylint
+on: 
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+jobs:
+  pylint:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.11"]
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        make install
+    # - name: Lint
+    #   run: |
+    #     make lint
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# Allow
+!requirements.txt
+
 # Ignore Mac system files
 .DS_store
 .dSYM
@@ -35,4 +38,5 @@ test.mp4
 *.ipynb
 
 # Ignore all json file
-*.json
+*.json
+
diff --git a/Makefile b/Makefile
@@ -0,0 +1,5 @@
+install: 
+	python -m pip install --upgrade pip &&\
+		pip install -r requirements.txt
+lint: 
+	pylint --disable=R,C,E0401,E0611,W0612 src/
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,3 +21,6 @@ scikit-optimize = "^0.9.0"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
+
+[tool.pylint.main]
+load-plugins = "pylint_actions"
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,9 @@
+numpy == 1.26.2
+pandas == 2.1.4
+matplotlib == 3.8.2
+scikit-learn == 1.3.2
+tensorflow == 2.15.0
+datetime == 5.4
+statsmodels == 0.14.1
+pylint == 3.0.3
+scikit-optimize == 0.9.0
diff --git a/src/main.py b/src/main.py
@@ -1,12 +1,10 @@
 import os.path
-
+from argparse import ArgumentParser
 from keras.models import load_model
 
-from modules import data_preparation
-from modules import models
-from modules import loading
-
-from argparse import ArgumentParser
+from src.modules import data_preparation
+from src.modules import models
+from src.modules import loading
 
 
 def main() -> None:
@@ -65,11 +63,11 @@ def main() -> None:
         save_model = models.Save_Load_models()
 
         x_train, x_test, y_train, y_test = model.train_split()
-        clf_rfc = model.model_RF(25, 10, "sqrt", 4)
+        clf_rfc = model.model_rf(25, 10, "sqrt", 4)
         pipe_lr = model.model_lr("lbfgs", 10000)
 
         if os.path.isfile("./pickle_files/models/lr") is False:
-            clf_rfc_fit = model.optimize_model_hyper_RF(clf_rfc, x_train, y_train)
+            clf_rfc_fit = model.optimize_model_hyper_rf(clf_rfc, x_train, y_train)
             pipe_lr_fit = model.fit_model(pipe_lr, x_train, y_train)
 
             save_model.save_model_sklearn(
@@ -115,13 +113,13 @@ def main() -> None:
         anomaly_isolation.visulaization_isolationforest()
         x_train = anomaly_autoencoder.data_to_feed_autoencoder()
         if os.path.isfile("./pickle_files/models/autoencoder.keras") is False:
-            autoencoder = anomaly_autoencoder.AutoEncoder(x_train)
+            autoencoder = anomaly_autoencoder.autoencoder(x_train)
             mse = anomaly_autoencoder.result_autocendoer(autoencoder, x_train)
-            anomaly_autoencoder.Anomaly(mse)
+            anomaly_autoencoder.anomaly(mse)
         else:
             autoencoder = load_model("./pickle_files/models/autoencoder.keras")
             mse = anomaly_autoencoder.result_autocendoer(autoencoder, x_train)
-            anomaly_autoencoder.Anomaly(mse)
+            anomaly_autoencoder.anomaly(mse)
 
 
 if __name__ == "__main__":

diff --git a/src/modules/models.py b/src/modules/models.py
@@ -43,7 +43,7 @@ def train_split(self) -> Tuple[np.array, np.array, list, list]:
         )
         return x_train, x_test, y_train, y_test
 
-    def model_RF(
+    def model_rf(
         self,
         estimateurs: int,
         depth: int,
@@ -68,7 +68,7 @@ def model_lr(
             LogisticRegression(random_state=1, solver=solv, max_iter=iteration),
         )
 
-    def optimize_model_hyper_RF(
+    def optimize_model_hyper_rf(
         self,
         model,
         x_train: np.array,
@@ -92,30 +92,6 @@ def optimize_model_hyper_RF(
         np.int = int  # to solve the issue with np.int and BayesSearchCV
         return search.fit(x_train, y_train)
 
-    def optimize_model_hyper_RF(
-        self,
-        model,
-        x_train: np.array,
-        y_train: list,
-    ) -> object:
-        params = {
-            "n_estimators": [10, 25, 50, 75],
-            "max_depth": np.arange(1, 9),
-            "criterion": ["gini", "entropy", "log_loss"],
-            "max_features": ["sqrt", "log2"],
-        }
-        search = BayesSearchCV(
-            estimator=model,
-            search_spaces=params,
-            n_jobs=4,
-            cv=3,
-            n_iter=50,
-            scoring="accuracy",
-            random_state=43,
-        )
-        np.int = int  # to solve the issue with np.int and BayesSearchCV
-        return search.fit(x_train, y_train)
-
     def fit_model(
         self,
         model,
@@ -141,7 +117,7 @@ def roc_curve(
 
         for i, j in enumerate(models):
             ax = plt.gca()
-            clf_disp = RocCurveDisplay.from_estimator(
+            RocCurveDisplay.from_estimator(
                 j, x_tests[i], y_tests[i], ax=ax, name=names[i], alpha=0.8
             )
         plt.savefig("./figures/roc_curves.png")
@@ -225,34 +201,29 @@ class Save_Load_models:
     def save_model_sklearn(
         self, name: str, model: object, prediction: np.array, prediction_proba: np.array
     ) -> None:
-        dbfile_model = open("./pickle_files/models/" + str(name), "ab")
-        dbfile_prediction = open(
+        with open("./pickle_files/models/" + str(name), "ab") as dbfile_model:
+            pickle.dump(model, dbfile_model)
+        with open(
             "./pickle_files/models/" + str(name) + "_predictions", "ab"
-        )
-        dbfile_prediction_proba = open(
+        ) as dbfile_prediction:
+            pickle.dump(prediction, dbfile_prediction)
+
+        with open(
             "./pickle_files/models/" + str(name) + "_predictions_proba", "ab"
-        )
-        pickle.dump(model, dbfile_model)
-        pickle.dump(prediction, dbfile_prediction)
-        pickle.dump(prediction_proba, dbfile_prediction_proba)
-        dbfile_model.close()
-        dbfile_prediction.close()
-        dbfile_prediction_proba.close()
+        ) as dbfile_prediction_proba:
+            pickle.dump(prediction_proba, dbfile_prediction_proba)
 
     def load_model_sklearn(self, name: str) -> Tuple[object, np.array, np.array]:
-        dbfile_model = open("./pickle_files/models/" + str(name), "rb")
-        dbfile_prediction = open(
+        with open("./pickle_files/models/" + str(name), "rb") as dbfile_model:
+            model_loaded = pickle.load(dbfile_model)
+        with open(
             "./pickle_files/models/" + str(name) + "_predictions", "rb"
-        )
-        dbfile_prediction_proba = open(
+        ) as dbfile_prediction:
+            predictions_loaded = pickle.load(dbfile_prediction)
+        with open(
             "./pickle_files/models/" + str(name) + "_predictions_proba", "rb"
-        )
-        model_loaded = pickle.load(dbfile_model)
-        predictions_loaded = pickle.load(dbfile_prediction)
-        prediction_proba_loaded = pickle.load(dbfile_prediction_proba)
-        dbfile_model.close()
-        dbfile_prediction.close()
-        dbfile_prediction_proba.close()
+        ) as dbfile_prediction_proba:
+            prediction_proba_loaded = pickle.load(dbfile_prediction_proba)
         return model_loaded, predictions_loaded, prediction_proba_loaded
 
 
@@ -318,7 +289,7 @@ def result_autocendoer(self, model: object, x_train: np.array) -> tf.Tensor:
         mse = tf.reduce_mean(tf.square(x_train - reconstructions_deep), axis=[1, 2])
         return mse
 
-    def AutoEncoder(self, x_train: np.array):
+    def autoencoder(self, x_train: np.array):
         input_layer = Input(shape=(x_train.shape[1], x_train.shape[2]))
         encoded = Dense(128, activation="relu")(input_layer)
         encoded = Dense(64, activation="relu")(encoded)
@@ -341,19 +312,14 @@ def AutoEncoder(self, x_train: np.array):
             autoencoder_deep.save("./pickle_files/models/autoencoder.keras")
         return autoencoder_deep
 
-    def Anomaly(self, mse: tf.Tensor) -> None:
+    def anomaly(self, mse: tf.Tensor) -> None:
         anomaly_deep_scores = pd.Series(mse.numpy(), name="anomaly_scores")
         anomaly_deep_scores.index = self.data[(self.time_step - 1) :].index
         anomaly_deep_scores = pd.Series(mse.numpy(), name="anomaly_scores")
         anomaly_deep_scores.index = self.data[(self.time_step - 1) :].index
 
         threshold_deep = anomaly_deep_scores.quantile(0.95)
         anomalous_deep = anomaly_deep_scores > threshold_deep
-        binary_labels_deep = anomalous_deep.astype(int)
-        precision, recall, f1_score, _ = precision_recall_fscore_support(
-            binary_labels_deep,
-            anomalous_deep,
-        )
 
         plt.figure(figsize=(16, 8))
         plt.plot(