Skip to content

Commit

Permalink
Merge pull request #11 from jvachier/jv/improvement_models
Browse files Browse the repository at this point in the history
Improvement.
  • Loading branch information
jvachier authored Jan 28, 2024
2 parents d7d2156 + 7e2e25a commit 2862660
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 68 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Pylint
on:
push:
branches:
- main
pull_request:
branches:
- main
jobs:
pylint:
runs-on: ubuntu-latest
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.11"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
make install
# - name: Lint
# run: |
# make lint
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Allow
!requirements.txt

# Ignore Mac system files
.DS_store
.dSYM
Expand Down Expand Up @@ -35,4 +38,5 @@ test.mp4
*.ipynb

# Ignore all json file
*.json
*.json

5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
install:
python -m pip install --upgrade pip &&\
pip install -r requirements.txt
lint:
pylint --disable=R,C,E0401,E0611,W0612 src/
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ scikit-optimize = "^0.9.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pylint.main]
load-plugins = "pylint_actions"
9 changes: 9 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
numpy == 1.26.2
pandas == 2.1.4
matplotlib == 3.8.2
scikit-learn == 1.3.2
tensorflow == 2.15.0
datetime == 5.4
statsmodels == 0.14.1
pylint == 3.0.3
scikit-optimize == 0.9.0
20 changes: 9 additions & 11 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import os.path

from argparse import ArgumentParser
from keras.models import load_model

from modules import data_preparation
from modules import models
from modules import loading

from argparse import ArgumentParser
from src.modules import data_preparation
from src.modules import models
from src.modules import loading


def main() -> None:
Expand Down Expand Up @@ -65,11 +63,11 @@ def main() -> None:
save_model = models.Save_Load_models()

x_train, x_test, y_train, y_test = model.train_split()
clf_rfc = model.model_RF(25, 10, "sqrt", 4)
clf_rfc = model.model_rf(25, 10, "sqrt", 4)
pipe_lr = model.model_lr("lbfgs", 10000)

if os.path.isfile("./pickle_files/models/lr") is False:
clf_rfc_fit = model.optimize_model_hyper_RF(clf_rfc, x_train, y_train)
clf_rfc_fit = model.optimize_model_hyper_rf(clf_rfc, x_train, y_train)
pipe_lr_fit = model.fit_model(pipe_lr, x_train, y_train)

save_model.save_model_sklearn(
Expand Down Expand Up @@ -115,13 +113,13 @@ def main() -> None:
anomaly_isolation.visulaization_isolationforest()
x_train = anomaly_autoencoder.data_to_feed_autoencoder()
if os.path.isfile("./pickle_files/models/autoencoder.keras") is False:
autoencoder = anomaly_autoencoder.AutoEncoder(x_train)
autoencoder = anomaly_autoencoder.autoencoder(x_train)
mse = anomaly_autoencoder.result_autocendoer(autoencoder, x_train)
anomaly_autoencoder.Anomaly(mse)
anomaly_autoencoder.anomaly(mse)
else:
autoencoder = load_model("./pickle_files/models/autoencoder.keras")
mse = anomaly_autoencoder.result_autocendoer(autoencoder, x_train)
anomaly_autoencoder.Anomaly(mse)
anomaly_autoencoder.anomaly(mse)


if __name__ == "__main__":
Expand Down
78 changes: 22 additions & 56 deletions src/modules/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def train_split(self) -> Tuple[np.array, np.array, list, list]:
)
return x_train, x_test, y_train, y_test

def model_RF(
def model_rf(
self,
estimateurs: int,
depth: int,
Expand All @@ -68,7 +68,7 @@ def model_lr(
LogisticRegression(random_state=1, solver=solv, max_iter=iteration),
)

def optimize_model_hyper_RF(
def optimize_model_hyper_rf(
self,
model,
x_train: np.array,
Expand All @@ -92,30 +92,6 @@ def optimize_model_hyper_RF(
np.int = int # to solve the issue with np.int and BayesSearchCV
return search.fit(x_train, y_train)

def optimize_model_hyper_RF(
self,
model,
x_train: np.array,
y_train: list,
) -> object:
params = {
"n_estimators": [10, 25, 50, 75],
"max_depth": np.arange(1, 9),
"criterion": ["gini", "entropy", "log_loss"],
"max_features": ["sqrt", "log2"],
}
search = BayesSearchCV(
estimator=model,
search_spaces=params,
n_jobs=4,
cv=3,
n_iter=50,
scoring="accuracy",
random_state=43,
)
np.int = int # to solve the issue with np.int and BayesSearchCV
return search.fit(x_train, y_train)

def fit_model(
self,
model,
Expand All @@ -141,7 +117,7 @@ def roc_curve(

for i, j in enumerate(models):
ax = plt.gca()
clf_disp = RocCurveDisplay.from_estimator(
RocCurveDisplay.from_estimator(
j, x_tests[i], y_tests[i], ax=ax, name=names[i], alpha=0.8
)
plt.savefig("./figures/roc_curves.png")
Expand Down Expand Up @@ -225,34 +201,29 @@ class Save_Load_models:
def save_model_sklearn(
self, name: str, model: object, prediction: np.array, prediction_proba: np.array
) -> None:
dbfile_model = open("./pickle_files/models/" + str(name), "ab")
dbfile_prediction = open(
with open("./pickle_files/models/" + str(name), "ab") as dbfile_model:
pickle.dump(model, dbfile_model)
with open(
"./pickle_files/models/" + str(name) + "_predictions", "ab"
)
dbfile_prediction_proba = open(
) as dbfile_prediction:
pickle.dump(prediction, dbfile_prediction)

with open(
"./pickle_files/models/" + str(name) + "_predictions_proba", "ab"
)
pickle.dump(model, dbfile_model)
pickle.dump(prediction, dbfile_prediction)
pickle.dump(prediction_proba, dbfile_prediction_proba)
dbfile_model.close()
dbfile_prediction.close()
dbfile_prediction_proba.close()
) as dbfile_prediction_proba:
pickle.dump(prediction_proba, dbfile_prediction_proba)

def load_model_sklearn(self, name: str) -> Tuple[object, np.array, np.array]:
dbfile_model = open("./pickle_files/models/" + str(name), "rb")
dbfile_prediction = open(
with open("./pickle_files/models/" + str(name), "rb") as dbfile_model:
model_loaded = pickle.load(dbfile_model)
with open(
"./pickle_files/models/" + str(name) + "_predictions", "rb"
)
dbfile_prediction_proba = open(
) as dbfile_prediction:
predictions_loaded = pickle.load(dbfile_prediction)
with open(
"./pickle_files/models/" + str(name) + "_predictions_proba", "rb"
)
model_loaded = pickle.load(dbfile_model)
predictions_loaded = pickle.load(dbfile_prediction)
prediction_proba_loaded = pickle.load(dbfile_prediction_proba)
dbfile_model.close()
dbfile_prediction.close()
dbfile_prediction_proba.close()
) as dbfile_prediction_proba:
prediction_proba_loaded = pickle.load(dbfile_prediction_proba)
return model_loaded, predictions_loaded, prediction_proba_loaded


Expand Down Expand Up @@ -318,7 +289,7 @@ def result_autocendoer(self, model: object, x_train: np.array) -> tf.Tensor:
mse = tf.reduce_mean(tf.square(x_train - reconstructions_deep), axis=[1, 2])
return mse

def AutoEncoder(self, x_train: np.array):
def autoencoder(self, x_train: np.array):
input_layer = Input(shape=(x_train.shape[1], x_train.shape[2]))
encoded = Dense(128, activation="relu")(input_layer)
encoded = Dense(64, activation="relu")(encoded)
Expand All @@ -341,19 +312,14 @@ def AutoEncoder(self, x_train: np.array):
autoencoder_deep.save("./pickle_files/models/autoencoder.keras")
return autoencoder_deep

def Anomaly(self, mse: tf.Tensor) -> None:
def anomaly(self, mse: tf.Tensor) -> None:
anomaly_deep_scores = pd.Series(mse.numpy(), name="anomaly_scores")
anomaly_deep_scores.index = self.data[(self.time_step - 1) :].index
anomaly_deep_scores = pd.Series(mse.numpy(), name="anomaly_scores")
anomaly_deep_scores.index = self.data[(self.time_step - 1) :].index

threshold_deep = anomaly_deep_scores.quantile(0.95)
anomalous_deep = anomaly_deep_scores > threshold_deep
binary_labels_deep = anomalous_deep.astype(int)
precision, recall, f1_score, _ = precision_recall_fscore_support(
binary_labels_deep,
anomalous_deep,
)

plt.figure(figsize=(16, 8))
plt.plot(
Expand Down

0 comments on commit 2862660

Please sign in to comment.