Add files via upload

NadavIs56 · May 11, 2023 · d1e2777 · d1e2777
1 parent 9bf26ed
commit d1e2777
Show file tree

Hide file tree

Showing 9 changed files with 605 additions and 0 deletions.
diff --git a/augmentation.py b/augmentation.py
@@ -0,0 +1,81 @@
+"""
+    This file used to add augmented images to our dataset for classes with lack of images according to the output of the "sets_visualization".
+"""
+import pickle
+import numpy as np
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+from skimage.transform import rotate
+
+x_train = pickle.load(open('train_val_test_sets6/after_aug/x_train', 'rb'))
+y_train = pickle.load(open('train_val_test_sets6/after_aug/y_train', 'rb'))
+x_val = pickle.load(open('train_val_test_sets6/x_val', 'rb'))
+y_val = pickle.load(open('train_val_test_sets6/y_val', 'rb'))
+x_test = pickle.load(open('train_val_test_sets6/x_test', 'rb'))
+y_test = pickle.load(open('train_val_test_sets6/y_test', 'rb'))
+
+x_t = len(x_train)
+x_copy = x_train.copy()
+count = 0
+ecz, ker, ros = 0, 0, 0
+for i in tqdm(range(x_t)):
+    if y_train[i] == 0:
+        x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
+        y_train = np.append(y_train, 0)
+
+    elif y_train[i] == 2:
+        if ecz % 3 == 0:
+            x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
+            x_train = np.append(x_train, [np.flipud(x_copy[i])], axis=0)
+            for j in range(2):
+                y_train = np.append(y_train, 2)
+        ecz += 1
+
+    elif y_train[i] == 3:
+        if ker % 6 == 0:
+            x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
+            x_train = np.append(x_train, [np.flipud(x_copy[i])], axis=0)
+            for j in range(2):
+                y_train = np.append(y_train, 3)
+        ker += 1
+
+    elif y_train[i] == 4:
+        x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
+        x_train = np.append(x_train, [np.flipud(x_copy[i])], axis=0)
+        x_train = np.append(x_train, [np.fliplr(x_copy[i])], axis=0)
+        x_train = np.append(x_train, [rotate(np.fliplr(x_copy[i]), angle=270, mode='wrap')], axis=0)
+        for j in range(4):
+            y_train = np.append(y_train, 4)
+
+    elif y_train[i] == 5:
+        if ros % 9 == 0:
+            x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
+            x_train = np.append(x_train, [np.flipud(x_copy[i])], axis=0)
+            for j in range(2):
+                y_train = np.append(y_train, 5)
+        ros += 1
+
+indices = np.random.permutation(len(x_train))
+x_train = x_train[indices]
+y_train = y_train[indices]
+
+f = open("dir../after_aug/x_train", "wb")
+pickle.dump(x_train, f)
+f.close()
+f = open("dir../after_aug/y_train", "wb")
+pickle.dump(y_train, f)
+f.close()
+
+f = open("dir../after_aug/x_val", "wb")
+pickle.dump(x_val, f)
+f.close()
+f = open("dir../after_aug/y_val", "wb")
+pickle.dump(y_val, f)
+f.close()
+
+f = open("dir../after_aug/x_test", "wb")
+pickle.dump(x_test, f)
+f.close()
+f = open("dir../after_aug/y_test", "wb")
+pickle.dump(y_test, f)
+f.close()
diff --git a/evaluate.py b/evaluate.py
@@ -0,0 +1,71 @@
+"""
+    This file used to evaluate our model for fine-tuning and better understanding.
+    It shows the cunfusion matrix, accuracy & loss histogram and classification report.
+"""
+
+import numpy as np
+import tensorflow.keras as K
+import pickle
+from sklearn.metrics import (ConfusionMatrixDisplay, confusion_matrix, classification_report)
+import matplotlib.pyplot as plt
+
+
+x_test = pickle.load(open('train_val_test_sets6/x_test', 'rb'))
+y_test = pickle.load(open('train_val_test_sets6/y_test', 'rb'))
+
+with open('hist_dir..', "rb") as file_pi:
+    hist = pickle.load(file_pi)
+
+model = K.models.load_model("model_dir..")
+
+predictions = model.predict(x_test)
+test_pred = np.argmax(predictions, axis=1)
+
+types = ['acne', 'carcinoma', 'eczema', 'keratosis', 'mila', 'rosacea']
+
+cm = confusion_matrix(y_test, test_pred)                    # confusion matrix
+print("Confusion Matrix\n", cm)
+disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=types)
+fig, ax = plt.subplots(figsize=(15, 15))
+disp.plot(ax=ax, cmap=plt.cm.Blues)
+plt.show()
+
+acc_arr, val_acc_arr = [0.0], [0.0]
+for i in hist['accuracy']:
+    acc_arr.append(i)
+for i in hist['val_accuracy']:
+    val_acc_arr.append(i)
+plt.plot(acc_arr)                               # plot accuracy vs epoch
+plt.plot(val_acc_arr)
+plt.title('Model accuracy')
+plt.ylabel('Accuracy')
+plt.xlabel('Epoch')
+plt.legend(['Train', 'Test'], loc='upper left')
+plt.ylim(bottom=0)
+plt.xlim(left=0)
+plt.xticks(np.arange(0, 25, 2))
+plt.grid()
+plt.show()
+
+loss_arr, val_loss_arr = [0.0], [0.0]
+for i in hist['loss']:
+    loss_arr.append(i)
+for i in hist['val_loss']:
+    val_loss_arr.append(i)
+plt.plot(loss_arr)                              # Plot loss values vs epoch
+plt.plot(val_loss_arr)
+plt.title('Model loss')
+plt.ylabel('Loss')
+plt.xlabel('Epoch')
+plt.legend(['Train', 'Test'], loc='upper left')
+plt.ylim(bottom=0)
+plt.xlim(left=0)
+plt.xticks(np.arange(0, 25, 2))
+plt.grid()
+plt.show()
+
+for i in range(6):
+    print(f'{types[i]} - {((cm[i][i] / sum(cm[i])) * 100):.2f}%')
+
+print("\nclassification_report: \n" + str(classification_report(y_test, test_pred)))
+
diff --git a/model.py b/model.py
@@ -0,0 +1,74 @@
+"""
+  This file used to build our model
+"""
+import pickle
+from keras import Model
+import tensorflow.keras as K
+from keras.optimizers import Adam
+from keras.callbacks import ModelCheckpoint
+from keras.layers import GlobalAveragePooling2D, BatchNormalization, Dense, Dropout
+
+x_train = pickle.load(open('dir../after_aug/x_train', 'rb'))
+y_train = pickle.load(open('dir../after_aug/y_train', 'rb'))
+x_val = pickle.load(open('dir../after_aug/x_val', 'rb'))
+y_val = pickle.load(open('dir../after_aug/y_val', 'rb'))
+x_test = pickle.load(open('dir../after_aug/x_test', 'rb'))
+y_test = pickle.load(open('dir../after_aug/y_test', 'rb'))
+
+base_model = K.applications.Xception(include_top=False,
+                                     weights='imagenet',
+                                     input_tensor=None,
+                                     input_shape=(299, 299, 3),
+                                     pooling=None,
+                                     classifier_activation="softmax",
+                                     )
+base_model.trainable = False
+
+inputs = K.Input(shape=(299, 299, 3))
+
+x = base_model(inputs, training=False)
+x = GlobalAveragePooling2D()(x)
+x = BatchNormalization()(x)
+x = Dropout(0.3)(x)
+
+x = Dense(256, activation='relu')(x)
+x = BatchNormalization()(x)
+x = Dropout(0.3)(x)
+
+x = Dense(128, activation='relu')(x)
+x = BatchNormalization()(x)
+x = Dropout(0.3)(x)
+
+outputs = Dense(6, activation='softmax')(x)           # final layer
+
+model = Model(inputs, outputs)
+
+opt = Adam(learning_rate=0.001)
+model.compile(loss='sparse_categorical_crossentropy',
+              optimizer=opt,
+              metrics=['accuracy'])
+acc_checkpoint = ModelCheckpoint("dir../first_train/max_acc", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
+loss_checkpoint = ModelCheckpoint("dir../first_train/min_loss", monitor='val_loss', verbose=1, save_best_only=True, mode='min')
+callbacks_list = [acc_checkpoint, loss_checkpoint]
+
+hist = model.fit(x_train, y_train, epochs=15, validation_data=(x_val, y_val), batch_size=32, callbacks=callbacks_list)
+
+with open('dir../first/hist', 'wb') as file_pi:
+    pickle.dump(hist.history, file_pi)
+
+
+base_model.trainable = True
+
+opt = Adam(learning_rate=0.00001)
+model.compile(loss='sparse_categorical_crossentropy',
+              optimizer=opt,
+              metrics=['accuracy'])
+
+acc_checkpoint = ModelCheckpoint("dir../second_train/max_acc", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
+loss_checkpoint = ModelCheckpoint("dir../second_train/min_loss", monitor='val_loss', verbose=1, save_best_only=True, mode='min')
+callbacks_list = [acc_checkpoint, loss_checkpoint]
+
+hist = model.fit(x_train, y_train, epochs=10, validation_data=(x_val, y_val), batch_size=32, callbacks=callbacks_list)
+
+with open('dir../second/hist', 'wb') as file_pi:
+    pickle.dump(hist.history, file_pi)
diff --git a/model/fingerprint.pb b/model/fingerprint.pb
diff --git a/model/keras_metadata.pb b/model/keras_metadata.pb
diff --git a/model/saved_model.pb b/model/saved_model.pb
diff --git a/predict.py b/predict.py
@@ -0,0 +1,33 @@
+"""
+    This file used to load our model and predict batch of images from a directory.
+"""
+import os
+import numpy as np
+from tqdm import tqdm
+import tensorflow.keras as K
+from tensorflow.keras.preprocessing import image
+from tensorflow.keras.preprocessing.image import img_to_array
+
+model = K.models.load_model("model_dir..")
+types = ['acne', 'carcinoma', 'eczema', 'keratosis', 'millia', 'rosacea']
+
+img_path = os.listdir('images_to_predict_dir..')
+for i in tqdm(img_path):
+    fname = 'pic to check'+'/'+i
+    img = image.load_img(fname, target_size=(299, 299))
+    x = img_to_array(img)
+    x = K.applications.xception.preprocess_input(x)
+
+    prediction = model.predict(np.array([x]))[0]
+    test_pred = np.argmax(prediction)
+
+    result = [(types[i], float(prediction[i]) * 100.0) for i in range(len(prediction))]
+    result.sort(reverse=True, key=lambda x: x[1])
+
+    print(f'Image name: {i}')
+    for j in range(6):
+        (class_name, prob) = result[j]
+        print("Top %d ====================" % (j + 1))
+        print(class_name + ": %.2f%%" % (prob))
+
+    print("\n")
diff --git a/preprocessing.py b/preprocessing.py
@@ -0,0 +1,80 @@
+"""
+    This file used to load the entire dataset, perform the require preprocessing and split the data into train, validation and test sets
+"""
+import os
+import pickle
+import numpy as np
+import seaborn as sns
+from tqdm import tqdm
+import tensorflow.keras as K
+import matplotlib.pyplot as plt
+from tensorflow.keras.preprocessing import image
+from sklearn.model_selection import StratifiedShuffleSplit
+from tensorflow.keras.preprocessing.image import img_to_array
+
+
+img_path = os.listdir('dataset_dir')
+
+features = []
+labels = []
+dict = {'acne': 0, 'carcinoma': 1, 'eczema': 2, 'keratosis': 3, 'mila': 4, 'rosacea': 5}
+for i in tqdm(img_path):
+    fname = 'dataset_dir'+'/'+i
+    img = image.load_img(fname, target_size=(299, 299))
+    x = img_to_array(img)
+    x = K.applications.xception.preprocess_input(x)
+    features.append(x)
+    y = i.split(".")[0]
+    labels.append(dict[y])
+
+features = np.array(features)
+labels = np.array(labels)
+
+lbl, count = np.unique(labels, return_counts=True)                                            # plot a bar plot to show the quantity of images in each class
+temp = {0: 'acne', 1: 'carcinoma', 2: 'eczema', 3: 'keratosis', 4: 'millia', 5: 'rosacea'}
+count_lbl = {}
+for i in range(len(lbl)):
+    count_lbl[temp[lbl[i]]] = count[i]
+sns.set_theme(style="whitegrid")
+ax = sns.barplot(x=list(count_lbl.keys()), y=list(count_lbl.values()))
+for i in ax.containers:
+    ax.bar_label(i,)
+plt.title('Before augmentation')
+plt.show()
+
+indices = np.random.permutation(len(features))
+features = features[indices]
+labels = labels[indices]
+
+x_train, y_train, rx_test, ry_test, x_val, y_val, x_test, y_test = [], [], [], [], [], [], [], []
+
+split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)       # split the test set
+for train_index, r_index in split.split(features, labels):
+    x_train, rx_test = features[train_index], features[r_index]
+    y_train, ry_test = labels[train_index], labels[r_index]
+
+split = StratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=0)      # split the training data into training and validation sets
+for val_index, test_index in split.split(rx_test, ry_test):
+    x_val, x_test = rx_test[val_index], rx_test[test_index]
+    y_val, y_test = ry_test[val_index], ry_test[test_index]
+
+f = open("dir../x_train", "wb")
+pickle.dump(x_train, f)
+f.close()
+f = open("dir../y_train", "wb")
+pickle.dump(y_train, f)
+f.close()
+
+f = open("dir../x_val", "wb")
+pickle.dump(x_val, f)
+f.close()
+f = open("dir../y_val", "wb")
+pickle.dump(y_val, f)
+f.close()
+
+f = open("dir../x_test", "wb")
+pickle.dump(x_test, f)
+f.close()
+f = open("dir../y_test", "wb")
+pickle.dump(y_test, f)
+f.close()