Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
NadavIs56 authored May 11, 2023
1 parent 9bf26ed commit d1e2777
Show file tree
Hide file tree
Showing 9 changed files with 605 additions and 0 deletions.
81 changes: 81 additions & 0 deletions augmentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
This file used to add augmented images to our dataset for classes with lack of images according to the output of the "sets_visualization".
"""
import pickle
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from skimage.transform import rotate

x_train = pickle.load(open('train_val_test_sets6/after_aug/x_train', 'rb'))
y_train = pickle.load(open('train_val_test_sets6/after_aug/y_train', 'rb'))
x_val = pickle.load(open('train_val_test_sets6/x_val', 'rb'))
y_val = pickle.load(open('train_val_test_sets6/y_val', 'rb'))
x_test = pickle.load(open('train_val_test_sets6/x_test', 'rb'))
y_test = pickle.load(open('train_val_test_sets6/y_test', 'rb'))

x_t = len(x_train)
x_copy = x_train.copy()
count = 0
ecz, ker, ros = 0, 0, 0
for i in tqdm(range(x_t)):
if y_train[i] == 0:
x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
y_train = np.append(y_train, 0)

elif y_train[i] == 2:
if ecz % 3 == 0:
x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
x_train = np.append(x_train, [np.flipud(x_copy[i])], axis=0)
for j in range(2):
y_train = np.append(y_train, 2)
ecz += 1

elif y_train[i] == 3:
if ker % 6 == 0:
x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
x_train = np.append(x_train, [np.flipud(x_copy[i])], axis=0)
for j in range(2):
y_train = np.append(y_train, 3)
ker += 1

elif y_train[i] == 4:
x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
x_train = np.append(x_train, [np.flipud(x_copy[i])], axis=0)
x_train = np.append(x_train, [np.fliplr(x_copy[i])], axis=0)
x_train = np.append(x_train, [rotate(np.fliplr(x_copy[i]), angle=270, mode='wrap')], axis=0)
for j in range(4):
y_train = np.append(y_train, 4)

elif y_train[i] == 5:
if ros % 9 == 0:
x_train = np.append(x_train, [rotate(x_copy[i], angle=45, mode='wrap')], axis=0)
x_train = np.append(x_train, [np.flipud(x_copy[i])], axis=0)
for j in range(2):
y_train = np.append(y_train, 5)
ros += 1

indices = np.random.permutation(len(x_train))
x_train = x_train[indices]
y_train = y_train[indices]

f = open("dir../after_aug/x_train", "wb")
pickle.dump(x_train, f)
f.close()
f = open("dir../after_aug/y_train", "wb")
pickle.dump(y_train, f)
f.close()

f = open("dir../after_aug/x_val", "wb")
pickle.dump(x_val, f)
f.close()
f = open("dir../after_aug/y_val", "wb")
pickle.dump(y_val, f)
f.close()

f = open("dir../after_aug/x_test", "wb")
pickle.dump(x_test, f)
f.close()
f = open("dir../after_aug/y_test", "wb")
pickle.dump(y_test, f)
f.close()
71 changes: 71 additions & 0 deletions evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
This file used to evaluate our model for fine-tuning and better understanding.
It shows the cunfusion matrix, accuracy & loss histogram and classification report.
"""

import numpy as np
import tensorflow.keras as K
import pickle
from sklearn.metrics import (ConfusionMatrixDisplay, confusion_matrix, classification_report)
import matplotlib.pyplot as plt


x_test = pickle.load(open('train_val_test_sets6/x_test', 'rb'))
y_test = pickle.load(open('train_val_test_sets6/y_test', 'rb'))

with open('hist_dir..', "rb") as file_pi:
hist = pickle.load(file_pi)

model = K.models.load_model("model_dir..")

predictions = model.predict(x_test)
test_pred = np.argmax(predictions, axis=1)

types = ['acne', 'carcinoma', 'eczema', 'keratosis', 'mila', 'rosacea']

cm = confusion_matrix(y_test, test_pred) # confusion matrix
print("Confusion Matrix\n", cm)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=types)
fig, ax = plt.subplots(figsize=(15, 15))
disp.plot(ax=ax, cmap=plt.cm.Blues)
plt.show()

acc_arr, val_acc_arr = [0.0], [0.0]
for i in hist['accuracy']:
acc_arr.append(i)
for i in hist['val_accuracy']:
val_acc_arr.append(i)
plt.plot(acc_arr) # plot accuracy vs epoch
plt.plot(val_acc_arr)
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.ylim(bottom=0)
plt.xlim(left=0)
plt.xticks(np.arange(0, 25, 2))
plt.grid()
plt.show()

loss_arr, val_loss_arr = [0.0], [0.0]
for i in hist['loss']:
loss_arr.append(i)
for i in hist['val_loss']:
val_loss_arr.append(i)
plt.plot(loss_arr) # Plot loss values vs epoch
plt.plot(val_loss_arr)
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.ylim(bottom=0)
plt.xlim(left=0)
plt.xticks(np.arange(0, 25, 2))
plt.grid()
plt.show()

for i in range(6):
print(f'{types[i]} - {((cm[i][i] / sum(cm[i])) * 100):.2f}%')

print("\nclassification_report: \n" + str(classification_report(y_test, test_pred)))

74 changes: 74 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
This file used to build our model
"""
import pickle
from keras import Model
import tensorflow.keras as K
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras.layers import GlobalAveragePooling2D, BatchNormalization, Dense, Dropout

x_train = pickle.load(open('dir../after_aug/x_train', 'rb'))
y_train = pickle.load(open('dir../after_aug/y_train', 'rb'))
x_val = pickle.load(open('dir../after_aug/x_val', 'rb'))
y_val = pickle.load(open('dir../after_aug/y_val', 'rb'))
x_test = pickle.load(open('dir../after_aug/x_test', 'rb'))
y_test = pickle.load(open('dir../after_aug/y_test', 'rb'))

base_model = K.applications.Xception(include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=(299, 299, 3),
pooling=None,
classifier_activation="softmax",
)
base_model.trainable = False

inputs = K.Input(shape=(299, 299, 3))

x = base_model(inputs, training=False)
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)

x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)

x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)

outputs = Dense(6, activation='softmax')(x) # final layer

model = Model(inputs, outputs)

opt = Adam(learning_rate=0.001)
model.compile(loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
acc_checkpoint = ModelCheckpoint("dir../first_train/max_acc", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
loss_checkpoint = ModelCheckpoint("dir../first_train/min_loss", monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [acc_checkpoint, loss_checkpoint]

hist = model.fit(x_train, y_train, epochs=15, validation_data=(x_val, y_val), batch_size=32, callbacks=callbacks_list)

with open('dir../first/hist', 'wb') as file_pi:
pickle.dump(hist.history, file_pi)


base_model.trainable = True

opt = Adam(learning_rate=0.00001)
model.compile(loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])

acc_checkpoint = ModelCheckpoint("dir../second_train/max_acc", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
loss_checkpoint = ModelCheckpoint("dir../second_train/min_loss", monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [acc_checkpoint, loss_checkpoint]

hist = model.fit(x_train, y_train, epochs=10, validation_data=(x_val, y_val), batch_size=32, callbacks=callbacks_list)

with open('dir../second/hist', 'wb') as file_pi:
pickle.dump(hist.history, file_pi)
Binary file added model/fingerprint.pb
Binary file not shown.
206 changes: 206 additions & 0 deletions model/keras_metadata.pb

Large diffs are not rendered by default.

Binary file added model/saved_model.pb
Binary file not shown.
33 changes: 33 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
This file used to load our model and predict batch of images from a directory.
"""
import os
import numpy as np
from tqdm import tqdm
import tensorflow.keras as K
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import img_to_array

model = K.models.load_model("model_dir..")
types = ['acne', 'carcinoma', 'eczema', 'keratosis', 'millia', 'rosacea']

img_path = os.listdir('images_to_predict_dir..')
for i in tqdm(img_path):
fname = 'pic to check'+'/'+i
img = image.load_img(fname, target_size=(299, 299))
x = img_to_array(img)
x = K.applications.xception.preprocess_input(x)

prediction = model.predict(np.array([x]))[0]
test_pred = np.argmax(prediction)

result = [(types[i], float(prediction[i]) * 100.0) for i in range(len(prediction))]
result.sort(reverse=True, key=lambda x: x[1])

print(f'Image name: {i}')
for j in range(6):
(class_name, prob) = result[j]
print("Top %d ====================" % (j + 1))
print(class_name + ": %.2f%%" % (prob))

print("\n")
80 changes: 80 additions & 0 deletions preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
This file used to load the entire dataset, perform the require preprocessing and split the data into train, validation and test sets
"""
import os
import pickle
import numpy as np
import seaborn as sns
from tqdm import tqdm
import tensorflow.keras as K
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import StratifiedShuffleSplit
from tensorflow.keras.preprocessing.image import img_to_array


img_path = os.listdir('dataset_dir')

features = []
labels = []
dict = {'acne': 0, 'carcinoma': 1, 'eczema': 2, 'keratosis': 3, 'mila': 4, 'rosacea': 5}
for i in tqdm(img_path):
fname = 'dataset_dir'+'/'+i
img = image.load_img(fname, target_size=(299, 299))
x = img_to_array(img)
x = K.applications.xception.preprocess_input(x)
features.append(x)
y = i.split(".")[0]
labels.append(dict[y])

features = np.array(features)
labels = np.array(labels)

lbl, count = np.unique(labels, return_counts=True) # plot a bar plot to show the quantity of images in each class
temp = {0: 'acne', 1: 'carcinoma', 2: 'eczema', 3: 'keratosis', 4: 'millia', 5: 'rosacea'}
count_lbl = {}
for i in range(len(lbl)):
count_lbl[temp[lbl[i]]] = count[i]
sns.set_theme(style="whitegrid")
ax = sns.barplot(x=list(count_lbl.keys()), y=list(count_lbl.values()))
for i in ax.containers:
ax.bar_label(i,)
plt.title('Before augmentation')
plt.show()

indices = np.random.permutation(len(features))
features = features[indices]
labels = labels[indices]

x_train, y_train, rx_test, ry_test, x_val, y_val, x_test, y_test = [], [], [], [], [], [], [], []

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0) # split the test set
for train_index, r_index in split.split(features, labels):
x_train, rx_test = features[train_index], features[r_index]
y_train, ry_test = labels[train_index], labels[r_index]

split = StratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=0) # split the training data into training and validation sets
for val_index, test_index in split.split(rx_test, ry_test):
x_val, x_test = rx_test[val_index], rx_test[test_index]
y_val, y_test = ry_test[val_index], ry_test[test_index]

f = open("dir../x_train", "wb")
pickle.dump(x_train, f)
f.close()
f = open("dir../y_train", "wb")
pickle.dump(y_train, f)
f.close()

f = open("dir../x_val", "wb")
pickle.dump(x_val, f)
f.close()
f = open("dir../y_val", "wb")
pickle.dump(y_val, f)
f.close()

f = open("dir../x_test", "wb")
pickle.dump(x_test, f)
f.close()
f = open("dir../y_test", "wb")
pickle.dump(y_test, f)
f.close()
Loading

0 comments on commit d1e2777

Please sign in to comment.