diff --git a/workshop/places365_train.ipynb b/workshop/places365_train.ipynb new file mode 100644 index 0000000..4a4e8d0 --- /dev/null +++ b/workshop/places365_train.ipynb @@ -0,0 +1,420 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "mLjoHocGsXa_" + }, + "source": [ + "## Preparing the environment" + ] + }, + { + "cell_type": "code", + "source": [ + "!gdown 1y-LdQ_4dbOip6sBgZ-Ub1FI6Hh5kl3h1" + ], + "metadata": { + "id": "x9DwxHaAj3V4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SdcVz796SPRJ" + }, + "outputs": [], + "source": [ + "!pip install plot_keras_history" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MUByrvr7sbj0" + }, + "source": [ + "## Preparing the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OvyDyYG2ybOv" + }, + "outputs": [], + "source": [ + "dataset_name = \"places365_300\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UFM1GfuOyD7v" + }, + "outputs": [], + "source": [ + "!tar -xf {dataset_name}.tar" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m-KvXhGgseu5" + }, + "source": [ + "## Importing the necessary libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AxR7wv3AzmKp" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "\n", + "from plot_keras_history import show_history\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "import seaborn as sns\n", + "from mpl_toolkits.axes_grid1 import ImageGrid\n", + "import math" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MegEbOfYskRE" + }, + "source": [ + "## Preprocessing the dataset" + ] + }, + { + "cell_type": "markdown", + "source": [ + "The following code loads images and builds a preprocessing pipeline within Datasets (Tensorflow-specific structures providing input data).\n", + "\n", + "Documentation: https://www.tensorflow.org/guide/data" + ], + "metadata": { + "id": "YlN_UCYM4oH9" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yuAsr9Aq1ZdG" + }, + "outputs": [], + "source": [ + "class_names = [\"alley\", \"entrance_hall\", \"park\"]\n", + "\n", + "\n", + "def get_label(file_path):\n", + " parts = tf.strings.split(file_path, os.path.sep)\n", + " one_hot = parts[-2] == class_names\n", + " return tf.argmax(one_hot)\n", + "\n", + "\n", + "def process_path(file_path, img_size=(224, 224)):\n", + " label = get_label(file_path)\n", + " img = tf.io.read_file(file_path)\n", + " img = tf.io.decode_jpeg(img, channels=3)\n", + " img = tf.image.resize(img, [img_size[0], img_size[1]])\n", + " return img, label\n", + "\n", + "\n", + "def build_dataset(path, sub_path):\n", + " ds = tf.data.Dataset.list_files(str(f\"{path}/{sub_path}/*/*\"))\n", + " ds = ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)\n", + " ds = ds.cache()\n", + " ds = ds.batch(64)\n", + " ds = ds.prefetch(tf.data.AUTOTUNE)\n", + " return ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NYDLidr11Sri" + }, + "outputs": [], + "source": [ + "train_ds = build_dataset(dataset_name, \"train\")\n", + "val_ds = build_dataset(dataset_name, \"val\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LZIBzMs1soYP" + }, + "source": [ + "## Building the model" + ] + }, + { + "cell_type": "markdown", + "source": [ + "This code builds a model that will be trained in the following cells. The field `base_model` is initialized with a pre-defined architecture loaded from Keras.\n", + "\n", + "You can find the list of possible architectures at https://keras.io/api/applications/\n", + "\n", + "On top of `base_model`, we add a fully-connected layer with 3 neurons, and a softmax activation function. This settings allows scaling the output in a way that it can be interpreted as a probability distribution where all of the probabilities sum up to 1.\n" + ], + "metadata": { + "id": "ws_I8t8f5QeB" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "l7UrpLIa9QR3" + }, + "outputs": [], + "source": [ + "class Places365Model(tf.keras.Model):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.base_model = tf.keras.applications.MobileNet(\n", + " input_shape=(224, 224, 3),\n", + " include_top=False,\n", + " weights=None,\n", + " classes=3,\n", + " pooling=\"avg\"\n", + " )\n", + " self.fc = tf.keras.layers.Dense(3, activation='softmax')\n", + "\n", + " def call(self, x):\n", + " x = self.base_model(x)\n", + " return self.fc(x)\n", + "\n", + "\n", + "model = Places365Model()\n", + "model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W8rJpL9Ss8Wt" + }, + "source": [ + "## Training the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jzNnDGkKzVDc" + }, + "outputs": [], + "source": [ + "history = model.fit(\n", + " train_ds,\n", + " epochs=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "__kwjFK9sKSK" + }, + "source": [ + "## Plotting the metrics" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's see the results of our training!" + ], + "metadata": { + "id": "Lhdx7LkU_vxx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6pFRBNs6SJjm" + }, + "outputs": [], + "source": [ + "show_history(history)\n", + "plt.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mmQHHkyCr8Cg" + }, + "source": [ + "## Confusion matrix" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Confusion matrix is a tool that helps us to interpret the results and draw conclusions about the characteristics of the model.\n", + "\n", + "It is a table presenting the number of correct predictions, and showing us how many mistakes it made, specifically focusing on the confusions between classes.\n", + "\n", + "A row represents the true class, and column represents a predicted one." + ], + "metadata": { + "id": "KNtWihJPAfes" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hByY7yt81t9o" + }, + "outputs": [], + "source": [ + "results = model.predict(val_ds)\n", + "ds = list(val_ds.unbatch().as_numpy_iterator())\n", + "\n", + "y_true = [class_names[entry[1]] for entry in ds]\n", + "y_pred = [class_names[np.argmax(result)] for result in results]\n", + "\n", + "matrix_confusion = confusion_matrix(y_true, y_pred)\n", + "f, ax = plt.subplots(figsize=(10, 10))\n", + "ax = sns.heatmap(matrix_confusion, square=True, annot=True, cmap='Blues', fmt='d', cbar=False,\n", + " xticklabels=class_names, yticklabels=class_names, annot_kws={\"fontsize\": 20})\n", + "ax.xaxis.tick_top()\n", + "ax.xaxis.set_label_position('top')\n", + "plt.tick_params(axis='both', which='major', labelsize=25, labelbottom=False, left=False, bottom=False, top=False, labeltop=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bRDeeXIr3HNo" + }, + "source": [ + "## Classification report" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's see the classifiation report!\n", + "\n", + "We can observe the following metrics:\n", + "- **precision** - this a frequentist probability that the predicted class is true. It is calculated with the equation TP / (TP + FP).\n", + "With just one positive and correct prediction, precision will have a value of 1, even if all the other ones were False Negatives.\n", + "- **recall** - this is a frequentist probability that the true class is not missed. It is calculated with the equation TP / (TP + FN). If all the predictions are positive, recall will have a value of 1.\n", + "\n", + "The rule is that increasing precision is done at the cost of recall, and vice-versa increasing recall decreases precision. This is why, we have another metric:\n", + "- **f1** - a harmonic mean between precision and recall: 2 * precision * recall / (precision + recall).\n", + "\n", + "On the other hand, we have:\n", + "- **accuracy** - it tells us what is the ratio of correct predictions (in terms of both True Positives and True Negatives). It is calculated with the equation (TP + TN) / (TP + FP + TN + FN)\n", + "\n", + "In general, f1 and accuracy provide better information about the quality of the model, whiile precision and recall can describe its additional characteristics.\n", + "\n", + "When the dataset is unbalanced, f1 is much more reliable than accuracy.\n", + "\n", + "\n", + "The following article explains all the above quite well:\n", + "\n", + "https://medium.com/analytics-vidhya/confusion-matrix-accuracy-precision-recall-f1-score-ade299cf63cd" + ], + "metadata": { + "id": "VjeQNCVsBScm" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aS5LhPp13Let" + }, + "outputs": [], + "source": [ + "print(classification_report(y_true, y_pred, target_names=class_names))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rtHJu6P31uIK" + }, + "source": [ + "## Misclassified images" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's plot the misclassified images and see how our model confused their classes!" + ], + "metadata": { + "id": "iU16Qm19Goai" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LYtKyhSTtd1f" + }, + "outputs": [], + "source": [ + "images_to_plot = []\n", + "\n", + "for result, gt in zip(results, ds):\n", + " img, label = gt\n", + " predicted_class = np.argmax(result)\n", + " if predicted_class != label:\n", + " images_to_plot.append((img, label, predicted_class))\n", + "\n", + "fig = plt.figure(figsize=(128., 128.))\n", + "grid = ImageGrid(fig, 111, nrows_ncols=(math.ceil(len(images_to_plot) / 4), 4), axes_pad=0.6,)\n", + "\n", + "for ax, im in zip(grid, images_to_plot):\n", + " ax.set_title(f\"True: {class_names[im[1]]}, predicted: {class_names[im[2]]}\", fontdict=None, loc='center', color = \"k\", fontsize=15)\n", + " ax.imshow(im[0] / 255)\n", + "\n", + "plt.show()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "places365 - train.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file