diff --git a/02_lab/lab1_regression_faces.ipynb b/02_lab/lab1_regression_faces.ipynb new file mode 100644 index 0000000..22ccf3b --- /dev/null +++ b/02_lab/lab1_regression_faces.ipynb @@ -0,0 +1,382 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Today's data\n", + "\n", + "400 fotos of human faces. Each face is a 2d array [64x64] of pixel brightness." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import fetch_olivetti_faces\n", + "data = fetch_olivetti_faces().images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# @this code showcases matplotlib subplots. The syntax is: plt.subplot(height, width, index_starting_from_1)\n", + "plt.subplot(2,2,1)\n", + "plt.imshow(data[0],cmap='gray')\n", + "plt.subplot(2,2,2)\n", + "plt.imshow(data[1],cmap='gray')\n", + "plt.subplot(2,2,3)\n", + "plt.imshow(data[2],cmap='gray')\n", + "plt.subplot(2,2,4)\n", + "plt.imshow(data[3],cmap='gray')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Face reconstruction problem\n", + "\n", + "Let's solve the face reconstruction problem: given left halves of facex __(X)__, our algorithm shall predict the right half __(y)__. Our first step is to slice the photos into X and y using slices.\n", + "\n", + "* In regular python, slice looks roughly like this: `a[2:5]` _(select elements from 2 to 5)_\n", + "* Numpy allows you to slice N-dimensional arrays along each dimension: [image_index, height, width]\n", + " * `data[:10]` - Select first 10 images\n", + " * `data[:, :10]` - For all images, select a horizontal stripe 10 pixels high at the top of the image\n", + " * `data[10:20, :, -25:-15]` - Take images [10, 11, ..., 19], for each image select a _vetrical stripe_ of width 10 pixels, 15 pixels away from the _right_ side.\n", + "\n", + "\n", + "Let's use slices to select all __left image halves as X__ and all __right halves as y__." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# select left half of each face as X, right half as Y\n", + "X = \n", + "y = " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If you did everything right, you're gonna see left half-image and right half-image drawn separately in natural order\n", + "plt.subplot(1,2,1)\n", + "plt.imshow(X[0],cmap='gray')\n", + "plt.subplot(1,2,2)\n", + "plt.imshow(y[0],cmap='gray')\n", + "\n", + "assert X.shape == y.shape == (len(data), 64, 32), \"Please slice exactly the left half-face to X and right half-face to Y\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def glue(left_half,right_half):\n", + " # merge photos back together\n", + " left_half = left_half.reshape([-1,64,32])\n", + " right_half = right_half.reshape([-1,64,32])\n", + " return np.concatenate([left_half,right_half],axis=-1)\n", + "\n", + "\n", + "# if you did everything right, you're gonna see a valid face\n", + "plt.imshow(glue(X,y)[99],cmap='gray')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Machine learning stuff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train,X_test,Y_train,Y_test = train_test_split(X.reshape([len(X),-1]),\n", + " y.reshape([len(y),-1]),\n", + " test_size=0.05,random_state=42)\n", + "\n", + "print(X_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "model = LinearRegression()\n", + "model.fit(X_train,Y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "measure mean squared error" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_squared_error\n", + "\n", + "print(\"Train MSE:\", mean_squared_error(Y_train,model.predict(X_train)))\n", + "print(\"Test MSE:\", mean_squared_error(Y_test,model.predict(X_test)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Train predictions\n", + "pics = glue(X_train,model.predict(X_train))\n", + "plt.figure(figsize=[16,12])\n", + "for i in range(20):\n", + " plt.subplot(4,5,i+1)\n", + " plt.imshow(pics[i],cmap='gray')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test predictions\n", + "pics = glue(X_test,model.predict(X_test))\n", + "plt.figure(figsize=[16,12])\n", + "for i in range(20):\n", + " plt.subplot(4,5,i+1)\n", + " plt.imshow(pics[i],cmap='gray')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "\n", + "# Ridge regression\n", + "RidgeRegression is just a LinearRegression, with l2 regularization - penalized for $ \\alpha \\cdot \\sum _i w_i^2$\n", + "\n", + "Let's train such a model with alpha=0.5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import Ridge\n", + "\n", + "ridge = Ridge(alpha=0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Test predictions\n", + "pics = glue(X_test,ridge.predict(X_test))\n", + "plt.figure(figsize=[16,12])\n", + "for i in range(20):\n", + " plt.subplot(4,5,i+1)\n", + " plt.imshow(pics[i],cmap='gray')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "# Grid search\n", + "\n", + "Train model with diferent $\\alpha$ and find one that has minimal test MSE. It's okay to use loops or any other python stuff here." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Test predictions\n", + "pics = glue(X_test,)\n", + "plt.figure(figsize=[16,12])\n", + "for i in range(20):\n", + " plt.subplot(4,5,i+1)\n", + " plt.imshow(pics[i],cmap='gray')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/02_lab/lab2_classification.ipynb b/02_lab/lab2_classification.ipynb new file mode 100644 index 0000000..da81961 --- /dev/null +++ b/02_lab/lab2_classification.ipynb @@ -0,0 +1,484 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Logistic regression\n", + "\n", + "In this seminar you will implement a logistic regression and train it using stochastic gradient descent modiffications, numpy and your brain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#load our dakka\n", + "import numpy as np\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Two-dimensional classification problem\n", + "\n", + "To make things more intuitive, let's solve a 2D classification problem with syntetic data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn import datasets, preprocessing\n", + "\n", + "(X, y) = datasets.make_circles(n_samples=1024, shuffle=True, noise=0.2, factor=0.4)\n", + "ind = np.logical_or(y==1, X[:,1] > X[:,0] - 0.5)\n", + "X = X[ind,:]\n", + "m = np.array([[1, 1], [-2, 1]])\n", + "X = preprocessing.scale(X)\n", + "y = y[ind]\n", + "\n", + "\n", + "plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "print(\"X:\\n{}\\ny:\\n{}\".format(X[:3],y[:3]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Your task starts here**\n", + "\n", + "Since the problem above isn't linearly separable, we add quadratic features to the classifier.\n", + "\n", + "Implement this transformation in the __expand__ function __[1 point]__. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def expand(X):\n", + " \"\"\"\n", + " Adds quadratic features. \n", + " This function allows your linear model to make non-linear separation.\n", + " \n", + " For each sample (row in matrix), compute an expanded row:\n", + " [feature0, feature1, feature0^2, feature1^2, feature1*feature2, 1]\n", + " \n", + " :param X: matrix of features, shape [n_samples,2]\n", + " :returns: expanded features of shape [n_samples,6]\n", + " \"\"\"\n", + " X_expanded = np.zeros((X.shape[0], 6))\n", + " \n", + " \n", + " \n", + " return X_expanded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#simple test on random numbers\n", + "#[all 8 random numbers are 100% random :P]\n", + "dummy_X = np.array([\n", + " [0,0],\n", + " [1,0],\n", + " [2.61,-1.28],\n", + " [-0.59,2.1]\n", + " ])\n", + "\n", + "#call your expand function\n", + "dummy_expanded = expand(dummy_X)\n", + "\n", + "#what it should have returned: x0 x1 x0^2 x1^2 x0*x1 1\n", + "dummy_expanded_ans = np.array([[ 0. , 0. , 0. , 0. , 0. , 1. ],\n", + " [ 1. , 0. , 1. , 0. , 0. , 1. ],\n", + " [ 2.61 , -1.28 , 6.8121, 1.6384, -3.3408, 1. ],\n", + " [-0.59 , 2.1 , 0.3481, 4.41 , -1.239 , 1. ]])\n", + "\n", + "#tests\n", + "assert isinstance(dummy_expanded,np.ndarray), \"please make sure you return numpy array\"\n", + "assert dummy_expanded.shape==dummy_expanded_ans.shape, \"please make sure your shape is correct\"\n", + "assert np.allclose(dummy_expanded,dummy_expanded_ans,1e-3), \"Something's out of order with features\"\n", + "\n", + "print(\"Seems legit!\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Logistic regression\n", + "Now, let's write function that predicts class given X as in logistic regression.\n", + "\n", + "The math should look like this:\n", + "\n", + "$$ P(y| \\vec x, \\vec w) = \\sigma(\\vec x \\cdot \\vec w )$$\n", + "\n", + "where x represents features, w are weights and $$\\sigma(a) = {1 \\over {1+e^{-a}}}$$\n", + "\n", + "We shall omit $ \\vec {arrows} $ in further formulae for simplicity." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def classify(X, w):\n", + " \"\"\"\n", + " Given input features and weights\n", + " return predicted probabilities of y==1 given x, P(y=1|x), see description above\n", + " \n", + " __don't forget to expand X inside classify and other functions__\n", + " \n", + " :param X: feature matrix X of shape [n_samples,2] (non-exanded)\n", + " :param w: weight vector w of shape [6] for each of the expanded features\n", + " :returns: an array of predicted probabilities in [0,1] interval.\n", + " \"\"\"\n", + "\n", + " \n", + " return " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#sample usage / test just as the previous one\n", + "dummy_weights = np.linspace(-1,1,6)\n", + "\n", + "dummy_probs = classify(dummy_X,dummy_weights)\n", + "\n", + "dummy_answers = np.array([ 0.73105858, 0.450166 , 0.02020883, 0.59844257])\n", + "\n", + "assert isinstance(dummy_probs,np.ndarray), \"please return np.array\"\n", + "assert dummy_probs.shape == dummy_answers.shape, \"please return an 1-d vector with answers for each object\"\n", + "assert np.allclose(dummy_probs,dummy_answers,1e-3), \"There's something non-canonic about how probabilties are computed\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The loss you should try to minimize is the Logistic Loss aka crossentropy aka negative log-likelihood:\n", + "\n", + "$$ L = - {1 \\over N} \\sum_i {y \\cdot log P(y|x,w) + (1-y) \\cdot log (1-P(y|x,w))}$$\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def compute_loss(X, y, w):\n", + " \"\"\"\n", + " Given feature matrix X [n_samples,2], target vector [n_samples] of +1/-1,\n", + " and weight vector w [6], compute scalar loss function using formula above.\n", + " \"\"\"\n", + " return " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "dummy_y = np.array([0,1,0,1])\n", + "dummy_loss = compute_loss(dummy_X,dummy_y,dummy_weights)\n", + "\n", + "assert np.allclose(dummy_loss,0.66131), \"something wrong with loss\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since we train our model with gradient descent, we gotta compute gradients.\n", + "\n", + "To be specific, we need a derivative of loss function over each weight [6 of them].\n", + "\n", + "$$ \\nabla L = {\\partial L \\over \\partial w} = ...$$\n", + "\n", + "No, we won't be giving you the exact formula this time. Instead, try figuring out a derivative with pen and paper. \n", + "\n", + "As usual, we've made a small test for you, but if you need more, feel free to check your math against finite differences (estimate how L changes if you shift w by $10^-5$ or so)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "\n", + "def compute_grad(X, y, w):\n", + " \"\"\"\n", + " Given feature matrix X [n_samples,2], target vector [n_samples] of +1/-1,\n", + " and weight vector w [6], compute vector [6] of derivatives of L over each weights.\n", + " \"\"\"\n", + " return " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#tests\n", + "dummy_grads = compute_grad(dummy_X,dummy_y,dummy_weights)\n", + "\n", + "#correct answers in canonic form\n", + "dummy_grads_ans = np.array([-0.06504252, -0.21728448, -0.1379879 , -0.43443953, 0.107504 , -0.05003101])\n", + "\n", + "assert isinstance(dummy_grads,np.ndarray)\n", + "assert dummy_grads.shape == (6,), \"must return a vector of gradients for each weight\"\n", + "assert len(set(np.round(dummy_grads/dummy_grads_ans,3))), \"gradients are wrong\"\n", + "assert np.allclose(dummy_grads,dummy_grads_ans,1e-3), \"gradients are off by a coefficient\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's an auxiliary function that visualizes the predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from IPython import display\n", + "\n", + "h = 0.01\n", + "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", + "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", + "xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n", + "def visualize(X, y, w, history):\n", + " \"\"\"draws classifier prediction with matplotlib magic\"\"\"\n", + " Z = classify(np.c_[xx.ravel(), yy.ravel()], w)\n", + " Z = Z.reshape(xx.shape)\n", + " plt.subplot(1,2,1)\n", + " plt.contourf(xx, yy, Z, alpha=0.8)\n", + " plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)\n", + " plt.xlim(xx.min(), xx.max())\n", + " plt.ylim(yy.min(), yy.max())\n", + " \n", + " plt.subplot(1,2,2)\n", + " plt.plot(history)\n", + " plt.grid()\n", + " ymin, ymax = plt.ylim()\n", + " plt.ylim(0, ymax)\n", + " display.clear_output(wait=True)\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "visualize(X,y,dummy_weights,[1,0.5,0.25],)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training\n", + "In this section, we'll use the functions you wrote to train our classifier using stochastic gradient descent.\n", + "\n", + "Try to find an optimal learning rate for gradient descent for the given batch size. \n", + "\n", + "**Don't change the batch size!**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "w = np.array([0,0,0,0,0,1])\n", + "\n", + "\n", + "alpha = \n", + "\n", + "n_iter = 50\n", + "batch_size = 4\n", + "loss = np.zeros(n_iter)\n", + "plt.figure(figsize=(12,5))\n", + "for i in range(n_iter):\n", + " ind = np.random.choice(X.shape[0], batch_size)\n", + " loss[i] = compute_loss(X, y, w)\n", + " visualize(X[ind,:], y[ind], w, loss)\n", + " \n", + " w = w - alpha * compute_grad(X[ind,:], y[ind], w)\n", + "\n", + "visualize(X, y, w, loss)\n", + "plt.clf()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "# Bonus quest \n", + "\n", + "If you're done and there's still time left, try implementing __momentum SGD__ as described [here](https://distill.pub/2017/momentum/).\n", + "\n", + "Find alpha & beta that results in fastest convergence rate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "w = np.array([0,0,0,0,0,1])\n", + "z = np.array([0,0,0,0,0,0])\n", + "\n", + "alpha = ???\n", + "beta = ???\n", + "\n", + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}