diff --git a/02_lab/lab1_regression_faces.ipynb b/02_lab/lab1_regression_faces.ipynb
new file mode 100644
index 0000000..22ccf3b
--- /dev/null
+++ b/02_lab/lab1_regression_faces.ipynb
@@ -0,0 +1,382 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Today's data\n",
+    "\n",
+    "400 fotos of human faces. Each face is a 2d array [64x64] of pixel brightness."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import fetch_olivetti_faces\n",
+    "data = fetch_olivetti_faces().images"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# @this code showcases matplotlib subplots. The syntax is: plt.subplot(height, width, index_starting_from_1)\n",
+    "plt.subplot(2,2,1)\n",
+    "plt.imshow(data[0],cmap='gray')\n",
+    "plt.subplot(2,2,2)\n",
+    "plt.imshow(data[1],cmap='gray')\n",
+    "plt.subplot(2,2,3)\n",
+    "plt.imshow(data[2],cmap='gray')\n",
+    "plt.subplot(2,2,4)\n",
+    "plt.imshow(data[3],cmap='gray')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Face reconstruction problem\n",
+    "\n",
+    "Let's solve the face reconstruction problem: given left halves of facex __(X)__, our algorithm shall predict the right half __(y)__. Our first step is to slice the photos into X and y using slices.\n",
+    "\n",
+    "* In regular python, slice looks roughly like this: `a[2:5]` _(select elements from 2 to 5)_\n",
+    "* Numpy allows you to slice N-dimensional arrays along each dimension: [image_index, height, width]\n",
+    "  * `data[:10]` - Select first 10 images\n",
+    "  * `data[:, :10]` - For all images, select a horizontal stripe 10 pixels high at the top of the image\n",
+    "  * `data[10:20, :, -25:-15]` - Take images [10, 11, ..., 19], for each image select a _vetrical stripe_ of width 10 pixels, 15 pixels away from the _right_ side.\n",
+    "\n",
+    "\n",
+    "Let's use slices to select all __left image halves as X__ and all __right halves as y__."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# select left half of each face as X, right half as Y\n",
+    "X = <Slice left half-images>\n",
+    "y = <Slice right half-images>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you did everything right, you're gonna see left half-image and right half-image drawn separately in natural order\n",
+    "plt.subplot(1,2,1)\n",
+    "plt.imshow(X[0],cmap='gray')\n",
+    "plt.subplot(1,2,2)\n",
+    "plt.imshow(y[0],cmap='gray')\n",
+    "\n",
+    "assert X.shape == y.shape == (len(data), 64, 32), \"Please slice exactly the left half-face to X and right half-face to Y\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def glue(left_half,right_half):\n",
+    "    # merge photos back together\n",
+    "    left_half = left_half.reshape([-1,64,32])\n",
+    "    right_half = right_half.reshape([-1,64,32])\n",
+    "    return np.concatenate([left_half,right_half],axis=-1)\n",
+    "\n",
+    "\n",
+    "# if you did everything right, you're gonna see a valid face\n",
+    "plt.imshow(glue(X,y)[99],cmap='gray')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Machine learning stuff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "X_train,X_test,Y_train,Y_test = train_test_split(X.reshape([len(X),-1]),\n",
+    "                                                 y.reshape([len(y),-1]),\n",
+    "                                                 test_size=0.05,random_state=42)\n",
+    "\n",
+    "print(X_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LinearRegression\n",
+    "model = LinearRegression()\n",
+    "model.fit(X_train,Y_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "measure mean squared error"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import mean_squared_error\n",
+    "\n",
+    "print(\"Train MSE:\", mean_squared_error(Y_train,model.predict(X_train)))\n",
+    "print(\"Test MSE:\", mean_squared_error(Y_test,model.predict(X_test)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Train predictions\n",
+    "pics = glue(X_train,model.predict(X_train))\n",
+    "plt.figure(figsize=[16,12])\n",
+    "for i in range(20):\n",
+    "    plt.subplot(4,5,i+1)\n",
+    "    plt.imshow(pics[i],cmap='gray')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test predictions\n",
+    "pics = glue(X_test,model.predict(X_test))\n",
+    "plt.figure(figsize=[16,12])\n",
+    "for i in range(20):\n",
+    "    plt.subplot(4,5,i+1)\n",
+    "    plt.imshow(pics[i],cmap='gray')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "\n",
+    "# Ridge regression\n",
+    "RidgeRegression is just a LinearRegression, with l2 regularization - penalized for $ \\alpha \\cdot \\sum _i w_i^2$\n",
+    "\n",
+    "Let's train such a model with alpha=0.5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import Ridge\n",
+    "\n",
+    "ridge = Ridge(alpha=0.5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "<YOUR CODE: fit the model on training set>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "<YOUR CODE: predict and measure MSE on train and test>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Test predictions\n",
+    "pics = glue(X_test,ridge.predict(X_test))\n",
+    "plt.figure(figsize=[16,12])\n",
+    "for i in range(20):\n",
+    "    plt.subplot(4,5,i+1)\n",
+    "    plt.imshow(pics[i],cmap='gray')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "# Grid search\n",
+    "\n",
+    "Train model with diferent $\\alpha$ and find one that has minimal test MSE. It's okay to use loops or any other python stuff here."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "<YOUR CODE>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Test predictions\n",
+    "pics = glue(X_test,<predict with your best model>)\n",
+    "plt.figure(figsize=[16,12])\n",
+    "for i in range(20):\n",
+    "    plt.subplot(4,5,i+1)\n",
+    "    plt.imshow(pics[i],cmap='gray')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/02_lab/lab2_classification.ipynb b/02_lab/lab2_classification.ipynb
new file mode 100644
index 0000000..da81961
--- /dev/null
+++ b/02_lab/lab2_classification.ipynb
@@ -0,0 +1,484 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Logistic regression\n",
+    "\n",
+    "In this seminar you will implement a logistic regression and train it using stochastic gradient descent modiffications, numpy and your brain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "#load our dakka\n",
+    "import numpy as np\n",
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Two-dimensional classification problem\n",
+    "\n",
+    "To make things more intuitive, let's solve a 2D classification problem with syntetic data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn import datasets, preprocessing\n",
+    "\n",
+    "(X, y) = datasets.make_circles(n_samples=1024, shuffle=True, noise=0.2, factor=0.4)\n",
+    "ind = np.logical_or(y==1, X[:,1] > X[:,0] - 0.5)\n",
+    "X = X[ind,:]\n",
+    "m = np.array([[1, 1], [-2, 1]])\n",
+    "X = preprocessing.scale(X)\n",
+    "y = y[ind]\n",
+    "\n",
+    "\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "print(\"X:\\n{}\\ny:\\n{}\".format(X[:3],y[:3]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Your task starts here**\n",
+    "\n",
+    "Since the problem above isn't linearly separable, we add quadratic features to the classifier.\n",
+    "\n",
+    "Implement this transformation in the __expand__ function __[1 point]__. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def expand(X):\n",
+    "    \"\"\"\n",
+    "    Adds quadratic features. \n",
+    "    This function allows your linear model to make non-linear separation.\n",
+    "    \n",
+    "    For each sample (row in matrix), compute an expanded row:\n",
+    "    [feature0, feature1, feature0^2, feature1^2, feature1*feature2, 1]\n",
+    "    \n",
+    "    :param X: matrix of features, shape [n_samples,2]\n",
+    "    :returns: expanded features of shape [n_samples,6]\n",
+    "    \"\"\"\n",
+    "    X_expanded = np.zeros((X.shape[0], 6))\n",
+    "    \n",
+    "    <your code here>\n",
+    "    \n",
+    "    return X_expanded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "#simple test on random numbers\n",
+    "#[all 8 random numbers are 100% random :P]\n",
+    "dummy_X = np.array([\n",
+    "        [0,0],\n",
+    "        [1,0],\n",
+    "        [2.61,-1.28],\n",
+    "        [-0.59,2.1]\n",
+    "    ])\n",
+    "\n",
+    "#call your expand function\n",
+    "dummy_expanded = expand(dummy_X)\n",
+    "\n",
+    "#what it should have returned:   x0       x1       x0^2     x1^2     x0*x1    1\n",
+    "dummy_expanded_ans = np.array([[ 0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ],\n",
+    "                               [ 1.    ,  0.    ,  1.    ,  0.    ,  0.    ,  1.    ],\n",
+    "                               [ 2.61  , -1.28  ,  6.8121,  1.6384, -3.3408,  1.    ],\n",
+    "                               [-0.59  ,  2.1   ,  0.3481,  4.41  , -1.239 ,  1.    ]])\n",
+    "\n",
+    "#tests\n",
+    "assert isinstance(dummy_expanded,np.ndarray), \"please make sure you return numpy array\"\n",
+    "assert dummy_expanded.shape==dummy_expanded_ans.shape, \"please make sure your shape is correct\"\n",
+    "assert np.allclose(dummy_expanded,dummy_expanded_ans,1e-3), \"Something's out of order with features\"\n",
+    "\n",
+    "print(\"Seems legit!\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Logistic regression\n",
+    "Now, let's write function that predicts class given X as in logistic regression.\n",
+    "\n",
+    "The math should look like this:\n",
+    "\n",
+    "$$ P(y| \\vec x, \\vec w) = \\sigma(\\vec x \\cdot \\vec w )$$\n",
+    "\n",
+    "where x represents features, w are weights and $$\\sigma(a) = {1 \\over {1+e^{-a}}}$$\n",
+    "\n",
+    "We shall omit $ \\vec {arrows} $ in further formulae for simplicity."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def classify(X, w):\n",
+    "    \"\"\"\n",
+    "    Given input features and weights\n",
+    "    return predicted probabilities of y==1 given x, P(y=1|x), see description above\n",
+    "        \n",
+    "    __don't forget to expand X inside classify and other functions__\n",
+    "    \n",
+    "    :param X: feature matrix X of shape [n_samples,2] (non-exanded)\n",
+    "    :param w: weight vector w of shape [6] for each of the expanded features\n",
+    "    :returns: an array of predicted probabilities in [0,1] interval.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    \n",
+    "    return <your code here>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "#sample usage / test just as the previous one\n",
+    "dummy_weights = np.linspace(-1,1,6)\n",
+    "\n",
+    "dummy_probs = classify(dummy_X,dummy_weights)\n",
+    "\n",
+    "dummy_answers = np.array([ 0.73105858,  0.450166  ,  0.02020883,  0.59844257])\n",
+    "\n",
+    "assert isinstance(dummy_probs,np.ndarray), \"please return np.array\"\n",
+    "assert dummy_probs.shape == dummy_answers.shape, \"please return an 1-d vector with answers for each object\"\n",
+    "assert np.allclose(dummy_probs,dummy_answers,1e-3), \"There's something non-canonic about how probabilties are computed\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The loss you should try to minimize is the Logistic Loss aka crossentropy aka negative log-likelihood:\n",
+    "\n",
+    "$$ L =  - {1 \\over N} \\sum_i {y \\cdot log P(y|x,w) + (1-y) \\cdot log (1-P(y|x,w))}$$\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def compute_loss(X, y, w):\n",
+    "    \"\"\"\n",
+    "    Given feature matrix X [n_samples,2], target vector [n_samples] of +1/-1,\n",
+    "    and weight vector w [6], compute scalar loss function using formula above.\n",
+    "    \"\"\"\n",
+    "    return <your code here>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "dummy_y = np.array([0,1,0,1])\n",
+    "dummy_loss = compute_loss(dummy_X,dummy_y,dummy_weights)\n",
+    "\n",
+    "assert np.allclose(dummy_loss,0.66131), \"something wrong with loss\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Since we train our model with gradient descent, we gotta compute gradients.\n",
+    "\n",
+    "To be specific, we need a derivative of loss function over each weight [6 of them].\n",
+    "\n",
+    "$$ \\nabla L = {\\partial L \\over \\partial w} = ...$$\n",
+    "\n",
+    "No, we won't be giving you the exact formula this time. Instead, try figuring out a derivative with pen and paper. \n",
+    "\n",
+    "As usual, we've made a small test for you, but if you need more, feel free to check your math against finite differences (estimate how L changes if you shift w by $10^-5$ or so)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "def compute_grad(X, y, w):\n",
+    "    \"\"\"\n",
+    "    Given feature matrix X [n_samples,2], target vector [n_samples] of +1/-1,\n",
+    "    and weight vector w [6], compute vector [6] of derivatives of L over each weights.\n",
+    "    \"\"\"\n",
+    "    return <your code here>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "#tests\n",
+    "dummy_grads = compute_grad(dummy_X,dummy_y,dummy_weights)\n",
+    "\n",
+    "#correct answers in canonic form\n",
+    "dummy_grads_ans = np.array([-0.06504252, -0.21728448, -0.1379879 , -0.43443953,  0.107504  , -0.05003101])\n",
+    "\n",
+    "assert isinstance(dummy_grads,np.ndarray)\n",
+    "assert dummy_grads.shape == (6,), \"must return a vector of gradients for each weight\"\n",
+    "assert len(set(np.round(dummy_grads/dummy_grads_ans,3))), \"gradients are wrong\"\n",
+    "assert np.allclose(dummy_grads,dummy_grads_ans,1e-3), \"gradients are off by a coefficient\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here's an auxiliary function that visualizes the predictions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from IPython import display\n",
+    "\n",
+    "h = 0.01\n",
+    "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n",
+    "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n",
+    "xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n",
+    "def visualize(X, y, w, history):\n",
+    "    \"\"\"draws classifier prediction with matplotlib magic\"\"\"\n",
+    "    Z = classify(np.c_[xx.ravel(), yy.ravel()], w)\n",
+    "    Z = Z.reshape(xx.shape)\n",
+    "    plt.subplot(1,2,1)\n",
+    "    plt.contourf(xx, yy, Z, alpha=0.8)\n",
+    "    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)\n",
+    "    plt.xlim(xx.min(), xx.max())\n",
+    "    plt.ylim(yy.min(), yy.max())\n",
+    "    \n",
+    "    plt.subplot(1,2,2)\n",
+    "    plt.plot(history)\n",
+    "    plt.grid()\n",
+    "    ymin, ymax = plt.ylim()\n",
+    "    plt.ylim(0, ymax)\n",
+    "    display.clear_output(wait=True)\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "visualize(X,y,dummy_weights,[1,0.5,0.25],)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Training\n",
+    "In this section, we'll use the functions you wrote to train our classifier using stochastic gradient descent.\n",
+    "\n",
+    "Try to find an optimal learning rate for gradient descent for the given batch size. \n",
+    "\n",
+    "**Don't change the batch size!**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "w = np.array([0,0,0,0,0,1])\n",
+    "\n",
+    "\n",
+    "alpha = <learning rate>\n",
+    "\n",
+    "n_iter = 50\n",
+    "batch_size = 4\n",
+    "loss = np.zeros(n_iter)\n",
+    "plt.figure(figsize=(12,5))\n",
+    "for i in range(n_iter):\n",
+    "    ind = np.random.choice(X.shape[0], batch_size)\n",
+    "    loss[i] = compute_loss(X, y, w)\n",
+    "    visualize(X[ind,:], y[ind], w, loss)\n",
+    "    \n",
+    "    w = w - alpha * compute_grad(X[ind,:], y[ind], w)\n",
+    "\n",
+    "visualize(X, y, w, loss)\n",
+    "plt.clf()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "# Bonus quest \n",
+    "\n",
+    "If you're done and there's still time left, try implementing __momentum SGD__ as described [here](https://distill.pub/2017/momentum/).\n",
+    "\n",
+    "Find alpha & beta that results in fastest convergence rate."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "w = np.array([0,0,0,0,0,1])\n",
+    "z = np.array([0,0,0,0,0,0])\n",
+    "\n",
+    "alpha = ???\n",
+    "beta = ???\n",
+    "\n",
+    "<YOUR CODE>"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}