diff --git a/doc/pub/week48/html/week48-bs.html b/doc/pub/week48/html/week48-bs.html
index 1c2786548..0a899735c 100644
--- a/doc/pub/week48/html/week48-bs.html
+++ b/doc/pub/week48/html/week48-bs.html
@@ -38,6 +38,18 @@
{'highest level': 2,
'sections': [('Overview of week 48', 2, None, 'overview-of-week-48'),
('Plan for week 47', 2, None, 'plan-for-week-47'),
+ ('Random Forest Algorithm, reminder from last week',
+ 2,
+ None,
+ 'random-forest-algorithm-reminder-from-last-week'),
+ ('Random Forests Compared with other Methods on the Cancer Data',
+ 2,
+ None,
+ 'random-forests-compared-with-other-methods-on-the-cancer-data'),
+ ('Compare Bagging on Trees with Random Forests',
+ 2,
+ None,
+ 'compare-bagging-on-trees-with-random-forests'),
("Boosting, a Bird's Eye View",
2,
None,
@@ -267,74 +279,77 @@
@@ -386,7 +401,7 @@
+Compare Bagging on Trees with Random Forests
+
+
+
+
+
+
+
+
+
Boosting, a Bird's Eye View
diff --git a/doc/pub/week48/html/week48-solarized.html b/doc/pub/week48/html/week48-solarized.html
index 98b14445d..6a80bf332 100644
--- a/doc/pub/week48/html/week48-solarized.html
+++ b/doc/pub/week48/html/week48-solarized.html
@@ -65,6 +65,18 @@
{'highest level': 2,
'sections': [('Overview of week 48', 2, None, 'overview-of-week-48'),
('Plan for week 47', 2, None, 'plan-for-week-47'),
+ ('Random Forest Algorithm, reminder from last week',
+ 2,
+ None,
+ 'random-forest-algorithm-reminder-from-last-week'),
+ ('Random Forests Compared with other Methods on the Cancer Data',
+ 2,
+ None,
+ 'random-forests-compared-with-other-methods-on-the-cancer-data'),
+ ('Compare Bagging on Trees with Random Forests',
+ 2,
+ None,
+ 'compare-bagging-on-trees-with-random-forests'),
("Boosting, a Bird's Eye View",
2,
None,
@@ -309,7 +321,7 @@ Plan for week 47
Work and Discussion of project 3
Last weekly exercise
Lab sessions at usual times.
- For the week of December 2-6, lab sessions atart at 10am and end 4pm, room FØ434, Tuesday and Wednesday
+ For the week of December 2-6, lab sessions start at 10am and end at 4pm, room FØ434, Tuesday and Wednesday
@@ -322,8 +334,8 @@ Plan for week 47
Summary of course
Readings and Videos:
- These lecture notes at https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week47/ipynb/week48.ipynb
- See also lecture notes from week 47 at https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week46/ipynb/week47.ipynb. The lecture on Monday starts with a repetition on AdaBoost before we move over to gradient boosting with examples
+ These lecture notes at https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week48/ipynb/week48.ipynb
+ See also lecture notes from week 47 at https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week47/ipynb/week47.ipynb. The lecture on Monday starts with a repetition on AdaBoost before we move over to gradient boosting with examples
Video on Decision trees https://www.youtube.com/watch?v=RmajweUFKvM&ab_channel=Simplilearn
@@ -336,6 +348,175 @@ Plan for week 47
+
+Random Forest Algorithm, reminder from last week
+
+The algorithm described here can be applied to both classification and regression problems.
+
+We will grow of forest of say \( B \) trees.
+
+- For \( b=1:B \)
+
+ - Draw a bootstrap sample from the training data organized in our \( \boldsymbol{X} \) matrix.
+ - We grow then a random forest tree \( T_b \) based on the bootstrapped data by repeating the steps outlined till we reach the maximum node size is reached
+
+ - we select \( m \le p \) variables at random from the \( p \) predictors/features
+ - pick the best split point among the \( m \) features using for example the CART algorithm and create a new node
+ - split the node into daughter nodes
+
+
+ Output then the ensemble of trees \( \{T_b\}_1^{B} \) and make predictions for either a regression type of problem or a classification type of problem.
+
+
+Random Forests Compared with other Methods on the Cancer Data
+
+
+
+
+
+Recall that the cumulative gains curve shows the percentage of the
+overall number of cases in a given category gained by targeting a
+percentage of the total number of cases.
+
+
+Similarly, the receiver operating characteristic curve, or ROC curve,
+displays the diagnostic ability of a binary classifier system as its
+discrimination threshold is varied. It plots the true positive rate against the false positive rate.
+
+
+
+Compare Bagging on Trees with Random Forests
+
+
+
+
+
+
+
+
+
Boosting, a Bird's Eye View
diff --git a/doc/pub/week48/html/week48.html b/doc/pub/week48/html/week48.html
index 93cdc9f13..ebd73a6cc 100644
--- a/doc/pub/week48/html/week48.html
+++ b/doc/pub/week48/html/week48.html
@@ -142,6 +142,18 @@
{'highest level': 2,
'sections': [('Overview of week 48', 2, None, 'overview-of-week-48'),
('Plan for week 47', 2, None, 'plan-for-week-47'),
+ ('Random Forest Algorithm, reminder from last week',
+ 2,
+ None,
+ 'random-forest-algorithm-reminder-from-last-week'),
+ ('Random Forests Compared with other Methods on the Cancer Data',
+ 2,
+ None,
+ 'random-forests-compared-with-other-methods-on-the-cancer-data'),
+ ('Compare Bagging on Trees with Random Forests',
+ 2,
+ None,
+ 'compare-bagging-on-trees-with-random-forests'),
("Boosting, a Bird's Eye View",
2,
None,
@@ -386,7 +398,7 @@
Plan for week 47
Work and Discussion of project 3
Last weekly exercise
Lab sessions at usual times.
-
For the week of December 2-6, lab sessions atart at 10am and end 4pm, room FØ434, Tuesday and Wednesday
+
For the week of December 2-6, lab sessions start at 10am and end at 4pm, room FØ434, Tuesday and Wednesday
@@ -399,8 +411,8 @@ Plan for week 47
Summary of course
Readings and Videos:
- These lecture notes at https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week47/ipynb/week48.ipynb
- See also lecture notes from week 47 at https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week46/ipynb/week47.ipynb. The lecture on Monday starts with a repetition on AdaBoost before we move over to gradient boosting with examples
+ These lecture notes at https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week48/ipynb/week48.ipynb
+ See also lecture notes from week 47 at https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week47/ipynb/week47.ipynb. The lecture on Monday starts with a repetition on AdaBoost before we move over to gradient boosting with examples
Video on Decision trees https://www.youtube.com/watch?v=RmajweUFKvM&ab_channel=Simplilearn
@@ -413,6 +425,175 @@ Plan for week 47
+
+Random Forest Algorithm, reminder from last week
+
+The algorithm described here can be applied to both classification and regression problems.
+
+We will grow of forest of say \( B \) trees.
+
+- For \( b=1:B \)
+
+ - Draw a bootstrap sample from the training data organized in our \( \boldsymbol{X} \) matrix.
+ - We grow then a random forest tree \( T_b \) based on the bootstrapped data by repeating the steps outlined till we reach the maximum node size is reached
+
+ - we select \( m \le p \) variables at random from the \( p \) predictors/features
+ - pick the best split point among the \( m \) features using for example the CART algorithm and create a new node
+ - split the node into daughter nodes
+
+
+ Output then the ensemble of trees \( \{T_b\}_1^{B} \) and make predictions for either a regression type of problem or a classification type of problem.
+
+
+Random Forests Compared with other Methods on the Cancer Data
+
+
+
+
+
+Recall that the cumulative gains curve shows the percentage of the
+overall number of cases in a given category gained by targeting a
+percentage of the total number of cases.
+
+
+Similarly, the receiver operating characteristic curve, or ROC curve,
+displays the diagnostic ability of a binary classifier system as its
+discrimination threshold is varied. It plots the true positive rate against the false positive rate.
+
+
+
+Compare Bagging on Trees with Random Forests
+
+
+
+
+
+
+
+
+
Boosting, a Bird's Eye View
diff --git a/doc/pub/week48/ipynb/ipynb-week48-src.tar.gz b/doc/pub/week48/ipynb/ipynb-week48-src.tar.gz
index 581bc0935..8526beceb 100644
Binary files a/doc/pub/week48/ipynb/ipynb-week48-src.tar.gz and b/doc/pub/week48/ipynb/ipynb-week48-src.tar.gz differ
diff --git a/doc/pub/week48/ipynb/week48.ipynb b/doc/pub/week48/ipynb/week48.ipynb
index 5b6adc95a..baa902728 100644
--- a/doc/pub/week48/ipynb/week48.ipynb
+++ b/doc/pub/week48/ipynb/week48.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "a39ebc71",
+ "id": "b8e98ac0",
"metadata": {
"editable": true
},
@@ -14,7 +14,7 @@
},
{
"cell_type": "markdown",
- "id": "1e7972ed",
+ "id": "12cb369c",
"metadata": {
"editable": true
},
@@ -29,7 +29,7 @@
},
{
"cell_type": "markdown",
- "id": "4e7d57ca",
+ "id": "25b1691e",
"metadata": {
"editable": true
},
@@ -39,7 +39,7 @@
},
{
"cell_type": "markdown",
- "id": "208cc80b",
+ "id": "c213ed56",
"metadata": {
"editable": true
},
@@ -54,7 +54,7 @@
"\n",
" * Lab sessions at usual times.\n",
"\n",
- " * For the week of December 2-6, lab sessions atart at 10am and end 4pm, room FØ434, Tuesday and Wednesday\n",
+ " * For the week of December 2-6, lab sessions start at 10am and end at 4pm, room FØ434, Tuesday and Wednesday\n",
"\n",
" \n",
"\n",
@@ -66,9 +66,9 @@
"\n",
"3. Readings and Videos:\n",
"\n",
- "a. These lecture notes at
\n",
+ "a. These lecture notes at \n",
"\n",
- "b. See also lecture notes from week 47 at . The lecture on Monday starts with a repetition on AdaBoost before we move over to gradient boosting with examples\n",
+ "b. See also lecture notes from week 47 at . The lecture on Monday starts with a repetition on AdaBoost before we move over to gradient boosting with examples\n",
"\n",
"\n",
"\n",
@@ -85,7 +85,179 @@
},
{
"cell_type": "markdown",
- "id": "26deb357",
+ "id": "c3891e8a",
+ "metadata": {
+ "editable": true
+ },
+ "source": [
+ "## Random Forest Algorithm, reminder from last week\n",
+ "\n",
+ "The algorithm described here can be applied to both classification and regression problems.\n",
+ "\n",
+ "We will grow of forest of say $B$ trees.\n",
+ "1. For $b=1:B$\n",
+ "\n",
+ "a. Draw a bootstrap sample from the training data organized in our $\\boldsymbol{X}$ matrix.\n",
+ "\n",
+ "b. We grow then a random forest tree $T_b$ based on the bootstrapped data by repeating the steps outlined till we reach the maximum node size is reached\n",
+ "\n",
+ "1. we select $m \\le p$ variables at random from the $p$ predictors/features\n",
+ "\n",
+ "2. pick the best split point among the $m$ features using for example the CART algorithm and create a new node\n",
+ "\n",
+ "3. split the node into daughter nodes\n",
+ "\n",
+ "4. Output then the ensemble of trees $\\{T_b\\}_1^{B}$ and make predictions for either a regression type of problem or a classification type of problem."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dbe2476d",
+ "metadata": {
+ "editable": true
+ },
+ "source": [
+ "## Random Forests Compared with other Methods on the Cancer Data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "d03f9475",
+ "metadata": {
+ "collapsed": false,
+ "editable": true
+ },
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "from sklearn.model_selection import train_test_split \n",
+ "from sklearn.datasets import load_breast_cancer\n",
+ "from sklearn.svm import SVC\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "from sklearn.ensemble import BaggingClassifier\n",
+ "\n",
+ "# Load the data\n",
+ "cancer = load_breast_cancer()\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(cancer.data,cancer.target,random_state=0)\n",
+ "print(X_train.shape)\n",
+ "print(X_test.shape)\n",
+ "#define methods\n",
+ "# Logistic Regression\n",
+ "logreg = LogisticRegression(solver='lbfgs')\n",
+ "# Support vector machine\n",
+ "svm = SVC(gamma='auto', C=100)\n",
+ "# Decision Trees\n",
+ "deep_tree_clf = DecisionTreeClassifier(max_depth=None)\n",
+ "#Scale the data\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "scaler = StandardScaler()\n",
+ "scaler.fit(X_train)\n",
+ "X_train_scaled = scaler.transform(X_train)\n",
+ "X_test_scaled = scaler.transform(X_test)\n",
+ "# Logistic Regression\n",
+ "logreg.fit(X_train_scaled, y_train)\n",
+ "print(\"Test set accuracy Logistic Regression with scaled data: {:.2f}\".format(logreg.score(X_test_scaled,y_test)))\n",
+ "# Support Vector Machine\n",
+ "svm.fit(X_train_scaled, y_train)\n",
+ "print(\"Test set accuracy SVM with scaled data: {:.2f}\".format(logreg.score(X_test_scaled,y_test)))\n",
+ "# Decision Trees\n",
+ "deep_tree_clf.fit(X_train_scaled, y_train)\n",
+ "print(\"Test set accuracy with Decision Trees and scaled data: {:.2f}\".format(deep_tree_clf.score(X_test_scaled,y_test)))\n",
+ "\n",
+ "\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "from sklearn.model_selection import cross_validate\n",
+ "# Data set not specificied\n",
+ "#Instantiate the model with 500 trees and entropy as splitting criteria\n",
+ "Random_Forest_model = RandomForestClassifier(n_estimators=500,criterion=\"entropy\")\n",
+ "Random_Forest_model.fit(X_train_scaled, y_train)\n",
+ "#Cross validation\n",
+ "accuracy = cross_validate(Random_Forest_model,X_test_scaled,y_test,cv=10)['test_score']\n",
+ "print(accuracy)\n",
+ "print(\"Test set accuracy with Random Forests and scaled data: {:.2f}\".format(Random_Forest_model.score(X_test_scaled,y_test)))\n",
+ "\n",
+ "\n",
+ "import scikitplot as skplt\n",
+ "y_pred = Random_Forest_model.predict(X_test_scaled)\n",
+ "skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True)\n",
+ "plt.show()\n",
+ "y_probas = Random_Forest_model.predict_proba(X_test_scaled)\n",
+ "skplt.metrics.plot_roc(y_test, y_probas)\n",
+ "plt.show()\n",
+ "skplt.metrics.plot_cumulative_gain(y_test, y_probas)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a5a918f",
+ "metadata": {
+ "editable": true
+ },
+ "source": [
+ "Recall that the cumulative gains curve shows the percentage of the\n",
+ "overall number of cases in a given category *gained* by targeting a\n",
+ "percentage of the total number of cases.\n",
+ "\n",
+ "Similarly, the receiver operating characteristic curve, or ROC curve,\n",
+ "displays the diagnostic ability of a binary classifier system as its\n",
+ "discrimination threshold is varied. It plots the true positive rate against the false positive rate."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fef4d0d0",
+ "metadata": {
+ "editable": true
+ },
+ "source": [
+ "## Compare Bagging on Trees with Random Forests"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "38b4f201",
+ "metadata": {
+ "collapsed": false,
+ "editable": true
+ },
+ "outputs": [],
+ "source": [
+ "bag_clf = BaggingClassifier(\n",
+ " DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n",
+ " n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1, random_state=42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8c186195",
+ "metadata": {
+ "collapsed": false,
+ "editable": true
+ },
+ "outputs": [],
+ "source": [
+ "bag_clf.fit(X_train, y_train)\n",
+ "y_pred = bag_clf.predict(X_test)\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)\n",
+ "rnd_clf.fit(X_train, y_train)\n",
+ "y_pred_rf = rnd_clf.predict(X_test)\n",
+ "np.sum(y_pred == y_pred_rf) / len(y_pred)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a6f19a9",
"metadata": {
"editable": true
},
@@ -105,7 +277,7 @@
},
{
"cell_type": "markdown",
- "id": "7a2f8226",
+ "id": "ea3a24e9",
"metadata": {
"editable": true
},
@@ -119,7 +291,7 @@
},
{
"cell_type": "markdown",
- "id": "dc77215d",
+ "id": "1a3c318c",
"metadata": {
"editable": true
},
@@ -131,7 +303,7 @@
},
{
"cell_type": "markdown",
- "id": "f85044fd",
+ "id": "e16cdd0d",
"metadata": {
"editable": true
},
@@ -148,7 +320,7 @@
},
{
"cell_type": "markdown",
- "id": "0203e8af",
+ "id": "8d3ad320",
"metadata": {
"editable": true
},
@@ -160,7 +332,7 @@
},
{
"cell_type": "markdown",
- "id": "cf1ed563",
+ "id": "fd25b1e3",
"metadata": {
"editable": true
},
@@ -174,7 +346,7 @@
},
{
"cell_type": "markdown",
- "id": "e6bf0fad",
+ "id": "10404a9e",
"metadata": {
"editable": true
},
@@ -186,7 +358,7 @@
},
{
"cell_type": "markdown",
- "id": "8130f888",
+ "id": "124bd4cf",
"metadata": {
"editable": true
},
@@ -199,7 +371,7 @@
},
{
"cell_type": "markdown",
- "id": "6c2724ee",
+ "id": "8fb8c197",
"metadata": {
"editable": true
},
@@ -211,7 +383,7 @@
},
{
"cell_type": "markdown",
- "id": "02ee12e9",
+ "id": "ced4e7f4",
"metadata": {
"editable": true
},
@@ -221,7 +393,7 @@
},
{
"cell_type": "markdown",
- "id": "b79767ef",
+ "id": "161eec23",
"metadata": {
"editable": true
},
@@ -249,7 +421,7 @@
},
{
"cell_type": "markdown",
- "id": "9af520b1",
+ "id": "a1964067",
"metadata": {
"editable": true
},
@@ -265,7 +437,7 @@
},
{
"cell_type": "markdown",
- "id": "bde4df04",
+ "id": "1933dc63",
"metadata": {
"editable": true
},
@@ -277,7 +449,7 @@
},
{
"cell_type": "markdown",
- "id": "fbd18e93",
+ "id": "d932cfbd",
"metadata": {
"editable": true
},
@@ -288,7 +460,7 @@
},
{
"cell_type": "markdown",
- "id": "aa97ee02",
+ "id": "24f7a02a",
"metadata": {
"editable": true
},
@@ -300,7 +472,7 @@
},
{
"cell_type": "markdown",
- "id": "d2aa39e9",
+ "id": "6eb9e426",
"metadata": {
"editable": true
},
@@ -310,7 +482,7 @@
},
{
"cell_type": "markdown",
- "id": "1e3955c2",
+ "id": "0a48ddf7",
"metadata": {
"editable": true
},
@@ -322,7 +494,7 @@
},
{
"cell_type": "markdown",
- "id": "372e44b9",
+ "id": "d0087ffb",
"metadata": {
"editable": true
},
@@ -332,7 +504,7 @@
},
{
"cell_type": "markdown",
- "id": "19c28d10",
+ "id": "b455ce5e",
"metadata": {
"editable": true
},
@@ -344,7 +516,7 @@
},
{
"cell_type": "markdown",
- "id": "27059383",
+ "id": "5aeac1a5",
"metadata": {
"editable": true
},
@@ -354,7 +526,7 @@
},
{
"cell_type": "markdown",
- "id": "6fb75d7d",
+ "id": "86dfaddf",
"metadata": {
"editable": true
},
@@ -366,7 +538,7 @@
},
{
"cell_type": "markdown",
- "id": "db81a8d2",
+ "id": "4826590f",
"metadata": {
"editable": true
},
@@ -380,7 +552,7 @@
},
{
"cell_type": "markdown",
- "id": "c005776e",
+ "id": "3de0828a",
"metadata": {
"editable": true
},
@@ -396,7 +568,7 @@
},
{
"cell_type": "markdown",
- "id": "41b295d1",
+ "id": "961265b1",
"metadata": {
"editable": true
},
@@ -408,7 +580,7 @@
},
{
"cell_type": "markdown",
- "id": "4e153aad",
+ "id": "a74db275",
"metadata": {
"editable": true
},
@@ -424,7 +596,7 @@
},
{
"cell_type": "markdown",
- "id": "b507d423",
+ "id": "81d27911",
"metadata": {
"editable": true
},
@@ -436,7 +608,7 @@
},
{
"cell_type": "markdown",
- "id": "c1293602",
+ "id": "3b7dbe21",
"metadata": {
"editable": true
},
@@ -446,7 +618,7 @@
},
{
"cell_type": "markdown",
- "id": "ee91ca92",
+ "id": "e664eec9",
"metadata": {
"editable": true
},
@@ -458,7 +630,7 @@
},
{
"cell_type": "markdown",
- "id": "18cda998",
+ "id": "50fe52a0",
"metadata": {
"editable": true
},
@@ -470,7 +642,7 @@
},
{
"cell_type": "markdown",
- "id": "f4882409",
+ "id": "a4cd6cb0",
"metadata": {
"editable": true
},
@@ -482,7 +654,7 @@
},
{
"cell_type": "markdown",
- "id": "76865a9d",
+ "id": "116d1038",
"metadata": {
"editable": true
},
@@ -493,7 +665,7 @@
},
{
"cell_type": "markdown",
- "id": "00f40cd2",
+ "id": "690f6882",
"metadata": {
"editable": true
},
@@ -505,7 +677,7 @@
},
{
"cell_type": "markdown",
- "id": "8e2ecc8f",
+ "id": "436d0a30",
"metadata": {
"editable": true
},
@@ -516,7 +688,7 @@
},
{
"cell_type": "markdown",
- "id": "6b366af8",
+ "id": "779a3214",
"metadata": {
"editable": true
},
@@ -528,7 +700,7 @@
},
{
"cell_type": "markdown",
- "id": "3a1a1034",
+ "id": "baf4ef97",
"metadata": {
"editable": true
},
@@ -538,7 +710,7 @@
},
{
"cell_type": "markdown",
- "id": "58fb6fa7",
+ "id": "819d9a45",
"metadata": {
"editable": true
},
@@ -550,7 +722,7 @@
},
{
"cell_type": "markdown",
- "id": "eaee6ea7",
+ "id": "9de7ba47",
"metadata": {
"editable": true
},
@@ -562,7 +734,7 @@
},
{
"cell_type": "markdown",
- "id": "6de435fd",
+ "id": "3130e2cc",
"metadata": {
"editable": true
},
@@ -574,7 +746,7 @@
},
{
"cell_type": "markdown",
- "id": "1139bd2b",
+ "id": "88bee939",
"metadata": {
"editable": true
},
@@ -586,7 +758,7 @@
},
{
"cell_type": "markdown",
- "id": "bfe4569a",
+ "id": "81de3757",
"metadata": {
"editable": true
},
@@ -596,7 +768,7 @@
},
{
"cell_type": "markdown",
- "id": "13d0b962",
+ "id": "b1897c2e",
"metadata": {
"editable": true
},
@@ -608,7 +780,7 @@
},
{
"cell_type": "markdown",
- "id": "c46749a8",
+ "id": "eee8697d",
"metadata": {
"editable": true
},
@@ -618,7 +790,7 @@
},
{
"cell_type": "markdown",
- "id": "fb9707b0",
+ "id": "56355f2a",
"metadata": {
"editable": true
},
@@ -630,7 +802,7 @@
},
{
"cell_type": "markdown",
- "id": "7599d5c4",
+ "id": "88a20b27",
"metadata": {
"editable": true
},
@@ -640,7 +812,7 @@
},
{
"cell_type": "markdown",
- "id": "6be2f8e6",
+ "id": "2a02a6d7",
"metadata": {
"editable": true
},
@@ -652,7 +824,7 @@
},
{
"cell_type": "markdown",
- "id": "f7519116",
+ "id": "816d8d8f",
"metadata": {
"editable": true
},
@@ -662,7 +834,7 @@
},
{
"cell_type": "markdown",
- "id": "154c4a3a",
+ "id": "a5a820fc",
"metadata": {
"editable": true
},
@@ -674,7 +846,7 @@
},
{
"cell_type": "markdown",
- "id": "39a20802",
+ "id": "6517fb85",
"metadata": {
"editable": true
},
@@ -684,7 +856,7 @@
},
{
"cell_type": "markdown",
- "id": "efb002ef",
+ "id": "4a3c7a45",
"metadata": {
"editable": true
},
@@ -696,7 +868,7 @@
},
{
"cell_type": "markdown",
- "id": "9b9cf35a",
+ "id": "d295d135",
"metadata": {
"editable": true
},
@@ -716,7 +888,7 @@
},
{
"cell_type": "markdown",
- "id": "c72455a5",
+ "id": "710b2ab1",
"metadata": {
"editable": true
},
@@ -728,7 +900,7 @@
},
{
"cell_type": "markdown",
- "id": "a76d98a0",
+ "id": "1e153c11",
"metadata": {
"editable": true
},
@@ -738,7 +910,7 @@
},
{
"cell_type": "markdown",
- "id": "2d094d5e",
+ "id": "7ca4618e",
"metadata": {
"editable": true
},
@@ -754,7 +926,7 @@
},
{
"cell_type": "markdown",
- "id": "1c4386fd",
+ "id": "ce93cd0f",
"metadata": {
"editable": true
},
@@ -766,7 +938,7 @@
},
{
"cell_type": "markdown",
- "id": "9d052a68",
+ "id": "3405f033",
"metadata": {
"editable": true
},
@@ -794,7 +966,7 @@
},
{
"cell_type": "markdown",
- "id": "ea5f4148",
+ "id": "4e254d44",
"metadata": {
"editable": true
},
@@ -806,8 +978,8 @@
},
{
"cell_type": "code",
- "execution_count": 1,
- "id": "9808c49a",
+ "execution_count": 4,
+ "id": "a2372724",
"metadata": {
"collapsed": false,
"editable": true
@@ -832,7 +1004,7 @@
},
{
"cell_type": "markdown",
- "id": "7fce1041",
+ "id": "42c1e9ee",
"metadata": {
"editable": true
},
@@ -842,8 +1014,8 @@
},
{
"cell_type": "code",
- "execution_count": 2,
- "id": "6f645036",
+ "execution_count": 5,
+ "id": "bad891e4",
"metadata": {
"collapsed": false,
"editable": true
@@ -932,7 +1104,7 @@
},
{
"cell_type": "markdown",
- "id": "7ba94acc",
+ "id": "87c499dc",
"metadata": {
"editable": true
},
@@ -950,7 +1122,7 @@
},
{
"cell_type": "markdown",
- "id": "01cc8969",
+ "id": "d78bf458",
"metadata": {
"editable": true
},
@@ -963,7 +1135,7 @@
},
{
"cell_type": "markdown",
- "id": "792b01b0",
+ "id": "12318d85",
"metadata": {
"editable": true
},
@@ -975,7 +1147,7 @@
},
{
"cell_type": "markdown",
- "id": "7b248aa2",
+ "id": "2d96597d",
"metadata": {
"editable": true
},
@@ -985,7 +1157,7 @@
},
{
"cell_type": "markdown",
- "id": "2e9c37b8",
+ "id": "ca649b47",
"metadata": {
"editable": true
},
@@ -997,7 +1169,7 @@
},
{
"cell_type": "markdown",
- "id": "2e4ce635",
+ "id": "f3e57f43",
"metadata": {
"editable": true
},
@@ -1007,7 +1179,7 @@
},
{
"cell_type": "markdown",
- "id": "9b82b90d",
+ "id": "d8cc18d6",
"metadata": {
"editable": true
},
@@ -1019,7 +1191,7 @@
},
{
"cell_type": "markdown",
- "id": "00cdf85c",
+ "id": "43d98194",
"metadata": {
"editable": true
},
@@ -1032,7 +1204,7 @@
},
{
"cell_type": "markdown",
- "id": "75879888",
+ "id": "b35a4881",
"metadata": {
"editable": true
},
@@ -1044,7 +1216,7 @@
},
{
"cell_type": "markdown",
- "id": "7df5cbe2",
+ "id": "b058af39",
"metadata": {
"editable": true
},
@@ -1056,7 +1228,7 @@
},
{
"cell_type": "markdown",
- "id": "ed39df0e",
+ "id": "3ad897cf",
"metadata": {
"editable": true
},
@@ -1068,7 +1240,7 @@
},
{
"cell_type": "markdown",
- "id": "17a6c806",
+ "id": "bd31fcbe",
"metadata": {
"editable": true
},
@@ -1078,7 +1250,7 @@
},
{
"cell_type": "markdown",
- "id": "4853b938",
+ "id": "f8376f29",
"metadata": {
"editable": true
},
@@ -1090,7 +1262,7 @@
},
{
"cell_type": "markdown",
- "id": "e48f6eff",
+ "id": "be665473",
"metadata": {
"editable": true
},
@@ -1100,7 +1272,7 @@
},
{
"cell_type": "markdown",
- "id": "8e16d35d",
+ "id": "e1091a41",
"metadata": {
"editable": true
},
@@ -1116,7 +1288,7 @@
},
{
"cell_type": "markdown",
- "id": "8582402a",
+ "id": "e84e0298",
"metadata": {
"editable": true
},
@@ -1128,7 +1300,7 @@
},
{
"cell_type": "markdown",
- "id": "f3affd6c",
+ "id": "c21417da",
"metadata": {
"editable": true
},
@@ -1149,7 +1321,7 @@
},
{
"cell_type": "markdown",
- "id": "0e7145f7",
+ "id": "5525cf60",
"metadata": {
"editable": true
},
@@ -1159,16 +1331,14 @@
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "94ef427d",
+ "execution_count": 6,
+ "id": "a06c6902",
"metadata": {
"collapsed": false,
"editable": true
},
"outputs": [],
"source": [
- "%matplotlib inline\n",
- "\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
@@ -1214,7 +1384,7 @@
},
{
"cell_type": "markdown",
- "id": "962c417a",
+ "id": "b515314f",
"metadata": {
"editable": true
},
@@ -1224,8 +1394,8 @@
},
{
"cell_type": "code",
- "execution_count": 4,
- "id": "595aada5",
+ "execution_count": 7,
+ "id": "4231d351",
"metadata": {
"collapsed": false,
"editable": true
@@ -1276,7 +1446,7 @@
},
{
"cell_type": "markdown",
- "id": "e83be453",
+ "id": "121a1d8a",
"metadata": {
"editable": true
},
@@ -1299,7 +1469,7 @@
},
{
"cell_type": "markdown",
- "id": "8d3d5790",
+ "id": "a9c5f373",
"metadata": {
"editable": true
},
@@ -1309,8 +1479,8 @@
},
{
"cell_type": "code",
- "execution_count": 5,
- "id": "34e94a90",
+ "execution_count": 8,
+ "id": "16f56e04",
"metadata": {
"collapsed": false,
"editable": true
@@ -1362,7 +1532,7 @@
},
{
"cell_type": "markdown",
- "id": "21b9da8e",
+ "id": "af8e732d",
"metadata": {
"editable": true
},
@@ -1374,8 +1544,8 @@
},
{
"cell_type": "code",
- "execution_count": 6,
- "id": "249fe2dc",
+ "execution_count": 9,
+ "id": "b63afee6",
"metadata": {
"collapsed": false,
"editable": true
@@ -1438,7 +1608,7 @@
},
{
"cell_type": "markdown",
- "id": "7f908d19",
+ "id": "84c9ab3e",
"metadata": {
"editable": true
},
@@ -1448,8 +1618,8 @@
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "a03ed5b7",
+ "execution_count": 10,
+ "id": "01952655",
"metadata": {
"collapsed": false,
"editable": true
@@ -1536,7 +1706,7 @@
},
{
"cell_type": "markdown",
- "id": "05b8f3c7",
+ "id": "9a762674",
"metadata": {
"editable": true
},
@@ -1546,7 +1716,7 @@
},
{
"cell_type": "markdown",
- "id": "9c4496bd",
+ "id": "1b17200a",
"metadata": {
"editable": true
},
@@ -1561,7 +1731,7 @@
},
{
"cell_type": "markdown",
- "id": "e81bb354",
+ "id": "4acb7811",
"metadata": {
"editable": true
},
@@ -1577,7 +1747,7 @@
},
{
"cell_type": "markdown",
- "id": "969bddcc",
+ "id": "6b2f2ccd",
"metadata": {
"editable": true
},
@@ -1602,7 +1772,7 @@
},
{
"cell_type": "markdown",
- "id": "d4949b85",
+ "id": "e6bdbe75",
"metadata": {
"editable": true
},
@@ -1649,7 +1819,7 @@
},
{
"cell_type": "markdown",
- "id": "45cb9fdd",
+ "id": "5900b979",
"metadata": {
"editable": true
},
@@ -1684,7 +1854,7 @@
},
{
"cell_type": "markdown",
- "id": "922f087a",
+ "id": "97bf13a4",
"metadata": {
"editable": true
},
@@ -1707,7 +1877,7 @@
},
{
"cell_type": "markdown",
- "id": "5215e2cc",
+ "id": "1e152734",
"metadata": {
"editable": true
},
@@ -1730,7 +1900,7 @@
},
{
"cell_type": "markdown",
- "id": "b47badea",
+ "id": "1ed0e449",
"metadata": {
"editable": true
},
@@ -1750,7 +1920,7 @@
},
{
"cell_type": "markdown",
- "id": "db0ba96d",
+ "id": "b24640ed",
"metadata": {
"editable": true
},
@@ -1766,7 +1936,7 @@
},
{
"cell_type": "markdown",
- "id": "3ce90bd6",
+ "id": "33957222",
"metadata": {
"editable": true
},
@@ -1794,7 +1964,7 @@
},
{
"cell_type": "markdown",
- "id": "3580f610",
+ "id": "56ba1280",
"metadata": {
"editable": true
},
@@ -1816,7 +1986,7 @@
},
{
"cell_type": "markdown",
- "id": "3996d3be",
+ "id": "fb062869",
"metadata": {
"editable": true
},
@@ -1841,7 +2011,7 @@
},
{
"cell_type": "markdown",
- "id": "97989551",
+ "id": "808e4541",
"metadata": {
"editable": true
},
@@ -1859,7 +2029,7 @@
},
{
"cell_type": "markdown",
- "id": "c347f2fe",
+ "id": "1ac3860d",
"metadata": {
"editable": true
},
@@ -1889,7 +2059,7 @@
},
{
"cell_type": "markdown",
- "id": "44e9af30",
+ "id": "c9bead32",
"metadata": {
"editable": true
},
@@ -1903,7 +2073,7 @@
},
{
"cell_type": "markdown",
- "id": "8077d8fa",
+ "id": "982d4930",
"metadata": {
"editable": true
},
@@ -1929,7 +2099,7 @@
},
{
"cell_type": "markdown",
- "id": "07aa4c82",
+ "id": "369bc8e7",
"metadata": {
"editable": true
},
@@ -1958,7 +2128,7 @@
},
{
"cell_type": "markdown",
- "id": "78c3e014",
+ "id": "539550ba",
"metadata": {
"editable": true
},
@@ -1975,7 +2145,7 @@
},
{
"cell_type": "markdown",
- "id": "9323bb0e",
+ "id": "45a3026e",
"metadata": {
"editable": true
},
@@ -1997,7 +2167,7 @@
},
{
"cell_type": "markdown",
- "id": "75509bf3",
+ "id": "3ba33ca6",
"metadata": {
"editable": true
},
@@ -2015,7 +2185,7 @@
},
{
"cell_type": "markdown",
- "id": "35674c96",
+ "id": "512af4a3",
"metadata": {
"editable": true
},
@@ -2038,7 +2208,7 @@
},
{
"cell_type": "markdown",
- "id": "29f53333",
+ "id": "bafc70a8",
"metadata": {
"editable": true
},
@@ -2057,7 +2227,7 @@
},
{
"cell_type": "markdown",
- "id": "5dbba68a",
+ "id": "c8db3bb3",
"metadata": {
"editable": true
},
@@ -2073,7 +2243,7 @@
},
{
"cell_type": "markdown",
- "id": "6828d447",
+ "id": "19a62799",
"metadata": {
"editable": true
},
@@ -2088,7 +2258,7 @@
},
{
"cell_type": "markdown",
- "id": "cdf3f161",
+ "id": "dee23d4a",
"metadata": {
"editable": true
},
@@ -2112,7 +2282,7 @@
},
{
"cell_type": "markdown",
- "id": "72c3fb89",
+ "id": "27b6fe1b",
"metadata": {
"editable": true
},
@@ -2124,7 +2294,7 @@
},
{
"cell_type": "markdown",
- "id": "37629ea0",
+ "id": "671ebf4b",
"metadata": {
"editable": true
},
@@ -2142,7 +2312,7 @@
},
{
"cell_type": "markdown",
- "id": "98aab4d7",
+ "id": "1fe52803",
"metadata": {
"editable": true
},
@@ -2152,7 +2322,7 @@
},
{
"cell_type": "markdown",
- "id": "87b99c69",
+ "id": "5a2a9f51",
"metadata": {
"editable": true
},
@@ -2170,7 +2340,7 @@
},
{
"cell_type": "markdown",
- "id": "104bf3d9",
+ "id": "bad1a098",
"metadata": {
"editable": true
},
@@ -2180,7 +2350,7 @@
},
{
"cell_type": "markdown",
- "id": "dd8e8532",
+ "id": "942d899e",
"metadata": {
"editable": true
},
@@ -2199,7 +2369,7 @@
},
{
"cell_type": "markdown",
- "id": "0fcf899e",
+ "id": "78dc7403",
"metadata": {
"editable": true
},
@@ -2211,7 +2381,7 @@
},
{
"cell_type": "markdown",
- "id": "73aab9a5",
+ "id": "7c2bfde9",
"metadata": {
"editable": true
},
@@ -2225,7 +2395,7 @@
},
{
"cell_type": "markdown",
- "id": "c60f69c7",
+ "id": "2f152023",
"metadata": {
"editable": true
},
@@ -2240,7 +2410,7 @@
},
{
"cell_type": "markdown",
- "id": "4e511fae",
+ "id": "70f4a4a4",
"metadata": {
"editable": true
},
@@ -2258,7 +2428,7 @@
},
{
"cell_type": "markdown",
- "id": "fa972f2d",
+ "id": "0dc78848",
"metadata": {
"editable": true
},
@@ -2272,7 +2442,7 @@
},
{
"cell_type": "markdown",
- "id": "8a994f1a",
+ "id": "831b279e",
"metadata": {
"editable": true
},
@@ -2290,7 +2460,7 @@
},
{
"cell_type": "markdown",
- "id": "03b5397c",
+ "id": "8b2e5067",
"metadata": {
"editable": true
},
@@ -2314,7 +2484,7 @@
},
{
"cell_type": "markdown",
- "id": "45018dc5",
+ "id": "5a7fd9c4",
"metadata": {
"editable": true
},
@@ -2352,7 +2522,7 @@
},
{
"cell_type": "markdown",
- "id": "b2cb45a9",
+ "id": "3fff9e22",
"metadata": {
"editable": true
},
@@ -2375,7 +2545,7 @@
},
{
"cell_type": "markdown",
- "id": "3865af47",
+ "id": "06e08aa9",
"metadata": {
"editable": true
},
@@ -2411,7 +2581,7 @@
},
{
"cell_type": "markdown",
- "id": "6f19b509",
+ "id": "564bb5de",
"metadata": {
"editable": true
},
@@ -2432,7 +2602,7 @@
},
{
"cell_type": "markdown",
- "id": "9343ee7f",
+ "id": "a50184f4",
"metadata": {
"editable": true
},
@@ -2454,7 +2624,7 @@
},
{
"cell_type": "markdown",
- "id": "b2dd9738",
+ "id": "0abcc34b",
"metadata": {
"editable": true
},
@@ -2474,7 +2644,7 @@
},
{
"cell_type": "markdown",
- "id": "b9be3dbc",
+ "id": "8d2c9af4",
"metadata": {
"editable": true
},
@@ -2489,7 +2659,7 @@
},
{
"cell_type": "markdown",
- "id": "8cbdca7e",
+ "id": "f477a76f",
"metadata": {
"editable": true
},
@@ -2507,7 +2677,7 @@
},
{
"cell_type": "markdown",
- "id": "537082f4",
+ "id": "6155600c",
"metadata": {
"editable": true
},
@@ -2537,7 +2707,7 @@
},
{
"cell_type": "markdown",
- "id": "db559f68",
+ "id": "83336d35",
"metadata": {
"editable": true
},
@@ -2566,7 +2736,7 @@
},
{
"cell_type": "markdown",
- "id": "ac53848b",
+ "id": "99eb3e53",
"metadata": {
"editable": true
},
@@ -2597,7 +2767,7 @@
},
{
"cell_type": "markdown",
- "id": "d8d03d63",
+ "id": "283b9930",
"metadata": {
"editable": true
},
@@ -2620,7 +2790,7 @@
},
{
"cell_type": "markdown",
- "id": "e3f5660d",
+ "id": "defb65a0",
"metadata": {
"editable": true
},
@@ -2638,7 +2808,7 @@
},
{
"cell_type": "markdown",
- "id": "b8d80de2",
+ "id": "027093b9",
"metadata": {
"editable": true
},
@@ -2661,7 +2831,7 @@
},
{
"cell_type": "markdown",
- "id": "de54e1e2",
+ "id": "78a77435",
"metadata": {
"editable": true
},
@@ -2682,7 +2852,7 @@
},
{
"cell_type": "markdown",
- "id": "be906a03",
+ "id": "500291ea",
"metadata": {
"editable": true
},
@@ -2698,7 +2868,7 @@
},
{
"cell_type": "markdown",
- "id": "d2090cb2",
+ "id": "62d8538b",
"metadata": {
"editable": true
},
diff --git a/doc/src/week48/week48.do.txt b/doc/src/week48/week48.do.txt
index 7608590eb..02de1f006 100644
--- a/doc/src/week48/week48.do.txt
+++ b/doc/src/week48/week48.do.txt
@@ -12,15 +12,15 @@ DATE: today
* Work and Discussion of project 3
* Last weekly exercise
* Lab sessions at usual times.
- * For the week of December 2-6, lab sessions atart at 10am and end 4pm, room FØ434, Tuesday and Wednesday
+ * For the week of December 2-6, lab sessions start at 10am and end at 4pm, room FØ434, Tuesday and Wednesday
!eblock
!bblock Plans for the lecture Monday 25 November, with video suggestions etc
o Boosting and gradient boosting and ensemble models
o Summary of course
o Readings and Videos:
- o These lecture notes at URL:"https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week47/ipynb/week48.ipynb"
- o See also lecture notes from week 47 at URL:"https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week46/ipynb/week47.ipynb". The lecture on Monday starts with a repetition on AdaBoost before we move over to gradient boosting with examples
+ o These lecture notes at URL:"https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week48/ipynb/week48.ipynb"
+ o See also lecture notes from week 47 at URL:"https://github.com/CompPhysics/MachineLearning/blob/master/doc/pub/week47/ipynb/week47.ipynb". The lecture on Monday starts with a repetition on AdaBoost before we move over to gradient boosting with examples
# o Video of lecture at URL:"https://youtu.be/RIHzmLv05DA"
# o Whiteboard notes at URL:"https://github.com/CompPhysics/MachineLearning/blob/master/doc/HandWrittenNotes/2024/NotesNovember25.pdf"
o Video on Decision trees URL:"https://www.youtube.com/watch?v=RmajweUFKvM&ab_channel=Simplilearn"
@@ -31,6 +31,128 @@ o Readings and Videos:
!eblock
+!split
+===== Random Forest Algorithm, reminder from last week =====
+
+
+The algorithm described here can be applied to both classification and regression problems.
+
+We will grow of forest of say $B$ trees.
+o For $b=1:B$
+ o Draw a bootstrap sample from the training data organized in our $\bm{X}$ matrix.
+ o We grow then a random forest tree $T_b$ based on the bootstrapped data by repeating the steps outlined till we reach the maximum node size is reached
+ o we select $m \le p$ variables at random from the $p$ predictors/features
+ o pick the best split point among the $m$ features using for example the CART algorithm and create a new node
+ o split the node into daughter nodes
+o Output then the ensemble of trees $\{T_b\}_1^{B}$ and make predictions for either a regression type of problem or a classification type of problem.
+
+
+
+!split
+===== Random Forests Compared with other Methods on the Cancer Data =====
+
+
+!bc pycod
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.datasets import load_breast_cancer
+from sklearn.svm import SVC
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import BaggingClassifier
+
+# Load the data
+cancer = load_breast_cancer()
+
+X_train, X_test, y_train, y_test = train_test_split(cancer.data,cancer.target,random_state=0)
+print(X_train.shape)
+print(X_test.shape)
+#define methods
+# Logistic Regression
+logreg = LogisticRegression(solver='lbfgs')
+# Support vector machine
+svm = SVC(gamma='auto', C=100)
+# Decision Trees
+deep_tree_clf = DecisionTreeClassifier(max_depth=None)
+#Scale the data
+from sklearn.preprocessing import StandardScaler
+scaler = StandardScaler()
+scaler.fit(X_train)
+X_train_scaled = scaler.transform(X_train)
+X_test_scaled = scaler.transform(X_test)
+# Logistic Regression
+logreg.fit(X_train_scaled, y_train)
+print("Test set accuracy Logistic Regression with scaled data: {:.2f}".format(logreg.score(X_test_scaled,y_test)))
+# Support Vector Machine
+svm.fit(X_train_scaled, y_train)
+print("Test set accuracy SVM with scaled data: {:.2f}".format(logreg.score(X_test_scaled,y_test)))
+# Decision Trees
+deep_tree_clf.fit(X_train_scaled, y_train)
+print("Test set accuracy with Decision Trees and scaled data: {:.2f}".format(deep_tree_clf.score(X_test_scaled,y_test)))
+
+
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.preprocessing import LabelEncoder
+from sklearn.model_selection import cross_validate
+# Data set not specificied
+#Instantiate the model with 500 trees and entropy as splitting criteria
+Random_Forest_model = RandomForestClassifier(n_estimators=500,criterion="entropy")
+Random_Forest_model.fit(X_train_scaled, y_train)
+#Cross validation
+accuracy = cross_validate(Random_Forest_model,X_test_scaled,y_test,cv=10)['test_score']
+print(accuracy)
+print("Test set accuracy with Random Forests and scaled data: {:.2f}".format(Random_Forest_model.score(X_test_scaled,y_test)))
+
+
+import scikitplot as skplt
+y_pred = Random_Forest_model.predict(X_test_scaled)
+skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True)
+plt.show()
+y_probas = Random_Forest_model.predict_proba(X_test_scaled)
+skplt.metrics.plot_roc(y_test, y_probas)
+plt.show()
+skplt.metrics.plot_cumulative_gain(y_test, y_probas)
+plt.show()
+
+!ec
+
+
+Recall that the cumulative gains curve shows the percentage of the
+overall number of cases in a given category *gained* by targeting a
+percentage of the total number of cases.
+
+Similarly, the receiver operating characteristic curve, or ROC curve,
+displays the diagnostic ability of a binary classifier system as its
+discrimination threshold is varied. It plots the true positive rate against the false positive rate.
+
+
+!split
+===== Compare Bagging on Trees with Random Forests =====
+!bc pycod
+bag_clf = BaggingClassifier(
+ DecisionTreeClassifier(splitter="random", max_leaf_nodes=16, random_state=42),
+ n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1, random_state=42)
+!ec
+
+
+
+!bc pycod
+bag_clf.fit(X_train, y_train)
+y_pred = bag_clf.predict(X_test)
+from sklearn.ensemble import RandomForestClassifier
+rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)
+rnd_clf.fit(X_train, y_train)
+y_pred_rf = rnd_clf.predict(X_test)
+np.sum(y_pred == y_pred_rf) / len(y_pred)
+!ec
+
+
+
+
+
+
+
!split
@@ -1539,3 +1661,4 @@ FIGURE: [figures/Nebbdyr2.png, width=500 frac=0.6]
+