From 1c143f339923d1183cb75779ba429722c9ff2e32 Mon Sep 17 00:00:00 2001
From: Doubleking-1 <71910936+Doubleking-1@users.noreply.github.com>
Date: Wed, 3 Jan 2024 23:17:02 +0800
Subject: [PATCH] Wangzun
second homework of ML
---
...ork_credit_scoring_finetune_ensemble.ipynb | 2073 +++++++++++++++++
1 file changed, 2073 insertions(+)
create mode 100644 2023/homework/Zun_Wang/homework_credit_scoring_finetune_ensemble.ipynb
diff --git a/2023/homework/Zun_Wang/homework_credit_scoring_finetune_ensemble.ipynb b/2023/homework/Zun_Wang/homework_credit_scoring_finetune_ensemble.ipynb
new file mode 100644
index 00000000..87a7e7a5
--- /dev/null
+++ b/2023/homework/Zun_Wang/homework_credit_scoring_finetune_ensemble.ipynb
@@ -0,0 +1,2073 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 一起来打怪之 Credit Scoring 练习"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "-------\n",
+ "## >>>说明:\n",
+ "### 1. 答题步骤:\n",
+ "- 回答问题**请保留每一步**操作过程,请不要仅仅给出最后答案\n",
+ "- 请养成代码注释的好习惯\n",
+ "\n",
+ "### 2. 解题思路:\n",
+ "- 为方便大家准确理解题目,在习题实战中有所收获,本文档提供了解题思路提示\n",
+ "- 解题思路**仅供参考**,鼓励原创解题方法\n",
+ "- 为督促同学们自己思考,解题思路内容设置为**注释**,请注意查看\n",
+ "\n",
+ "### 3. 所用数据:\n",
+ "- 问题使用了多个数据库,请注意导入每个数据库后都先**查看和了解数据的基本性质**,后面的问题不再一一提醒"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "--------\n",
+ "## 操作题"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 信用卡欺诈项目"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " #### 前期数据导入,预览及处理(此部分勿修改,涉及的数据文件无需复制移动)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " SeriousDlqin2yrs | \n",
+ " RevolvingUtilizationOfUnsecuredLines | \n",
+ " age | \n",
+ " NumberOfTime30-59DaysPastDueNotWorse | \n",
+ " DebtRatio | \n",
+ " MonthlyIncome | \n",
+ " NumberOfOpenCreditLinesAndLoans | \n",
+ " NumberOfTimes90DaysLate | \n",
+ " NumberRealEstateLoansOrLines | \n",
+ " NumberOfTime60-89DaysPastDueNotWorse | \n",
+ " NumberOfDependents | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0.766127 | \n",
+ " 45.0 | \n",
+ " 2.0 | \n",
+ " 0.802982 | \n",
+ " 9120.0 | \n",
+ " 13.0 | \n",
+ " 0.0 | \n",
+ " 6.0 | \n",
+ " 0.0 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0.957151 | \n",
+ " 40.0 | \n",
+ " 0.0 | \n",
+ " 0.121876 | \n",
+ " 2600.0 | \n",
+ " 4.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0.658180 | \n",
+ " 38.0 | \n",
+ " 1.0 | \n",
+ " 0.085113 | \n",
+ " 3042.0 | \n",
+ " 2.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0.233810 | \n",
+ " 30.0 | \n",
+ " 0.0 | \n",
+ " 0.036050 | \n",
+ " 3300.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0.907239 | \n",
+ " 49.0 | \n",
+ " 1.0 | \n",
+ " 0.024926 | \n",
+ " 63588.0 | \n",
+ " 7.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " SeriousDlqin2yrs RevolvingUtilizationOfUnsecuredLines age \\\n",
+ "0 1 0.766127 45.0 \n",
+ "1 0 0.957151 40.0 \n",
+ "2 0 0.658180 38.0 \n",
+ "3 0 0.233810 30.0 \n",
+ "4 0 0.907239 49.0 \n",
+ "\n",
+ " NumberOfTime30-59DaysPastDueNotWorse DebtRatio MonthlyIncome \\\n",
+ "0 2.0 0.802982 9120.0 \n",
+ "1 0.0 0.121876 2600.0 \n",
+ "2 1.0 0.085113 3042.0 \n",
+ "3 0.0 0.036050 3300.0 \n",
+ "4 1.0 0.024926 63588.0 \n",
+ "\n",
+ " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n",
+ "0 13.0 0.0 \n",
+ "1 4.0 0.0 \n",
+ "2 2.0 1.0 \n",
+ "3 5.0 0.0 \n",
+ "4 7.0 0.0 \n",
+ "\n",
+ " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n",
+ "0 6.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 0.0 0.0 \n",
+ "3 0.0 0.0 \n",
+ "4 1.0 0.0 \n",
+ "\n",
+ " NumberOfDependents \n",
+ "0 2.0 \n",
+ "1 1.0 \n",
+ "2 0.0 \n",
+ "3 0.0 \n",
+ "4 0.0 "
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "pd.set_option('display.max_columns', 500)\n",
+ "import zipfile\n",
+ "with zipfile.ZipFile('KaggleCredit2.csv.zip', 'r') as z:\n",
+ " f = z.open('KaggleCredit2.csv')\n",
+ " data = pd.read_csv(f, index_col=0)\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(112915, 11)"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 检查数据维度\n",
+ "data.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "SeriousDlqin2yrs 0\n",
+ "RevolvingUtilizationOfUnsecuredLines 0\n",
+ "age 4267\n",
+ "NumberOfTime30-59DaysPastDueNotWorse 0\n",
+ "DebtRatio 0\n",
+ "MonthlyIncome 0\n",
+ "NumberOfOpenCreditLinesAndLoans 0\n",
+ "NumberOfTimes90DaysLate 0\n",
+ "NumberRealEstateLoansOrLines 0\n",
+ "NumberOfTime60-89DaysPastDueNotWorse 0\n",
+ "NumberOfDependents 4267\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 查看数据缺失值情况\n",
+ "data.isnull().sum(axis=0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_253566/2980780030.py:3: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n",
+ " data.shapey = data['SeriousDlqin2yrs']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 清除缺失值\n",
+ "data.dropna(inplace=True)\n",
+ "data.shapey = data['SeriousDlqin2yrs']\n",
+ "X = data.drop('SeriousDlqin2yrs', axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.06742876076872101"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 取出对应的X和y\n",
+ "y = data['SeriousDlqin2yrs']\n",
+ "X = data.drop('SeriousDlqin2yrs', axis=1)\n",
+ "# 查看平均的欺诈率\n",
+ "y.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 以下为操作题"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 1.把数据切分成训练集和测试集"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((76053, 10), (32595, 10), (76053,), (32595,), (108648, 10))"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 提示:查看train_test_split函数\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "# 把数据切分成70%的训练集,30%的测试集\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)\n",
+ "\n",
+ "# 查看训练集跟测试集的维度---原来数据集的维度\n",
+ "X_train.shape, X_test.shape, y_train.shape, y_test.shape, X.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Text(0.5, 0, 'Catalog')"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAETCAYAAAD6R0vDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAAsTAAALEwEAmpwYAAAczklEQVR4nO3debxdZX3v8c/XAAoyBCRSSYCg0AGwWkwBX7YWxTJoLdQrVq8taBHsBVs7KlgrVEuL11qE61RuoYITRawSFUpzkWjVMgSxMklJGRMZwgwqAvK7f6zn4M7JOScnyTpnk5PP+/U6r7P2s5611rPWHr57PevZe6eqkCSpT08bdgMkSTOP4SJJ6p3hIknqneEiSeqd4SJJ6p3hIknqneEikrwryT9OMP+NSf5tOtu0ppLsm2TZsNuxOkl2TPJwklnDbkvfksxPUkk26mFdK92fSa5Jsu+6rreta6XHc2vzLn2su63v4STP7Wt96yvDZT2U5OYkP2oP4juTfCLJ5mu7vqr6m6p6S1v3Ki8QVfXpqtq/j7ZvaNp99YqR21V1a1VtXlU/GWa71jdVtXtVLZ6ozmTDrc/Hc5LFSd4yav2bV9WNfax/fWa4rL9eXVWbA3sCC4B3D7k90lNeH2dVmhzDZT1XVcuBC4A9AJL8ZutCuL+9q/qFkbpJ3plkeZKHklyfZL9WfkKST7VqX2//729nRi9O8qYk32h1P5bk7wbbkOS8JH/SprdP8vkkK5LclOQPx2t7klcluTLJg0luS3LCwLyRd6GHJ7k1yd1J/mJg/qbtjO2+JNcCvzzRcWrr+v0kN7Rj85EkGZj/e0mua+u7MMlOA/P2b8frgSQfTfK1kXerSZ6X5KtJ7mlt/HSS2W3eJ4EdgS+1Y/mOwXfXSX47yZJR7fzjJAvb9NOT/F3b/zuTfDzJpuPs3y6tXQ+0dvzzwLxT2vF9MMkVSX51YN4JST6X5FPtcXFVkp9NclySu9py+w/UX5zkb5Nc1tZ3XpJtxmnTVklOT3J7e9z9dcbpDlzd/ZmBM8AkeyVZ0rZ/Z5K/b9XGe+x+M8nJSe4BThh8PA94ZZIb27H7QJKnDRyfkefGSmdHSU4EfhX4cNveh1udJ7vZ2jE4K93z4ZYk7x5Y95uSfKPdx/ele74cNNbxWS9VlX/r2R9wM/CKNr0DcA3wPuBngR8Avw5sDLwDWApsAvwccBuwfVtuPvC8Nn0C8KmB8gI2Gtjem4BvtOmXtvWk3d4a+BGwPd2blSuA97RtPhe4EThgnP3YF3h+W+4XgTuBQ0a14/8CmwIvAH4M/EKbfxLw78A27RhcDSyb4JgV8GVgNt0L/grgwDbv4HacfgHYiO4s8Ftt3rbAg8Br2ry3A48Bb2nzd2nH++nAHLoXuA+NdV+NPr7AZsBDwK4D8y8HXt+mTwYWtn3cAvgS8Lfj7N9ngb9ox/IZwK8MzPsd4Fltm38K3AE8Y+C+fwQ4oM0/C7iprWtj4EjgpoF1LQaW072ZeSbwecZ57ABfAP6h1Xs2cBnw1nHaP+H9ycqP+f8AfrdNbw7ss5rH7uPAH7T925SBx/PAY+Pitu0dgf8auH9PGNm/cfZx8UjdUevbpU2fBZzX7r/5bd1HDLTtsXaMZwH/C/g+7bm1vv8NvQH+rcWd1j3RHgbuB24BPtqeNH8JnDNQ72nthWBfuhfBu4BXABuPWt+TT6AJnqAj4RLgVuCl7faRwFfb9N7AraPWfRzwT5Pcrw8BJ49qx7yB+Zfx0xfeG2nh0G4fxerDZfAF9xzg2DZ9wcgTfuC4/RDYCTgM+I+BeaEL17eMs51DgCtH3Vdjhku7/SngPW16V7qw2axt5we0NwBt/osZeKEftd2zgNMGj9cEx+I+4AUD9/2igXmvbo+tWe32Fq29s9vtxcBJA/V3Ax6le3F8ct+A7ejeDGw6UPcNwMXjtGnC+5OVw+XrwF8B245ax0rHduCxO/ox+SZWDZfBbR8NXDT6uTHO/bd49GOhzd+lHZNHgd0G5r0VWDzQjqUD8zZry/7MmrwePFX/7BZbfx1SVbOraqeqOrqqRs4ebhmpUFVP0L0Qzq2qpcAf0T1Z7kpydpLt13Sj1T0LzqZ7oQD4n8Cn2/ROwPat2+n+JPcD76J7oVlFkr2TXNy6DB4Afp/uTGHQHQPTP6R7p0rb19sG5t3C6o23rp2AUwbafC/di/vc0dtp+z84imm7diyXJ3mQLixG78NEPsPKx/KLVfVDurOgzYArBtr1r618LO9obb4sXbfo7w208c/Sdfk90Naz1ag23jkw/SPg7vrpgIMftf+DA0ZGH/eNWXWfd2rltw+0/x/ozmDGsib35xF0Z+nfS3J5kt+YoO7o9k6mzi2tPetqW7pjMLgvt9A9rkY8+Zhs9zusfKzXW4bLzPJ9uic1AElC18WwHKCqPlNVv9LqFPD+MdYxma/J/izw2nTXJfam6xqB7gl6Uwu9kb8tquqV46znM3TdPjtU1VbAx+leICfj9rZvI3ac5HJjuY2uu2aw3ZtW1bfaduaNVGzHdN7Asn9Dd8yeX1Vb0nVBDe7D6o7nImBOkhfShcxnWvnddC/suw+0aavqBnGsoqruqKojq2p7unfHH23XYX6VLnheB2xdVbOBB5j8cR7L6OP+WGvvoNvozly2HWj/llW1+zjrnPT9WVU3VNUb6ILq/cC5SZ7J+Md6Mo/p0dv+fpv+AV3Ij/iZNVj33XTHZqeBsh1pz8eZznCZWc4BXpVkvyQb0/Wv/xj4VpKfS/LyJE+n62P/EfDEGOtY0crHHadfVVfSPXH+Ebiwqu5vsy4DHko3cGDTJLOS7JFkvIvtWwD3VtUjSfaie+e+Jvt6XJKtk8yj61NfWx9v69odnrwIe2ib9xXg+UkOSTfS6BhWfoHZgq4b6YEkc4E/H7XuO5n4WD4GfA74AF2f/6JW/gTd9aaTkzy7tWtukgPGWk+SQ9txgK7bq+juxy3orjmsADZK8h5gy9Ucj9X5nSS7JdkMeC9wbo0aWl1VtwP/BnwwyZZJnpZu8MOvjbPOSd+fSX4nyZx2jO5vxU8wicfuBP68bXsHuutqIwMivgO8NN3nk7ai6+YdNO79247JOcCJSbZob8b+hO7sdsYzXGaQqrqe7p3z/6F78X813ZDlR+kuOJ/Uyu+ge9c3+okycmp+IvDN1p2xzzib+wzd9ZvPDCz7E+A3gBfSXRQeCaCtxlnH0cB7kzxENwjgnDXY3b+i62K4ie5F7JNrsOxKquoLdO+Az25dW1cDB7V5dwOHAv8buIfuGsMSutAeaceedGcDXwH+ZdTq/xZ4dzuWfzZOE0aO5eeq6vGB8nfSDTS4pLXr/9ENzBjLLwOXJnmY7mzw7dV91uJCuu60/6I7Xo8wuW6iiXwS+ARtYAAw3ojAw+gGdlxLF3jnAs8Zp+6a3J8HAte0fT2F7jrcj9bgsTuW8+gGo3yH7n48HaCqFtEFzXfb/C+PWu4UurP4+5KcOsZ6/4Du7OdG4Bt09/UZa9Cu9dbIiB9Jk9CGkS4D3lhVFw+7PdMtyWK6C9zjfqODBJ65SKuV5IAks1uX4rvorldcMuRmSU9phou0ei8G/pufdjUe0kbnSRqH3WKSpN555iJJ6p3hIknqnd8Q2my77bY1f/78YTdDktYrV1xxxd1Vtco3Rxguzfz581myZMnqK0qSnpRkzK/qsVtMktQ7w0WS1DvDRZLUO8NFktQ7w0WS1DvDRZLUO8NFktS7KQuXJGckuSvJ1QNl2yRZlOSG9n/rVp4kpyZZmuS7SfYcWObwVv+GJIcPlL8oyVVtmVPbLwSOuw1J0vSZyg9RfgL4MHDWQNmxwEVVdVKSY9vtd9L9MNOu7W9v4GPA3km2AY4HFtD9st4VSRZW1X2tzpHApcD5dD8gdMEE25gR5h/7lWE3Yca4+aRXDbsJ0ow1ZWcuVfV14N5RxQcDZ7bpM4FDBsrPqs4lwOwkzwEOABZV1b0tUBYBB7Z5W1bVJdV9rfNZo9Y11jYkSdNkuq+5bNd+Wxu6n0jdrk3PZeWfXl3WyiYqXzZG+UTbkCRNk6Fd0G9nHFP6YzKr20aSo5IsSbJkxYoVU9kUSdqgTHe43Nm6tGj/72rly4EdBurNa2UTlc8bo3yibayiqk6rqgVVtWDOnFW+1FOStJamO1wWAiMjvg4HzhsoP6yNGtsHeKB1bV0I7J9k6zbqa3/gwjbvwST7tFFih41a11jbkCRNkykbLZbks8C+wLZJltGN+joJOCfJEcAtwOta9fOBVwJLgR8CbwaoqnuTvA+4vNV7b1WNDBI4mm5E2qZ0o8QuaOXjbUOSNE2mLFyq6g3jzNpvjLoFHDPOes4AzhijfAmwxxjl94y1DUnS9PET+pKk3hkukqTeGS6SpN4ZLpKk3hkukqTeGS6SpN4ZLpKk3hkukqTeGS6SpN4ZLpKk3hkukqTeGS6SpN4ZLpKk3hkukqTeGS6SpN4ZLpKk3hkukqTeGS6SpN4ZLpKk3hkukqTeGS6SpN4ZLpKk3hkukqTeGS6SpN4ZLpKk3hkukqTeGS6SpN4ZLpKk3hkukqTeGS6SpN4ZLpKk3hkukqTeDSVckvxxkmuSXJ3ks0mekWTnJJcmWZrkn5Ns0uo+vd1e2ubPH1jPca38+iQHDJQf2MqWJjl2CLsoSRu0aQ+XJHOBPwQWVNUewCzg9cD7gZOrahfgPuCItsgRwH2t/ORWjyS7teV2Bw4EPppkVpJZwEeAg4DdgDe0upKkaTKsbrGNgE2TbARsBtwOvBw4t80/EzikTR/cbtPm75ckrfzsqvpxVd0ELAX2an9Lq+rGqnoUOLvVlSRNk2kPl6paDvwdcCtdqDwAXAHcX1WPt2rLgLltei5wW1v28Vb/WYPlo5YZr1ySNE2G0S22Nd2ZxM7A9sAz6bq1pl2So5IsSbJkxYoVw2iCJM1Iw+gWewVwU1WtqKrHgH8BXgLMbt1kAPOA5W16ObADQJu/FXDPYPmoZcYrX0VVnVZVC6pqwZw5c/rYN0kSwwmXW4F9kmzWrp3sB1wLXAy8ttU5HDivTS9st2nzv1pV1cpf30aT7QzsClwGXA7s2kafbUJ30X/hNOyXJKnZaPVV+lVVlyY5F/g28DhwJXAa8BXg7CR/3cpOb4ucDnwyyVLgXrqwoKquSXIOXTA9DhxTVT8BSPI24EK6kWhnVNU107V/kqQhhAtAVR0PHD+q+Ea6kV6j6z4CHDrOek4EThyj/Hzg/HVvqSRpbfgJfUlS7wwXSVLvDBdJUu8MF0lS7wwXSVLvDBdJUu8MF0lS7wwXSVLvDBdJUu8MF0lS7wwXSVLvDBdJUu8MF0lS7wwXSVLvDBdJUu8MF0lS7wwXSVLvDBdJUu8MF0lS7wwXSVLvDBdJUu8MF0lS7wwXSVLvDBdJUu8MF0lS7wwXSVLvDBdJUu8MF0lS7wwXSVLvDBdJUu8MF0lS7wwXSVLvVhsuSWYl+V6fG00yO8m5Sb6X5LokL06yTZJFSW5o/7dudZPk1CRLk3w3yZ4D6zm81b8hyeED5S9KclVb5tQk6bP9kqSJrTZcquonwPVJduxxu6cA/1pVPw+8ALgOOBa4qKp2BS5qtwEOAnZtf0cBHwNIsg1wPLA3sBdw/EggtTpHDix3YI9tlyStxkaTrLc1cE2Sy4AfjBRW1W+u6QaTbAW8FHhTW8ejwKNJDgb2bdXOBBYD7wQOBs6qqgIuaWc9z2l1F1XVvW29i4ADkywGtqyqS1r5WcAhwAVr2lZJ0tqZbLj8ZY/b3BlYAfxTkhcAVwBvB7arqttbnTuA7dr0XOC2geWXtbKJypeNUS5JmiaTuqBfVV8DbgY2btOXA99ey21uBOwJfKyqfonuTOjYwQrtLKXWcv2TluSoJEuSLFmxYsVUb06SNhiTCpckRwLnAv/QiuYCX1zLbS4DllXVpe32uXRhc2fr7qL9v6vNXw7sMLD8vFY2Ufm8McpXUVWnVdWCqlowZ86ctdwdSdJokx2KfAzwEuBBgKq6AXj22mywqu4Abkvyc61oP+BaYCEwMuLrcOC8Nr0QOKyNGtsHeKB1n10I7J9k63Yhf3/gwjbvwST7tFFihw2sS5I0DSZ7zeXHVfXoyIjeJBuxbt1WfwB8OskmwI3Am+mC7pwkRwC3AK9rdc8HXgksBX7Y6lJV9yZ5H10XHcB7Ry7uA0cDnwA2pbuQ78V8SZpGkw2XryV5F7Bpkl+ne/H+0tputKq+AywYY9Z+Y9QtujOnsdZzBnDGGOVLgD3Wtn2SpHUz2W6xY+lGeF0FvJXubOLdU9UoSdL6bVJnLlX1RJIzgUvpusOub2cUkiStYlLhkuRVwMeB/wYC7JzkrVXltQxJ0iome83lg8DLqmopQJLnAV/BC+WSpDFM9prLQyPB0twIPDQF7ZEkzQATnrkkeU2bXJLkfOAcumsuh/LTIcCSJK1kdd1irx6YvhP4tTa9gu4zJJIkrWLCcKmqN09XQyRJM8dkR4vtTPep+vmDy6zNV+5Lkma+yY4W+yJwOt2n8p+YstZIkmaEyYbLI1V16pS2RJI0Y0w2XE5Jcjzwb8CPRwqram1/00WSNINNNlyeD/wu8HJ+2i1W7bYkSSuZbLgcCjy3/d69JEkTmuwn9K8GZk9hOyRJM8hkz1xmA99LcjkrX3NxKLIkaRWTDZfjp7QVkqQZZbK/5/K1qW6IJGnmmOwn9B+iGx0GsAmwMfCDqtpyqhomSVp/TfbMZYuR6SQBDgb2mapGSZLWb5MdLfak6nwROKD/5kiSZoLJdou9ZuDm04AFwCNT0iJJ0npvsqPFBn/X5XHgZrquMUmSVjHZay7+roskadJW9zPH75lgdlXV+3pujyRpBljdmcsPxih7JnAE8CzAcJEkrWJ1P3P8wZHpJFsAbwfeDJwNfHC85SRJG7bVXnNJsg3wJ8AbgTOBPavqvqlumCRp/bW6ay4fAF4DnAY8v6oenpZWSZLWa6v7EOWfAtsD7wa+n+TB9vdQkgenvnmSpPXR6q65rPEn+CVJMjwkSb0bWrgkmZXkyiRfbrd3TnJpkqVJ/jnJJq386e320jZ//sA6jmvl1yc5YKD8wFa2NMmx075zkrSBG+aZy9uB6wZuvx84uap2Ae6j+ywN7f99rfzkVo8kuwGvB3YHDgQ+2gJrFvAR4CBgN+ANra4kaZoMJVySzANeBfxjux3g5cC5rcqZwCFt+uB2mzZ/v4Gv/T+7qn5cVTcBS4G92t/Sqrqxqh6l+0yO34MmSdNoWGcuHwLeATzRbj8LuL+qHm+3lwFz2/Rc4DaANv+BVv/J8lHLjFcuSZom0x4uSX4DuKuqrpjubY/RlqOSLEmyZMWKFcNujiTNGMM4c3kJ8JtJbqbrsno5cAowO8nI0Oh5wPI2vRzYAaDN3wq4Z7B81DLjla+iqk6rqgVVtWDOnDnrvmeSJGAI4VJVx1XVvKqaT3dB/qtV9UbgYuC1rdrhwHltemG7TZv/1aqqVv76NppsZ2BX4DLgcmDXNvpsk7aNhdOwa5KkZrI/FjYd3gmcneSvgSuB01v56cAnkywF7qULC6rqmiTnANfS/YDZMVX1E4AkbwMuBGYBZ1TVNdO6J5K0gRtquFTVYmBxm76RbqTX6DqPAIeOs/yJwIljlJ8PnN9jUyVJa8BP6EuSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6N+3hkmSHJBcnuTbJNUne3sq3SbIoyQ3t/9atPElOTbI0yXeT7DmwrsNb/RuSHD5Q/qIkV7VlTk2S6d5PSdqQDePM5XHgT6tqN2Af4JgkuwHHAhdV1a7ARe02wEHAru3vKOBj0IURcDywN7AXcPxIILU6Rw4sd+A07JckqZn2cKmq26vq2236IeA6YC5wMHBmq3YmcEibPhg4qzqXALOTPAc4AFhUVfdW1X3AIuDANm/Lqrqkqgo4a2BdkqRpMNRrLknmA78EXApsV1W3t1l3ANu16bnAbQOLLWtlE5UvG6NckjRNhhYuSTYHPg/8UVU9ODivnXHUNLThqCRLkixZsWLFVG9OkjYYQwmXJBvTBcunq+pfWvGdrUuL9v+uVr4c2GFg8XmtbKLyeWOUr6KqTquqBVW1YM6cOeu2U5KkJw1jtFiA04HrqurvB2YtBEZGfB0OnDdQflgbNbYP8EDrPrsQ2D/J1u1C/v7AhW3eg0n2ads6bGBdkqRpsNEQtvkS4HeBq5J8p5W9CzgJOCfJEcAtwOvavPOBVwJLgR8CbwaoqnuTvA+4vNV7b1Xd26aPBj4BbApc0P4kSdNk2sOlqr4BjPe5k/3GqF/AMeOs6wzgjDHKlwB7rEMzJUnrwE/oS5J6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknpnuEiSeme4SJJ6Z7hIknq30bAbIGlmmH/sV4bdhBnl5pNeNewmrBPPXCRJvTNcJEm9M1wkSb0zXCRJvTNcJEm9M1wkSb0zXCRJvTNcJEm9M1wkSb0zXCRJvTNcJEm9m7HhkuTAJNcnWZrk2GG3R5I2JDMyXJLMAj4CHATsBrwhyW7DbZUkbThmZLgAewFLq+rGqnoUOBs4eMhtkqQNxkz9yv25wG0Dt5cBe4+ulOQo4Kh28+Ek109D2zYU2wJ3D7sRE8n7h90CDclT/rEJ69Xjc6exCmdquExKVZ0GnDbsdsxESZZU1YJht0Mazcfm9Jip3WLLgR0Gbs9rZZKkaTBTw+VyYNckOyfZBHg9sHDIbZKkDcaM7BarqseTvA24EJgFnFFV1wy5WRsauxv1VOVjcxqkqobdBknSDDNTu8UkSUNkuEiSeme4SJJ6NyMv6Gt6Jfl5um9AmNuKlgMLq+q64bVK0jB55qJ1kuSddF+vE+Cy9hfgs35hqJ7Kkrx52G2YyRwtpnWS5L+A3avqsVHlmwDXVNWuw2mZNLEkt1bVjsNux0xlt5jW1RPA9sAto8qf0+ZJQ5Pku+PNArabzrZsaAwXras/Ai5KcgM//bLQHYFdgLcNq1FSsx1wAHDfqPIA35r+5mw4DBetk6r61yQ/S/czB4MX9C+vqp8Mr2USAF8GNq+q74yekWTxtLdmA+I1F0lS7xwtJknqneEiSeqd4SJNoSQ/k+TsJP+d5Iok57drVGPVnZ3k6Emu9+F+Wyr1y3CRpkiSAF8AFlfV86rqRcBxjD8EdjYwqXCRnuoMF2nqvAx4rKo+PlJQVf8JXJnkoiTfTnJVkoPb7JOA5yX5TpIPJNl8nHpPSucDSa5udX67lT8tyUeTfC/JonbG9Nrp2GkJHIosTaU9gCvGKH8E+K2qejDJtsAlSRYCxwJ7VNULAZJsNFa9WnmI52uAFwIvALYFLk/ydeAlwHxgN+DZwHXAGf3vojQ2w0WafgH+JslL6b7FYC5jd5WNV++OgTq/Any2faboziRfA365lX+uqp4A7khy8ZTtjTQGw0WaOtcAY3VFvRGYA7yoqh5LcjPwjHWoJz3leM1FmjpfBZ6e5KiRgiS/COwE3NUC42XtNsBDwBYDy281Tr1B/w78dpJZSeYAL6X7ZupvAv+jXXvZDti3532TJuSZizRFqqqS/BbwofbTBI8ANwMnAKcmuQpYAnyv1b8nyTeTXA1cALwf+NLoeqN8AXgx8J9AAe+oqjuSfB7YD7iW7jvfvg08MFX7Ko3m179IM1SSzavq4STPojubeUlV3bG65aQ+eOYizVxfTjIb2AR4n8Gi6eSZiySpd17QlyT1znCRJPXOcJEk9c5wkST1znCRJPXOcJEk9e7/A3TJFvTwTwZSAAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# 通过SeriousDlqin2yrs字段查看正负样本分布情况\n",
+ "# 提示:value_counts\n",
+ "data_counts = data['SeriousDlqin2yrs'].value_counts()\n",
+ "\n",
+ "# 绘制两种类别的柱状图\n",
+ "# 提示:dataframe可以直接plot(kind='bar')\n",
+ "import matplotlib.pyplot as plt\n",
+ "fig = plt.figure()\n",
+ "data_counts.plot(kind='bar')\n",
+ "plt.title('Positive and negative sample distribution')\n",
+ "plt.ylabel('Number')\n",
+ "plt.xlabel('Catalog')\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 2.数据预处理之离散化"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# 请对年龄按照3岁一个区间进行离散化\n",
+ "# 提示:可以先计算出分桶边界,再基于pandas的cut函数进行离散化(分箱、分桶)\n",
+ "low = min(data['age'])\n",
+ "high = max(data['age'])\n",
+ "\n",
+ "bins = list(range(int(low), int(high), 3)) # 以3岁为一个区间\n",
+ "\n",
+ "# 使用 cut 函数进行离散化\n",
+ "data['age_group'] = pd.cut(data['age'], bins)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 3.数据预处理之独热向量编码"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " SeriousDlqin2yrs | \n",
+ " RevolvingUtilizationOfUnsecuredLines | \n",
+ " age | \n",
+ " NumberOfTime30-59DaysPastDueNotWorse | \n",
+ " DebtRatio | \n",
+ " MonthlyIncome | \n",
+ " NumberOfOpenCreditLinesAndLoans | \n",
+ " NumberOfTimes90DaysLate | \n",
+ " NumberRealEstateLoansOrLines | \n",
+ " NumberOfTime60-89DaysPastDueNotWorse | \n",
+ " NumberOfDependents | \n",
+ " age_group | \n",
+ " age_group_(0, 3] | \n",
+ " age_group_(3, 6] | \n",
+ " age_group_(6, 9] | \n",
+ " age_group_(9, 12] | \n",
+ " age_group_(12, 15] | \n",
+ " age_group_(15, 18] | \n",
+ " age_group_(18, 21] | \n",
+ " age_group_(21, 24] | \n",
+ " age_group_(24, 27] | \n",
+ " age_group_(27, 30] | \n",
+ " age_group_(30, 33] | \n",
+ " age_group_(33, 36] | \n",
+ " age_group_(36, 39] | \n",
+ " age_group_(39, 42] | \n",
+ " age_group_(42, 45] | \n",
+ " age_group_(45, 48] | \n",
+ " age_group_(48, 51] | \n",
+ " age_group_(51, 54] | \n",
+ " age_group_(54, 57] | \n",
+ " age_group_(57, 60] | \n",
+ " age_group_(60, 63] | \n",
+ " age_group_(63, 66] | \n",
+ " age_group_(66, 69] | \n",
+ " age_group_(69, 72] | \n",
+ " age_group_(72, 75] | \n",
+ " age_group_(75, 78] | \n",
+ " age_group_(78, 81] | \n",
+ " age_group_(81, 84] | \n",
+ " age_group_(84, 87] | \n",
+ " age_group_(87, 90] | \n",
+ " age_group_(90, 93] | \n",
+ " age_group_(93, 96] | \n",
+ " age_group_(96, 99] | \n",
+ " age_group_(99, 102] | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0.766127 | \n",
+ " 45.0 | \n",
+ " 2.0 | \n",
+ " 0.802982 | \n",
+ " 9120.0 | \n",
+ " 13.0 | \n",
+ " 0.0 | \n",
+ " 6.0 | \n",
+ " 0.0 | \n",
+ " 2.0 | \n",
+ " (42, 45] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0.957151 | \n",
+ " 40.0 | \n",
+ " 0.0 | \n",
+ " 0.121876 | \n",
+ " 2600.0 | \n",
+ " 4.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " (39, 42] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0.658180 | \n",
+ " 38.0 | \n",
+ " 1.0 | \n",
+ " 0.085113 | \n",
+ " 3042.0 | \n",
+ " 2.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " (36, 39] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0.233810 | \n",
+ " 30.0 | \n",
+ " 0.0 | \n",
+ " 0.036050 | \n",
+ " 3300.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " (27, 30] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0.907239 | \n",
+ " 49.0 | \n",
+ " 1.0 | \n",
+ " 0.024926 | \n",
+ " 63588.0 | \n",
+ " 7.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " (48, 51] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 112910 | \n",
+ " 0 | \n",
+ " 0.385742 | \n",
+ " 50.0 | \n",
+ " 0.0 | \n",
+ " 0.404293 | \n",
+ " 3400.0 | \n",
+ " 7.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " (48, 51] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 112911 | \n",
+ " 0 | \n",
+ " 0.040674 | \n",
+ " 74.0 | \n",
+ " 0.0 | \n",
+ " 0.225131 | \n",
+ " 2100.0 | \n",
+ " 4.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " (72, 75] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 112912 | \n",
+ " 0 | \n",
+ " 0.299745 | \n",
+ " 44.0 | \n",
+ " 0.0 | \n",
+ " 0.716562 | \n",
+ " 5584.0 | \n",
+ " 4.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 2.0 | \n",
+ " (42, 45] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 112913 | \n",
+ " 0 | \n",
+ " 0.000000 | \n",
+ " 30.0 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 5716.0 | \n",
+ " 4.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " (27, 30] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 112914 | \n",
+ " 0 | \n",
+ " 0.850283 | \n",
+ " 64.0 | \n",
+ " 0.0 | \n",
+ " 0.249908 | \n",
+ " 8158.0 | \n",
+ " 8.0 | \n",
+ " 0.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " (63, 66] | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
108648 rows × 46 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " SeriousDlqin2yrs RevolvingUtilizationOfUnsecuredLines age \\\n",
+ "0 1 0.766127 45.0 \n",
+ "1 0 0.957151 40.0 \n",
+ "2 0 0.658180 38.0 \n",
+ "3 0 0.233810 30.0 \n",
+ "4 0 0.907239 49.0 \n",
+ "... ... ... ... \n",
+ "112910 0 0.385742 50.0 \n",
+ "112911 0 0.040674 74.0 \n",
+ "112912 0 0.299745 44.0 \n",
+ "112913 0 0.000000 30.0 \n",
+ "112914 0 0.850283 64.0 \n",
+ "\n",
+ " NumberOfTime30-59DaysPastDueNotWorse DebtRatio MonthlyIncome \\\n",
+ "0 2.0 0.802982 9120.0 \n",
+ "1 0.0 0.121876 2600.0 \n",
+ "2 1.0 0.085113 3042.0 \n",
+ "3 0.0 0.036050 3300.0 \n",
+ "4 1.0 0.024926 63588.0 \n",
+ "... ... ... ... \n",
+ "112910 0.0 0.404293 3400.0 \n",
+ "112911 0.0 0.225131 2100.0 \n",
+ "112912 0.0 0.716562 5584.0 \n",
+ "112913 0.0 0.000000 5716.0 \n",
+ "112914 0.0 0.249908 8158.0 \n",
+ "\n",
+ " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n",
+ "0 13.0 0.0 \n",
+ "1 4.0 0.0 \n",
+ "2 2.0 1.0 \n",
+ "3 5.0 0.0 \n",
+ "4 7.0 0.0 \n",
+ "... ... ... \n",
+ "112910 7.0 0.0 \n",
+ "112911 4.0 0.0 \n",
+ "112912 4.0 0.0 \n",
+ "112913 4.0 0.0 \n",
+ "112914 8.0 0.0 \n",
+ "\n",
+ " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n",
+ "0 6.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 0.0 0.0 \n",
+ "3 0.0 0.0 \n",
+ "4 1.0 0.0 \n",
+ "... ... ... \n",
+ "112910 0.0 0.0 \n",
+ "112911 1.0 0.0 \n",
+ "112912 1.0 0.0 \n",
+ "112913 0.0 0.0 \n",
+ "112914 2.0 0.0 \n",
+ "\n",
+ " NumberOfDependents age_group age_group_(0, 3] age_group_(3, 6] \\\n",
+ "0 2.0 (42, 45] 0 0 \n",
+ "1 1.0 (39, 42] 0 0 \n",
+ "2 0.0 (36, 39] 0 0 \n",
+ "3 0.0 (27, 30] 0 0 \n",
+ "4 0.0 (48, 51] 0 0 \n",
+ "... ... ... ... ... \n",
+ "112910 0.0 (48, 51] 0 0 \n",
+ "112911 0.0 (72, 75] 0 0 \n",
+ "112912 2.0 (42, 45] 0 0 \n",
+ "112913 0.0 (27, 30] 0 0 \n",
+ "112914 0.0 (63, 66] 0 0 \n",
+ "\n",
+ " age_group_(6, 9] age_group_(9, 12] age_group_(12, 15] \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 0 \n",
+ "... ... ... ... \n",
+ "112910 0 0 0 \n",
+ "112911 0 0 0 \n",
+ "112912 0 0 0 \n",
+ "112913 0 0 0 \n",
+ "112914 0 0 0 \n",
+ "\n",
+ " age_group_(15, 18] age_group_(18, 21] age_group_(21, 24] \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 0 \n",
+ "... ... ... ... \n",
+ "112910 0 0 0 \n",
+ "112911 0 0 0 \n",
+ "112912 0 0 0 \n",
+ "112913 0 0 0 \n",
+ "112914 0 0 0 \n",
+ "\n",
+ " age_group_(24, 27] age_group_(27, 30] age_group_(30, 33] \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 1 0 \n",
+ "4 0 0 0 \n",
+ "... ... ... ... \n",
+ "112910 0 0 0 \n",
+ "112911 0 0 0 \n",
+ "112912 0 0 0 \n",
+ "112913 0 1 0 \n",
+ "112914 0 0 0 \n",
+ "\n",
+ " age_group_(33, 36] age_group_(36, 39] age_group_(39, 42] \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 1 \n",
+ "2 0 1 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 0 \n",
+ "... ... ... ... \n",
+ "112910 0 0 0 \n",
+ "112911 0 0 0 \n",
+ "112912 0 0 0 \n",
+ "112913 0 0 0 \n",
+ "112914 0 0 0 \n",
+ "\n",
+ " age_group_(42, 45] age_group_(45, 48] age_group_(48, 51] \\\n",
+ "0 1 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 1 \n",
+ "... ... ... ... \n",
+ "112910 0 0 1 \n",
+ "112911 0 0 0 \n",
+ "112912 1 0 0 \n",
+ "112913 0 0 0 \n",
+ "112914 0 0 0 \n",
+ "\n",
+ " age_group_(51, 54] age_group_(54, 57] age_group_(57, 60] \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 0 \n",
+ "... ... ... ... \n",
+ "112910 0 0 0 \n",
+ "112911 0 0 0 \n",
+ "112912 0 0 0 \n",
+ "112913 0 0 0 \n",
+ "112914 0 0 0 \n",
+ "\n",
+ " age_group_(60, 63] age_group_(63, 66] age_group_(66, 69] \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 0 \n",
+ "... ... ... ... \n",
+ "112910 0 0 0 \n",
+ "112911 0 0 0 \n",
+ "112912 0 0 0 \n",
+ "112913 0 0 0 \n",
+ "112914 0 1 0 \n",
+ "\n",
+ " age_group_(69, 72] age_group_(72, 75] age_group_(75, 78] \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 0 \n",
+ "... ... ... ... \n",
+ "112910 0 0 0 \n",
+ "112911 0 1 0 \n",
+ "112912 0 0 0 \n",
+ "112913 0 0 0 \n",
+ "112914 0 0 0 \n",
+ "\n",
+ " age_group_(78, 81] age_group_(81, 84] age_group_(84, 87] \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 0 \n",
+ "... ... ... ... \n",
+ "112910 0 0 0 \n",
+ "112911 0 0 0 \n",
+ "112912 0 0 0 \n",
+ "112913 0 0 0 \n",
+ "112914 0 0 0 \n",
+ "\n",
+ " age_group_(87, 90] age_group_(90, 93] age_group_(93, 96] \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 0 \n",
+ "... ... ... ... \n",
+ "112910 0 0 0 \n",
+ "112911 0 0 0 \n",
+ "112912 0 0 0 \n",
+ "112913 0 0 0 \n",
+ "112914 0 0 0 \n",
+ "\n",
+ " age_group_(96, 99] age_group_(99, 102] \n",
+ "0 0 0 \n",
+ "1 0 0 \n",
+ "2 0 0 \n",
+ "3 0 0 \n",
+ "4 0 0 \n",
+ "... ... ... \n",
+ "112910 0 0 \n",
+ "112911 0 0 \n",
+ "112912 0 0 \n",
+ "112913 0 0 \n",
+ "112914 0 0 \n",
+ "\n",
+ "[108648 rows x 46 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 请对上述分箱后的年龄段进行独热向量编码\n",
+ "# 提示:使用pandas的get_dummies完成\n",
+ "# 对年龄段进行独热向量编码\n",
+ "one_hot_encoded = pd.get_dummies(data['age_group'], prefix='age_group')\n",
+ "\n",
+ "# 将独热编码结果与原始 DataFrame 进行合并\n",
+ "data = pd.concat([data, one_hot_encoded], axis=1)\n",
+ "\n",
+ "data\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 4.数据预处理之幅度缩放"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# 请对连续值特征进行幅度缩放\n",
+ "# 提示:可以使用StandardScaler等幅度缩放器进行处理\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "\n",
+ "sc = StandardScaler()\n",
+ "X_train_std = sc.fit_transform(X_train)\n",
+ "X_test_std = sc.fit_transform(X_test)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 5.使用logistic regression建模,并且输出一下系数,分析重要度。 "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "系数: [[-0.01427294 -0.36429202 1.72869067 0.31207913 -0.11534462 -0.09187206\n",
+ " 1.68994946 -0.19639706 -3.2487085 0.11638382]]\n",
+ "截距: [-2.85903863]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 提示:fit建模,建完模之后可以取出coef属性\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "import seaborn as sns\n",
+ "\n",
+ "lr = LogisticRegression(C=1000.0,random_state=0, penalty='l2', solver='liblinear') \n",
+ "lr.fit(X_train_std, y_train)\n",
+ "# 输出系数\n",
+ "coefficients = lr.coef_\n",
+ "intercept = lr.intercept_\n",
+ "\n",
+ "print(\"系数:\", coefficients)\n",
+ "print(\"截距:\", intercept)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 6.使用网格搜索交叉验证进行调参\n",
+ "调整penalty和C参数,其中penalty候选为\"l1\"和\"l2\",C的候选为[1,10,100,500]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "最佳参数: {'C': 1, 'penalty': 'l1'}\n",
+ "最佳模型: LogisticRegression(C=1, penalty='l1', random_state=0, solver='liblinear')\n",
+ "在测试集上的准确率: 0.9339162448228255\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 提示:先按照上面要求准备好网格字典,再使用GridSearchCV进行调参\n",
+ "# 设置参数候选\n",
+ "from sklearn.model_selection import GridSearchCV\n",
+ "\n",
+ "param_grid = {\n",
+ " 'penalty': ['l1', 'l2'],\n",
+ " 'C': [1, 10, 100, 500]\n",
+ "}\n",
+ "\n",
+ "# 初始化 GridSearchCV\n",
+ "grid_search = GridSearchCV(lr, param_grid, cv=5)\n",
+ "\n",
+ "# 执行网格搜索交叉验证\n",
+ "grid_search.fit(X_train_std, y_train)\n",
+ "\n",
+ "# 输出最佳参数\n",
+ "print(\"最佳参数:\", grid_search.best_params_)\n",
+ "\n",
+ "# 输出最佳模型\n",
+ "best_model = grid_search.best_estimator_\n",
+ "print(\"最佳模型:\", best_model)\n",
+ "\n",
+ "# 在测试集上评估最佳模型\n",
+ "accuracy = best_model.score(X_test_std, y_test)\n",
+ "print(\"在测试集上的准确率:\", accuracy)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 7.在测试集上进行预测,计算 查准率/查全率/auc/混淆矩阵/f1值 等测试指标"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "准确率: 0.9339162448228255\n",
+ "查全率: 0.046061722708429294\n",
+ "AUC: 0.6979862258129022\n",
+ "F1值: 0.08496176720475784\n",
+ "混淆矩阵:\n",
+ "[[30341 83]\n",
+ " [ 2071 100]]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUUAAAEICAYAAADIsubvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAAsTAAALEwEAmpwYAAAfZ0lEQVR4nO3de5xWZb338c+XYTgocgblZFAiRqVohGJtHyITdO82dlK015aXubNUyk5PabUfynJXz9NpdxC3JVvtIGpaYpmIpKkVIioqoCSKB5CDnEGUw8zv+WNdA4txZu77hrmZe2a+79drvWbdv7XWta574PWb61rXWtdSRGBmZpkOLV0BM7NK4qRoZpbjpGhmluOkaGaW46RoZpbjpGhmluOkaGYtQlIXSfMlPS5psaRvpPgwSQ9JWibpJkmdUrxz+rwsbR+aK+vyFF8qaUIuPjHFlkm6rKh6VdJ9in17V8XQIdUtXQ0rwT+eOKSlq2AleJ1X2Rk7dCBlTHjvobF+Q01R+z7yxI7ZETGxoW2SBBwaEdskVQMPApcCnwdui4iZkq4GHo+I6ZIuBo6NiE9Jmgx8MCLOljQSuBEYAwwE7gGOTqf5B/B+YAXwMHBORCxpqs4di/pmB8nQIdXMnz2kpathJZgwcFRLV8FK8FDMPeAy1m2o4aHZg4vat3rAs30b2xZZi2xb3a5pCWA8cG6KXw98HZgOTErrAL8FfpoS6yRgZkTsAJZLWkaWIAGWRcRzAJJmpn2bTIruPptZiYKaqC1qKURSlaSFwFpgDvAssCkidqddVgCD0vog4CWAtH0z0Ccfr3dMY/EmVVRL0cwqXwC1FH3Zra+kBbnP10TENXvKiqgBRknqCfwOOKa56rm/nBTNrGS1FG4FJusiYnShnSJik6R7gbFAT0kdU2twMLAy7bYSGAKskNQR6AGsz8Xr5I9pLN4od5/NrCRBsCtqi1qaIqlfaiEiqSvZgMhTwL3AR9JuU4Db0/qs9Jm0/c/puuQsYHIanR4GDAfmkw2sDE+j2Z2AyWnfJrmlaGYlCaCm+O5zUwYA10uqImug3RwRf5C0BJgp6VvAY8C1af9rgV+mgZQNZEmOiFgs6WayAZTdwCWpW46kqcBsoAqYERGLC1XKSdHMSlbCNcVGRcQTwPENxJ9j7+hxPv468NFGyroSuLKB+J3AnaXUy0nRzEoSQE0F3d/c3JwUzaxkRQ+ztEJOimZWkiCa65piRXJSNLOSRMCutpsTnRTNrFSihgN6fLqiOSmaWUkCqHVL0cxsL7cUzcyS7OZtJ0UzMyBLirui7T4h7KRoZiUJRE0bnjbBSdHMSlYb7j6bmQG+pmhmVo+o8TVFM7NMNvO2k6KZGQARYmdUtXQ1ysZJ0cxKVutrimZmmWygxd1nM7PEAy1mZnt4oMXMrJ4a37xtZpYJxK5ou6mj7X4zMysLD7SYmeUEcvfZzCzPAy1mZkkEviXHzKxONtDix/zMzPZoywMtbfebmVlZBKI2iluaImmIpHslLZG0WNKlKf51SSslLUzLGbljLpe0TNJSSRNy8YkptkzSZbn4MEkPpfhNkjoV+n5OimZWsho6FLUUsBv4QkSMBE4CLpE0Mm37YUSMSsudAGnbZOBtwETgKklVkqqAnwGnAyOBc3LlfDeVdRSwEbigUKWcFM2sJNl7nzsUtTRZTsSqiHg0rW8FngIGNXHIJGBmROyIiOXAMmBMWpZFxHMRsROYCUySJGA88Nt0/PXAmYW+n5OimZVI1BS5FF2iNBQ4HngohaZKekLSDEm9UmwQ8FLusBUp1li8D7ApInbXizfJSdHMSpK94rSqqAXoK2lBbrmwfnmSugG3Ap+NiC3AdOAtwChgFfD9g/bl8OizmZUoQgW7xjnrImJ0YxslVZMlxF9HxG1Z+bEmt/3nwB/Sx5XAkNzhg1OMRuLrgZ6SOqbWYn7/RrmlaGYlq4kORS1NSdf8rgWeiogf5OIDcrt9EFiU1mcBkyV1ljQMGA7MBx4GhqeR5k5kgzGzIiKAe4GPpOOnALcX+m5uKZpZSbL5FJvl2ed3A/8GPClpYYp9hWz0eFQ61fPAJwEiYrGkm4ElZCPXl0REDYCkqcBsoAqYERGLU3lfBmZK+hbwGFkSbpKTopmVqHlm3o6IB6HB7HpnE8dcCVzZQPzOho6LiOfIRqeL5qRoZiXJbsnxLDlmZoCffTYzewNPHWZmlmRTh7n7bGa2h68pmpkl2Sw57j63aztfF1/40FHs2tmBmt3wT/+8mfP+9+oDKnPmT/pz1419qOoQXPStlYwet3XPtpoa+PTEo+kzYBffvGH5gVbfCvjgJ17h9HPXEyGWP92F739uCFP/cyVHH7sdBCuf68z3PjuE17e33cGFUmSP+bXdpFjWb9bYHGetTXXn4P/e8ixX37OU6XOWsuC+w3jqkUOKOva8MSPfEHvhH5257/ZeXHPv01z5m+f46eWDqanZu/33v+jHkOE7mqv61oQ+R+zizAvWMfX0o/nk+BFUdQjGTdrEf08byEXvH8FFp45g7cpq/vXj61q6qhVEzTJLTqUqW60LzHHWqkjQ9dBaAHbvEjW7hATPPNGVL37oKC6ZcDRfOefNrF9TXMP777N7MG7SRjp1Do44cicDh+5g6WNZkn3l5Wrmz+3O6eeuL9v3sX1VdQw6d6mlQ1XQuWst69dUs31bXasw6NwloA1fQ9sftaiopTUqZypvcI6zMp6vrGpq4KJTR3D2sW/n+FO2ctQ7tvOzrw7maz9fzs9m/4PTJm/guu8MKFwQsG5VNf0G7trzue+AXaxfXQ3A1dMG8e9fexm1zj+yrc761dX8dno/fvnwU9y4cDGvbq3i0b8cBsAXfvgiMx9fwpCjXuf2GX1buKaVo270uZilNSrnNcWG5jg7sYznK6uqKph+z1K2ba7iGxcMZcWzXXhhaRcuP/soAGproXf/LNH95r8O54E7egKwfk1HLjp1BABve9c2pn678Uk65s3pTs++uxl+7Gs8/rdu5f1CBkC3HrsZO2ELU058K9u2VPG1a55n/Ic28ufbevH9zx1Jhw7Bxd9ayf/6103cfVPvlq5uxWitXeNitPhAS5pf7UKAIwe1eHUK6tajhuNO3sZf/9SDN414nR/d8cwb9jn30jWce2k2+9F5Y0Yy/Z6l+2zvO2AXr7xcvefzulXV9DliF/Pu7sG8u7vz8NyR7Nwhtm+t4rtTj+TLP32xvF+qHTv+n7ax+qVObN6Q/d/76509GDn6Vf58WzavaW2tuO/2npx18VonxaTuHS1tVTnTfVNzn+0REddExOiIGN2vT2WO7m1aX8W2zVnddrwmHr3/MN488jU2re/IkgXZtcDdu+D5pV2KKu+k07Zw3+292LlDrH6xEyuXd2bE8dv5+FdW8etHlnDD/CVcPv0FjnvPVifEMlu7spq3nvAqnbvWAsGo92zjxWWdGTi0bqArGDthCy89W9y/bXsQwO7oUNTSGpWzabZnjjOyZDgZOLeM5yubDWuq+d6lR1JbK2pr4ZQPbGLshC30H7SLq/5jEK9uraJmd3Zrx9ARrxcsb+iI1znlA5u4cNwxVFUFU/9zBVWV+fegzVv62KE88Mee/Gz2P6jZLZYt6sqfftWH797yLId0q0WC55Z04SeXDW7pqlaUttx9VjYPY5kKz15N+CP2znH2hil/8kYf1yXmzx7S1C5WYSYMHNXSVbASPBRz2RIbDqjv2/uY/vG+GR8uat/fvvvqR5qaebsSlfUiXmNznJlZ69WMk8xWpMof2TCzitOWB1qcFM2sJJ5k1swsJxC7a9vuQIuTopmVzNcUzczqhLvPZmZ7+JqimVk9TopmZkkgajzQYma2lwdazMyS8ECLmdm+og0nxbZ7YcDMyiSbT7GYpclSpCGS7pW0RNJiSZemeG9JcyQ9k372SnFJ+nF659MTkk7IlTUl7f+MpCm5+DslPZmO+bGkgtncSdHMShahopYCdgNfiIiRwEnAJek9TpcBcyNiODA3fYbsfU/D03IhMB2yJApMI5vZfwwwrS6Rpn0+kTtuYqFKOSmaWUkioKZWRS1NlxOrIuLRtL4VeIrsNSaTgOvTbtcDZ6b1ScANkZkH9JQ0AJgAzImIDRGxEZgDTEzbukfEvMjmSLwhV1ajfE3RzErW3KPPkoYCxwMPAYdHxKq0aTVweFpv6L1PgwrEVzQQb5KTopmVJChpoKWvpAW5z9dExDX5HSR1A24FPhsRW/KX/SIiJJVvJuwGOCmaWYlKenHVuqZm3pZUTZYQfx0Rt6XwGkkDImJV6gKvTfHG3vu0EhhXL35fig9uYP8m+ZqimZUsorilKWkk+FrgqYj4QW7TLKBuBHkKcHsufl4ahT4J2Jy62bOB0yT1SgMspwGz07Ytkk5K5zovV1aj3FI0s5I1032K7wb+DXhS0sIU+wrwHeBmSRcALwBnpW13AmcAy4DtwPlZXWKDpG+SvSwP4IqI2JDWLwauA7oCf0pLk5wUzawk2ejzgXcyI+JBaHTE5n0N7B/AJY2UNQOY0UB8AfD2UurlpGhmJSvjS0BbnJOimZWsLT/m56RoZiUJinpapdVyUjSzkrXh3rOTopmVKCAKPMLXmjkpmlnJ3H02M8tpl6PPkn5CE5cOIuIzZamRmVW0Ep99bnWaaikuaGKbmbVXAbTHpBgR1+c/SzokIraXv0pmVunacve54LM6ksZKWgI8nT4fJ+mqstfMzCqUiNriltaomAcYf0Q2s+16gIh4HDiljHUys0oXRS6tUFGjzxHxUr33vdSUpzpmVvGi/Q601HlJ0slApAkhLyV7l4KZtVettBVYjGK6z58im65nEPAyMIpGpu8xs/ZCRS6tT8GWYkSsAz52EOpiZq1FbUtXoHyKGX1+s6Q7JL0iaa2k2yW9+WBUzswqUN19isUsrVAx3effADcDA4CBwC3AjeWslJlVtuZ4R0ulKiYpHhIRv4yI3Wn5FdCl3BUzswrWHm/JkdQ7rf5J0mXATLKveTbZC2TMrL1qpV3jYjQ10PIIWRKs+/afzG0L4PJyVcrMKtvBfT39wdXUs8/DDmZFzKyVCEErfYSvGEU90SLp7cBIctcSI+KGclXKzCpce2wp1pE0DRhHlhTvBE4HHgScFM3aqzacFIsZff4I2YupV0fE+cBxQI+y1srMKlt7HH3OeS0iaiXtltQdWAsMKXO9zKxStddJZnMWSOoJ/JxsRHob8PdyVsrMKltbHn0u2H2OiIsjYlNEXA28H5iSutFm1l41U/dZ0oz0+PCiXOzrklZKWpiWM3LbLpe0TNJSSRNy8YkptizdV10XHybpoRS/SVKnQnVqNClKOqH+AvQGOqZ1M2unFMUtRbgOmNhA/IcRMSotdwJIGglMBt6WjrlKUpWkKuBnZIPAI4Fz0r4A301lHQVsBC4oVKGmus/fb2JbAOMLFV6qZ5Z054zjT2vuYq2s1rZ0BawlNNM1xYi4X9LQInefBMyMiB3AcknLgDFp27KIeA5A0kxgkqSnyPLUuWmf64GvA9ObOklTN2+/t8iKmll7cnBGlqdKOo/sraJfiIiNZHO6zsvtsyLFAF6qFz8R6ANsiojdDezfqGJuyTEz21fx1xT7SlqQWy4sovTpwFvIJrReRdO91mZX1BMtZmZ5Kn6S2XURMbqUsiNizZ7zSD8H/pA+rmTf2wEHpxiNxNcDPSV1TK3F/P6NckvRzEpXxpu3JQ3IffwgUDcyPQuYLKmzpGHAcGA+8DAwPI00dyIbjJkVEQHcS/YACsAU4PZC5y/mMT+RvY7gzRFxhaQjgSMiYn5R39DM2pQSRpYLlyXdSPYYcV9JK4BpwDhJo8jS6vOkGboiYrGkm4ElwG7gkoioSeVMBWYDVcCMiFicTvFlYKakbwGPAdcWqlMx3eeryN7IMB64AtgK3Aq8q4hjzawtar7R53MaCDeauCLiSuDKBuJ30sA8r2lEekz9eFOKSYonRsQJkh5LJ9lYzA2QZtaGteEnWopJirvSzZEBIKkfbfpdXmZWSFt+zK+YpPhj4HdAf0lXkl20/FpZa2VmlStKGn1udYp57/OvJT1CNn2YgDMj4qmy18zMKld7bimm0ebtwB35WES8WM6KmVkFa89JEfgje19g1QUYBiwleyjbzNqhdn1NMSLekf+cZsi5uGw1MjNrQSU/5hcRj0o6sRyVMbNWoj23FCV9PvexA3AC8HLZamRmla29jz4Dh+XWd5NdY7y1PNUxs1ahvbYU003bh0XEFw9Sfcyswol2OtBSN92OpHcfzAqZWSvQHpMi2ZQ8JwALJc0CbgFerdsYEbeVuW5mVomacZacSlTMNcUuZJM1jmfv/YoBOCmatVftdKClfxp5XsTeZFinDf+dMLNC2mtLsQroxr7JsE4b/pWYWUFtOAM0lRRXRcQVB60mZtY6HJy3+bWYppJi80yta2ZtTnvtPr/voNXCzFqX9pgUI2LDwayImbUe7f0xPzOzvdrxNUUzszcQbXvAwUnRzErnlqKZ2V7tdfTZzKxhTopmZoknmTUzq6cNtxQ7tHQFzKz1URS3FCxHmiFpraRFuVhvSXMkPZN+9kpxSfqxpGWSnkgv0as7Zkra/xlJU3Lxd0p6Mh3zY0kFB86dFM2sdFHkUth1wMR6scuAuRExHJibPgOcDgxPy4XAdMiSKDANOBEYA0yrS6Rpn0/kjqt/rjdwUjSzkjVXSzEi7gfqPz03Cbg+rV8PnJmL3xCZeUBPSQOACcCciNgQERuBOcDEtK17RMyLiABuyJXVKF9TNLPSBOWeZPbwiFiV1lcDh6f1QcBLuf1WpFhT8RUNxJvkpGhmJSnxxVV9JS3Ifb4mIq4p9uCICOng3hXppGhmpSs+Ta2LiNEllr5G0oCIWJW6wGtTfCUwJLff4BRbCYyrF78vxQc3sH+TfE3RzEqmiKKW/TQLqBtBngLcnoufl0ahTwI2p272bOA0Sb3SAMtpwOy0bYukk9Ko83m5shrllqKZlaYZZ8mRdCNZK6+vpBVko8jfAW6WdAHwAnBW2v1O4AxgGbAdOB+yaQ4lfRN4OO13RW7qw4vJRri7An9KS5OcFM2sZM11lS8izmlk0xsmuU4jyJc0Us4MYEYD8QXA20upk5OimZXMj/mZmeW14cf8nBTNrDRF3pjdWjkpmlnpnBTNzDIl3rzd6jgpmlnJVNt2s6KTopmVxm/zs/r6Hv46X/jmInr12UkE3HXrYG6/8Ui6dd/F5d99gv4DX2Pty1359peOZdvWaj583vOMOyN7vr2qKhgy7FXOGT+ObVuq+ey0xYw55RU2bejExR89uYW/Wfvw+R+8yImnbmXTuo58cvwIAA7ruZuvXP0Chw/eyZoVnbjyk29i2+aOQHDRN19mzPgtvP5aB77/uSEse/KQlv0CFaAt35JTtsf8Gpo8sq2oqRG/+MHRfOrDJ/P588bwL2e/xJA3b+Os85ezcH5vPjHpPSyc35uPnv88ALfeMJRPTx7LpyeP5bqfDGfRI73YtqUagHvuGMh/XHJCE2ez5nb3Tb356seG7RM7a+paHnuwGx9/z1t57MFunD01e9z2XeO3MmjYDs5/9zH815cG8+lvF3x0tn1ovvkUK045n32+jiImdGyNNq7rzLNPdwfgte0deXH5ofTtt4OTxr3CPXcMBLJkN/a9a99w7LiJq7nvriP2fF70aC+2bq4+OBU3ABY91I2tG/ftJI2dsIV7bu4NwD0392bsxC0pvpl7ftsLEE8/eiiH9qihd/9dB7vKFae55lOsRGVLio1MHtnm9B/wGm8ZsZWnF/WgZ5+dbFzXGYCN6zrRs8/Offbt3KWGd568jr/OPbyhoqwF9eq7iw1rsz9OG9Z2pFffLPH1PWIXr7y894/Wuper6XNEO0+KAUQUt7RCLX5NUdKFZFOL06VDtxauTWm6dN3NV7/3ONd872hee7X+r1Jv+D9x4imvsGRhzz1dZ6tUIqLgqzzaNV9TLKOIuCYiRkfE6E4durZ0dYpW1bGWr37vCe770wD+9ues5bdpfSd69d0BQK++O9i8odM+x5wyYTV/yXWdrXJsXFe9p1vcu/8uNq3P/sitW11Nv4F7W4Z9B+5i/er2/Uet7j5Fd58tJ/jstCW8tPxQfverN+2JzvtLP079wMsAnPqBl5l3X7892w7ptot3vHMjf7+v/0GvrRU27+7unHpWdrXn1LM28PfZ3VO8B6d+ZCMQHHPCq2zf0mFPN7vdKrbr7O5z+zFy1Cbe9y+rWP6Pbvxk5t8BuP6nR3HL/wzl8u8+yWlnrmTtquyWnDonv/cVHp3Xhx2vV+1T1pe+/QTHvnMj3Xvu4oa77udXV7+Fu39f8DUSdgAuu+oFjh27jR69d/OrBUv45fcP56af9uerV7/AxMkbWLsyuyUHYP7cw3jX+7bwP397mh3plhxrva3AYijKlM3zk0cCa4BpEXFtU8f0qO4fY/t+tCz1sfKoWfPGEXarXA/FXLbEhgO6YHpYz8Fx/CmXFrXvA3d86ZH9eB1BiypbS7GJySPNrJVryy1Fd5/NrDQB1LTdrOikaGYlc0vRzCyvlY4sF8NJ0cxK5paimVmdVjzZQzGcFM2sJALkgRYzs73ka4pmZom7z2Zmea33ueZiOCmaWcna8uizZ8kxs9I10yw5kp6X9KSkhZIWpFhvSXMkPZN+9kpxSfqxpGWSnpB0Qq6cKWn/ZyRNOZCv5qRoZqWJbPS5mKVI742IUbmJIy4D5kbEcGBu+gxwOjA8LRcC0yFLosA04ERgDDCtLpHuDydFMytdeV9cNQm4Pq1fD5yZi98QmXlAT0kDgAnAnIjYEBEbgTkcwPuhnBTNrGSKKGoB+kpakFsurFdUAHdLeiS37fCIWJXWVwN1LzUaBLyUO3ZFijUW3y8eaDGz0hU/+ryuwHyK74mIlZL6A3MkPb3vaSKkgzus45aimZUmgNoil0JFRaxMP9cCvyO7JrgmdYtJP+tmMl4J5Kc+H5xijcX3i5OimZVEFNd1LvTUi6RDJR1Wtw6cBiwCZgF1I8hTgNvT+izgvDQKfRKwOXWzZwOnSeqVBlhOS7H94u6zmZWutlnecXo48DtJkOWi30TEXZIeBm6WdAHwAnBW2v9O4AxgGbAdOB8gIjZI+ibwcNrviojY73fOOymaWWnqus8HWkzEc8BxDcTXA+9rIB7AJY2UNQOYceC1clI0s/3gCSHMzPKcFM3M6nhCCDOzvfw2PzOzffmaoplZnpOimVkSQK2ToplZ4oEWM7N9OSmamSUB1DTLY34VyUnRzEoUEE6KZmZ7uftsZpZ49NnMrB63FM3McpwUzcySCKipaelalI2TopmVzi1FM7McJ0Uzszrh0Wczsz0Cwjdvm5nl+DE/M7MkorlecVqRnBTNrHQeaDEz2yvcUjQzq+NJZs3M9vKEEGZmewUQfszPzCwJTzJrZraPcPfZzCynDbcUFRU0iiTpFeCFlq5HGfQF1rV0JawkbfXf7E0R0e9ACpB0F9nvpxjrImLigZzvYKuopNhWSVoQEaNbuh5WPP+btV8dWroCZmaVxEnRzCzHSfHguKalK2Al879ZO+VrimZmOW4pmpnlOCmWkaSJkpZKWibpspaujxUmaYaktZIWtXRdrGU4KZaJpCrgZ8DpwEjgHEkjW7ZWVoTrgFZ1X501LyfF8hkDLIuI5yJiJzATmNTCdbICIuJ+YENL18NajpNi+QwCXsp9XpFiZlbBnBTNzHKcFMtnJTAk93lwiplZBXNSLJ+HgeGShknqBEwGZrVwncysACfFMomI3cBUYDbwFHBzRCxu2VpZIZJuBP4OjJC0QtIFLV0nO7j8RIuZWY5bimZmOU6KZmY5TopmZjlOimZmOU6KZmY5ToqtiKQaSQslLZJ0i6RDDqCs6yR9JK3/oqnJKiSNk3TyfpzjeUlveMFRY/F6+2wr8Vxfl/TFUutoVp+TYuvyWkSMioi3AzuBT+U3StqvV9ZGxL9HxJImdhkHlJwUzVojJ8XW6wHgqNSKe0DSLGCJpCpJ/0/Sw5KekPRJAGV+muZ3vAfoX1eQpPskjU7rEyU9KulxSXMlDSVLvp9LrdR/ktRP0q3pHA9Lenc6to+kuyUtlvQLQIW+hKTfS3okHXNhvW0/TPG5kvql2Fsk3ZWOeUDSMc3y2zRL9qtlYS0rtQhPB+5KoROAt0fE8pRYNkfEuyR1Bv4q6W7geGAE2dyOhwNLgBn1yu0H/Bw4JZXVOyI2SLoa2BYR30v7/Qb4YUQ8KOlIsqd23gpMAx6MiCsk/TNQzNMgH0/n6Ao8LOnWiFgPHAosiIjPSfo/qeypZO9O+VREPCPpROAqYPx+/BrNGuSk2Lp0lbQwrT8AXEvWrZ0fEctT/DTg2LrrhUAPYDhwCnBjRNQAL0v6cwPlnwTcX1dWRDQ2r+CpwEhpT0Owu6Ru6RwfSsf+UdLGIr7TZyR9MK0PSXVdD9QCN6X4r4Db0jlOBm7JnbtzEecwK5qTYuvyWkSMygdScng1HwI+HRGz6+13RjPWowNwUkS83kBdiiZpHFmCHRsR2yXdB3RpZPdI591U/3dg1px8TbHtmQ1cJKkaQNLRkg4F7gfOTtccBwDvbeDYecApkoalY3un+FbgsNx+dwOfrvsgaVRavR84N8VOB3oVqGsPYGNKiMeQtVTrdADqWrvnknXLtwDLJX00nUOSjitwDrOSOCm2Pb8gu174aHr50n+T9Qh+BzyTtt1ANhPMPiLiFeBCsq7q4+ztvt4BfLBuoAX4DDA6DeQsYe8o+DfIkupism70iwXqehfQUdJTwHfIknKdV4Ex6TuMB65I8Y8BF6T6LcaveLBm5llyzMxy3FI0M8txUjQzy3FSNDPLcVI0M8txUjQzy3FSNDPLcVI0M8txUjQzy/n/wEnEsvsWGi0AAAAASUVORK5CYII=",
+ "text/plain": [
+ "