-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
184 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import sys\n", | ||
"sys.path.append(\"../\")\n", | ||
"\n", | ||
"import os\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"from sklearn.model_selection import train_test_split\n", | ||
"from scorecardpipeline import *\n", | ||
"\n", | ||
"\n", | ||
"logger = init_setting(seed=8888, logger=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": " duration_in_month credit_amount installment_rate_in_percentage_of_disposable_income present_residence_since age_in_years number_of_existing_credits_at_this_bank number_of_people_being_liable_to_provide_maintenance_for creditability\n0 6 1169 4 4 67 2 1 0\n1 48 5951 2 2 22 1 1 1\n2 12 2096 2 3 49 1 2 0\n3 42 7882 2 4 45 1 2 0\n4 24 4870 3 4 53 2 2 1\n5 36 9055 2 4 35 1 2 0\n6 24 2835 3 4 53 1 1 0\n7 36 6948 2 2 35 1 1 0\n8 12 3059 2 4 61 1 1 0\n9 30 5234 4 2 28 2 1 1", | ||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>duration_in_month</th>\n <th>credit_amount</th>\n <th>installment_rate_in_percentage_of_disposable_income</th>\n <th>present_residence_since</th>\n <th>age_in_years</th>\n <th>number_of_existing_credits_at_this_bank</th>\n <th>number_of_people_being_liable_to_provide_maintenance_for</th>\n <th>creditability</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>6</td>\n <td>1169</td>\n <td>4</td>\n <td>4</td>\n <td>67</td>\n <td>2</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>48</td>\n <td>5951</td>\n <td>2</td>\n <td>2</td>\n <td>22</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>12</td>\n <td>2096</td>\n <td>2</td>\n <td>3</td>\n <td>49</td>\n <td>1</td>\n <td>2</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>42</td>\n <td>7882</td>\n <td>2</td>\n <td>4</td>\n <td>45</td>\n <td>1</td>\n <td>2</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>24</td>\n <td>4870</td>\n <td>3</td>\n <td>4</td>\n <td>53</td>\n <td>2</td>\n <td>2</td>\n <td>1</td>\n </tr>\n <tr>\n <th>5</th>\n <td>36</td>\n <td>9055</td>\n <td>2</td>\n <td>4</td>\n <td>35</td>\n <td>1</td>\n <td>2</td>\n <td>0</td>\n </tr>\n <tr>\n <th>6</th>\n <td>24</td>\n <td>2835</td>\n <td>3</td>\n <td>4</td>\n <td>53</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>7</th>\n <td>36</td>\n <td>6948</td>\n <td>2</td>\n <td>2</td>\n <td>35</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>8</th>\n <td>12</td>\n <td>3059</td>\n <td>2</td>\n <td>4</td>\n <td>61</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>9</th>\n <td>30</td>\n <td>5234</td>\n <td>4</td>\n <td>2</td>\n <td>28</td>\n <td>2</td>\n <td>1</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>" | ||
}, | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# 加载数据集,标签转换为 0 和 1\n", | ||
"target = \"creditability\"\n", | ||
"data = germancredit()\n", | ||
"data[target] = data[target].map({\"good\": 0, \"bad\": 1})\n", | ||
"\n", | ||
"# 目前仅支持数值型变量\n", | ||
"data = data.select_dtypes(\"number\")\n", | ||
"data.head(10)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"rule1 = Rule(\"duration_in_month < 4\")\n", | ||
"rule2 = Rule(\"credit_amount < 500\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": " 指标名称 分箱 样本总数 样本占比 好样本数 好样本占比 坏样本数 坏样本占比 坏样本率 LIFT值 准确率 精确率 召回率 F1分数 坏账改善\n0 duration_in_month < 4 命中 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.7000 0.0000 0.0000 0.0000 0.0000\n1 duration_in_month < 4 未命中 1000.0000 1.0000 700.0000 1.0000 300.0000 1.0000 0.3000 1.0000 0.3000 0.3000 1.0000 0.4615 NaN", | ||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>指标名称</th>\n <th>分箱</th>\n <th>样本总数</th>\n <th>样本占比</th>\n <th>好样本数</th>\n <th>好样本占比</th>\n <th>坏样本数</th>\n <th>坏样本占比</th>\n <th>坏样本率</th>\n <th>LIFT值</th>\n <th>准确率</th>\n <th>精确率</th>\n <th>召回率</th>\n <th>F1分数</th>\n <th>坏账改善</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>duration_in_month < 4</td>\n <td>命中</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.7000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>duration_in_month < 4</td>\n <td>未命中</td>\n <td>1000.0000</td>\n <td>1.0000</td>\n <td>700.0000</td>\n <td>1.0000</td>\n <td>300.0000</td>\n <td>1.0000</td>\n <td>0.3000</td>\n <td>1.0000</td>\n <td>0.3000</td>\n <td>0.3000</td>\n <td>1.0000</td>\n <td>0.4615</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>" | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"rule1.report(data, target=target)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": " 指标名称 分箱 样本总数 样本占比 好样本数 好样本占比 坏样本数 坏样本占比 坏样本率 LIFT值 准确率 精确率 召回率 F1分数 坏账改善\n0 credit_amount < 500 命中 18 0.0180 15 0.0214 3 0.0100 0.1667 0.5556 0.6880 0.1667 0.0100 0.0189 -0.0081\n1 credit_amount < 500 未命中 982 0.9820 685 0.9786 297 0.9900 0.3024 1.0081 0.3120 0.3024 0.9900 0.4633 0.4444", | ||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>指标名称</th>\n <th>分箱</th>\n <th>样本总数</th>\n <th>样本占比</th>\n <th>好样本数</th>\n <th>好样本占比</th>\n <th>坏样本数</th>\n <th>坏样本占比</th>\n <th>坏样本率</th>\n <th>LIFT值</th>\n <th>准确率</th>\n <th>精确率</th>\n <th>召回率</th>\n <th>F1分数</th>\n <th>坏账改善</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>credit_amount < 500</td>\n <td>命中</td>\n <td>18</td>\n <td>0.0180</td>\n <td>15</td>\n <td>0.0214</td>\n <td>3</td>\n <td>0.0100</td>\n <td>0.1667</td>\n <td>0.5556</td>\n <td>0.6880</td>\n <td>0.1667</td>\n <td>0.0100</td>\n <td>0.0189</td>\n <td>-0.0081</td>\n </tr>\n <tr>\n <th>1</th>\n <td>credit_amount < 500</td>\n <td>未命中</td>\n <td>982</td>\n <td>0.9820</td>\n <td>685</td>\n <td>0.9786</td>\n <td>297</td>\n <td>0.9900</td>\n <td>0.3024</td>\n <td>1.0081</td>\n <td>0.3120</td>\n <td>0.3024</td>\n <td>0.9900</td>\n <td>0.4633</td>\n <td>0.4444</td>\n </tr>\n </tbody>\n</table>\n</div>" | ||
}, | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"rule2.report(data, target=target)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": " 指标名称 分箱 样本总数 样本占比 好样本数 好样本占比 坏样本数 坏样本占比 坏样本率 LIFT值 准确率 精确率 召回率 F1分数 坏账改善\n0 (duration_in_month < 4) | (credit_amount < 500) 命中 18 0.0180 15 0.0214 3 0.0100 0.1667 0.5556 0.6880 0.1667 0.0100 0.0189 -0.0081\n1 (duration_in_month < 4) | (credit_amount < 500) 未命中 982 0.9820 685 0.9786 297 0.9900 0.3024 1.0081 0.3120 0.3024 0.9900 0.4633 0.4444", | ||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>指标名称</th>\n <th>分箱</th>\n <th>样本总数</th>\n <th>样本占比</th>\n <th>好样本数</th>\n <th>好样本占比</th>\n <th>坏样本数</th>\n <th>坏样本占比</th>\n <th>坏样本率</th>\n <th>LIFT值</th>\n <th>准确率</th>\n <th>精确率</th>\n <th>召回率</th>\n <th>F1分数</th>\n <th>坏账改善</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>(duration_in_month < 4) | (credit_amount < 500)</td>\n <td>命中</td>\n <td>18</td>\n <td>0.0180</td>\n <td>15</td>\n <td>0.0214</td>\n <td>3</td>\n <td>0.0100</td>\n <td>0.1667</td>\n <td>0.5556</td>\n <td>0.6880</td>\n <td>0.1667</td>\n <td>0.0100</td>\n <td>0.0189</td>\n <td>-0.0081</td>\n </tr>\n <tr>\n <th>1</th>\n <td>(duration_in_month < 4) | (credit_amount < 500)</td>\n <td>未命中</td>\n <td>982</td>\n <td>0.9820</td>\n <td>685</td>\n <td>0.9786</td>\n <td>297</td>\n <td>0.9900</td>\n <td>0.3024</td>\n <td>1.0081</td>\n <td>0.3120</td>\n <td>0.3024</td>\n <td>0.9900</td>\n <td>0.4633</td>\n <td>0.4444</td>\n </tr>\n </tbody>\n</table>\n</div>" | ||
}, | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"(rule1 | rule2).report(data, target=target)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": " 指标名称 分箱 样本总数 样本占比 好样本数 好样本占比 坏样本数 坏样本占比 坏样本率 LIFT值 准确率 精确率 召回率 F1分数 坏账改善\n0 (duration_in_month < 4) & (credit_amount < 500) 命中 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.7000 0.0000 0.0000 0.0000 0.0000\n1 (duration_in_month < 4) & (credit_amount < 500) 未命中 1000.0000 1.0000 700.0000 1.0000 300.0000 1.0000 0.3000 1.0000 0.3000 0.3000 1.0000 0.4615 NaN", | ||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>指标名称</th>\n <th>分箱</th>\n <th>样本总数</th>\n <th>样本占比</th>\n <th>好样本数</th>\n <th>好样本占比</th>\n <th>坏样本数</th>\n <th>坏样本占比</th>\n <th>坏样本率</th>\n <th>LIFT值</th>\n <th>准确率</th>\n <th>精确率</th>\n <th>召回率</th>\n <th>F1分数</th>\n <th>坏账改善</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>(duration_in_month < 4) & (credit_amount < 500)</td>\n <td>命中</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.7000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>(duration_in_month < 4) & (credit_amount < 500)</td>\n <td>未命中</td>\n <td>1000.0000</td>\n <td>1.0000</td>\n <td>700.0000</td>\n <td>1.0000</td>\n <td>300.0000</td>\n <td>1.0000</td>\n <td>0.3000</td>\n <td>1.0000</td>\n <td>0.3000</td>\n <td>0.3000</td>\n <td>1.0000</td>\n <td>0.4615</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>" | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"(rule1 & rule2).report(data, target=target)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"outputs": [], | ||
"source": [], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 42, | ||
"outputs": [], | ||
"source": [], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "score", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |