Skip to content

Commit

Permalink
add rule efficiency report
Browse files Browse the repository at this point in the history
  • Loading branch information
itlubber committed Feb 28, 2024
1 parent 5667447 commit b7b4d3c
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 0 deletions.
9 changes: 9 additions & 0 deletions docs/source/scorecardpipeline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ scorecardpipeline.excel\_writer
:show-inheritance:


scorecardpipeline.rule
======================================

.. automodule:: scorecardpipeline.rule
:members:
:undoc-members:
:show-inheritance:


scorecardpipeline.logger
======================================

Expand Down
175 changes: 175 additions & 0 deletions examples/rule_efficiency.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append(\"../\")\n",
"\n",
"import os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.model_selection import train_test_split\n",
"from scorecardpipeline import *\n",
"\n",
"\n",
"logger = init_setting(seed=8888, logger=True)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " duration_in_month credit_amount installment_rate_in_percentage_of_disposable_income present_residence_since age_in_years number_of_existing_credits_at_this_bank number_of_people_being_liable_to_provide_maintenance_for creditability\n0 6 1169 4 4 67 2 1 0\n1 48 5951 2 2 22 1 1 1\n2 12 2096 2 3 49 1 2 0\n3 42 7882 2 4 45 1 2 0\n4 24 4870 3 4 53 2 2 1\n5 36 9055 2 4 35 1 2 0\n6 24 2835 3 4 53 1 1 0\n7 36 6948 2 2 35 1 1 0\n8 12 3059 2 4 61 1 1 0\n9 30 5234 4 2 28 2 1 1",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>duration_in_month</th>\n <th>credit_amount</th>\n <th>installment_rate_in_percentage_of_disposable_income</th>\n <th>present_residence_since</th>\n <th>age_in_years</th>\n <th>number_of_existing_credits_at_this_bank</th>\n <th>number_of_people_being_liable_to_provide_maintenance_for</th>\n <th>creditability</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>6</td>\n <td>1169</td>\n <td>4</td>\n <td>4</td>\n <td>67</td>\n <td>2</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>48</td>\n <td>5951</td>\n <td>2</td>\n <td>2</td>\n <td>22</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>12</td>\n <td>2096</td>\n <td>2</td>\n <td>3</td>\n <td>49</td>\n <td>1</td>\n <td>2</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>42</td>\n <td>7882</td>\n <td>2</td>\n <td>4</td>\n <td>45</td>\n <td>1</td>\n <td>2</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>24</td>\n <td>4870</td>\n <td>3</td>\n <td>4</td>\n <td>53</td>\n <td>2</td>\n <td>2</td>\n <td>1</td>\n </tr>\n <tr>\n <th>5</th>\n <td>36</td>\n <td>9055</td>\n <td>2</td>\n <td>4</td>\n <td>35</td>\n <td>1</td>\n <td>2</td>\n <td>0</td>\n </tr>\n <tr>\n <th>6</th>\n <td>24</td>\n <td>2835</td>\n <td>3</td>\n <td>4</td>\n <td>53</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>7</th>\n <td>36</td>\n <td>6948</td>\n <td>2</td>\n <td>2</td>\n <td>35</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>8</th>\n <td>12</td>\n <td>3059</td>\n <td>2</td>\n <td>4</td>\n <td>61</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>9</th>\n <td>30</td>\n <td>5234</td>\n <td>4</td>\n <td>2</td>\n <td>28</td>\n <td>2</td>\n <td>1</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 加载数据集,标签转换为 0 和 1\n",
"target = \"creditability\"\n",
"data = germancredit()\n",
"data[target] = data[target].map({\"good\": 0, \"bad\": 1})\n",
"\n",
"# 目前仅支持数值型变量\n",
"data = data.select_dtypes(\"number\")\n",
"data.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"rule1 = Rule(\"duration_in_month < 4\")\n",
"rule2 = Rule(\"credit_amount < 500\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " 指标名称 分箱 样本总数 样本占比 好样本数 好样本占比 坏样本数 坏样本占比 坏样本率 LIFT值 准确率 精确率 召回率 F1分数 坏账改善\n0 duration_in_month < 4 命中 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.7000 0.0000 0.0000 0.0000 0.0000\n1 duration_in_month < 4 未命中 1000.0000 1.0000 700.0000 1.0000 300.0000 1.0000 0.3000 1.0000 0.3000 0.3000 1.0000 0.4615 NaN",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>指标名称</th>\n <th>分箱</th>\n <th>样本总数</th>\n <th>样本占比</th>\n <th>好样本数</th>\n <th>好样本占比</th>\n <th>坏样本数</th>\n <th>坏样本占比</th>\n <th>坏样本率</th>\n <th>LIFT值</th>\n <th>准确率</th>\n <th>精确率</th>\n <th>召回率</th>\n <th>F1分数</th>\n <th>坏账改善</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>duration_in_month &lt; 4</td>\n <td>命中</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.7000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>duration_in_month &lt; 4</td>\n <td>未命中</td>\n <td>1000.0000</td>\n <td>1.0000</td>\n <td>700.0000</td>\n <td>1.0000</td>\n <td>300.0000</td>\n <td>1.0000</td>\n <td>0.3000</td>\n <td>1.0000</td>\n <td>0.3000</td>\n <td>0.3000</td>\n <td>1.0000</td>\n <td>0.4615</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rule1.report(data, target=target)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " 指标名称 分箱 样本总数 样本占比 好样本数 好样本占比 坏样本数 坏样本占比 坏样本率 LIFT值 准确率 精确率 召回率 F1分数 坏账改善\n0 credit_amount < 500 命中 18 0.0180 15 0.0214 3 0.0100 0.1667 0.5556 0.6880 0.1667 0.0100 0.0189 -0.0081\n1 credit_amount < 500 未命中 982 0.9820 685 0.9786 297 0.9900 0.3024 1.0081 0.3120 0.3024 0.9900 0.4633 0.4444",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>指标名称</th>\n <th>分箱</th>\n <th>样本总数</th>\n <th>样本占比</th>\n <th>好样本数</th>\n <th>好样本占比</th>\n <th>坏样本数</th>\n <th>坏样本占比</th>\n <th>坏样本率</th>\n <th>LIFT值</th>\n <th>准确率</th>\n <th>精确率</th>\n <th>召回率</th>\n <th>F1分数</th>\n <th>坏账改善</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>credit_amount &lt; 500</td>\n <td>命中</td>\n <td>18</td>\n <td>0.0180</td>\n <td>15</td>\n <td>0.0214</td>\n <td>3</td>\n <td>0.0100</td>\n <td>0.1667</td>\n <td>0.5556</td>\n <td>0.6880</td>\n <td>0.1667</td>\n <td>0.0100</td>\n <td>0.0189</td>\n <td>-0.0081</td>\n </tr>\n <tr>\n <th>1</th>\n <td>credit_amount &lt; 500</td>\n <td>未命中</td>\n <td>982</td>\n <td>0.9820</td>\n <td>685</td>\n <td>0.9786</td>\n <td>297</td>\n <td>0.9900</td>\n <td>0.3024</td>\n <td>1.0081</td>\n <td>0.3120</td>\n <td>0.3024</td>\n <td>0.9900</td>\n <td>0.4633</td>\n <td>0.4444</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rule2.report(data, target=target)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " 指标名称 分箱 样本总数 样本占比 好样本数 好样本占比 坏样本数 坏样本占比 坏样本率 LIFT值 准确率 精确率 召回率 F1分数 坏账改善\n0 (duration_in_month < 4) | (credit_amount < 500) 命中 18 0.0180 15 0.0214 3 0.0100 0.1667 0.5556 0.6880 0.1667 0.0100 0.0189 -0.0081\n1 (duration_in_month < 4) | (credit_amount < 500) 未命中 982 0.9820 685 0.9786 297 0.9900 0.3024 1.0081 0.3120 0.3024 0.9900 0.4633 0.4444",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>指标名称</th>\n <th>分箱</th>\n <th>样本总数</th>\n <th>样本占比</th>\n <th>好样本数</th>\n <th>好样本占比</th>\n <th>坏样本数</th>\n <th>坏样本占比</th>\n <th>坏样本率</th>\n <th>LIFT值</th>\n <th>准确率</th>\n <th>精确率</th>\n <th>召回率</th>\n <th>F1分数</th>\n <th>坏账改善</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>(duration_in_month &lt; 4) | (credit_amount &lt; 500)</td>\n <td>命中</td>\n <td>18</td>\n <td>0.0180</td>\n <td>15</td>\n <td>0.0214</td>\n <td>3</td>\n <td>0.0100</td>\n <td>0.1667</td>\n <td>0.5556</td>\n <td>0.6880</td>\n <td>0.1667</td>\n <td>0.0100</td>\n <td>0.0189</td>\n <td>-0.0081</td>\n </tr>\n <tr>\n <th>1</th>\n <td>(duration_in_month &lt; 4) | (credit_amount &lt; 500)</td>\n <td>未命中</td>\n <td>982</td>\n <td>0.9820</td>\n <td>685</td>\n <td>0.9786</td>\n <td>297</td>\n <td>0.9900</td>\n <td>0.3024</td>\n <td>1.0081</td>\n <td>0.3120</td>\n <td>0.3024</td>\n <td>0.9900</td>\n <td>0.4633</td>\n <td>0.4444</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(rule1 | rule2).report(data, target=target)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " 指标名称 分箱 样本总数 样本占比 好样本数 好样本占比 坏样本数 坏样本占比 坏样本率 LIFT值 准确率 精确率 召回率 F1分数 坏账改善\n0 (duration_in_month < 4) & (credit_amount < 500) 命中 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.7000 0.0000 0.0000 0.0000 0.0000\n1 (duration_in_month < 4) & (credit_amount < 500) 未命中 1000.0000 1.0000 700.0000 1.0000 300.0000 1.0000 0.3000 1.0000 0.3000 0.3000 1.0000 0.4615 NaN",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>指标名称</th>\n <th>分箱</th>\n <th>样本总数</th>\n <th>样本占比</th>\n <th>好样本数</th>\n <th>好样本占比</th>\n <th>坏样本数</th>\n <th>坏样本占比</th>\n <th>坏样本率</th>\n <th>LIFT值</th>\n <th>准确率</th>\n <th>精确率</th>\n <th>召回率</th>\n <th>F1分数</th>\n <th>坏账改善</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>(duration_in_month &lt; 4) &amp; (credit_amount &lt; 500)</td>\n <td>命中</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.7000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n <td>0.0000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>(duration_in_month &lt; 4) &amp; (credit_amount &lt; 500)</td>\n <td>未命中</td>\n <td>1000.0000</td>\n <td>1.0000</td>\n <td>700.0000</td>\n <td>1.0000</td>\n <td>300.0000</td>\n <td>1.0000</td>\n <td>0.3000</td>\n <td>1.0000</td>\n <td>0.3000</td>\n <td>0.3000</td>\n <td>1.0000</td>\n <td>0.4615</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(rule1 & rule2).report(data, target=target)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 42,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "score",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit b7b4d3c

Please sign in to comment.