From 1a442fe3e8fbd9fa6d9a54736c56d1e86cb9422f Mon Sep 17 00:00:00 2001 From: itlubber <1830611168@qq.com> Date: Tue, 23 Jan 2024 12:22:59 +0800 Subject: [PATCH] 0.1.30 --- scorecardpipeline/__init__.py | 2 +- scorecardpipeline/auto_report.py | 2 +- scorecardpipeline/excel_writer.py | 17 +++++++++-------- setup.py | 2 ++ 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/scorecardpipeline/__init__.py b/scorecardpipeline/__init__.py index 5416a01..837dfff 100644 --- a/scorecardpipeline/__init__.py +++ b/scorecardpipeline/__init__.py @@ -21,7 +21,7 @@ feature_bin_stats = Combiner.feature_bin_stats -__version__ = "0.1.29" +__version__ = "0.1.30" __all__ = ( "__version__" , "FeatureSelection", "FeatureImportanceSelector", "StepwiseSelection", "Combiner", "WOETransformer" diff --git a/scorecardpipeline/auto_report.py b/scorecardpipeline/auto_report.py index dc824db..f8f0ab8 100644 --- a/scorecardpipeline/auto_report.py +++ b/scorecardpipeline/auto_report.py @@ -98,7 +98,7 @@ def auto_data_testing_report(data, features=None, target="target", date=None, da end_row, end_col = writer.insert_pic2sheet(worksheet, f"model_report/feature_ks_plot_{col}.png", (ks_row, end_col - 1), figsize=(600, 350)) end_row, end_col = writer.insert_pic2sheet(worksheet, f"model_report/feature_hist_plot_{col}.png", (ks_row, end_col - 1), figsize=(600, 350)) - end_row, end_col = dataframe2excel(score_table_train, writer, worksheet, percent_cols=["样本占比", "好样本占比", "坏样本占比", "坏样本率", "LIFT值", "累积LIFT值"], condition_cols=["坏样本率", "LIFT值"], start_row=end_row) + end_row, end_col = dataframe2excel(score_table_train, writer, worksheet, percent_cols=["样本占比", "好样本占比", "坏样本占比", "坏样本率", "LIFT值", "累积LIFT值"], condition_cols=["坏样本率", "LIFT值"], merge_column=["指标名称"], merge=True, fill=True, start_row=end_row) if not isinstance(excel_writer, ExcelWriter) and not isinstance(sheet, Worksheet): writer.save(excel_writer) diff --git a/scorecardpipeline/excel_writer.py b/scorecardpipeline/excel_writer.py index c63b251..9a6c65a 100644 --- a/scorecardpipeline/excel_writer.py +++ b/scorecardpipeline/excel_writer.py @@ -269,7 +269,8 @@ def insert_df2sheet(self, worksheet, data, insert_space, merge_column=None, head if isinstance(merge_column[0], (int, float)): merge_column = [df.columns.tolist()[col] if col not in df.columns else col for col in merge_column] - # df = df.sort_values(merge_column).reset_index(drop=True) + if df[merge_column].values.tolist() != df[merge_column].sort_values(merge_column).values.tolist(): + df = df.sort_values(merge_column).reset_index(drop=True) merge_cols = [get_column_letter(df.columns.get_loc(col) + column_index_from_string(start_col)) for col in merge_column] if header: @@ -685,7 +686,7 @@ def dataframe2excel(data, excel_writer, sheet_name=None, title=None, header=True if __name__ == "__main__": - writer = ExcelWriter() + writer = ExcelWriter(theme_color='3f1dba') worksheet = writer.get_sheet_by_name("模型报告") end_row, end_col = writer.insert_value2sheet(worksheet, "B2", value="模型报告", style="header") end_row, end_col = writer.insert_value2sheet(worksheet, "B3", value="模型报告", style="header", end_space="D3") @@ -701,6 +702,7 @@ def dataframe2excel(data, excel_writer, sheet_name=None, title=None, header=True end_row, end_col = dataframe2excel(sample, writer, sheet_name="模型报告", start_row=end_row + 2, percent_cols=["B2", "B6"], condition_cols=["B3", "B9"], color_cols=["B4"], title="测试样例") end_row, end_col = dataframe2excel(sample, writer, sheet_name="模型报告", start_row=end_row + 2, percent_cols=["B2", "B6"], condition_cols=["B3", "B9"], color_cols=["B4"], title="测试样例", figures=[ "../examples/model_report/feature_ks_plot_number_of_existing_credits_at_this_bank.png", "../examples/model_report/psi_duration_in_month.png"]) + multi_sample = pd.DataFrame(np.random.randint(0, 150, size=(8, 12)), columns=pd.MultiIndex.from_product([['模拟考', '正式考'], ['数学', '语文', '英语', '物理', '化学', '生物']]), index=pd.MultiIndex.from_product([['期中', '期末'], ['雷军', '李斌'], ['测试一', '测试二']])) multi_sample.index.names = ["考试类型", "姓名", "测试"] end_row, end_col = dataframe2excel(multi_sample, writer, sheet_name="模型报告", start_row=end_row + 2, title="测试样例", index=True, header=False) @@ -709,14 +711,13 @@ def dataframe2excel(data, excel_writer, sheet_name=None, title=None, header=True end_row, end_col = dataframe2excel(multi_sample.reset_index(names=multi_sample.index.names, col_level=-1), writer, sheet_name="模型报告", start_row=end_row + 2, title="测试样例", index=False, fill=False, merge_column=[('', '考试类型'), ('', '姓名')]) end_row, end_col = dataframe2excel(multi_sample.reset_index(names=multi_sample.index.names, col_level=-1), writer, sheet_name="模型报告", start_row=end_row + 2, title="测试样例", index=False, fill=False, merge_column=[('', '考试类型')], merge=True) - data = pd.read_pickle("/Users/lubberit/Downloads/black_list.pkl").reset_index(names=[("数据指标", ""), ("渠道", "时间")]).sort_values([("数据指标", ""), ("渠道", "时间")]).reset_index(drop=True) - end_row, end_col = dataframe2excel(data, writer, sheet_name="模型报告", start_row=end_row + 2, title="测试样例", index=False, fill=False, merge_column=[("数据指标", "")], merge=True) - - end_row, end_col = dataframe2excel(data, writer, sheet_name="模型报告", start_row=end_row + 2, index=False, fill=True, merge_column=[("数据指标", "")], merge=True) + data = pd.read_pickle("/Users/lubberit/Downloads/black_list.pkl") + data = data.reset_index(names=[("", ""), ("渠道", "时间")]).sort_values([("", ""), ("渠道", "时间")]).reset_index(drop=True) + # end_row, end_col = dataframe2excel(data, writer, sheet_name="模型报告", start_row=end_row + 2, title="测试样例", index=False, fill=False, merge_column=[("", "")], merge=True) + end_row, end_col = dataframe2excel(data, writer, sheet_name="模型报告", start_row=end_row + 2, index=False, fill=False, merge_column=[("", "")], merge=True) for color_rows in data[data[("渠道", "时间")] == "命中率"].index: - rule = ColorScaleRule(start_type='num', start_value=0, start_color='7d5fff', end_type='num', end_value=data.iloc[color_rows, 2:].max(), end_color='ff3838') + rule = ColorScaleRule(start_type='num', start_value=0, start_color='3f1dba', end_type='num', end_value=data.iloc[color_rows, 2:].max(), end_color='c04d9c') worksheet.conditional_formatting.add(f"{get_column_letter(2 + 2)}{end_row - len(data) + color_rows}:{get_column_letter(2 + len(data.columns))}{end_row - len(data) + color_rows}", rule) writer.set_number_format(worksheet, f"{get_column_letter(2 + 2)}{end_row - len(data) + color_rows}:{get_column_letter(2 + len(data.columns))}{end_row - len(data) + color_rows}", "0.00%") writer.save("测试样例.xlsx") - diff --git a/setup.py b/setup.py index e844cfe..db2aff4 100644 --- a/setup.py +++ b/setup.py @@ -52,5 +52,7 @@ def get_requirements(stage = None): 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', ], ) \ No newline at end of file