From af3279472d1af853c17b31ebb6a8f06a1c2df79e Mon Sep 17 00:00:00 2001 From: itlubber <1830611168@qq.com> Date: Thu, 5 Dec 2024 00:22:35 +0800 Subject: [PATCH] fix groupby_feature_describe methods --- scorecardpipeline/utils.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/scorecardpipeline/utils.py b/scorecardpipeline/utils.py index 073e9b7..3f43d00 100644 --- a/scorecardpipeline/utils.py +++ b/scorecardpipeline/utils.py @@ -178,17 +178,21 @@ def feature_describe(data, feature=None, percentiles=None, missing=None, cardina def groupby_feature_describe(data, by=None, **kwargs): + if not isinstance(by, (tuple, list, np.ndarray)): + by = [by] + describe = pd.DataFrame() for p, group in data.groupby(by=by): _describe = pd.DataFrame() for f in group.columns: - temp = feature_describe(group[f], **kwargs) - temp.index = pd.MultiIndex.from_product([[f], temp.index]) - temp = pd.DataFrame(temp, columns=[p]) - - _describe = pd.concat([_describe, temp]) + if f not in by: + temp = feature_describe(group[f], **kwargs) + temp.index = pd.MultiIndex.from_product([[f], temp.index]) + temp = pd.DataFrame(temp, columns=[p]) + + _describe = pd.concat([_describe, temp]) describe[p] = _describe[p]