Skip to content

Commit

Permalink
ENH add metric and non-metric features in summarized info (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
joaopfonseca committed Jan 4, 2023
1 parent ab0dcd7 commit 1e604d1
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 3 deletions.
37 changes: 37 additions & 0 deletions mlresearch/datasets/_multiclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,43 @@ def download(self):
self.content_.append((name, data))
return self

def summarize_datasets(self):
"""
Create a summary of the downloaded datasets.
Returns
-------
datasets_summary : pd.DataFrame
Dataframe with summary statistics of all datasets.
"""

datasets_summary = (
super(ContinuousCategoricalDatasets, self).summarize_datasets()
)
columns = datasets_summary.columns.tolist()

# Define summary table columns and empty list
summary_columns = [
"Metric", "Non-Metric"
]
extended_summary = []

# Populate empty list
for name, dataset in self.content_:
dataset = dataset.drop(columns="target")
values = [
sum(~dataset.columns.str.startswith("cat_")),
sum(dataset.columns.str.startswith("cat_"))
]
extended_summary.append(values)
extended_summary = pd.DataFrame(extended_summary, columns=summary_columns)
datasets_summary = pd.concat([datasets_summary, extended_summary], axis=1)

# Reorder columns
index = columns.index("Features")+1
columns = [*columns[:index], *summary_columns, *columns[index:]]
return datasets_summary[columns]

def fetch_adult(self):
"""Download and transform the Adult Data Set.
Expand Down
6 changes: 3 additions & 3 deletions mlresearch/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,9 @@ def summarize_datasets(self):
summary_columns = [
"Dataset name",
"Features",
"Instances",
"Minority instances",
"Majority instances",
"Observations",
"Minority Obs.",
"Majority Obs.",
"Imbalance Ratio",
"Classes",
]
Expand Down

0 comments on commit 1e604d1

Please sign in to comment.