Skip to content

Commit

Permalink
add rank_df util
Browse files Browse the repository at this point in the history
  • Loading branch information
chapmanjacobd committed Dec 15, 2024
1 parent bd143d4 commit 5e1b7c7
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions library/utils/pd_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,34 @@ def available_name(df, column_name):
else:
column_name = f"{column_name}_1"
return column_name


def rank_dataframe(df, column_weights):
"""
ranked_df = rank_dataframe(
df,
column_weights={
"progress": {"direction": "desc", "weight": 6},
"size": {"direction": "asc", "weight": 3}
}
)
"""
ranks = df[column_weights.keys()].apply(
lambda x: x.rank(
method="min",
na_option="bottom",
ascending=column_weights.get(x.name, {}).get("direction") == "asc",
)
* column_weights.get(x.name, {}).get("weight", 1),
)

unranked_columns = set(df.select_dtypes(include=["number"]).columns) - set(ranks.columns)
if unranked_columns:
print(
"Unranked columns:\n"
+ "\n".join([f""" "{s}": {{ 'direction': 'desc' }}, """ for s in unranked_columns]),
)

scaled_ranks = (ranks - 1) / (len(ranks.columns) - 1)
scaled_df = df.iloc[scaled_ranks.sum(axis=1).sort_values().index]
return scaled_df.reset_index(drop=True)

0 comments on commit 5e1b7c7

Please sign in to comment.