add rank_df util

chapmanjacobd · Dec 15, 2024 · 5e1b7c7 · 5e1b7c7
1 parent bd143d4
commit 5e1b7c7
Showing 1 changed file with 31 additions and 0 deletions.
diff --git a/library/utils/pd_utils.py b/library/utils/pd_utils.py
@@ -49,3 +49,34 @@ def available_name(df, column_name):
         else:
             column_name = f"{column_name}_1"
     return column_name
+
+
+def rank_dataframe(df, column_weights):
+    """
+    ranked_df = rank_dataframe(
+        df,
+        column_weights={
+            "progress": {"direction": "desc", "weight": 6},
+            "size": {"direction": "asc", "weight": 3}
+        }
+    )
+    """
+    ranks = df[column_weights.keys()].apply(
+        lambda x: x.rank(
+            method="min",
+            na_option="bottom",
+            ascending=column_weights.get(x.name, {}).get("direction") == "asc",
+        )
+        * column_weights.get(x.name, {}).get("weight", 1),
+    )
+
+    unranked_columns = set(df.select_dtypes(include=["number"]).columns) - set(ranks.columns)
+    if unranked_columns:
+        print(
+            "Unranked columns:\n"
+            + "\n".join([f"""    "{s}": {{ 'direction': 'desc' }}, """ for s in unranked_columns]),
+        )
+
+    scaled_ranks = (ranks - 1) / (len(ranks.columns) - 1)
+    scaled_df = df.iloc[scaled_ranks.sum(axis=1).sort_values().index]
+    return scaled_df.reset_index(drop=True)