Skip to content

Commit

Permalink
modify arena leaderboard to include input & output columns
Browse files Browse the repository at this point in the history
  • Loading branch information
Jackie-Lian committed Oct 31, 2024
1 parent 7ca0d43 commit eb2c58c
Showing 1 changed file with 42 additions and 20 deletions.
62 changes: 42 additions & 20 deletions fastchat/serve/monitor/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ def load_leaderboard_table_csv(filename, add_hyperlink=True):
v = round(ast.literal_eval(v), 2)
else:
v = np.nan
elif h == "input_token_price" or h == "output_token_price":
if v == "null":
v = "-"
item[h] = v
if add_hyperlink:
item["Model"] = model_hyperlink(item["Model"], item["Link"])
Expand Down Expand Up @@ -242,7 +245,11 @@ def get_full_table(arena_df, model_table_df, model_to_score):
row.append(model_table_df.iloc[i]["Organization"])
# license
row.append(model_table_df.iloc[i]["License"])

# Input Price
row.append(model_table_df.iloc[i]["input_token_price"])
# Output Price
row.append(model_table_df.iloc[i]["output_token_price"])

values.append(row)
values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
return values
Expand Down Expand Up @@ -350,12 +357,14 @@ def process_row(row):
f"+{round(row['rating_q975'] - row['rating'])}/-{round(row['rating'] - row['rating_q025'])}",
round(row["num_battles"]),
model_info.get("Organization", "Unknown"),
model_info.get("License", "Unknown"),
(
"Unknown"
if model_info.get("Knowledge cutoff date", "-") == "-"
else model_info.get("Knowledge cutoff date", "Unknown")
),
# model_info.get("License", "Unknown"),
# (
# "Unknown"
# if model_info.get("Knowledge cutoff date", "-") == "-"
# else model_info.get("Knowledge cutoff date", "Unknown")
# ),
model_info.get("input_token_price", "-"),
model_info.get("output_token_price", "-"),
]
)
return result
Expand All @@ -378,8 +387,10 @@ def update_leaderboard_df(arena_table_vals):
"95% CI",
"Votes",
"Organization",
"License",
"Knowledge Cutoff",
# "License",
# "Knowledge Cutoff",
"Input Token Price",
"Output Token Price",
]
elo_dataframe = pd.DataFrame(arena_table_vals, columns=columns)

Expand Down Expand Up @@ -407,8 +418,10 @@ def update_overall_leaderboard_df(arena_table_vals):
"95% CI",
"Votes",
"Organization",
"License",
"Knowledge Cutoff",
# "License",
# "Knowledge Cutoff",
"Input Token Price",
"Output Token Price",
]
elo_dataframe = pd.DataFrame(arena_table_vals, columns=columns)

Expand Down Expand Up @@ -514,8 +527,10 @@ def update_leaderboard_and_plots(category, filters):
"95% CI",
"Votes",
"Organization",
"License",
"Knowledge Cutoff",
# "License",
# "Knowledge Cutoff",
"Input Token Price",
"Output Token Price",
],
datatype=[
"number",
Expand All @@ -531,7 +546,7 @@ def update_leaderboard_and_plots(category, filters):
value=arena_values,
elem_id="arena_leaderboard_dataframe",
height=1000,
column_widths=[75, 75, 180, 60, 60, 60, 70, 80, 60],
column_widths=[75, 75, 180, 60, 60, 60, 70, 55, 55],
wrap=True,
)
else:
Expand All @@ -545,8 +560,10 @@ def update_leaderboard_and_plots(category, filters):
"95% CI",
"Votes",
"Organization",
"License",
"Knowledge Cutoff",
# "License",
# "Knowledge Cutoff",
"Input Token Price",
"Output Token Price",
],
datatype=[
"number",
Expand All @@ -562,7 +579,7 @@ def update_leaderboard_and_plots(category, filters):
value=arena_values,
elem_id="arena_leaderboard_dataframe",
height=1000,
column_widths=[75, 75, 180, 60, 60, 60, 70, 80, 60],
column_widths=[75, 75, 180, 60, 60, 60, 70, 55, 55],
wrap=True,
)

Expand Down Expand Up @@ -631,8 +648,10 @@ def update_leaderboard_and_plots(category, filters):
"95% CI",
"Votes",
"Organization",
"License",
"Knowledge Cutoff",
# "License",
# "Knowledge Cutoff",
"Input Token Price",
"Output Token Price",
],
datatype=[
"number",
Expand All @@ -648,7 +667,7 @@ def update_leaderboard_and_plots(category, filters):
value=arena_vals,
elem_id="arena_leaderboard_dataframe",
height=1000,
column_widths=[75, 75, 180, 60, 60, 60, 70, 80, 60],
column_widths=[75, 75, 180, 60, 60, 60, 70, 55, 55],
wrap=True,
)

Expand Down Expand Up @@ -895,6 +914,8 @@ def get_model_name(model_key):

combined_table = []
for category in elo_results.keys():
if category not in key_to_category_name:
continue
df = elo_results[category]["leaderboard_table_df"].copy()
# remove deprecated models
df = df.loc[~df.index.isin(deprecated_model_name)]
Expand Down Expand Up @@ -1187,3 +1208,4 @@ def build_demo(elo_results_file, leaderboard_table_file, arena_hard_leaderboard)
max_threads=200,
auth=(args.password[0], args.password[1]) if args.password else None,
)

0 comments on commit eb2c58c

Please sign in to comment.