From 5d1128282ad4e4c67f4c76789a4413751ed9485d Mon Sep 17 00:00:00 2001 From: Kelly Tang Date: Mon, 25 Nov 2024 18:10:13 -0800 Subject: [PATCH 1/9] added data explorer tab --- fastchat/serve/gradio_web_server_multi.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fastchat/serve/gradio_web_server_multi.py b/fastchat/serve/gradio_web_server_multi.py index 7a255d59e..72fbb3bbd 100644 --- a/fastchat/serve/gradio_web_server_multi.py +++ b/fastchat/serve/gradio_web_server_multi.py @@ -200,6 +200,14 @@ def build_demo( show_plot=True, ) + with gr.Tab("๐Ÿ” Data Explorer", id=5): + frame = """ + + """ + gr.HTML(frame) + with gr.Tab("โ„น๏ธ About Us", id=4): about = build_about() From 273c6858c576e0ba190f02188e44a370818e4f6d Mon Sep 17 00:00:00 2001 From: ygtangg Date: Tue, 10 Dec 2024 18:17:23 +0000 Subject: [PATCH 2/9] updated data visualizer UI --- fastchat/serve/gradio_web_server.py | 43 ++++++++++++++++++++++- fastchat/serve/gradio_web_server_multi.py | 10 ++---- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py index 4f0521da0..edaa3d58c 100644 --- a/fastchat/serve/gradio_web_server.py +++ b/fastchat/serve/gradio_web_server.py @@ -791,7 +791,48 @@ def get_model_description_md(models): ct += 1 return model_description_md - +def build_visualizer(): + visualizer_markdown = """ + # ๐Ÿงญ Arena Visualizer + Data explorer provides interactive tools to explore and draw insights from our leaderboard data. + """ + + gr.Markdown(visualizer_markdown, elem_id="visualizer_markdown") + + with gr.Tabs() as tabs: + with gr.Tab("Topic Explorer", id=0): + topic_markdown = """ + ## *Welcome to the Topic Explorer* + This tool lets you dive into user-submitted prompts, organized into general categories and detailed subcategories. Using the sunburst chart, you can easily explore the data and understand how different topics are distributed. + + ### How to Use: + - Hover Over Segments: View the category name, the number of prompts, and their percentage. + - Click to Explore: + - Click on a main category to see its subcategories. + - Click on subcategories to see example prompts in the sidebar. + - Undo and Reset: Click the center of the chart to return to the top level. + + Start exploring and discover interesting trends in the data! + + """ + gr.Markdown(topic_markdown) + + frame = """ + + """ + gr.HTML(frame) + + + with gr.Tab("Price Analysis", id=1): + price_markdown = """ + ## *Price Control Data Visualizations* + Below are scatter-plots depicting a model's arena score against its cost effectiveness + and output token price. + """ + gr.Markdown(price_markdown) + def build_about(): about_markdown = """ # About Us diff --git a/fastchat/serve/gradio_web_server_multi.py b/fastchat/serve/gradio_web_server_multi.py index 72fbb3bbd..99f94c12a 100644 --- a/fastchat/serve/gradio_web_server_multi.py +++ b/fastchat/serve/gradio_web_server_multi.py @@ -37,6 +37,7 @@ set_global_vars, block_css, build_single_model_ui, + build_visualizer, build_about, get_model_list, load_demo_single, @@ -200,13 +201,8 @@ def build_demo( show_plot=True, ) - with gr.Tab("๐Ÿ” Data Explorer", id=5): - frame = """ - - """ - gr.HTML(frame) + with gr.Tab("๐Ÿ” Data Visualizer", id=5): + build_visualizer() with gr.Tab("โ„น๏ธ About Us", id=4): about = build_about() From c1eb2fdde6cde51cd204e11fa60dcd3fde52e4e8 Mon Sep 17 00:00:00 2001 From: ygtangg Date: Tue, 10 Dec 2024 18:28:18 +0000 Subject: [PATCH 3/9] reformatting --- fastchat/serve/gradio_web_server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py index edaa3d58c..dd8ce9b20 100644 --- a/fastchat/serve/gradio_web_server.py +++ b/fastchat/serve/gradio_web_server.py @@ -791,6 +791,7 @@ def get_model_description_md(models): ct += 1 return model_description_md + def build_visualizer(): visualizer_markdown = """ # ๐Ÿงญ Arena Visualizer @@ -833,6 +834,7 @@ def build_visualizer(): """ gr.Markdown(price_markdown) + def build_about(): about_markdown = """ # About Us From 251a745f6167f0514a9ca2404161a5ef729d380d Mon Sep 17 00:00:00 2001 From: Kelly Tang Date: Tue, 10 Dec 2024 15:30:20 -0800 Subject: [PATCH 4/9] retry formatting --- fastchat/serve/gradio_web_server.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py index dd8ce9b20..db5cf3a56 100644 --- a/fastchat/serve/gradio_web_server.py +++ b/fastchat/serve/gradio_web_server.py @@ -800,11 +800,13 @@ def build_visualizer(): gr.Markdown(visualizer_markdown, elem_id="visualizer_markdown") - with gr.Tabs() as tabs: + with gr.Tabs(): with gr.Tab("Topic Explorer", id=0): topic_markdown = """ ## *Welcome to the Topic Explorer* - This tool lets you dive into user-submitted prompts, organized into general categories and detailed subcategories. Using the sunburst chart, you can easily explore the data and understand how different topics are distributed. + This tool lets you dive into user-submitted prompts, organized into general + categories and detailed subcategories. Using the sunburst chart, you can easily + explore the data and understand how different topics are distributed. ### How to Use: - Hover Over Segments: View the category name, the number of prompts, and their percentage. @@ -825,7 +827,6 @@ def build_visualizer(): """ gr.HTML(frame) - with gr.Tab("Price Analysis", id=1): price_markdown = """ ## *Price Control Data Visualizations* @@ -833,7 +834,7 @@ def build_visualizer(): and output token price. """ gr.Markdown(price_markdown) - + def build_about(): about_markdown = """ From 892aaee108e4b4e19c42375a79af057fc56e4dad Mon Sep 17 00:00:00 2001 From: ygtangg Date: Tue, 10 Dec 2024 23:59:49 +0000 Subject: [PATCH 5/9] added visualizer option --- fastchat/serve/gradio_web_server.py | 43 ----------------- fastchat/serve/gradio_web_server_multi.py | 58 +++++++++++++++++++++-- 2 files changed, 54 insertions(+), 47 deletions(-) diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py index db5cf3a56..f847b7ed9 100644 --- a/fastchat/serve/gradio_web_server.py +++ b/fastchat/serve/gradio_web_server.py @@ -792,49 +792,6 @@ def get_model_description_md(models): return model_description_md -def build_visualizer(): - visualizer_markdown = """ - # ๐Ÿงญ Arena Visualizer - Data explorer provides interactive tools to explore and draw insights from our leaderboard data. - """ - - gr.Markdown(visualizer_markdown, elem_id="visualizer_markdown") - - with gr.Tabs(): - with gr.Tab("Topic Explorer", id=0): - topic_markdown = """ - ## *Welcome to the Topic Explorer* - This tool lets you dive into user-submitted prompts, organized into general - categories and detailed subcategories. Using the sunburst chart, you can easily - explore the data and understand how different topics are distributed. - - ### How to Use: - - Hover Over Segments: View the category name, the number of prompts, and their percentage. - - Click to Explore: - - Click on a main category to see its subcategories. - - Click on subcategories to see example prompts in the sidebar. - - Undo and Reset: Click the center of the chart to return to the top level. - - Start exploring and discover interesting trends in the data! - - """ - gr.Markdown(topic_markdown) - - frame = """ - - """ - gr.HTML(frame) - - with gr.Tab("Price Analysis", id=1): - price_markdown = """ - ## *Price Control Data Visualizations* - Below are scatter-plots depicting a model's arena score against its cost effectiveness - and output token price. - """ - gr.Markdown(price_markdown) - def build_about(): about_markdown = """ diff --git a/fastchat/serve/gradio_web_server_multi.py b/fastchat/serve/gradio_web_server_multi.py index 99f94c12a..bb365a77d 100644 --- a/fastchat/serve/gradio_web_server_multi.py +++ b/fastchat/serve/gradio_web_server_multi.py @@ -37,7 +37,6 @@ set_global_vars, block_css, build_single_model_ui, - build_visualizer, build_about, get_model_list, load_demo_single, @@ -55,6 +54,50 @@ logger = build_logger("gradio_web_server_multi", "gradio_web_server_multi.log") +def build_visualizer(): + visualizer_markdown = """ + # ๐Ÿงญ Arena Visualizer + Data explorer provides interactive tools to explore and draw insights from our leaderboard data. + """ + + gr.Markdown(visualizer_markdown, elem_id="visualizer_markdown") + + with gr.Tabs(): + with gr.Tab("Topic Explorer", id=0): + topic_markdown = """ + ## *Welcome to the Topic Explorer* + This tool lets you dive into user-submitted prompts, organized into general + categories and detailed subcategories. Using the sunburst chart, you can easily + explore the data and understand how different topics are distributed. + + ### How to Use: + - Hover Over Segments: View the category name, the number of prompts, and their percentage. + - Click to Explore: + - Click on a main category to see its subcategories. + - Click on subcategories to see example prompts in the sidebar. + - Undo and Reset: Click the center of the chart to return to the top level. + + Start exploring and discover interesting trends in the data! + + """ + gr.Markdown(topic_markdown) + + frame = """ + + """ + gr.HTML(frame) + + with gr.Tab("Price Analysis", id=1): + price_markdown = """ + ## *Price Control Data Visualizations* + Below are scatter-plots depicting a model's arena score against its cost effectiveness + and output token price. + """ + gr.Markdown(price_markdown) + + def load_demo(context: Context, request: gr.Request): ip = get_ip(request) logger.info(f"load_demo. ip: {ip}. params: {request.query_params}") @@ -200,9 +243,10 @@ def build_demo( arena_hard_table, show_plot=True, ) - - with gr.Tab("๐Ÿ” Data Visualizer", id=5): - build_visualizer() + + if args.show_visualizer: + with gr.Tab("๐Ÿ” Data Visualizer", id=5): + build_visualizer() with gr.Tab("โ„น๏ธ About Us", id=4): about = build_about() @@ -309,6 +353,12 @@ def build_demo( type=str, help="Set the password for the gradio web server", ) + parser.add_argument( + "--show-visualizer", + action="store_true", + default=False, + help="Show the Data Visualizer tab", + ) args = parser.parse_args() logger.info(f"args: {args}") From 3ac88b8bdedf22b225df3edda77ed263a0d9b05c Mon Sep 17 00:00:00 2001 From: Kelly Tang Date: Tue, 10 Dec 2024 16:08:26 -0800 Subject: [PATCH 6/9] reformatting --- fastchat/serve/gradio_web_server.py | 1 - fastchat/serve/gradio_web_server_multi.py | 1 - 2 files changed, 2 deletions(-) diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py index f847b7ed9..4f0521da0 100644 --- a/fastchat/serve/gradio_web_server.py +++ b/fastchat/serve/gradio_web_server.py @@ -792,7 +792,6 @@ def get_model_description_md(models): return model_description_md - def build_about(): about_markdown = """ # About Us diff --git a/fastchat/serve/gradio_web_server_multi.py b/fastchat/serve/gradio_web_server_multi.py index bb365a77d..25e1d1bf0 100644 --- a/fastchat/serve/gradio_web_server_multi.py +++ b/fastchat/serve/gradio_web_server_multi.py @@ -243,7 +243,6 @@ def build_demo( arena_hard_table, show_plot=True, ) - if args.show_visualizer: with gr.Tab("๐Ÿ” Data Visualizer", id=5): build_visualizer() From 81aedb999e01e1f11ce0322ea6fe0ca1e9951be6 Mon Sep 17 00:00:00 2001 From: ygtangg Date: Thu, 12 Dec 2024 18:24:59 +0000 Subject: [PATCH 7/9] small fixes --- fastchat/serve/gradio_web_server_multi.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fastchat/serve/gradio_web_server_multi.py b/fastchat/serve/gradio_web_server_multi.py index 25e1d1bf0..ec3f803eb 100644 --- a/fastchat/serve/gradio_web_server_multi.py +++ b/fastchat/serve/gradio_web_server_multi.py @@ -56,7 +56,7 @@ def build_visualizer(): visualizer_markdown = """ - # ๐Ÿงญ Arena Visualizer + # ๐Ÿ” Arena Visualizer Data explorer provides interactive tools to explore and draw insights from our leaderboard data. """ @@ -92,8 +92,7 @@ def build_visualizer(): with gr.Tab("Price Analysis", id=1): price_markdown = """ ## *Price Control Data Visualizations* - Below are scatter-plots depicting a model's arena score against its cost effectiveness - and output token price. + Coming soon: Visualizations showing models' arena scores compared to their cost-effectiveness and output token prices. """ gr.Markdown(price_markdown) @@ -244,7 +243,7 @@ def build_demo( show_plot=True, ) if args.show_visualizer: - with gr.Tab("๐Ÿ” Data Visualizer", id=5): + with gr.Tab("๐Ÿ” Arena Visualizer", id=5): build_visualizer() with gr.Tab("โ„น๏ธ About Us", id=4): From 6a40877c262b1861663b9dac0075d373fbf2d7eb Mon Sep 17 00:00:00 2001 From: Sophie Xie Date: Thu, 12 Dec 2024 23:44:50 -0800 Subject: [PATCH 8/9] add price plots --- fastchat/serve/gradio_web_server_multi.py | 29 +++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/fastchat/serve/gradio_web_server_multi.py b/fastchat/serve/gradio_web_server_multi.py index ec3f803eb..3d3ffc09c 100644 --- a/fastchat/serve/gradio_web_server_multi.py +++ b/fastchat/serve/gradio_web_server_multi.py @@ -7,6 +7,7 @@ import pickle import time from typing import List +import plotly.express as px import gradio as gr @@ -91,10 +92,34 @@ def build_visualizer(): with gr.Tab("Price Analysis", id=1): price_markdown = """ - ## *Price Control Data Visualizations* - Coming soon: Visualizations showing models' arena scores compared to their cost-effectiveness and output token prices. + ## *Price Analysis Visualizations* + Below is a scatterplot depicting a modelโ€™s arena score against its cost effectiveness. Start exploring and discover some interesting trends in the data! """ gr.Markdown(price_markdown) + model_keys = ['chatgpt-4o-latest', 'gemini-1.5-pro-exp-0827','gpt-4o-mini-2024-07-18','claude-3-5-sonnet-20240620','gemini-1.5-flash-exp-0827','llama-3.1-405b-instruct','gemini-1.5-pro-api-0514','mistral-large-2407','reka-core-20240722','gemini-1.5-flash-api-0514', 'deepseek-coder-v2-0724','yi-large','llama-3-70b-instruct','qwen2-72b-instruct','claude-3-haiku-20240307','llama-3.1-8b-instruct','mistral-large-2402','command-r','mixtral-8x22b-instruct-v0.1','gpt-3.5-turbo-0613'] + output_tokens_per_USD = [66.66666667000001,200.0,1666.666667,66.66666667000001,3333.333333,333.3333333,200.0,166.6666667,166.6666667,3333.333333,3333.333333,333.3333333,1265.8227849999998,1111.111111,800.0,11111.11111,166.6666667,666.6666667,166.6666667,500.0] + score=[1316.1559008799543,1300.8583398843484,1273.6004783067303,1270.113546648134,1270.530573909608,1266.244657076764,1259.2844314017723,1249.8268751367714,1229.2148108171098,1226.8769924152105,1214.5634252743123,1212.4668382698005,1206.3236747009742,1186.7832147344182,1178.5484948812955,1167.8793593807711,1157.271872307139,1148.6665817312062,1147.0325504217642,1117.0289441863001] + fig = px.scatter(x=output_tokens_per_USD, y=score, title="Quality vs. Cost Effectiveness", labels={ + "output_tokens_per_USD": "# of output tokens per USD (in thousands)", + "score": "Arena Score"}, log_x=True, text=model_keys) + fig.update_traces( + textposition="bottom center", + textfont=dict(size=16), + texttemplate='%{text}', + marker=dict(size=8), + hovertemplate=( + 'Model: %{text}
' # Show the model name + 'Output Tokens Per USD: %{x}
' # Show the x value (Output Price) + 'Arena Score: %{y}
' # Show the y value (Arena Score) + ) + ) + fig.update_xaxes(range=[1,4.5]) + fig.update_yaxes(range=[1100,1320]) + fig.update_layout(autosize=True, height=850, width=None, xaxis_title="# of output tokens per USD (in thousands)", yaxis_title= "Arena Score") + + + gr.Plot(fig, elem_id="plotly-graph") + def load_demo(context: Context, request: gr.Request): From 8936d10daf94cb67a34b6cef02e2111603726764 Mon Sep 17 00:00:00 2001 From: Sophie Xie Date: Thu, 19 Dec 2024 09:45:23 -0800 Subject: [PATCH 9/9] update price scatterplot --- fastchat/serve/gradio_web_server_multi.py | 44 +++++++++-------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/fastchat/serve/gradio_web_server_multi.py b/fastchat/serve/gradio_web_server_multi.py index 3d3ffc09c..f3abd02d6 100644 --- a/fastchat/serve/gradio_web_server_multi.py +++ b/fastchat/serve/gradio_web_server_multi.py @@ -7,8 +7,6 @@ import pickle import time from typing import List -import plotly.express as px - import gradio as gr from fastchat.serve.gradio_block_arena_anony import ( @@ -92,33 +90,25 @@ def build_visualizer(): with gr.Tab("Price Analysis", id=1): price_markdown = """ - ## *Price Analysis Visualizations* - Below is a scatterplot depicting a modelโ€™s arena score against its cost effectiveness. Start exploring and discover some interesting trends in the data! - """ - gr.Markdown(price_markdown) - model_keys = ['chatgpt-4o-latest', 'gemini-1.5-pro-exp-0827','gpt-4o-mini-2024-07-18','claude-3-5-sonnet-20240620','gemini-1.5-flash-exp-0827','llama-3.1-405b-instruct','gemini-1.5-pro-api-0514','mistral-large-2407','reka-core-20240722','gemini-1.5-flash-api-0514', 'deepseek-coder-v2-0724','yi-large','llama-3-70b-instruct','qwen2-72b-instruct','claude-3-haiku-20240307','llama-3.1-8b-instruct','mistral-large-2402','command-r','mixtral-8x22b-instruct-v0.1','gpt-3.5-turbo-0613'] - output_tokens_per_USD = [66.66666667000001,200.0,1666.666667,66.66666667000001,3333.333333,333.3333333,200.0,166.6666667,166.6666667,3333.333333,3333.333333,333.3333333,1265.8227849999998,1111.111111,800.0,11111.11111,166.6666667,666.6666667,166.6666667,500.0] - score=[1316.1559008799543,1300.8583398843484,1273.6004783067303,1270.113546648134,1270.530573909608,1266.244657076764,1259.2844314017723,1249.8268751367714,1229.2148108171098,1226.8769924152105,1214.5634252743123,1212.4668382698005,1206.3236747009742,1186.7832147344182,1178.5484948812955,1167.8793593807711,1157.271872307139,1148.6665817312062,1147.0325504217642,1117.0289441863001] - fig = px.scatter(x=output_tokens_per_USD, y=score, title="Quality vs. Cost Effectiveness", labels={ - "output_tokens_per_USD": "# of output tokens per USD (in thousands)", - "score": "Arena Score"}, log_x=True, text=model_keys) - fig.update_traces( - textposition="bottom center", - textfont=dict(size=16), - texttemplate='%{text}', - marker=dict(size=8), - hovertemplate=( - 'Model: %{text}
' # Show the model name - 'Output Tokens Per USD: %{x}
' # Show the x value (Output Price) - 'Arena Score: %{y}
' # Show the y value (Arena Score) - ) - ) - fig.update_xaxes(range=[1,4.5]) - fig.update_yaxes(range=[1100,1320]) - fig.update_layout(autosize=True, height=850, width=None, xaxis_title="# of output tokens per USD (in thousands)", yaxis_title= "Arena Score") + ## *Welcome to the Price Explorer* + This scatterplot displays a selection of the arena's models, showing their scores plotted against their cost-effectiveness. Using the plot, you can easily explore the model's price and compare it with their arena score. + ### How to Use: + - Hover Over Points: View the model's price, arena score, and organization. + - Click to Explore: + - Double-click a legend point to show only that organization's models on the scatterplot. + - Single-click a legend point to hide that organization's models from the scatterplot. + + Start exploring and discover interesting trends in the data! + """ - gr.Plot(fig, elem_id="plotly-graph") + gr.Markdown(price_markdown) + frame = """ + + """ + gr.HTML(frame)