Skip to content

Commit

Permalink
Added notebook for uploading vllm eval results to wandb post-hoc
Browse files Browse the repository at this point in the history
  • Loading branch information
wongjingping committed Jun 13, 2024
1 parent 7b3cded commit 4e27761
Showing 1 changed file with 250 additions and 0 deletions.
250 changes: 250 additions & 0 deletions upload_wandb.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import wandb"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup\n",
"\n",
"### Read Results CSV Files\n",
"\n",
"This assumes that you already have at least 1 result csv file for each step. We would recommend using `run_checkpoints.sh` and `run_checkpoints_cot.sh` to generate the result csv files, where it would automatically save the results in the following format:\n",
"```\n",
"{run_name}/c{checkpoint_number}_api_{benchmark}_{if cot}.csv\n",
"```\n",
"\n",
"You can choose either method to import in the result csv files:\n",
"1. Specify the folder that contains the csv files. This will import all of the csv files.\n",
"2. Manually specify the csv files that you want to import.\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 32 csv files in results/sqlcoder_8b_fullft_ds_013_llama3_mgn1_b1_0900_b2_0990_steps_1000\n"
]
}
],
"source": [
"# 1. Specify the folder where the result csv files are stored\n",
"result_folder = 'results/sqlcoder_8b_fullft_ds_013_llama3_mgn1_b1_0900_b2_0990_steps_1000'\n",
"csv_files = []\n",
"for f in os.listdir(result_folder):\n",
" if f.endswith('.csv'):\n",
" csv_files.append(f)\n",
"# 2. Manually specify the list of csv files. Uncomment if you want to use this method\n",
"# csv_files = []\n",
"print(f\"Found {len(csv_files)} csv files in {result_folder}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /Users/jp/.netrc\n"
]
},
{
"data": {
"text/html": [
"Tracking run with wandb version 0.17.1"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run data is saved locally in <code>/Users/jp/workspace/sql-eval/results/wandb/run-20240613_112335-qcbad5rx</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Resuming run <strong><a href='https://wandb.ai/defog/huggingface/runs/qcbad5rx' target=\"_blank\">sqlcoder_8b_fullft_ds_013_llama3_mgn1_b1_0900_b2_0990_steps_1000</a></strong> to <a href='https://wandb.ai/defog/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View project at <a href='https://wandb.ai/defog/huggingface' target=\"_blank\">https://wandb.ai/defog/huggingface</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run at <a href='https://wandb.ai/defog/huggingface/runs/qcbad5rx' target=\"_blank\">https://wandb.ai/defog/huggingface/runs/qcbad5rx</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5403234ec9b7496e8fbdc78eda30fb7f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(Label(value='0.000 MB of 0.020 MB uploaded\\r'), FloatProgress(value=0.0, max=1.0)))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<style>\n",
" table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
" .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
" .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
" </style>\n",
"<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>vllm/advanced</td><td>▁</td></tr><tr><td>vllm/advanced_cot</td><td>▁</td></tr><tr><td>vllm/basic</td><td>▁</td></tr><tr><td>vllm/basic_cot</td><td>▁</td></tr><tr><td>vllm/idk</td><td>▁</td></tr><tr><td>vllm/idk_cot</td><td>▁</td></tr><tr><td>vllm/v1</td><td>▁</td></tr><tr><td>vllm/v1_cot</td><td>▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>advanced</td><td>0.45312</td></tr><tr><td>basic</td><td>0.95</td></tr><tr><td>basic_group_order_limit</td><td>1</td></tr><tr><td>basic_join_date_group_order_limit</td><td>0.875</td></tr><tr><td>basic_join_distinct</td><td>1</td></tr><tr><td>basic_join_group_order_limit</td><td>0.875</td></tr><tr><td>basic_left_join</td><td>1</td></tr><tr><td>cat_a</td><td>0</td></tr><tr><td>cat_b</td><td>0</td></tr><tr><td>cat_c</td><td>0</td></tr><tr><td>date_functions</td><td>0.84</td></tr><tr><td>eval/count_mismatch_i_diff_avg</td><td>5.375</td></tr><tr><td>eval/first_index_mismatch_avg</td><td>11.20833</td></tr><tr><td>eval/loss</td><td>0.1782</td></tr><tr><td>eval/mean_mismatch_i_diff_avg</td><td>15.33588</td></tr><tr><td>eval/runtime</td><td>15.7635</td></tr><tr><td>eval/samples_per_second</td><td>1.523</td></tr><tr><td>eval/sql_exact_match_string</td><td>3</td></tr><tr><td>eval/steps_per_second</td><td>0.127</td></tr><tr><td>eval/tokens_match_avg</td><td>0.94784</td></tr><tr><td>group_by</td><td>0.97143</td></tr><tr><td>idk</td><td>0</td></tr><tr><td>instruct</td><td>0.8</td></tr><tr><td>instructions_cte_join</td><td>0.75</td></tr><tr><td>instructions_cte_window</td><td>0</td></tr><tr><td>instructions_date_join</td><td>0.375</td></tr><tr><td>instructions_string_matching</td><td>0.75</td></tr><tr><td>keywords_aggregate</td><td>0.625</td></tr><tr><td>keywords_ratio</td><td>0</td></tr><tr><td>order_by</td><td>0.85714</td></tr><tr><td>overall</td><td>0.5868</td></tr><tr><td>ratio</td><td>0.85714</td></tr><tr><td>table_join</td><td>0.85714</td></tr><tr><td>total_flos</td><td>9.784632598246196e+17</td></tr><tr><td>train/epoch</td><td>1</td></tr><tr><td>train/global_step</td><td>1000</td></tr><tr><td>train/grad_norm</td><td>4</td></tr><tr><td>train/learning_rate</td><td>0.0</td></tr><tr><td>train/loss</td><td>0.1368</td></tr><tr><td>train_loss</td><td>0.15225</td></tr><tr><td>train_runtime</td><td>8061.5451</td></tr><tr><td>train_samples_per_second</td><td>2.977</td></tr><tr><td>train_steps_per_second</td><td>0.124</td></tr><tr><td>v1</td><td>0.865</td></tr><tr><td>vllm/advanced</td><td>0.78125</td></tr><tr><td>vllm/advanced_cot</td><td>0.78125</td></tr><tr><td>vllm/basic</td><td>0.9</td></tr><tr><td>vllm/basic_cot</td><td>0.925</td></tr><tr><td>vllm/idk</td><td>0.87619</td></tr><tr><td>vllm/idk_cot</td><td>0.95238</td></tr><tr><td>vllm/v1</td><td>0.845</td></tr><tr><td>vllm/v1_cot</td><td>0.865</td></tr></table><br/></div></div>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run <strong style=\"color:#cdcd00\">sqlcoder_8b_fullft_ds_013_llama3_mgn1_b1_0900_b2_0990_steps_1000</strong> at: <a href='https://wandb.ai/defog/huggingface/runs/qcbad5rx' target=\"_blank\">https://wandb.ai/defog/huggingface/runs/qcbad5rx</a><br/> View project at: <a href='https://wandb.ai/defog/huggingface' target=\"_blank\">https://wandb.ai/defog/huggingface</a><br/>Synced 3 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Find logs at: <code>./wandb/run-20240613_112335-qcbad5rx/logs</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"\n",
"# Initialize a new run, specifying the project and the run ID\n",
"run = wandb.init(project=\"huggingface\", id=\"qcbad5rx\", resume=\"must\")\n",
"\n",
"# Define your new metrics\n",
"step = 1000\n",
"new_metrics = {\n",
" 'vllm/advanced': 0.781250,\n",
" 'vllm/advanced_cot': 0.781250,\n",
" 'vllm/basic': 0.900000,\n",
" 'vllm/basic_cot': 0.925000,\n",
" 'vllm/v1': 0.845000,\n",
" 'vllm/v1_cot': 0.865000,\n",
" 'vllm/idk': 0.876190,\n",
" 'vllm/idk_cot': 0.952381,\n",
"}\n",
"\n",
"# Log the new metrics to the run\n",
"wandb.log(new_metrics, step=step)\n",
"\n",
"# Finish the run\n",
"run.finish()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 4e27761

Please sign in to comment.