-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First version of visualization using dummy data
- Loading branch information
Showing
1 changed file
with
173 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Generate Dummy Data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"PosixPath('/var/folders/5k/7nfpl0cs5999pzhndyybcn800000gn/T/dummy_data.csv')" | ||
] | ||
}, | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"import tempfile\n", | ||
"import pathlib\n", | ||
"\n", | ||
"tmpdir = pathlib.Path(tempfile.gettempdir())\n", | ||
"dummy_csv_path = tmpdir / \"dummy_data.csv\"\n", | ||
"\n", | ||
"token_classes = [\"Nouns\", \"Verbs\", \"Adjectives\"]\n", | ||
"training_steps_options = [100000, 200000, 400000]\n", | ||
"model_sizes_options = [\"Small\", \"Medium\", \"Large\"]\n", | ||
"\n", | ||
"data = {\n", | ||
" \"model_size\": [],\n", | ||
" \"training_steps\": [],\n", | ||
" \"loss\": [],\n", | ||
" \"token_class\": [],\n", | ||
"}\n", | ||
"\n", | ||
"# Generate dummy data\n", | ||
"for size in model_sizes_options:\n", | ||
" for steps in training_steps_options:\n", | ||
" for token_class in token_classes:\n", | ||
" data[\"model_size\"].append(size)\n", | ||
" data[\"training_steps\"].append(steps)\n", | ||
" data[\"token_class\"].append(token_class)\n", | ||
" # loss should be random but decrease with size and steps\n", | ||
" loss = (\n", | ||
" 1\n", | ||
" - (model_sizes_options.index(size) + 1) / len(model_sizes_options)\n", | ||
" - (training_steps_options.index(steps) + 1)\n", | ||
" / len(training_steps_options)\n", | ||
" )\n", | ||
" noisy_loss = loss + np.random.normal(0, 0.1)\n", | ||
" data[\"loss\"].append(noisy_loss)\n", | ||
"\n", | ||
"# Create DataFrame\n", | ||
"dummy_df = pd.DataFrame(data)\n", | ||
"\n", | ||
"# Save DataFrame to a CSV file\n", | ||
"dummy_df.to_csv(dummy_csv_path, index=False)\n", | ||
"\n", | ||
"dummy_csv_path" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Build Visualization" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 19, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"application/vnd.jupyter.widget-view+json": { | ||
"model_id": "8ef207924213434babfadb9160dd8605", | ||
"version_major": 2, | ||
"version_minor": 0 | ||
}, | ||
"text/plain": [ | ||
"interactive(children=(Dropdown(description='comparison_type', options=('model_size', 'training_steps'), value=…" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
} | ||
], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import plotly.graph_objs as go\n", | ||
"from ipywidgets import interact, Dropdown\n", | ||
"import plotly.express as px\n", | ||
"\n", | ||
"# Load your data\n", | ||
"df = pd.read_csv(dummy_csv_path) # replace with your actual path\n", | ||
"\n", | ||
"# dumb hack to avoid the first call to update_figure rendering a duplicate chart\n", | ||
"_first_call = True\n", | ||
"\n", | ||
"# Function to create and update the figure\n", | ||
"def update_figure(comparison_type, model_size, training_steps, token_class):\n", | ||
" if comparison_type == 'model_size':\n", | ||
" filtered_df = df[(df['training_steps'] == training_steps) & (df['token_class'] == token_class)]\n", | ||
" fig = px.line(filtered_df, x='model_size', y='loss', title='Loss by Model Size')\n", | ||
" else:\n", | ||
" filtered_df = df[(df['model_size'] == model_size) & (df['token_class'] == token_class)]\n", | ||
" fig = px.line(filtered_df, x='training_steps', y='loss', title='Loss by Training Steps')\n", | ||
" \n", | ||
" global _first_call\n", | ||
" if _first_call:\n", | ||
" _first_call = False\n", | ||
" else:\n", | ||
" fig.show()\n", | ||
"\n", | ||
"# Interactive widgets\n", | ||
"comparison_type = Dropdown(options=['model_size', 'training_steps'])\n", | ||
"model_size = Dropdown(options=sorted(df['model_size'].unique()))\n", | ||
"training_steps = Dropdown(options=sorted(df['training_steps'].unique()))\n", | ||
"token_class = Dropdown(options=df['token_class'].unique())\n", | ||
"\n", | ||
"# only render the chart after all the widgets have been rendered\n", | ||
"_ = interact(update_figure, comparison_type=comparison_type, model_size=model_size, training_steps=training_steps, token_class=token_class, __manual=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "tinyevals", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |