Skip to content

Commit

Permalink
First version of visualization using dummy data
Browse files Browse the repository at this point in the history
  • Loading branch information
jaidhyani committed Feb 14, 2024
1 parent 4ed8b19 commit 6d46f93
Showing 1 changed file with 173 additions and 0 deletions.
173 changes: 173 additions & 0 deletions notebooks/end2end_demo.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Generate Dummy Data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/var/folders/5k/7nfpl0cs5999pzhndyybcn800000gn/T/dummy_data.csv')"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"import tempfile\n",
"import pathlib\n",
"\n",
"tmpdir = pathlib.Path(tempfile.gettempdir())\n",
"dummy_csv_path = tmpdir / \"dummy_data.csv\"\n",
"\n",
"token_classes = [\"Nouns\", \"Verbs\", \"Adjectives\"]\n",
"training_steps_options = [100000, 200000, 400000]\n",
"model_sizes_options = [\"Small\", \"Medium\", \"Large\"]\n",
"\n",
"data = {\n",
" \"model_size\": [],\n",
" \"training_steps\": [],\n",
" \"loss\": [],\n",
" \"token_class\": [],\n",
"}\n",
"\n",
"# Generate dummy data\n",
"for size in model_sizes_options:\n",
" for steps in training_steps_options:\n",
" for token_class in token_classes:\n",
" data[\"model_size\"].append(size)\n",
" data[\"training_steps\"].append(steps)\n",
" data[\"token_class\"].append(token_class)\n",
" # loss should be random but decrease with size and steps\n",
" loss = (\n",
" 1\n",
" - (model_sizes_options.index(size) + 1) / len(model_sizes_options)\n",
" - (training_steps_options.index(steps) + 1)\n",
" / len(training_steps_options)\n",
" )\n",
" noisy_loss = loss + np.random.normal(0, 0.1)\n",
" data[\"loss\"].append(noisy_loss)\n",
"\n",
"# Create DataFrame\n",
"dummy_df = pd.DataFrame(data)\n",
"\n",
"# Save DataFrame to a CSV file\n",
"dummy_df.to_csv(dummy_csv_path, index=False)\n",
"\n",
"dummy_csv_path"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Build Visualization"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8ef207924213434babfadb9160dd8605",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(Dropdown(description='comparison_type', options=('model_size', 'training_steps'), value=…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import plotly.graph_objs as go\n",
"from ipywidgets import interact, Dropdown\n",
"import plotly.express as px\n",
"\n",
"# Load your data\n",
"df = pd.read_csv(dummy_csv_path) # replace with your actual path\n",
"\n",
"# dumb hack to avoid the first call to update_figure rendering a duplicate chart\n",
"_first_call = True\n",
"\n",
"# Function to create and update the figure\n",
"def update_figure(comparison_type, model_size, training_steps, token_class):\n",
" if comparison_type == 'model_size':\n",
" filtered_df = df[(df['training_steps'] == training_steps) & (df['token_class'] == token_class)]\n",
" fig = px.line(filtered_df, x='model_size', y='loss', title='Loss by Model Size')\n",
" else:\n",
" filtered_df = df[(df['model_size'] == model_size) & (df['token_class'] == token_class)]\n",
" fig = px.line(filtered_df, x='training_steps', y='loss', title='Loss by Training Steps')\n",
" \n",
" global _first_call\n",
" if _first_call:\n",
" _first_call = False\n",
" else:\n",
" fig.show()\n",
"\n",
"# Interactive widgets\n",
"comparison_type = Dropdown(options=['model_size', 'training_steps'])\n",
"model_size = Dropdown(options=sorted(df['model_size'].unique()))\n",
"training_steps = Dropdown(options=sorted(df['training_steps'].unique()))\n",
"token_class = Dropdown(options=df['token_class'].unique())\n",
"\n",
"# only render the chart after all the widgets have been rendered\n",
"_ = interact(update_figure, comparison_type=comparison_type, model_size=model_size, training_steps=training_steps, token_class=token_class, __manual=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "tinyevals",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 6d46f93

Please sign in to comment.