diff --git a/notebooks/end2end_demo.ipynb b/notebooks/end2end_demo.ipynb new file mode 100644 index 00000000..5034dca5 --- /dev/null +++ b/notebooks/end2end_demo.ipynb @@ -0,0 +1,173 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generate Dummy Data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PosixPath('/var/folders/5k/7nfpl0cs5999pzhndyybcn800000gn/T/dummy_data.csv')" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import tempfile\n", + "import pathlib\n", + "\n", + "tmpdir = pathlib.Path(tempfile.gettempdir())\n", + "dummy_csv_path = tmpdir / \"dummy_data.csv\"\n", + "\n", + "token_classes = [\"Nouns\", \"Verbs\", \"Adjectives\"]\n", + "training_steps_options = [100000, 200000, 400000]\n", + "model_sizes_options = [\"Small\", \"Medium\", \"Large\"]\n", + "\n", + "data = {\n", + " \"model_size\": [],\n", + " \"training_steps\": [],\n", + " \"loss\": [],\n", + " \"token_class\": [],\n", + "}\n", + "\n", + "# Generate dummy data\n", + "for size in model_sizes_options:\n", + " for steps in training_steps_options:\n", + " for token_class in token_classes:\n", + " data[\"model_size\"].append(size)\n", + " data[\"training_steps\"].append(steps)\n", + " data[\"token_class\"].append(token_class)\n", + " # loss should be random but decrease with size and steps\n", + " loss = (\n", + " 1\n", + " - (model_sizes_options.index(size) + 1) / len(model_sizes_options)\n", + " - (training_steps_options.index(steps) + 1)\n", + " / len(training_steps_options)\n", + " )\n", + " noisy_loss = loss + np.random.normal(0, 0.1)\n", + " data[\"loss\"].append(noisy_loss)\n", + "\n", + "# Create DataFrame\n", + "dummy_df = pd.DataFrame(data)\n", + "\n", + "# Save DataFrame to a CSV file\n", + "dummy_df.to_csv(dummy_csv_path, index=False)\n", + "\n", + "dummy_csv_path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8ef207924213434babfadb9160dd8605", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='comparison_type', options=('model_size', 'training_steps'), value=…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import plotly.graph_objs as go\n", + "from ipywidgets import interact, Dropdown\n", + "import plotly.express as px\n", + "\n", + "# Load your data\n", + "df = pd.read_csv(dummy_csv_path) # replace with your actual path\n", + "\n", + "# dumb hack to avoid the first call to update_figure rendering a duplicate chart\n", + "_first_call = True\n", + "\n", + "# Function to create and update the figure\n", + "def update_figure(comparison_type, model_size, training_steps, token_class):\n", + " if comparison_type == 'model_size':\n", + " filtered_df = df[(df['training_steps'] == training_steps) & (df['token_class'] == token_class)]\n", + " fig = px.line(filtered_df, x='model_size', y='loss', title='Loss by Model Size')\n", + " else:\n", + " filtered_df = df[(df['model_size'] == model_size) & (df['token_class'] == token_class)]\n", + " fig = px.line(filtered_df, x='training_steps', y='loss', title='Loss by Training Steps')\n", + " \n", + " global _first_call\n", + " if _first_call:\n", + " _first_call = False\n", + " else:\n", + " fig.show()\n", + "\n", + "# Interactive widgets\n", + "comparison_type = Dropdown(options=['model_size', 'training_steps'])\n", + "model_size = Dropdown(options=sorted(df['model_size'].unique()))\n", + "training_steps = Dropdown(options=sorted(df['training_steps'].unique()))\n", + "token_class = Dropdown(options=df['token_class'].unique())\n", + "\n", + "# only render the chart after all the widgets have been rendered\n", + "_ = interact(update_figure, comparison_type=comparison_type, model_size=model_size, training_steps=training_steps, token_class=token_class, __manual=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tinyevals", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}