First version of visualization using dummy data

delphi-suite · Feb 14, 2024 · 6d46f93 · 6d46f93
1 parent 4ed8b19
commit 6d46f93
Showing 1 changed file with 173 additions and 0 deletions.
diff --git a/notebooks/end2end_demo.ipynb b/notebooks/end2end_demo.ipynb
@@ -0,0 +1,173 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Generate Dummy Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "PosixPath('/var/folders/5k/7nfpl0cs5999pzhndyybcn800000gn/T/dummy_data.csv')"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "import tempfile\n",
+    "import pathlib\n",
+    "\n",
+    "tmpdir = pathlib.Path(tempfile.gettempdir())\n",
+    "dummy_csv_path = tmpdir / \"dummy_data.csv\"\n",
+    "\n",
+    "token_classes = [\"Nouns\", \"Verbs\", \"Adjectives\"]\n",
+    "training_steps_options = [100000, 200000, 400000]\n",
+    "model_sizes_options = [\"Small\", \"Medium\", \"Large\"]\n",
+    "\n",
+    "data = {\n",
+    "    \"model_size\": [],\n",
+    "    \"training_steps\": [],\n",
+    "    \"loss\": [],\n",
+    "    \"token_class\": [],\n",
+    "}\n",
+    "\n",
+    "# Generate dummy data\n",
+    "for size in model_sizes_options:\n",
+    "    for steps in training_steps_options:\n",
+    "        for token_class in token_classes:\n",
+    "            data[\"model_size\"].append(size)\n",
+    "            data[\"training_steps\"].append(steps)\n",
+    "            data[\"token_class\"].append(token_class)\n",
+    "            # loss should be random but decrease with size and steps\n",
+    "            loss = (\n",
+    "                1\n",
+    "                - (model_sizes_options.index(size) + 1) / len(model_sizes_options)\n",
+    "                - (training_steps_options.index(steps) + 1)\n",
+    "                / len(training_steps_options)\n",
+    "            )\n",
+    "            noisy_loss = loss + np.random.normal(0, 0.1)\n",
+    "            data[\"loss\"].append(noisy_loss)\n",
+    "\n",
+    "# Create DataFrame\n",
+    "dummy_df = pd.DataFrame(data)\n",
+    "\n",
+    "# Save DataFrame to a CSV file\n",
+    "dummy_df.to_csv(dummy_csv_path, index=False)\n",
+    "\n",
+    "dummy_csv_path"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Build Visualization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8ef207924213434babfadb9160dd8605",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "interactive(children=(Dropdown(description='comparison_type', options=('model_size', 'training_steps'), value=…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import plotly.graph_objs as go\n",
+    "from ipywidgets import interact, Dropdown\n",
+    "import plotly.express as px\n",
+    "\n",
+    "# Load your data\n",
+    "df = pd.read_csv(dummy_csv_path)  # replace with your actual path\n",
+    "\n",
+    "# dumb hack to avoid the first call to update_figure rendering a duplicate chart\n",
+    "_first_call = True\n",
+    "\n",
+    "# Function to create and update the figure\n",
+    "def update_figure(comparison_type, model_size, training_steps, token_class):\n",
+    "    if comparison_type == 'model_size':\n",
+    "        filtered_df = df[(df['training_steps'] == training_steps) & (df['token_class'] == token_class)]\n",
+    "        fig = px.line(filtered_df, x='model_size', y='loss', title='Loss by Model Size')\n",
+    "    else:\n",
+    "        filtered_df = df[(df['model_size'] == model_size) & (df['token_class'] == token_class)]\n",
+    "        fig = px.line(filtered_df, x='training_steps', y='loss', title='Loss by Training Steps')\n",
+    "    \n",
+    "    global _first_call\n",
+    "    if _first_call:\n",
+    "        _first_call = False\n",
+    "    else:\n",
+    "        fig.show()\n",
+    "\n",
+    "# Interactive widgets\n",
+    "comparison_type = Dropdown(options=['model_size', 'training_steps'])\n",
+    "model_size = Dropdown(options=sorted(df['model_size'].unique()))\n",
+    "training_steps = Dropdown(options=sorted(df['training_steps'].unique()))\n",
+    "token_class = Dropdown(options=df['token_class'].unique())\n",
+    "\n",
+    "# only render the chart after all the widgets have been rendered\n",
+    "_ = interact(update_figure, comparison_type=comparison_type, model_size=model_size, training_steps=training_steps, token_class=token_class, __manual=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "tinyevals",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}