Merge pull request #26 from philip-ndikum/feature/tutorial-3

Feature/tutorial 3
philip-ndikum · Nov 13, 2024 · 736accf · 736accf
2 parents c307f6c + a98e112
commit 736accf
Showing 1 changed file with 290 additions and 0 deletions.
diff --git a/tutorial_notebooks/3_health_monitoring_analysis.ipynb b/tutorial_notebooks/3_health_monitoring_analysis.ipynb
@@ -0,0 +1,290 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Temporal Scope Tutorial: Health Monitoring Analysis\n",
+    "\n",
+    "## Overview\n",
+    "\n",
+    "This tutorial demonstrates how to analyze temporal biological data using the **TemporalScope** framework. We'll work with multiple health metrics to showcase both machine learning and deep learning approaches to temporal analysis.\n",
+    "\n",
+    "### Summary\n",
+    "\n",
+    "| **Step**  | **Description**                                                                 |\n",
+    "|-----------|---------------------------------------------------------------------------------|\n",
+    "| **1**     | **Data Generation**: Create synthetic health data with realistic patterns        |\n",
+    "| **2**     | **TimeFrame Setup**: Initialize temporal data structures for each health metric  |\n",
+    "| **3**     | **ML Processing**: Prepare data for one-step-ahead forecasting                  |\n",
+    "| **4**     | **DL Processing**: Prepare sequence data for deep learning models               |\n",
+    "| **5**     | **Temporal Splits**: Create proper train/test partitions                        |\n",
+    "\n",
+    "### Key Concepts\n",
+    "\n",
+    "- **Multiple Health Metrics**: Blood pressure, stress levels, and heart rate\n",
+    "- **Temporal Patterns**: Daily, weekly, and seasonal variations\n",
+    "- **Forecasting Approaches**: Both one-step-ahead and sequence-based predictions\n",
+    "- **Proper Validation**: Time-aware train/test splitting\n",
+    "\n",
+    "### Steps\n",
+    "\n",
+    "1. **Generate Health Data**\n",
+    "   - Create synthetic but realistic health measurements\n",
+    "   - Include known physiological patterns and correlations\n",
+    "\n",
+    "2. **Initialize TimeFrames**\n",
+    "   - Separate temporal structures for each health metric\n",
+    "   - Enable parallel processing capabilities\n",
+    "\n",
+    "3. **Prepare Forecasting Data**\n",
+    "   - Machine learning mode for immediate predictions\n",
+    "   - Deep learning mode for sequence-based analysis\n",
+    "\n",
+    "4. **Create Temporal Splits**\n",
+    "   - Sliding window approach\n",
+    "   - Maintain temporal ordering\n",
+    "   - Multiple validation periods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from datetime import datetime, timedelta\n",
+    "\n",
+    "from temporalscope.core.temporal_data_loader import TimeFrame\n",
+    "from temporalscope.core.temporal_target_shifter import TemporalTargetShifter\n",
+    "from temporalscope.partition.sliding_window import SlidingWindowPartitioner\n",
+    "from temporalscope.core.core_utils import print_divider"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_health_data(start_date: str = '2023-01-01', days: int = 365):\n",
+    "    \"\"\"Generate synthetic health monitoring data.\n",
+    "    \n",
+    "    Args:\n",
+    "        start_date (str): Starting date for the data\n",
+    "        days (int): Number of days to generate\n",
+    "    \"\"\"\n",
+    "    # Create date range for daily measurements\n",
+    "    dates = pd.date_range(start=start_date, periods=days, freq='D')\n",
+    "    \n",
+    "    # Time array for generating patterns\n",
+    "    t = np.arange(days)\n",
+    "    \n",
+    "    # Seasonal effect (yearly cycle)\n",
+    "    # - Amplitude of 5 represents typical seasonal BP variation\n",
+    "    # - 2π/365 gives us one complete cycle per year\n",
+    "    seasonal_effect = 5 * np.sin(2 * np.pi * t / 365)\n",
+    "    \n",
+    "    # Weekly pattern (work week stress)\n",
+    "    # - Amplitude of 3 for weekly BP fluctuation\n",
+    "    # - 2π/7 gives us one complete cycle per week\n",
+    "    weekly_effect = 3 * np.sin(2 * np.pi * t / 7)\n",
+    "    \n",
+    "    # Blood Pressure Generation\n",
+    "    # Systolic (120 typical baseline)\n",
+    "    # - Stronger influence from seasonal & weekly patterns\n",
+    "    # - Random variation (σ=3) for daily fluctuations\n",
+    "    systolic = 120 + seasonal_effect + weekly_effect + np.random.normal(0, 3, days)\n",
+    "    \n",
+    "    # Diastolic (80 typical baseline)\n",
+    "    # - Less affected by external patterns (multiplied by 0.5)\n",
+    "    # - Smaller random variation (σ=2)\n",
+    "    diastolic = 80 + seasonal_effect * 0.5 + weekly_effect * 0.5 + np.random.normal(0, 2, days)\n",
+    "    \n",
+    "    # Stress Level Generation (0-100 scale)\n",
+    "    # - Heavily influenced by weekly pattern (work stress)\n",
+    "    # - Larger random variation (σ=5) for daily life events\n",
+    "    # - Clipped to valid range [0,100]\n",
+    "    stress = 50 + weekly_effect + np.random.normal(0, 5, days)\n",
+    "    stress = np.clip(stress, 0, 100)\n",
+    "    \n",
+    "    # Heart Rate Generation\n",
+    "    # - Baseline of 70 bpm\n",
+    "    # - Correlates with stress (0.3 coefficient)\n",
+    "    # - Weekly pattern influence\n",
+    "    # - Moderate random variation (σ=3)\n",
+    "    heart_rate = 70 + 0.3 * stress + weekly_effect + np.random.normal(0, 3, days)\n",
+    "    \n",
+    "    return pd.DataFrame({\n",
+    "        'ds': dates,\n",
+    "        'systolic': systolic,\n",
+    "        'diastolic': diastolic,\n",
+    "        'stress_level': stress,\n",
+    "        'heart_rate': heart_rate\n",
+    "    })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_metric_timeframes(df):\n",
+    "    \"\"\"Create TimeFrame objects for each health metric.\n",
+    "    \n",
+    "    Why separate TimeFrames?\n",
+    "    - Each metric might need different forecasting horizons\n",
+    "    - Allows parallel processing of different metrics\n",
+    "    - Can apply different temporal transformations per metric\n",
+    "    \"\"\"\n",
+    "    metrics = ['systolic', 'diastolic', 'stress_level', 'heart_rate']\n",
+    "    timeframes = {}\n",
+    "    \n",
+    "    for metric in metrics:\n",
+    "        # Using pandas backend for simplicity\n",
+    "        # Could switch to Modin/Polars for larger datasets\n",
+    "        timeframes[metric] = TimeFrame(\n",
+    "            df=df,\n",
+    "            time_col='ds',  # datetime column\n",
+    "            target_col=metric,  # metric to forecast\n",
+    "            backend='pd'\n",
+    "        )\n",
+    "    \n",
+    "    return timeframes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_forecasting_data(timeframe, mode='machine_learning', sequence_length=7):\n",
+    "    \"\"\"Prepare data for forecasting using TemporalTargetShifter.\n",
+    "    \n",
+    "    Two modes supported:\n",
+    "    1. Machine Learning (ml) mode:\n",
+    "       - One-step-ahead prediction\n",
+    "       - Useful for immediate forecasts (next day)\n",
+    "       - Better for interpretable models (regression, etc.)\n",
+    "    \n",
+    "    2. Deep Learning (dl) mode:\n",
+    "       - Sequence-to-sequence prediction\n",
+    "       - Captures longer temporal patterns\n",
+    "       - Better for complex patterns (LSTM, etc.)\n",
+    "       - sequence_length=7 for weekly patterns\n",
+    "    \"\"\"\n",
+    "    shifter = TemporalTargetShifter(\n",
+    "        n_lags=1,  # How many steps to look ahead\n",
+    "        mode=mode,\n",
+    "        sequence_length=sequence_length if mode == 'deep_learning' else None,\n",
+    "        verbose=True\n",
+    "    )\n",
+    "    \n",
+    "    return shifter.fit_transform(timeframe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_temporal_splits(timeframe, num_partitions=3):\n",
+    "    \"\"\"Create temporal train/test splits using sliding window.\n",
+    "    \n",
+    "    Why sliding window?\n",
+    "    - Maintains temporal ordering (crucial for time series)\n",
+    "    - Multiple partitions to assess model stability\n",
+    "    - Each partition moves forward in time\n",
+    "    - 70/30 split preserves enough history for training\n",
+    "    \n",
+    "    Why num_partitions=3?\n",
+    "    - Tests model on different time periods\n",
+    "    - Captures seasonal variations\n",
+    "    - Balance between validation and data usage\n",
+    "    \"\"\"\n",
+    "    partitioner = SlidingWindowPartitioner(\n",
+    "        tf=timeframe,\n",
+    "        num_partitions=num_partitions,  # Number of temporal splits\n",
+    "        train_pct=0.7,  # 70% for training\n",
+    "        test_pct=0.3    # 30% for testing\n",
+    "    )\n",
+    "    \n",
+    "    return list(partitioner.fit_transform())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    # Step 1: Generate synthetic health data\n",
+    "    print_divider()\n",
+    "    print(\"Generating synthetic health data...\")\n",
+    "    health_df = generate_health_data()\n",
+    "    print(\"Preview of generated health data:\")\n",
+    "    print(health_df.head())\n",
+    "    print_divider()\n",
+    "    \n",
+    "    # Step 2: Create TimeFrames for each metric\n",
+    "    print(\"Initializing TimeFrames for each health metric...\")\n",
+    "    metric_timeframes = create_metric_timeframes(health_df)\n",
+    "    \n",
+    "    # Step 3: Demonstrate both ML and DL approaches\n",
+    "    print(\"\\nPreparing data for different forecasting approaches:\")\n",
+    "    for metric in ['heart_rate', 'stress_level']:\n",
+    "        print(f\"\\nProcessing {metric}:\")\n",
+    "        \n",
+    "        # ML mode (one-step-ahead)\n",
+    "        print(\"\\nMachine Learning mode (one-step-ahead):\")\n",
+    "        ml_data = prepare_forecasting_data(metric_timeframes[metric], mode='machine_learning')\n",
+    "        print(ml_data.head())\n",
+    "        \n",
+    "        # DL mode (sequence)\n",
+    "        print(\"\\nDeep Learning mode (sequence-based):\")\n",
+    "        dl_data = prepare_forecasting_data(metric_timeframes[metric], mode='deep_learning')\n",
+    "        print(dl_data.head())\n",
+    "        \n",
+    "        print_divider()\n",
+    "    \n",
+    "    # Step 4: Create and demonstrate temporal splits\n",
+    "    print(\"\\nCreating temporal splits for validation:\")\n",
+    "    heart_rate_splits = create_temporal_splits(metric_timeframes['heart_rate'])\n",
+    "    \n",
+    "    for i, partition in enumerate(heart_rate_splits):\n",
+    "        print(f\"\\nPartition {i+1}:\")\n",
+    "        print(f\"Train shape: {partition['partition_1']['train'].shape}\")\n",
+    "        print(f\"Test shape: {partition['partition_1']['test'].shape}\")\n",
+    "    \n",
+    "    print_divider()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}