Skip to content

Commit

Permalink
Merge pull request #26 from philip-ndikum/feature/tutorial-3
Browse files Browse the repository at this point in the history
Feature/tutorial 3
  • Loading branch information
philip-ndikum authored Nov 13, 2024
2 parents c307f6c + a98e112 commit 736accf
Showing 1 changed file with 290 additions and 0 deletions.
290 changes: 290 additions & 0 deletions tutorial_notebooks/3_health_monitoring_analysis.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Temporal Scope Tutorial: Health Monitoring Analysis\n",
"\n",
"## Overview\n",
"\n",
"This tutorial demonstrates how to analyze temporal biological data using the **TemporalScope** framework. We'll work with multiple health metrics to showcase both machine learning and deep learning approaches to temporal analysis.\n",
"\n",
"### Summary\n",
"\n",
"| **Step** | **Description** |\n",
"|-----------|---------------------------------------------------------------------------------|\n",
"| **1** | **Data Generation**: Create synthetic health data with realistic patterns |\n",
"| **2** | **TimeFrame Setup**: Initialize temporal data structures for each health metric |\n",
"| **3** | **ML Processing**: Prepare data for one-step-ahead forecasting |\n",
"| **4** | **DL Processing**: Prepare sequence data for deep learning models |\n",
"| **5** | **Temporal Splits**: Create proper train/test partitions |\n",
"\n",
"### Key Concepts\n",
"\n",
"- **Multiple Health Metrics**: Blood pressure, stress levels, and heart rate\n",
"- **Temporal Patterns**: Daily, weekly, and seasonal variations\n",
"- **Forecasting Approaches**: Both one-step-ahead and sequence-based predictions\n",
"- **Proper Validation**: Time-aware train/test splitting\n",
"\n",
"### Steps\n",
"\n",
"1. **Generate Health Data**\n",
" - Create synthetic but realistic health measurements\n",
" - Include known physiological patterns and correlations\n",
"\n",
"2. **Initialize TimeFrames**\n",
" - Separate temporal structures for each health metric\n",
" - Enable parallel processing capabilities\n",
"\n",
"3. **Prepare Forecasting Data**\n",
" - Machine learning mode for immediate predictions\n",
" - Deep learning mode for sequence-based analysis\n",
"\n",
"4. **Create Temporal Splits**\n",
" - Sliding window approach\n",
" - Maintain temporal ordering\n",
" - Multiple validation periods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from datetime import datetime, timedelta\n",
"\n",
"from temporalscope.core.temporal_data_loader import TimeFrame\n",
"from temporalscope.core.temporal_target_shifter import TemporalTargetShifter\n",
"from temporalscope.partition.sliding_window import SlidingWindowPartitioner\n",
"from temporalscope.core.core_utils import print_divider"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def generate_health_data(start_date: str = '2023-01-01', days: int = 365):\n",
" \"\"\"Generate synthetic health monitoring data.\n",
" \n",
" Args:\n",
" start_date (str): Starting date for the data\n",
" days (int): Number of days to generate\n",
" \"\"\"\n",
" # Create date range for daily measurements\n",
" dates = pd.date_range(start=start_date, periods=days, freq='D')\n",
" \n",
" # Time array for generating patterns\n",
" t = np.arange(days)\n",
" \n",
" # Seasonal effect (yearly cycle)\n",
" # - Amplitude of 5 represents typical seasonal BP variation\n",
" # - 2π/365 gives us one complete cycle per year\n",
" seasonal_effect = 5 * np.sin(2 * np.pi * t / 365)\n",
" \n",
" # Weekly pattern (work week stress)\n",
" # - Amplitude of 3 for weekly BP fluctuation\n",
" # - 2π/7 gives us one complete cycle per week\n",
" weekly_effect = 3 * np.sin(2 * np.pi * t / 7)\n",
" \n",
" # Blood Pressure Generation\n",
" # Systolic (120 typical baseline)\n",
" # - Stronger influence from seasonal & weekly patterns\n",
" # - Random variation (σ=3) for daily fluctuations\n",
" systolic = 120 + seasonal_effect + weekly_effect + np.random.normal(0, 3, days)\n",
" \n",
" # Diastolic (80 typical baseline)\n",
" # - Less affected by external patterns (multiplied by 0.5)\n",
" # - Smaller random variation (σ=2)\n",
" diastolic = 80 + seasonal_effect * 0.5 + weekly_effect * 0.5 + np.random.normal(0, 2, days)\n",
" \n",
" # Stress Level Generation (0-100 scale)\n",
" # - Heavily influenced by weekly pattern (work stress)\n",
" # - Larger random variation (σ=5) for daily life events\n",
" # - Clipped to valid range [0,100]\n",
" stress = 50 + weekly_effect + np.random.normal(0, 5, days)\n",
" stress = np.clip(stress, 0, 100)\n",
" \n",
" # Heart Rate Generation\n",
" # - Baseline of 70 bpm\n",
" # - Correlates with stress (0.3 coefficient)\n",
" # - Weekly pattern influence\n",
" # - Moderate random variation (σ=3)\n",
" heart_rate = 70 + 0.3 * stress + weekly_effect + np.random.normal(0, 3, days)\n",
" \n",
" return pd.DataFrame({\n",
" 'ds': dates,\n",
" 'systolic': systolic,\n",
" 'diastolic': diastolic,\n",
" 'stress_level': stress,\n",
" 'heart_rate': heart_rate\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def create_metric_timeframes(df):\n",
" \"\"\"Create TimeFrame objects for each health metric.\n",
" \n",
" Why separate TimeFrames?\n",
" - Each metric might need different forecasting horizons\n",
" - Allows parallel processing of different metrics\n",
" - Can apply different temporal transformations per metric\n",
" \"\"\"\n",
" metrics = ['systolic', 'diastolic', 'stress_level', 'heart_rate']\n",
" timeframes = {}\n",
" \n",
" for metric in metrics:\n",
" # Using pandas backend for simplicity\n",
" # Could switch to Modin/Polars for larger datasets\n",
" timeframes[metric] = TimeFrame(\n",
" df=df,\n",
" time_col='ds', # datetime column\n",
" target_col=metric, # metric to forecast\n",
" backend='pd'\n",
" )\n",
" \n",
" return timeframes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def prepare_forecasting_data(timeframe, mode='machine_learning', sequence_length=7):\n",
" \"\"\"Prepare data for forecasting using TemporalTargetShifter.\n",
" \n",
" Two modes supported:\n",
" 1. Machine Learning (ml) mode:\n",
" - One-step-ahead prediction\n",
" - Useful for immediate forecasts (next day)\n",
" - Better for interpretable models (regression, etc.)\n",
" \n",
" 2. Deep Learning (dl) mode:\n",
" - Sequence-to-sequence prediction\n",
" - Captures longer temporal patterns\n",
" - Better for complex patterns (LSTM, etc.)\n",
" - sequence_length=7 for weekly patterns\n",
" \"\"\"\n",
" shifter = TemporalTargetShifter(\n",
" n_lags=1, # How many steps to look ahead\n",
" mode=mode,\n",
" sequence_length=sequence_length if mode == 'deep_learning' else None,\n",
" verbose=True\n",
" )\n",
" \n",
" return shifter.fit_transform(timeframe)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def create_temporal_splits(timeframe, num_partitions=3):\n",
" \"\"\"Create temporal train/test splits using sliding window.\n",
" \n",
" Why sliding window?\n",
" - Maintains temporal ordering (crucial for time series)\n",
" - Multiple partitions to assess model stability\n",
" - Each partition moves forward in time\n",
" - 70/30 split preserves enough history for training\n",
" \n",
" Why num_partitions=3?\n",
" - Tests model on different time periods\n",
" - Captures seasonal variations\n",
" - Balance between validation and data usage\n",
" \"\"\"\n",
" partitioner = SlidingWindowPartitioner(\n",
" tf=timeframe,\n",
" num_partitions=num_partitions, # Number of temporal splits\n",
" train_pct=0.7, # 70% for training\n",
" test_pct=0.3 # 30% for testing\n",
" )\n",
" \n",
" return list(partitioner.fit_transform())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" # Step 1: Generate synthetic health data\n",
" print_divider()\n",
" print(\"Generating synthetic health data...\")\n",
" health_df = generate_health_data()\n",
" print(\"Preview of generated health data:\")\n",
" print(health_df.head())\n",
" print_divider()\n",
" \n",
" # Step 2: Create TimeFrames for each metric\n",
" print(\"Initializing TimeFrames for each health metric...\")\n",
" metric_timeframes = create_metric_timeframes(health_df)\n",
" \n",
" # Step 3: Demonstrate both ML and DL approaches\n",
" print(\"\\nPreparing data for different forecasting approaches:\")\n",
" for metric in ['heart_rate', 'stress_level']:\n",
" print(f\"\\nProcessing {metric}:\")\n",
" \n",
" # ML mode (one-step-ahead)\n",
" print(\"\\nMachine Learning mode (one-step-ahead):\")\n",
" ml_data = prepare_forecasting_data(metric_timeframes[metric], mode='machine_learning')\n",
" print(ml_data.head())\n",
" \n",
" # DL mode (sequence)\n",
" print(\"\\nDeep Learning mode (sequence-based):\")\n",
" dl_data = prepare_forecasting_data(metric_timeframes[metric], mode='deep_learning')\n",
" print(dl_data.head())\n",
" \n",
" print_divider()\n",
" \n",
" # Step 4: Create and demonstrate temporal splits\n",
" print(\"\\nCreating temporal splits for validation:\")\n",
" heart_rate_splits = create_temporal_splits(metric_timeframes['heart_rate'])\n",
" \n",
" for i, partition in enumerate(heart_rate_splits):\n",
" print(f\"\\nPartition {i+1}:\")\n",
" print(f\"Train shape: {partition['partition_1']['train'].shape}\")\n",
" print(f\"Test shape: {partition['partition_1']['test'].shape}\")\n",
" \n",
" print_divider()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

0 comments on commit 736accf

Please sign in to comment.