-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #26 from philip-ndikum/feature/tutorial-3
Feature/tutorial 3
- Loading branch information
Showing
1 changed file
with
290 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,290 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Temporal Scope Tutorial: Health Monitoring Analysis\n", | ||
"\n", | ||
"## Overview\n", | ||
"\n", | ||
"This tutorial demonstrates how to analyze temporal biological data using the **TemporalScope** framework. We'll work with multiple health metrics to showcase both machine learning and deep learning approaches to temporal analysis.\n", | ||
"\n", | ||
"### Summary\n", | ||
"\n", | ||
"| **Step** | **Description** |\n", | ||
"|-----------|---------------------------------------------------------------------------------|\n", | ||
"| **1** | **Data Generation**: Create synthetic health data with realistic patterns |\n", | ||
"| **2** | **TimeFrame Setup**: Initialize temporal data structures for each health metric |\n", | ||
"| **3** | **ML Processing**: Prepare data for one-step-ahead forecasting |\n", | ||
"| **4** | **DL Processing**: Prepare sequence data for deep learning models |\n", | ||
"| **5** | **Temporal Splits**: Create proper train/test partitions |\n", | ||
"\n", | ||
"### Key Concepts\n", | ||
"\n", | ||
"- **Multiple Health Metrics**: Blood pressure, stress levels, and heart rate\n", | ||
"- **Temporal Patterns**: Daily, weekly, and seasonal variations\n", | ||
"- **Forecasting Approaches**: Both one-step-ahead and sequence-based predictions\n", | ||
"- **Proper Validation**: Time-aware train/test splitting\n", | ||
"\n", | ||
"### Steps\n", | ||
"\n", | ||
"1. **Generate Health Data**\n", | ||
" - Create synthetic but realistic health measurements\n", | ||
" - Include known physiological patterns and correlations\n", | ||
"\n", | ||
"2. **Initialize TimeFrames**\n", | ||
" - Separate temporal structures for each health metric\n", | ||
" - Enable parallel processing capabilities\n", | ||
"\n", | ||
"3. **Prepare Forecasting Data**\n", | ||
" - Machine learning mode for immediate predictions\n", | ||
" - Deep learning mode for sequence-based analysis\n", | ||
"\n", | ||
"4. **Create Temporal Splits**\n", | ||
" - Sliding window approach\n", | ||
" - Maintain temporal ordering\n", | ||
" - Multiple validation periods" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"from datetime import datetime, timedelta\n", | ||
"\n", | ||
"from temporalscope.core.temporal_data_loader import TimeFrame\n", | ||
"from temporalscope.core.temporal_target_shifter import TemporalTargetShifter\n", | ||
"from temporalscope.partition.sliding_window import SlidingWindowPartitioner\n", | ||
"from temporalscope.core.core_utils import print_divider" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def generate_health_data(start_date: str = '2023-01-01', days: int = 365):\n", | ||
" \"\"\"Generate synthetic health monitoring data.\n", | ||
" \n", | ||
" Args:\n", | ||
" start_date (str): Starting date for the data\n", | ||
" days (int): Number of days to generate\n", | ||
" \"\"\"\n", | ||
" # Create date range for daily measurements\n", | ||
" dates = pd.date_range(start=start_date, periods=days, freq='D')\n", | ||
" \n", | ||
" # Time array for generating patterns\n", | ||
" t = np.arange(days)\n", | ||
" \n", | ||
" # Seasonal effect (yearly cycle)\n", | ||
" # - Amplitude of 5 represents typical seasonal BP variation\n", | ||
" # - 2π/365 gives us one complete cycle per year\n", | ||
" seasonal_effect = 5 * np.sin(2 * np.pi * t / 365)\n", | ||
" \n", | ||
" # Weekly pattern (work week stress)\n", | ||
" # - Amplitude of 3 for weekly BP fluctuation\n", | ||
" # - 2π/7 gives us one complete cycle per week\n", | ||
" weekly_effect = 3 * np.sin(2 * np.pi * t / 7)\n", | ||
" \n", | ||
" # Blood Pressure Generation\n", | ||
" # Systolic (120 typical baseline)\n", | ||
" # - Stronger influence from seasonal & weekly patterns\n", | ||
" # - Random variation (σ=3) for daily fluctuations\n", | ||
" systolic = 120 + seasonal_effect + weekly_effect + np.random.normal(0, 3, days)\n", | ||
" \n", | ||
" # Diastolic (80 typical baseline)\n", | ||
" # - Less affected by external patterns (multiplied by 0.5)\n", | ||
" # - Smaller random variation (σ=2)\n", | ||
" diastolic = 80 + seasonal_effect * 0.5 + weekly_effect * 0.5 + np.random.normal(0, 2, days)\n", | ||
" \n", | ||
" # Stress Level Generation (0-100 scale)\n", | ||
" # - Heavily influenced by weekly pattern (work stress)\n", | ||
" # - Larger random variation (σ=5) for daily life events\n", | ||
" # - Clipped to valid range [0,100]\n", | ||
" stress = 50 + weekly_effect + np.random.normal(0, 5, days)\n", | ||
" stress = np.clip(stress, 0, 100)\n", | ||
" \n", | ||
" # Heart Rate Generation\n", | ||
" # - Baseline of 70 bpm\n", | ||
" # - Correlates with stress (0.3 coefficient)\n", | ||
" # - Weekly pattern influence\n", | ||
" # - Moderate random variation (σ=3)\n", | ||
" heart_rate = 70 + 0.3 * stress + weekly_effect + np.random.normal(0, 3, days)\n", | ||
" \n", | ||
" return pd.DataFrame({\n", | ||
" 'ds': dates,\n", | ||
" 'systolic': systolic,\n", | ||
" 'diastolic': diastolic,\n", | ||
" 'stress_level': stress,\n", | ||
" 'heart_rate': heart_rate\n", | ||
" })" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def create_metric_timeframes(df):\n", | ||
" \"\"\"Create TimeFrame objects for each health metric.\n", | ||
" \n", | ||
" Why separate TimeFrames?\n", | ||
" - Each metric might need different forecasting horizons\n", | ||
" - Allows parallel processing of different metrics\n", | ||
" - Can apply different temporal transformations per metric\n", | ||
" \"\"\"\n", | ||
" metrics = ['systolic', 'diastolic', 'stress_level', 'heart_rate']\n", | ||
" timeframes = {}\n", | ||
" \n", | ||
" for metric in metrics:\n", | ||
" # Using pandas backend for simplicity\n", | ||
" # Could switch to Modin/Polars for larger datasets\n", | ||
" timeframes[metric] = TimeFrame(\n", | ||
" df=df,\n", | ||
" time_col='ds', # datetime column\n", | ||
" target_col=metric, # metric to forecast\n", | ||
" backend='pd'\n", | ||
" )\n", | ||
" \n", | ||
" return timeframes" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def prepare_forecasting_data(timeframe, mode='machine_learning', sequence_length=7):\n", | ||
" \"\"\"Prepare data for forecasting using TemporalTargetShifter.\n", | ||
" \n", | ||
" Two modes supported:\n", | ||
" 1. Machine Learning (ml) mode:\n", | ||
" - One-step-ahead prediction\n", | ||
" - Useful for immediate forecasts (next day)\n", | ||
" - Better for interpretable models (regression, etc.)\n", | ||
" \n", | ||
" 2. Deep Learning (dl) mode:\n", | ||
" - Sequence-to-sequence prediction\n", | ||
" - Captures longer temporal patterns\n", | ||
" - Better for complex patterns (LSTM, etc.)\n", | ||
" - sequence_length=7 for weekly patterns\n", | ||
" \"\"\"\n", | ||
" shifter = TemporalTargetShifter(\n", | ||
" n_lags=1, # How many steps to look ahead\n", | ||
" mode=mode,\n", | ||
" sequence_length=sequence_length if mode == 'deep_learning' else None,\n", | ||
" verbose=True\n", | ||
" )\n", | ||
" \n", | ||
" return shifter.fit_transform(timeframe)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def create_temporal_splits(timeframe, num_partitions=3):\n", | ||
" \"\"\"Create temporal train/test splits using sliding window.\n", | ||
" \n", | ||
" Why sliding window?\n", | ||
" - Maintains temporal ordering (crucial for time series)\n", | ||
" - Multiple partitions to assess model stability\n", | ||
" - Each partition moves forward in time\n", | ||
" - 70/30 split preserves enough history for training\n", | ||
" \n", | ||
" Why num_partitions=3?\n", | ||
" - Tests model on different time periods\n", | ||
" - Captures seasonal variations\n", | ||
" - Balance between validation and data usage\n", | ||
" \"\"\"\n", | ||
" partitioner = SlidingWindowPartitioner(\n", | ||
" tf=timeframe,\n", | ||
" num_partitions=num_partitions, # Number of temporal splits\n", | ||
" train_pct=0.7, # 70% for training\n", | ||
" test_pct=0.3 # 30% for testing\n", | ||
" )\n", | ||
" \n", | ||
" return list(partitioner.fit_transform())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"if __name__ == \"__main__\":\n", | ||
" # Step 1: Generate synthetic health data\n", | ||
" print_divider()\n", | ||
" print(\"Generating synthetic health data...\")\n", | ||
" health_df = generate_health_data()\n", | ||
" print(\"Preview of generated health data:\")\n", | ||
" print(health_df.head())\n", | ||
" print_divider()\n", | ||
" \n", | ||
" # Step 2: Create TimeFrames for each metric\n", | ||
" print(\"Initializing TimeFrames for each health metric...\")\n", | ||
" metric_timeframes = create_metric_timeframes(health_df)\n", | ||
" \n", | ||
" # Step 3: Demonstrate both ML and DL approaches\n", | ||
" print(\"\\nPreparing data for different forecasting approaches:\")\n", | ||
" for metric in ['heart_rate', 'stress_level']:\n", | ||
" print(f\"\\nProcessing {metric}:\")\n", | ||
" \n", | ||
" # ML mode (one-step-ahead)\n", | ||
" print(\"\\nMachine Learning mode (one-step-ahead):\")\n", | ||
" ml_data = prepare_forecasting_data(metric_timeframes[metric], mode='machine_learning')\n", | ||
" print(ml_data.head())\n", | ||
" \n", | ||
" # DL mode (sequence)\n", | ||
" print(\"\\nDeep Learning mode (sequence-based):\")\n", | ||
" dl_data = prepare_forecasting_data(metric_timeframes[metric], mode='deep_learning')\n", | ||
" print(dl_data.head())\n", | ||
" \n", | ||
" print_divider()\n", | ||
" \n", | ||
" # Step 4: Create and demonstrate temporal splits\n", | ||
" print(\"\\nCreating temporal splits for validation:\")\n", | ||
" heart_rate_splits = create_temporal_splits(metric_timeframes['heart_rate'])\n", | ||
" \n", | ||
" for i, partition in enumerate(heart_rate_splits):\n", | ||
" print(f\"\\nPartition {i+1}:\")\n", | ||
" print(f\"Train shape: {partition['partition_1']['train'].shape}\")\n", | ||
" print(f\"Test shape: {partition['partition_1']['test'].shape}\")\n", | ||
" \n", | ||
" print_divider()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |