From f8b1eef2b82f044306f0665fde6a7bc96313021c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 17:20:12 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- fossil/DA_methods.py | 1 + notebooks/DA_demo_L96.ipynb | 80 +++++----- notebooks/DA_methods.py | 1 + notebooks/L96_offline_training_NN.ipynb | 146 ++++++++--------- notebooks/L96_online_training_NN.ipynb | 92 +++++------ notebooks/Learning-DA-increments.ipynb | 140 ++++++++--------- notebooks/citing.ipynb | 2 +- notebooks/closing_remarks.ipynb | 2 +- notebooks/constraints.ipynb | 108 ++++++------- notebooks/estimating-gcm-parameters.ipynb | 98 ++++++------ notebooks/gcm-analogue.ipynb | 70 ++++----- notebooks/gcm-parameterization-problem.ipynb | 126 +++++++-------- .../old_notebooks/gcm-analogue-old.ipynb | 78 ++++----- .../gcm-parameterization-problem-old.ipynb | 142 ++++++++--------- notebooks/rvm.py | 3 +- notebooks/sindy_L96_2scale.ipynb | 124 +++++++-------- notebooks/symbolic_methods_comparison.ipynb | 148 +++++++++--------- notebooks/symbolic_vs_nn_multiscale_L96.ipynb | 140 ++++++++--------- 18 files changed, 751 insertions(+), 750 deletions(-) diff --git a/fossil/DA_methods.py b/fossil/DA_methods.py index 5ec69319..8bcd5515 100644 --- a/fossil/DA_methods.py +++ b/fossil/DA_methods.py @@ -2,6 +2,7 @@ Adapted form PyDA project: https://github.com/Shady-Ahmed/PyDA Reference: https://www.mdpi.com/2311-5521/5/4/225 """ + import numpy as np from numba import jit diff --git a/notebooks/DA_demo_L96.ipynb b/notebooks/DA_demo_L96.ipynb index 1b8f2da4..0fc5a918 100644 --- a/notebooks/DA_demo_L96.ipynb +++ b/notebooks/DA_demo_L96.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "a013986b", + "id": "0", "metadata": {}, "source": [ "# Data Assimilation demo in the Lorenz 96 (L96) two time-scale model\n" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "c33e3435", + "id": "1", "metadata": {}, "source": [ "## What is DA? Why do we do it?\n", @@ -31,7 +31,7 @@ }, { "cell_type": "markdown", - "id": "208a8f14", + "id": "2", "metadata": {}, "source": [ "## Design of the data assimilation experiments in L96\n", @@ -54,7 +54,7 @@ }, { "cell_type": "markdown", - "id": "66584b6f", + "id": "3", "metadata": {}, "source": [ "### 1. Define our \"GCM\" and DA parameters to use throughout notebook" @@ -63,7 +63,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9f81b262-6d04-44d8-9a61-ada0ea0c9acb", + "id": "4", "metadata": { "tags": [] }, @@ -83,7 +83,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ebf223e6-ed8a-425a-b808-55b0cc59c69c", + "id": "5", "metadata": { "tags": [] }, @@ -121,7 +121,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3d2efdbe", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -159,7 +159,7 @@ }, { "cell_type": "markdown", - "id": "6d3052af", + "id": "7", "metadata": { "user_expressions": [] }, @@ -174,7 +174,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e393d1bd-5838-402d-a381-a617c987d1f8", + "id": "8", "metadata": { "tags": [] }, @@ -186,7 +186,7 @@ }, { "cell_type": "markdown", - "id": "a4696c6b-cfbd-4623-9952-bcf7a3e75783", + "id": "9", "metadata": { "user_expressions": [] }, @@ -197,7 +197,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18d8e02a-2dec-4724-9175-f5809414533e", + "id": "10", "metadata": { "tags": [] }, @@ -216,7 +216,7 @@ }, { "cell_type": "markdown", - "id": "a68c402e-e083-4cfe-b678-d4d8cd5e098f", + "id": "11", "metadata": { "user_expressions": [] }, @@ -227,7 +227,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ae0f3b33-7cf7-47ae-9460-f735f8d46627", + "id": "12", "metadata": { "tags": [] }, @@ -238,7 +238,7 @@ }, { "cell_type": "markdown", - "id": "4916b273-2823-4c6c-b3e4-fd1f3b5ce805", + "id": "13", "metadata": { "user_expressions": [] }, @@ -249,7 +249,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fceac2dd-543e-4688-99ce-a7eb2bcc4e41", + "id": "14", "metadata": { "tags": [] }, @@ -273,7 +273,7 @@ }, { "cell_type": "markdown", - "id": "c90c0fef-53be-4d0b-9105-9601862f6690", + "id": "15", "metadata": { "user_expressions": [] }, @@ -284,7 +284,7 @@ { "cell_type": "code", "execution_count": null, - "id": "66ac4faf-ff3f-4152-bfb9-fcdb1f2c2234", + "id": "16", "metadata": { "tags": [] }, @@ -295,7 +295,7 @@ }, { "cell_type": "markdown", - "id": "61df79fe-b945-43be-b08a-b63b93bca400", + "id": "17", "metadata": { "user_expressions": [] }, @@ -306,7 +306,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1e66be99-a6d1-4134-92a4-e1a2ab8a6916", + "id": "18", "metadata": { "tags": [] }, @@ -350,7 +350,7 @@ }, { "cell_type": "markdown", - "id": "8a17c582-090e-48bf-b26a-adbd66fdbefb", + "id": "19", "metadata": { "user_expressions": [] }, @@ -361,7 +361,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dd8b5cc7", + "id": "20", "metadata": { "tags": [] }, @@ -375,7 +375,7 @@ }, { "cell_type": "markdown", - "id": "d3f14f72", + "id": "21", "metadata": { "user_expressions": [] }, @@ -388,7 +388,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e01dbe50-741b-4ce5-88ac-4f583e19d11c", + "id": "22", "metadata": { "tags": [] }, @@ -411,7 +411,7 @@ }, { "cell_type": "markdown", - "id": "89ee8b4d-0539-4621-8d45-30769452a155", + "id": "23", "metadata": { "user_expressions": [] }, @@ -422,7 +422,7 @@ { "cell_type": "code", "execution_count": null, - "id": "abc69e52", + "id": "24", "metadata": { "tags": [] }, @@ -435,7 +435,7 @@ { "cell_type": "code", "execution_count": null, - "id": "34000d8e-bb3c-4ca7-b760-e186ec7c2f80", + "id": "25", "metadata": { "tags": [] }, @@ -454,7 +454,7 @@ }, { "cell_type": "markdown", - "id": "c72bc6d5", + "id": "26", "metadata": { "user_expressions": [] }, @@ -467,7 +467,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c053ebb0-0e36-41cc-bca5-141ddd15a165", + "id": "27", "metadata": { "tags": [] }, @@ -484,7 +484,7 @@ { "cell_type": "code", "execution_count": null, - "id": "577e9b3d", + "id": "28", "metadata": { "tags": [] }, @@ -503,7 +503,7 @@ }, { "cell_type": "markdown", - "id": "760a249a", + "id": "29", "metadata": { "user_expressions": [] }, @@ -524,7 +524,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7c5c9f4-0ae8-4d07-a458-6d2b1ec4af33", + "id": "30", "metadata": { "tags": [] }, @@ -543,7 +543,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8d49ce4c-2a11-4207-9bd1-05b16607b2fd", + "id": "31", "metadata": { "tags": [] }, @@ -558,7 +558,7 @@ { "cell_type": "code", "execution_count": null, - "id": "236c576b-be9f-4353-9172-455853f08d2e", + "id": "32", "metadata": { "tags": [] }, @@ -583,7 +583,7 @@ { "cell_type": "code", "execution_count": null, - "id": "50e95487-435f-4bd4-bfab-6e0cb6893b30", + "id": "33", "metadata": { "tags": [] }, @@ -599,7 +599,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46ef8b1f", + "id": "34", "metadata": { "tags": [] }, @@ -699,7 +699,7 @@ }, { "cell_type": "markdown", - "id": "5d243a69", + "id": "35", "metadata": { "user_expressions": [] }, @@ -710,7 +710,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f3c37a85-433c-4d2e-a49c-1daf4f6a9ca8", + "id": "36", "metadata": { "tags": [] }, @@ -723,7 +723,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3f6f05d0", + "id": "37", "metadata": { "tags": [] }, @@ -879,7 +879,7 @@ }, { "cell_type": "markdown", - "id": "63475b82-5e45-4a2c-93f5-f0e089320c7b", + "id": "38", "metadata": { "user_expressions": [] }, @@ -890,7 +890,7 @@ { "cell_type": "code", "execution_count": null, - "id": "65993585", + "id": "39", "metadata": {}, "outputs": [], "source": [ diff --git a/notebooks/DA_methods.py b/notebooks/DA_methods.py index 6f49fe10..9152ef1d 100644 --- a/notebooks/DA_methods.py +++ b/notebooks/DA_methods.py @@ -2,6 +2,7 @@ Partly adapted form PyDA project: https://github.com/Shady-Ahmed/PyDA Reference: https://www.mdpi.com/2311-5521/5/4/225 """ + import numpy as np from numba import njit diff --git a/notebooks/L96_offline_training_NN.ipynb b/notebooks/L96_offline_training_NN.ipynb index 759c6baf..911bf1c0 100644 --- a/notebooks/L96_offline_training_NN.ipynb +++ b/notebooks/L96_offline_training_NN.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "c0d2675f-796a-4466-a76a-c9a354518d1b", + "id": "0", "metadata": { "user_expressions": [] }, @@ -12,7 +12,7 @@ }, { "cell_type": "markdown", - "id": "aa918476-598e-4ec0-9d9c-b2fd20d2e699", + "id": "1", "metadata": { "user_expressions": [] }, @@ -27,7 +27,7 @@ }, { "cell_type": "markdown", - "id": "e2c6ba62-1b14-408c-ac05-146c96a130b3", + "id": "2", "metadata": {}, "source": [ "## Generating the data" @@ -36,7 +36,7 @@ { "cell_type": "code", "execution_count": null, - "id": "129a18f4-f3d5-4503-8c0b-e81895ee2406", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -56,7 +56,7 @@ { "cell_type": "code", "execution_count": null, - "id": "79044f9a-ac01-415a-a88a-0385718c8b02", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -67,7 +67,7 @@ }, { "cell_type": "markdown", - "id": "d555922c-f58e-4343-8cf0-1ad027046c6a", + "id": "5", "metadata": {}, "source": [ "### Build the *Real World* to Generate the Ground Truth Dataset\n", @@ -89,7 +89,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ae0738e8-fd6f-4f0c-befa-aa5db97f4521", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -102,7 +102,7 @@ }, { "cell_type": "markdown", - "id": "a8df4af7-7283-4a64-8a3d-cbb7dada4363", + "id": "7", "metadata": {}, "source": [ "### Run the simulation to generate data\n", @@ -113,7 +113,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8ae3e6b4-269f-4ac5-82e8-366eb2fb6dee", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -126,7 +126,7 @@ }, { "cell_type": "markdown", - "id": "c4586ecb-43a0-4764-9b6f-c968be40eca9", + "id": "9", "metadata": {}, "source": [ "### Split the Data into Training and Testing Set\n", @@ -137,7 +137,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9f99293a-df6d-4d60-b177-03fc2f8ae361", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -158,7 +158,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bded5ed8-7cd8-4c50-940b-93c37d82d984", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -167,7 +167,7 @@ }, { "cell_type": "markdown", - "id": "7ac52993-712b-4aba-8012-7ca83a4d605e", + "id": "12", "metadata": {}, "source": [ "## The Dataloaders \n", @@ -180,7 +180,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1e751231-d506-4bee-982d-c7522a4481e4", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -191,7 +191,7 @@ }, { "cell_type": "markdown", - "id": "4615518a-e6d8-4a9d-ad05-b334fd7f8224", + "id": "14", "metadata": {}, "source": [ "Define the X (state), Y (subgrid tendency) pairs for the linear regression local network." @@ -200,7 +200,7 @@ { "cell_type": "code", "execution_count": null, - "id": "060e3982-1791-418e-bf8d-81475da84a50", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -216,7 +216,7 @@ }, { "cell_type": "markdown", - "id": "5d9ef6f0-9441-49c5-8cea-904c992d75c1", + "id": "16", "metadata": {}, "source": [ "Define the dataloader for the test set." @@ -225,7 +225,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3eec82d6-91e3-4851-993f-fa28836af5d5", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -241,7 +241,7 @@ }, { "cell_type": "markdown", - "id": "33ff24b3-f8c9-4b5e-a36f-82d6b0f177d1", + "id": "18", "metadata": {}, "source": [ "Display a batch of samples from the dataset." @@ -250,7 +250,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8527ff9a-01c8-419a-9ca4-c28a38a54b88", + "id": "19", "metadata": { "tags": [] }, @@ -272,7 +272,7 @@ }, { "cell_type": "markdown", - "id": "edfeefb9-cfb5-44d9-b258-a8701ce4e4c7", + "id": "20", "metadata": {}, "source": [ "Clearly there is a relationship between $U_k$ and $X_k$, we plan to learn this with the help of ML models." @@ -280,7 +280,7 @@ }, { "cell_type": "markdown", - "id": "5c54b68a-bae5-4bce-b4f0-9bdb7f4235bc", + "id": "21", "metadata": {}, "source": [ "## The *local* ML models\n", @@ -292,7 +292,7 @@ }, { "cell_type": "markdown", - "id": "ed3e53bd-fb12-4aad-bcff-be495935ef80", + "id": "22", "metadata": {}, "source": [ "### Linear regression network" @@ -301,7 +301,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eed1ac73-a213-4d2c-81af-d74b093d2ac2", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -320,7 +320,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3c9a3d2f-7f95-4906-a3b0-dd2ecf81822d", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -330,7 +330,7 @@ }, { "cell_type": "markdown", - "id": "1f5abff9-0d16-4bcb-a929-f39c010a4b92", + "id": "25", "metadata": {}, "source": [ "### Fully connected neural network (FCNN)" @@ -339,7 +339,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10044fa0-de73-4240-be0f-55bb30af2513", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -362,7 +362,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e4e55f54-c315-4aba-b00f-85e00812f75d", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -372,7 +372,7 @@ }, { "cell_type": "markdown", - "id": "1287a0d4-f7e0-4a4f-bbee-3666ff2c1adc", + "id": "28", "metadata": {}, "source": [ "We do not expect the linear regression to do well in this problem, as the dependence seenin the figure is clearly more complex than a simple line. We include it here as a baseline. \n", @@ -382,7 +382,7 @@ }, { "cell_type": "markdown", - "id": "b3b259d3-3478-4a9c-b23c-8e9a8042404a", + "id": "29", "metadata": {}, "source": [ "```{admonition} Need for Activation Functions\n", @@ -406,7 +406,7 @@ }, { "cell_type": "markdown", - "id": "419f6fd5-0055-4583-8677-ae31ac9953e9", + "id": "30", "metadata": {}, "source": [ "**Test forward function** \n", @@ -416,7 +416,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2b236b7f-dd79-4f88-8e8a-e60bfbc2a045", + "id": "31", "metadata": {}, "outputs": [], "source": [ @@ -431,7 +431,7 @@ }, { "cell_type": "markdown", - "id": "6471c571-5ee4-48ef-bfce-e2349bcdfe5b", + "id": "32", "metadata": {}, "source": [ "## The Loss Function\n", @@ -444,7 +444,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7c42de43-27de-4896-91ee-caf695f212df", + "id": "33", "metadata": {}, "outputs": [], "source": [ @@ -455,7 +455,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d33c7361-f717-4a74-b296-075f398d7072", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -476,7 +476,7 @@ }, { "cell_type": "markdown", - "id": "1eb9cd59-ef25-48e8-9522-355fdda85d07", + "id": "35", "metadata": {}, "source": [ "Since at this point neither of the networks have been trained, the resultant loss is just arbitrary. If one model is doing better than the other, it is just a result of random chance. " @@ -484,7 +484,7 @@ }, { "cell_type": "markdown", - "id": "98716e66-ed26-48f4-8509-9f25e0ddeebf", + "id": "36", "metadata": {}, "source": [ "## The Optimizer\n", @@ -513,7 +513,7 @@ }, { "cell_type": "markdown", - "id": "4b28b5f2-11b1-4c30-b385-471d7a6c9e79", + "id": "37", "metadata": {}, "source": [ "See in the code sample below how taking a step with the optimizer changes the model parameters. In what comes next we will take these steps to reach a parameter state such that the loss function is as small as possible. " @@ -522,7 +522,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d46984b4-14c6-43ff-b143-c936282b6a24", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -533,7 +533,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7ed28fd9-441e-4e01-9e3c-7571ff7a311b", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -551,7 +551,7 @@ { "cell_type": "code", "execution_count": null, - "id": "72cdeebe-87c1-49fc-9cba-eacb264bcdf6", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -561,7 +561,7 @@ }, { "cell_type": "markdown", - "id": "3f28f566-bb81-4d2d-9acc-b24ae80712c8", + "id": "41", "metadata": { "tags": [] }, @@ -571,7 +571,7 @@ }, { "cell_type": "markdown", - "id": "fa66f8a8-8c11-4897-b71e-ac06d03271ac", + "id": "42", "metadata": {}, "source": [ "### Define the Training and Test Functions\n", @@ -582,7 +582,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a800f28f-69c5-40eb-9b0c-cb27059f2f32", + "id": "43", "metadata": {}, "outputs": [], "source": [ @@ -616,7 +616,7 @@ }, { "cell_type": "markdown", - "id": "08dd95be-0a9e-4214-aced-ca19460217c0", + "id": "44", "metadata": {}, "source": [ "The `test_model` function is meant to test the skill of the model, but does not update the weights to further improve it. " @@ -625,7 +625,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f963a83c-c7ba-42a6-9642-002e5d254c4b", + "id": "45", "metadata": {}, "outputs": [], "source": [ @@ -656,7 +656,7 @@ }, { "cell_type": "markdown", - "id": "3ea21cd1-d42f-44b0-a7b4-4ca1763abeba", + "id": "46", "metadata": {}, "source": [ "The `fit_model` iterates the `train_model` function over multiple epochs, such that the loss is reduced. " @@ -665,7 +665,7 @@ { "cell_type": "code", "execution_count": null, - "id": "74e8b24a-3107-48b4-8b52-d26776fa95f8", + "id": "47", "metadata": {}, "outputs": [], "source": [ @@ -686,7 +686,7 @@ }, { "cell_type": "markdown", - "id": "7a3c15e2-c4ea-4de7-9fe6-2f5cbc656f03", + "id": "48", "metadata": {}, "source": [ "### Train the networks\n", @@ -697,7 +697,7 @@ { "cell_type": "code", "execution_count": null, - "id": "447887a6-4c7e-4146-b7c9-0962594ad997", + "id": "49", "metadata": {}, "outputs": [], "source": [ @@ -708,7 +708,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7e39cc83-2cb4-4e54-95cf-e11b6240dcfb", + "id": "50", "metadata": {}, "outputs": [], "source": [ @@ -720,7 +720,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e455c22a-5cf7-4d16-b060-49a747612b60", + "id": "51", "metadata": {}, "outputs": [], "source": [ @@ -732,7 +732,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46e10b48-3253-4ef8-8223-945f15726d24", + "id": "52", "metadata": {}, "outputs": [], "source": [ @@ -754,7 +754,7 @@ }, { "cell_type": "markdown", - "id": "9615e08d-6399-49e4-9c7f-0cbd171d084f", + "id": "53", "metadata": {}, "source": [ "### Compare Predictions with Ground Truth" @@ -763,7 +763,7 @@ { "cell_type": "code", "execution_count": null, - "id": "84aab879-0337-4b5e-8dc9-cdb9497c9103", + "id": "54", "metadata": {}, "outputs": [], "source": [ @@ -787,7 +787,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20ebb0e1-4e83-4c11-9d90-f3c828c52802", + "id": "55", "metadata": {}, "outputs": [], "source": [ @@ -812,7 +812,7 @@ }, { "cell_type": "markdown", - "id": "7aae0beb-27e2-4db2-95be-875f01cabfb4", + "id": "56", "metadata": {}, "source": [ "As seen above, the FCNN does a better job at learning the relationship between the $X_k$ and $U_k$, when compared to the linear model.\n", @@ -821,7 +821,7 @@ }, { "cell_type": "markdown", - "id": "e8732e89-6841-460f-98f4-84b2e7f3d73a", + "id": "57", "metadata": {}, "source": [ "## The *non-local* ML models" @@ -829,7 +829,7 @@ }, { "cell_type": "markdown", - "id": "286cb239-4868-47a1-84e7-35618970d09a", + "id": "58", "metadata": {}, "source": [ "It is possible that sub-grid tendency at $k$ depends on the neighbouring points (as shown in figure below), or points farther away. " @@ -838,7 +838,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f66b3446-9a0f-4ecc-8038-937266a30fda", + "id": "59", "metadata": {}, "outputs": [], "source": [ @@ -877,7 +877,7 @@ }, { "cell_type": "markdown", - "id": "4836e71a-5afe-4d51-bc58-ff6d68c74e7a", + "id": "60", "metadata": {}, "source": [ "So to make more accurate model, one may want to build a model that learns relationship between not only the specific k point but also other k points. \n", @@ -894,7 +894,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0681dc84-64ca-4c81-9dc5-eae2293cc1d9", + "id": "61", "metadata": {}, "outputs": [], "source": [ @@ -922,7 +922,7 @@ { "cell_type": "code", "execution_count": null, - "id": "89867c3f-914a-4504-bf16-c03f9c26874a", + "id": "62", "metadata": {}, "outputs": [], "source": [ @@ -945,7 +945,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c79eeaa0-2682-4b0a-84bb-89b0b2145cb4", + "id": "63", "metadata": {}, "outputs": [], "source": [ @@ -955,7 +955,7 @@ { "cell_type": "code", "execution_count": null, - "id": "df7bb279-7eff-4760-b0ba-0e2bad2bc676", + "id": "64", "metadata": {}, "outputs": [], "source": [ @@ -969,7 +969,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6222280d-2ecb-4d00-97c6-969fad2360d5", + "id": "65", "metadata": {}, "outputs": [], "source": [ @@ -987,7 +987,7 @@ { "cell_type": "code", "execution_count": null, - "id": "93184419-875c-4b07-b5bd-38f528bea6dd", + "id": "66", "metadata": {}, "outputs": [], "source": [ @@ -1011,7 +1011,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b4541ce4-fb05-4153-9413-9da1618ca1c9", + "id": "67", "metadata": {}, "outputs": [], "source": [ @@ -1043,7 +1043,7 @@ }, { "cell_type": "markdown", - "id": "dad69d69-960f-4d24-8ba1-676f619da1cf", + "id": "68", "metadata": {}, "source": [ "As seen above, the non-local model does about as good as the local model. It may be possible to improve the skill of the model by adjusting the hyperparameters associated with the training or model architecture, but we did not pursue this much more in this notebook. " @@ -1051,7 +1051,7 @@ }, { "cell_type": "markdown", - "id": "d208ea6f-a2ff-49e1-9369-a26a7095483c", + "id": "69", "metadata": {}, "source": [ "## Save the networks\n", @@ -1062,7 +1062,7 @@ { "cell_type": "code", "execution_count": null, - "id": "85b2a542-4012-4be3-a897-7219c0ce71ae", + "id": "70", "metadata": {}, "outputs": [], "source": [ @@ -1079,7 +1079,7 @@ }, { "cell_type": "markdown", - "id": "4ab018fe-819f-4dd2-a208-161ea0368d82", + "id": "71", "metadata": {}, "source": [ "## Summary\n", @@ -1090,7 +1090,7 @@ { "cell_type": "code", "execution_count": null, - "id": "839b02a1-c8f5-4dbd-91b8-cbe2f8d138aa", + "id": "72", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/L96_online_training_NN.ipynb b/notebooks/L96_online_training_NN.ipynb index 748fe94a..2df58d37 100644 --- a/notebooks/L96_online_training_NN.ipynb +++ b/notebooks/L96_online_training_NN.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "c0d2675f-796a-4466-a76a-c9a354518d1b", + "id": "0", "metadata": { "user_expressions": [] }, @@ -12,7 +12,7 @@ }, { "cell_type": "markdown", - "id": "aa918476-598e-4ec0-9d9c-b2fd20d2e699", + "id": "1", "metadata": { "user_expressions": [] }, @@ -23,7 +23,7 @@ }, { "cell_type": "markdown", - "id": "c2f632ad-af84-4958-b940-20b96755460d", + "id": "2", "metadata": {}, "source": [ "## Online vs offline training\n", @@ -52,7 +52,7 @@ { "cell_type": "code", "execution_count": null, - "id": "129a18f4-f3d5-4503-8c0b-e81895ee2406", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -72,7 +72,7 @@ { "cell_type": "code", "execution_count": null, - "id": "79044f9a-ac01-415a-a88a-0385718c8b02", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -84,7 +84,7 @@ { "cell_type": "code", "execution_count": null, - "id": "59c9e8fe-9e64-4a58-8cf9-0223192a0d6e", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -94,7 +94,7 @@ }, { "cell_type": "markdown", - "id": "d555922c-f58e-4343-8cf0-1ad027046c6a", + "id": "6", "metadata": {}, "source": [ "## Generate the Ground Truth Dataset from the *Real World* \n", @@ -107,7 +107,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ae0738e8-fd6f-4f0c-befa-aa5db97f4521", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -122,7 +122,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8ae3e6b4-269f-4ac5-82e8-366eb2fb6dee", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -139,7 +139,7 @@ }, { "cell_type": "markdown", - "id": "afd23807-0b39-4a92-9bef-d8edfcfd2871", + "id": "9", "metadata": {}, "source": [ "We now need to set the number of time steps that the training process will use for every sample (how long of a trajectory are we trying to match). " @@ -148,7 +148,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0367d3f3-037d-4316-a827-bc45f5bb3b04", + "id": "10", "metadata": { "tags": [] }, @@ -165,7 +165,7 @@ { "cell_type": "code", "execution_count": null, - "id": "97974dc3-09d8-4a41-8b53-e3fa36ba959d", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -184,7 +184,7 @@ { "cell_type": "code", "execution_count": null, - "id": "69869e4a-2cb3-481c-9070-1385e2c34084", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -194,7 +194,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26cd66c5-c845-42ad-8d53-dc6a77c9f8f8", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -204,7 +204,7 @@ }, { "cell_type": "markdown", - "id": "16ceba35-c4ae-452a-820b-5f2bd45105c4", + "id": "14", "metadata": {}, "source": [ "Notice that in the training and testing datasets defined below the input to the model is the initial condition, and the output that the model will be evaluated against is a time series from the simulation. " @@ -213,7 +213,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f988c083-3931-474c-bd73-53e8447d24b1", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -240,7 +240,7 @@ }, { "cell_type": "markdown", - "id": "d68c2036-7b57-4172-a6f0-0e164765510e", + "id": "16", "metadata": {}, "source": [ "## Create a differentiable 1 time-scale L96 model \n", @@ -261,7 +261,7 @@ { "cell_type": "code", "execution_count": null, - "id": "078ad47f-cec7-446a-ad65-8cd900469341", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -281,7 +281,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2ec5406a-9992-430d-a61f-008ef44b380f", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -306,7 +306,7 @@ }, { "cell_type": "markdown", - "id": "8b7788d6-13cd-42ae-9249-0926be461470", + "id": "19", "metadata": {}, "source": [ "## Define a Neural Network for the parameterization\n", @@ -317,7 +317,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6a6a5262-39e2-4d50-a8ea-c8b028e5fe5f", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -340,7 +340,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0d72c643-f0ac-42f8-99ee-615e9e2087fa", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -349,7 +349,7 @@ }, { "cell_type": "markdown", - "id": "2f76d74f-26fa-47ba-b19f-819cafeeee03", + "id": "22", "metadata": {}, "source": [ "## Loss function and optimizer for online training\n", @@ -362,7 +362,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ea9fffa9-07fa-4b40-84da-470f8202474c", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -390,7 +390,7 @@ }, { "cell_type": "markdown", - "id": "f5c0ad15-5790-4236-8011-fbff56c55910", + "id": "24", "metadata": {}, "source": [ "Since the only free parameters correspond to the weights of the neural network, they are passed to the optimizer. Notice that even though the loss function is much more complex than the offline training case, the parameters that are being optimized are the same." @@ -399,7 +399,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dfbc3b3b-46c3-45a5-9d5c-7dab607015fc", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -410,7 +410,7 @@ }, { "cell_type": "markdown", - "id": "0ea74669-66f7-4637-acbf-8b8457668c08", + "id": "26", "metadata": {}, "source": [ "## Training\n", @@ -421,7 +421,7 @@ { "cell_type": "code", "execution_count": null, - "id": "33c342c1-f430-4085-bc10-efcafa1364d8", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -481,7 +481,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a329dace-bd5f-4625-9cc3-1b9a6604b6e4", + "id": "28", "metadata": {}, "outputs": [], "source": [ @@ -492,7 +492,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cea48b6c-cb48-47fd-af13-99c215cf683f", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -510,7 +510,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c0e6329c-1b1f-4a85-8b4d-ea5387936a84", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -525,7 +525,7 @@ }, { "cell_type": "markdown", - "id": "30448682-8d16-48ca-b907-92ffaba2ea9b", + "id": "31", "metadata": {}, "source": [ "The loss curve above shows that online model is training well, and parameters have been optimized in some sense. Let us check below how this online trained model compares against the offline trained model from the previous notebook. " @@ -533,7 +533,7 @@ }, { "cell_type": "markdown", - "id": "f2932cc0-bada-4649-8121-f84b6e0daff3", + "id": "32", "metadata": {}, "source": [ "## Test in a simulation: Online testing" @@ -542,7 +542,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ec9f9d38-2320-4490-a4a2-803de8df8770", + "id": "33", "metadata": {}, "outputs": [], "source": [ @@ -556,7 +556,7 @@ { "cell_type": "code", "execution_count": null, - "id": "90aeb502-5f07-4966-8cfe-4082757d8e72", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -659,7 +659,7 @@ }, { "cell_type": "markdown", - "id": "5e6a3ac2-d75d-4643-8aa8-33c3b7a353d1", + "id": "35", "metadata": {}, "source": [ "Now let us pick a random point in the simulation as our initial condition, and compare if there is some drastic difference between offline and online parameterization that can be seen visually. " @@ -668,7 +668,7 @@ { "cell_type": "code", "execution_count": null, - "id": "57d35701-3fb8-4360-af23-01dff0ff28ca", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -682,7 +682,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ef4586d4-55f9-4c28-b7dc-73ea5284d0fc", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -693,7 +693,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8f023ad5-b6b0-4e3e-b75b-834d14daa2b2", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -707,7 +707,7 @@ { "cell_type": "code", "execution_count": null, - "id": "63fa095e-9d5d-4d99-aa6b-31b8f7e56df1", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -721,7 +721,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1b8ff02e-6acc-4c66-b0a5-461e9358c12b", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -750,7 +750,7 @@ }, { "cell_type": "markdown", - "id": "ae711913-49b6-43b9-a70b-368fcfcac88e", + "id": "41", "metadata": {}, "source": [ "The above plot shows that both offline and online trained models perform much better than the simulation without any parameterization. However, it is unclear if there is any signficant gain in the online case. To be more precise, we compare the different cases below over many simulation. " @@ -759,7 +759,7 @@ { "cell_type": "code", "execution_count": null, - "id": "163c3563-51d7-4949-b8ff-14df98afed1a", + "id": "42", "metadata": {}, "outputs": [], "source": [ @@ -807,7 +807,7 @@ }, { "cell_type": "markdown", - "id": "d336de28-783d-4dfe-ae60-8774f39057bf", + "id": "43", "metadata": {}, "source": [ "This assessment shows that the online parameterization performs about the same as offline parameterzation. However, atleast for the L96 model the gains (if any), which come at the cost of signfiant complexity, are not drastic. " @@ -815,7 +815,7 @@ }, { "cell_type": "markdown", - "id": "4ab018fe-819f-4dd2-a208-161ea0368d82", + "id": "44", "metadata": {}, "source": [ "## Summary\n", @@ -828,7 +828,7 @@ { "cell_type": "code", "execution_count": null, - "id": "839b02a1-c8f5-4dbd-91b8-cbe2f8d138aa", + "id": "45", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/Learning-DA-increments.ipynb b/notebooks/Learning-DA-increments.ipynb index 75dbf1aa..f72e6304 100644 --- a/notebooks/Learning-DA-increments.ipynb +++ b/notebooks/Learning-DA-increments.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "5f25c823", + "id": "0", "metadata": {}, "source": [ "# Learning Data Assimilation Increments\n", @@ -15,7 +15,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53133bbf", + "id": "1", "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ { "cell_type": "code", "execution_count": null, - "id": "86ef077a", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -46,7 +46,7 @@ }, { "cell_type": "markdown", - "id": "b97ec0e3-55d5-469c-9178-8e749893e79b", + "id": "3", "metadata": {}, "source": [ "## Defining the Model, its Parameters, and Other Utility Functions" @@ -54,7 +54,7 @@ }, { "cell_type": "markdown", - "id": "f05b1f30-b274-4408-a065-6a86e05c06ba", + "id": "4", "metadata": {}, "source": [ "### Defining the General Circulation Model (GCM)" @@ -63,7 +63,7 @@ { "cell_type": "code", "execution_count": null, - "id": "99c22825-e3e8-4329-8707-0da40b1700a8", + "id": "5", "metadata": { "tags": [] }, @@ -103,7 +103,7 @@ }, { "cell_type": "markdown", - "id": "99f9d561-6f0b-4cc7-8148-9666c6601e01", + "id": "6", "metadata": {}, "source": [ "### Defining the Utility Functions" @@ -112,7 +112,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0a810c15-17d4-4b8d-acff-03aeb4177f73", + "id": "7", "metadata": { "tags": [] }, @@ -126,7 +126,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bdf41747-d694-4098-930f-85516f2df60a", + "id": "8", "metadata": { "tags": [] }, @@ -150,7 +150,7 @@ { "cell_type": "code", "execution_count": null, - "id": "470b52bb-a77a-4635-b104-b60a4279ad3f", + "id": "9", "metadata": { "tags": [] }, @@ -178,7 +178,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3d1e6168-70a5-4354-a006-f0657f57bf5d", + "id": "10", "metadata": { "tags": [] }, @@ -227,7 +227,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f86db1f1-c20a-4906-b473-d2960cfec200", + "id": "11", "metadata": { "tags": [] }, @@ -255,7 +255,7 @@ { "cell_type": "code", "execution_count": null, - "id": "62cbac5e-a06b-499e-a404-0a00f9d8d1f9", + "id": "12", "metadata": { "tags": [] }, @@ -284,7 +284,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d23da076-8d00-466a-9975-4ecccdf0d7d8", + "id": "13", "metadata": { "tags": [] }, @@ -303,7 +303,7 @@ }, { "cell_type": "markdown", - "id": "22367ca5-242e-4d5f-8cf7-9a82699469ac", + "id": "14", "metadata": {}, "source": [ "### Initializing the Lorenz 1996 Model Parameters\n", @@ -314,7 +314,7 @@ { "cell_type": "code", "execution_count": null, - "id": "54ff8a4a-641a-41ea-ad9a-71f129b760cf", + "id": "15", "metadata": { "tags": [] }, @@ -347,7 +347,7 @@ }, { "cell_type": "markdown", - "id": "530a7fdf-f1c5-4134-9d12-67c9cd4c8c97", + "id": "16", "metadata": {}, "source": [ "#### Suggestions for Modifying the L96 Model Paramters\n", @@ -392,7 +392,7 @@ }, { "cell_type": "markdown", - "id": "1e0e4ef9", + "id": "17", "metadata": {}, "source": [ "## Generate `Truth` Run from Two Time-Scale L96 Model\n", @@ -405,7 +405,7 @@ { "cell_type": "code", "execution_count": null, - "id": "becd0f6a", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -427,7 +427,7 @@ }, { "cell_type": "markdown", - "id": "1df7ce87", + "id": "19", "metadata": {}, "source": [ "## Generate Synthetic Observations\n", @@ -438,7 +438,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ff525d9d", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -460,7 +460,7 @@ { "cell_type": "code", "execution_count": null, - "id": "93879f7c-3c7c-4f83-93cc-8e6c6a43ca1d", + "id": "21", "metadata": { "tags": [] }, @@ -473,7 +473,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1f8ffbbd-2cd8-4db9-82d7-82ac2bbcfcc2", + "id": "22", "metadata": { "tags": [] }, @@ -495,7 +495,7 @@ }, { "cell_type": "markdown", - "id": "7d63c749-f646-4455-97cc-aa781a5550d0", + "id": "23", "metadata": {}, "source": [ "## Apply Localization to the Background Model Covariance" @@ -503,7 +503,7 @@ }, { "cell_type": "markdown", - "id": "b4f2c954", + "id": "24", "metadata": { "user_expressions": [] }, @@ -521,7 +521,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4c18eb58", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -533,7 +533,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f8e083da-e89d-41e1-89dc-452106797ae6", + "id": "26", "metadata": { "tags": [] }, @@ -551,7 +551,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0919ecf0-375b-4aca-b3ec-38b5d04897e8", + "id": "27", "metadata": { "tags": [] }, @@ -574,7 +574,7 @@ }, { "cell_type": "markdown", - "id": "502fcb30", + "id": "28", "metadata": {}, "source": [ "## Run Data Assimilation\n", @@ -587,7 +587,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1b30ed0d-10e0-46ef-b0c4-22b44623195a", + "id": "29", "metadata": { "tags": [] }, @@ -603,7 +603,7 @@ { "cell_type": "code", "execution_count": null, - "id": "80eca511-68f9-401e-9500-567fc61492d1", + "id": "30", "metadata": { "tags": [] }, @@ -618,7 +618,7 @@ { "cell_type": "code", "execution_count": null, - "id": "30694d88-ec62-4c27-86b3-1c0fb97dbeae", + "id": "31", "metadata": { "tags": [] }, @@ -630,7 +630,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5bdecb3b", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -684,7 +684,7 @@ }, { "cell_type": "markdown", - "id": "ecbff140", + "id": "33", "metadata": {}, "source": [ "## Post Processing and Visualization\n", @@ -695,7 +695,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5a25d44f-cdc7-417b-b8bb-6513062948cc", + "id": "34", "metadata": { "tags": [] }, @@ -708,7 +708,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53048cb9", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -818,7 +818,7 @@ }, { "cell_type": "markdown", - "id": "e3cdeb59", + "id": "36", "metadata": {}, "source": [ "## Examining the Relationship between the Members and their Increments\n", @@ -830,7 +830,7 @@ }, { "cell_type": "markdown", - "id": "fe35d752-3a29-4d3d-a073-96d82359d731", + "id": "37", "metadata": {}, "source": [ "### Individual Ensemble Members" @@ -839,7 +839,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cdc24313-1928-4a43-aa02-ffce97ba5a74", + "id": "38", "metadata": { "tags": [] }, @@ -863,7 +863,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3d79e858", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -889,7 +889,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3ce5f1ce-e50d-4d99-8e99-2e4106bb302b", + "id": "40", "metadata": { "tags": [] }, @@ -921,7 +921,7 @@ }, { "cell_type": "markdown", - "id": "17ae15ec-d330-4ae2-864a-1a553e5956ee", + "id": "41", "metadata": {}, "source": [ "### Mean over Ensemble Members" @@ -930,7 +930,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4d970ec4-6d21-41df-9c75-b96efacbf528", + "id": "42", "metadata": { "tags": [] }, @@ -954,7 +954,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9925ad58", + "id": "43", "metadata": {}, "outputs": [], "source": [ @@ -980,7 +980,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12a15ad9", + "id": "44", "metadata": {}, "outputs": [], "source": [ @@ -1017,7 +1017,7 @@ }, { "cell_type": "markdown", - "id": "380408db", + "id": "45", "metadata": {}, "source": [ "With this, we have successfully **created the DA increments**." @@ -1025,7 +1025,7 @@ }, { "cell_type": "markdown", - "id": "25bedb98-8922-495e-ba7c-34da6cca5f91", + "id": "46", "metadata": {}, "source": [ "## Learning the DA Increments\n", @@ -1036,7 +1036,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3f0ee34b", + "id": "47", "metadata": {}, "outputs": [], "source": [ @@ -1055,7 +1055,7 @@ }, { "cell_type": "markdown", - "id": "7af4eb68", + "id": "48", "metadata": {}, "source": [ "### Observing the Dataset\n", @@ -1066,7 +1066,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1bf0c4eb", + "id": "49", "metadata": {}, "outputs": [], "source": [ @@ -1084,7 +1084,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8ada1933", + "id": "50", "metadata": {}, "outputs": [], "source": [ @@ -1106,7 +1106,7 @@ }, { "cell_type": "markdown", - "id": "79538913", + "id": "51", "metadata": {}, "source": [ "### Creating the Dataset Split\n", @@ -1117,7 +1117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ea024c20", + "id": "52", "metadata": {}, "outputs": [], "source": [ @@ -1136,7 +1136,7 @@ }, { "cell_type": "markdown", - "id": "576c14cb-1e75-48ee-b74c-21fb7b4afe12", + "id": "53", "metadata": {}, "source": [ "### Building the Dataset" @@ -1145,7 +1145,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d4517f34", + "id": "54", "metadata": {}, "outputs": [], "source": [ @@ -1162,7 +1162,7 @@ }, { "cell_type": "markdown", - "id": "a176df90-1ab9-4bb2-8809-46f52a4cae73", + "id": "55", "metadata": {}, "source": [ "### Define the Model" @@ -1171,7 +1171,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2ec20b34", + "id": "56", "metadata": {}, "outputs": [], "source": [ @@ -1193,7 +1193,7 @@ }, { "cell_type": "markdown", - "id": "fe90c2a2-a0b8-4f7c-9061-7ac5336cdd64", + "id": "57", "metadata": {}, "source": [ "### Define the Training and Test Functions" @@ -1202,7 +1202,7 @@ { "cell_type": "code", "execution_count": null, - "id": "816fe6b3", + "id": "58", "metadata": {}, "outputs": [], "source": [ @@ -1237,7 +1237,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ec59c249-95c9-426d-b409-04f42e7528f8", + "id": "59", "metadata": { "tags": [] }, @@ -1271,7 +1271,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f684cd81-67f9-4ccd-ac08-2ed75e922669", + "id": "60", "metadata": { "tags": [] }, @@ -1294,7 +1294,7 @@ }, { "cell_type": "markdown", - "id": "d0893a8d-86a8-40d4-9cbc-6116200ec943", + "id": "61", "metadata": {}, "source": [ "### Train the Network" @@ -1303,7 +1303,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3c5ca08d-3971-4230-b64e-8846ec0b4cda", + "id": "62", "metadata": { "tags": [] }, @@ -1322,7 +1322,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dabaf445-3e69-4e7e-af52-a190b5848d2c", + "id": "63", "metadata": { "tags": [] }, @@ -1336,7 +1336,7 @@ }, { "cell_type": "markdown", - "id": "203109dc-7817-4a68-a468-bcd62515a90c", + "id": "64", "metadata": {}, "source": [ "### Visualizing the Results" @@ -1344,7 +1344,7 @@ }, { "cell_type": "markdown", - "id": "15f2f411-6f21-4095-972a-76745d5eb02f", + "id": "65", "metadata": {}, "source": [ "Comparing the training and validation loss curves" @@ -1353,7 +1353,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cda2716e-26f2-4701-b036-cbf321cb5058", + "id": "66", "metadata": { "tags": [] }, @@ -1367,7 +1367,7 @@ }, { "cell_type": "markdown", - "id": "26b1e20b", + "id": "67", "metadata": {}, "source": [ "Since the NN has one input and one output, we can plot it as a function $nn(X)$ (orange), and compare it to the polyfit (blue) and Wilks 2005 polynomial (black dashed)." @@ -1376,7 +1376,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0409ce7a", + "id": "68", "metadata": {}, "outputs": [], "source": [ @@ -1403,7 +1403,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2b2c4439-b9f0-4139-9568-6d0863f52993", + "id": "69", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/citing.ipynb b/notebooks/citing.ipynb index 7ce0aeb2..ea3ffe88 100644 --- a/notebooks/citing.ipynb +++ b/notebooks/citing.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "bfb1c039-bf51-4923-9ce9-d0e869b3d8c3", + "id": "0", "metadata": {}, "source": [ "# Citing this book\n", diff --git a/notebooks/closing_remarks.ipynb b/notebooks/closing_remarks.ipynb index 8fbacbf9..ea27fba1 100644 --- a/notebooks/closing_remarks.ipynb +++ b/notebooks/closing_remarks.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "fe395e90-f86b-4251-9300-b80389a26d69", + "id": "0", "metadata": {}, "source": [ "# Outlook\n", diff --git a/notebooks/constraints.ipynb b/notebooks/constraints.ipynb index 74dd6e92..8c3ec826 100644 --- a/notebooks/constraints.ipynb +++ b/notebooks/constraints.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "deb0775b", + "id": "0", "metadata": { "user_expressions": [] }, @@ -12,7 +12,7 @@ }, { "cell_type": "markdown", - "id": "acd1a857-c84c-4fe6-9494-22cc0aeffefd", + "id": "1", "metadata": { "user_expressions": [] }, @@ -24,7 +24,7 @@ }, { "cell_type": "markdown", - "id": "fb0c32a3", + "id": "2", "metadata": { "user_expressions": [] }, @@ -57,7 +57,7 @@ }, { "cell_type": "markdown", - "id": "6acaa8ea", + "id": "3", "metadata": { "user_expressions": [] }, @@ -86,7 +86,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5dcd5a54", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -105,7 +105,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1e97474a", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -123,7 +123,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8e603f29", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -134,7 +134,7 @@ { "cell_type": "code", "execution_count": null, - "id": "982eda0b", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -156,7 +156,7 @@ { "cell_type": "code", "execution_count": null, - "id": "afee3788", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -167,7 +167,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7ca46561", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -190,7 +190,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c91c9c96", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -213,7 +213,7 @@ }, { "cell_type": "markdown", - "id": "2a3ac2b8", + "id": "11", "metadata": { "user_expressions": [] }, @@ -224,7 +224,7 @@ { "cell_type": "code", "execution_count": null, - "id": "50d2aa0b", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -251,7 +251,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8f20a1ac", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -277,7 +277,7 @@ }, { "cell_type": "markdown", - "id": "fcc3a04e", + "id": "14", "metadata": { "user_expressions": [] }, @@ -288,7 +288,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f91a3b74", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -308,7 +308,7 @@ }, { "cell_type": "markdown", - "id": "29380dc3", + "id": "16", "metadata": { "user_expressions": [] }, @@ -318,7 +318,7 @@ }, { "cell_type": "markdown", - "id": "a1aa6cd0", + "id": "17", "metadata": { "user_expressions": [] }, @@ -336,7 +336,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8224f915", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -409,7 +409,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2c7554df", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -434,7 +434,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5c136578", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -466,7 +466,7 @@ { "cell_type": "code", "execution_count": null, - "id": "42e24fa0", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -521,7 +521,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8c5c2985", + "id": "22", "metadata": {}, "outputs": [], "source": [ @@ -542,7 +542,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c75fc425-813d-417d-b130-8b7231566849", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -557,7 +557,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7e6248e2-4606-4cd6-bbe8-b668ecaa2e53", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -574,7 +574,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36f29ca4-7b20-4abd-8704-f3a9d52423cf", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -589,7 +589,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fe69ee5e-0019-4a97-9350-7df9ad38e378", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -600,7 +600,7 @@ }, { "cell_type": "markdown", - "id": "a4555c47", + "id": "27", "metadata": { "user_expressions": [] }, @@ -613,7 +613,7 @@ }, { "cell_type": "markdown", - "id": "bfa56c74", + "id": "28", "metadata": { "user_expressions": [] }, @@ -624,7 +624,7 @@ { "cell_type": "code", "execution_count": null, - "id": "91e8e9f8", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -681,7 +681,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dcc11a1b-34b6-422a-889a-1a044d7accd1", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -696,7 +696,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28337301-42ec-40db-a5be-b0f00543c56c", + "id": "31", "metadata": {}, "outputs": [], "source": [ @@ -761,7 +761,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ba797430", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -794,7 +794,7 @@ }, { "cell_type": "markdown", - "id": "0e3fea6f", + "id": "33", "metadata": { "user_expressions": [] }, @@ -809,7 +809,7 @@ { "cell_type": "code", "execution_count": null, - "id": "987ef53e", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -833,7 +833,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9cf6224f", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -858,7 +858,7 @@ { "cell_type": "code", "execution_count": null, - "id": "07c4f1e0", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -908,7 +908,7 @@ { "cell_type": "code", "execution_count": null, - "id": "47ee238e", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -924,7 +924,7 @@ { "cell_type": "code", "execution_count": null, - "id": "71ce4b88", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -941,7 +941,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7b7c9696", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -958,7 +958,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cc58bc76", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -991,7 +991,7 @@ }, { "cell_type": "markdown", - "id": "6ad595b3", + "id": "41", "metadata": { "user_expressions": [] }, @@ -1004,7 +1004,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4842bdb0", + "id": "42", "metadata": {}, "outputs": [], "source": [ @@ -1034,7 +1034,7 @@ }, { "cell_type": "markdown", - "id": "a4c44fc3", + "id": "43", "metadata": { "user_expressions": [] }, @@ -1044,7 +1044,7 @@ }, { "cell_type": "markdown", - "id": "63aee82d", + "id": "44", "metadata": { "user_expressions": [] }, @@ -1055,7 +1055,7 @@ { "cell_type": "code", "execution_count": null, - "id": "596d1e8e", + "id": "45", "metadata": {}, "outputs": [], "source": [ @@ -1093,7 +1093,7 @@ { "cell_type": "code", "execution_count": null, - "id": "39d140cb-4602-44b3-91e0-06fc15b90866", + "id": "46", "metadata": {}, "outputs": [], "source": [ @@ -1144,7 +1144,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8a7ebac1", + "id": "47", "metadata": {}, "outputs": [], "source": [ @@ -1198,7 +1198,7 @@ { "cell_type": "code", "execution_count": null, - "id": "59080546", + "id": "48", "metadata": {}, "outputs": [], "source": [ @@ -1228,7 +1228,7 @@ { "cell_type": "code", "execution_count": null, - "id": "868ac884", + "id": "49", "metadata": {}, "outputs": [], "source": [ @@ -1276,7 +1276,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46274421", + "id": "50", "metadata": {}, "outputs": [], "source": [ @@ -1327,7 +1327,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5e6649bc", + "id": "51", "metadata": {}, "outputs": [], "source": [ @@ -1375,7 +1375,7 @@ }, { "cell_type": "markdown", - "id": "fc7d3fd8-b460-4c9c-a064-2a6c7dccb973", + "id": "52", "metadata": {}, "source": [ "Given the simplicity of the parameterization problem in L96 model, all the different ML parameterizations performed relatively well. We expect to see much larger gains by adding constraints in more complex realistic problems, particularly the ones that are more data limited. " @@ -1383,7 +1383,7 @@ }, { "cell_type": "markdown", - "id": "e0ee7ea5", + "id": "53", "metadata": { "tags": [], "user_expressions": [] diff --git a/notebooks/estimating-gcm-parameters.ipynb b/notebooks/estimating-gcm-parameters.ipynb index a6933f30..9c318d25 100644 --- a/notebooks/estimating-gcm-parameters.ipynb +++ b/notebooks/estimating-gcm-parameters.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "33938dd5-f864-4642-8ce0-c44bb81bd60d", + "id": "0", "metadata": {}, "source": [ "# Tuning GCM Parameterizations" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "686988e8", + "id": "1", "metadata": {}, "source": [ "The objective of this notebook is to show how GCM closures can be tuned in practice. \n", @@ -19,7 +19,7 @@ }, { "cell_type": "markdown", - "id": "8cb1f6aa", + "id": "2", "metadata": {}, "source": [ "**Resources** : We have used material from Emmanuel Cosme's nice GitHub [repository](https://github.com/ecosme38/Data-Assimilation-Notebooks). " @@ -27,7 +27,7 @@ }, { "cell_type": "markdown", - "id": "daa836f4", + "id": "3", "metadata": {}, "source": [ "## The GCM Parameterization Problem " @@ -35,7 +35,7 @@ }, { "cell_type": "markdown", - "id": "33a03d0b", + "id": "4", "metadata": {}, "source": [ "Here we quickly reintroduce the problem we are trying to solve, our starting point here is {doc}`gcm-parameterization-problem`." @@ -44,7 +44,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eb6ac7ba", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ { "cell_type": "code", "execution_count": null, - "id": "515d089a", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -72,7 +72,7 @@ }, { "cell_type": "markdown", - "id": "4c697b25", + "id": "7", "metadata": {}, "source": [ "Run \"real world\" for 3 days to forget initial conditons." @@ -81,7 +81,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d6d7e234", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -91,7 +91,7 @@ }, { "cell_type": "markdown", - "id": "777fef0d", + "id": "9", "metadata": {}, "source": [ "From here on we can use `W.X` as perfect initial conditions for a model and sample the real world using `W.run(dt,T)`" @@ -99,7 +99,7 @@ }, { "cell_type": "markdown", - "id": "9647aa43-90ce-45f0-9756-02ed10780e2f", + "id": "10", "metadata": {}, "source": [ "The class defined below is just a sophisticated version of the single time-scale L96 gcm analogue that was defined in 'The Lorenz-96 GCM Analog' notebook." @@ -108,7 +108,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5b97a051", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -156,7 +156,7 @@ }, { "cell_type": "markdown", - "id": "66a2385b", + "id": "12", "metadata": {}, "source": [ "We illustrate the concepts here with the help of the simple linear regression parameterization, which was also introduced in the {doc}`gcm-analogue`. Remember that this was not a very good parameterization, but it is used to show how imperfect parameterizations can be tuned in different ways. Here we use parameters that were estimated by fitting the linear regression to the true sub-grid tendencies. Such parameter estimates are sometimes also referred to as \"offline fitting\" estimates." @@ -165,7 +165,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1ec03cc6", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -175,7 +175,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18baf519", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -189,7 +189,7 @@ }, { "cell_type": "markdown", - "id": "235aed7d", + "id": "15", "metadata": {}, "source": [ "Now we compare the model with the parameterization and trajectories from the \"real world\" truth." @@ -198,7 +198,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1de79307", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -209,7 +209,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5d260623", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -224,7 +224,7 @@ }, { "cell_type": "markdown", - "id": "2c928cf0-72d6-4cc1-9897-fdb141a9f2f9", + "id": "18", "metadata": {}, "source": [ "The figure above show that the model with linear parameterization outperforms the model with no parameterization. Next we discuss if these parameter estimates can be made even better. " @@ -232,7 +232,7 @@ }, { "cell_type": "markdown", - "id": "3200004c", + "id": "19", "metadata": {}, "source": [ "## Variational estimation of optimal parameters for a predefined closure\n", @@ -243,7 +243,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9b5141f2", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -253,7 +253,7 @@ }, { "cell_type": "markdown", - "id": "96b14903", + "id": "21", "metadata": {}, "source": [ "### Estimating parameters based on one initial condition and one time step" @@ -261,7 +261,7 @@ }, { "cell_type": "markdown", - "id": "a57669f4", + "id": "22", "metadata": {}, "source": [ "#### Cost function " @@ -269,7 +269,7 @@ }, { "cell_type": "markdown", - "id": "91466a50", + "id": "23", "metadata": {}, "source": [ "What we will be doing here is very close to what is done with classical variational data assimilation, where we try to estimate the state of the parameters of a model through the minimization of a cost function $J$. This is also very close to what is done when parameterizations are encoded as neural networks. " @@ -277,7 +277,7 @@ }, { "cell_type": "markdown", - "id": "53f9ab28", + "id": "24", "metadata": {}, "source": [ "We introduce a cost function $J(p)$ which depends on the parameters of the closure. \n", @@ -290,7 +290,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0f356baf-71c3-4f70-91c6-cd7da6415191", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -303,7 +303,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ce939e3c", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -315,7 +315,7 @@ }, { "cell_type": "markdown", - "id": "c6198df7", + "id": "27", "metadata": {}, "source": [ "#### Minimization " @@ -323,7 +323,7 @@ }, { "cell_type": "markdown", - "id": "35d38b2b", + "id": "28", "metadata": {}, "source": [ "Since the problem dimension is only two ( $p=[p1,p2]$ ), we can use efficient derivative-free optimization methods." @@ -332,7 +332,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4b23971d", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -344,7 +344,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4bdffeae", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -353,7 +353,7 @@ }, { "cell_type": "markdown", - "id": "e6b43c9e", + "id": "31", "metadata": {}, "source": [ "#### Let's test the closure." @@ -362,7 +362,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6a8c65cd", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -377,7 +377,7 @@ }, { "cell_type": "markdown", - "id": "9172f27e", + "id": "33", "metadata": {}, "source": [ "#### Results" @@ -386,7 +386,7 @@ { "cell_type": "code", "execution_count": null, - "id": "328e5851", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -401,7 +401,7 @@ }, { "cell_type": "markdown", - "id": "fcc951db", + "id": "35", "metadata": {}, "source": [ "The results are better but not great. This problem is related to the question of *a priori* versus *a posteriori* skill in Large Eddy Simulation (LES) closures. \n" @@ -409,7 +409,7 @@ }, { "cell_type": "markdown", - "id": "de43a461", + "id": "36", "metadata": {}, "source": [ "### Estimating parameters which optimize longer trajectories" @@ -418,7 +418,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f5bb63b3", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -430,7 +430,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dfa2d4aa", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -440,7 +440,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1722b186-8ba6-4ff0-b369-18320b9381a5", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -454,7 +454,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5a48582d", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -467,7 +467,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9b152335", + "id": "41", "metadata": {}, "outputs": [], "source": [ @@ -479,7 +479,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c8c7eb69-fcc1-411e-bbb5-480bd7923b00", + "id": "42", "metadata": {}, "outputs": [], "source": [ @@ -488,7 +488,7 @@ }, { "cell_type": "markdown", - "id": "c5613d4a", + "id": "43", "metadata": {}, "source": [ "#### Let's test the closure." @@ -497,7 +497,7 @@ { "cell_type": "code", "execution_count": null, - "id": "56a96080", + "id": "44", "metadata": {}, "outputs": [], "source": [ @@ -510,7 +510,7 @@ }, { "cell_type": "markdown", - "id": "a44d0463", + "id": "45", "metadata": {}, "source": [ "#### Results" @@ -519,7 +519,7 @@ { "cell_type": "code", "execution_count": null, - "id": "72ad455c", + "id": "46", "metadata": {}, "outputs": [], "source": [ @@ -534,7 +534,7 @@ }, { "cell_type": "markdown", - "id": "47764aff", + "id": "47", "metadata": {}, "source": [ "The closure produces better results but it is not clear how this would generalize to unseen initial conditions. " @@ -542,7 +542,7 @@ }, { "cell_type": "markdown", - "id": "f814ad48", + "id": "48", "metadata": {}, "source": [ "## Discussion and possible next steps:\n", diff --git a/notebooks/gcm-analogue.ipynb b/notebooks/gcm-analogue.ipynb index 8df3312b..d498abcd 100644 --- a/notebooks/gcm-analogue.ipynb +++ b/notebooks/gcm-analogue.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "08d71bbc", + "id": "0", "metadata": {}, "source": [ "# The Lorenz-96 and its GCM Analog\n", @@ -15,7 +15,7 @@ }, { "cell_type": "markdown", - "id": "15939db9-43b8-4d20-8a31-84bdbc80fd2d", + "id": "1", "metadata": {}, "source": [ "## The two time-scale model: Analog for the real atmosphere\n", @@ -31,7 +31,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7f0d7a3f", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -43,7 +43,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1d6f4844", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ }, { "cell_type": "markdown", - "id": "fd78d2c3-b1f6-41d9-bc9c-a8da4898b9b0", + "id": "4", "metadata": {}, "source": [ "Here is the what the solution looks like:" @@ -64,7 +64,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1a01ce7b", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -88,7 +88,7 @@ }, { "cell_type": "markdown", - "id": "f63cb015", + "id": "6", "metadata": {}, "source": [ "## The single time-scale model: Analog for a general circulation model (GCM)\n", @@ -109,7 +109,7 @@ }, { "cell_type": "markdown", - "id": "2851e9da-c8f9-4411-9db5-241cee04bc79", + "id": "7", "metadata": {}, "source": [ "In the following code, we show how the single time scale model can be solved. We use `L96_eq1_xdot` (code shown below), which returns the tendency (RHS) corresponding to the following equation, where there are no coupling or parameterization terms, \n", @@ -124,7 +124,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c9535c3a-4a5f-4268-a285-cc3ae2c02d4e", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -135,7 +135,7 @@ { "cell_type": "code", "execution_count": null, - "id": "60f0a64e-1cf3-4ac9-90ac-8b3f4745ab36", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -144,7 +144,7 @@ }, { "cell_type": "markdown", - "id": "2f5deb49-44b3-41ad-97cd-29a099d21169", + "id": "10", "metadata": {}, "source": [ "Now we define `GCM`, which solves for the temporal evolution of $X$s using a simple Euler integration." @@ -153,7 +153,7 @@ { "cell_type": "code", "execution_count": null, - "id": "61a2e64f-dc90-43a4-8488-eac7f7c6c995", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "7304546c-cd9e-4034-948e-4fa17f7e376e", + "id": "12", "metadata": {}, "source": [ "Notice that we have added the possibility of adding a parameterization that may take the form a polynomial function, this will be discussed futher below." @@ -180,7 +180,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1317602e-a3b1-4250-b837-a68a67a3746e", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fb291e41-31e7-48bb-9f86-2b7435f8f984", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -213,7 +213,7 @@ }, { "cell_type": "markdown", - "id": "18bcf631-1aac-4115-8eda-752bc1244d47", + "id": "15", "metadata": {}, "source": [ "Clearly, the evolution the single time scale system does not even qualitatively match the evolution of the two time scale system. For example, the leftward propagation is completely missing. " @@ -221,7 +221,7 @@ }, { "cell_type": "markdown", - "id": "fc65effa", + "id": "16", "metadata": {}, "source": [ "## Adding a parametization to the single time-scale model\n", @@ -252,7 +252,7 @@ }, { "cell_type": "markdown", - "id": "58a6bada-ad09-4d51-a017-c32cde4d5b55", + "id": "17", "metadata": {}, "source": [ "All parameterizations have some unknown parameters, which need to be determined in some way. These parameters may be guessed based on some intuition about the physics, or determined from data collected in the real system (two time-scale model here), or optimized to make the evolution of the reduced (single time-scale) model match the evolution of the real world or full (two time-scale model) model. In this notebook we will use the second approach, and the last approach will be discussed in {doc}`estimating-gcm-parameters`. " @@ -260,7 +260,7 @@ }, { "cell_type": "markdown", - "id": "39830287-61de-4069-b547-e89a9547b2a7", + "id": "18", "metadata": {}, "source": [ "\n", @@ -270,7 +270,7 @@ { "cell_type": "code", "execution_count": null, - "id": "934f3a28", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -280,7 +280,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ff240f58", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -294,7 +294,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eae31105-17af-4c41-9d4d-5bef728ff21c", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -317,7 +317,7 @@ }, { "cell_type": "markdown", - "id": "ed0a0e4b", + "id": "22", "metadata": {}, "source": [ "We now compare the results obtained with a linear polynomial approximation, {cite:t}`Wilks2005` polynomial parameterization and the \"truth\" values of the coupling terms." @@ -326,7 +326,7 @@ { "cell_type": "code", "execution_count": null, - "id": "06794e2b", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -348,7 +348,7 @@ }, { "cell_type": "markdown", - "id": "02669085-8725-456d-9100-5a31f92f2db0", + "id": "24", "metadata": {}, "source": [ "(sub-grid-hist-label)=\n", @@ -357,7 +357,7 @@ }, { "cell_type": "markdown", - "id": "cc34b767-cf90-4503-b708-5dd560d903ff", + "id": "25", "metadata": {}, "source": [ "We had already setup the code in `GCM` to accept polynomial parameterizations, which can be turned on by passing the parameters. We will use this in the next section to test the effect that the parameterization has on the single time-scale model." @@ -365,7 +365,7 @@ }, { "cell_type": "markdown", - "id": "e42cd286", + "id": "26", "metadata": {}, "source": [ "## Testing the effect of parameterizations in the \"GCM\" model\n", @@ -380,7 +380,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9579b622", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -401,7 +401,7 @@ }, { "cell_type": "markdown", - "id": "6a3f04e9-78bc-4af1-bb9b-31370693a20a", + "id": "28", "metadata": {}, "source": [ "Now we look at Hovmoeller plots of the different solutions." @@ -410,7 +410,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1bb832df", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -442,7 +442,7 @@ }, { "cell_type": "markdown", - "id": "aed7579a", + "id": "30", "metadata": {}, "source": [ "The next plot shows the temporal evolution of the variable $X_3$ obtained with the four models listed above." @@ -451,7 +451,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f5815467", + "id": "31", "metadata": {}, "outputs": [], "source": [ @@ -469,7 +469,7 @@ }, { "cell_type": "markdown", - "id": "082fcc26-72a5-43bb-a764-159d14155605", + "id": "32", "metadata": {}, "source": [ "As seen above, all the simulation diverge at long times,and the unparameterized simulation diverges very rapidly. On the other hand, the parameterized GCMs track the \"real world\" solution better. The Wilks parameterization does better than the linear fit." @@ -477,7 +477,7 @@ }, { "cell_type": "markdown", - "id": "b33704fc", + "id": "33", "metadata": {}, "source": [ "## Summary\n", @@ -494,7 +494,7 @@ }, { "cell_type": "markdown", - "id": "06d61ed7-705f-40dc-9d39-0383dcefcd36", + "id": "34", "metadata": {}, "source": [] } diff --git a/notebooks/gcm-parameterization-problem.ipynb b/notebooks/gcm-parameterization-problem.ipynb index 40ab4baa..bc15b9f0 100644 --- a/notebooks/gcm-parameterization-problem.ipynb +++ b/notebooks/gcm-parameterization-problem.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "b0650213-5f2e-4821-8489-b969f0da4b63", + "id": "0", "metadata": { "user_expressions": [] }, @@ -12,7 +12,7 @@ }, { "cell_type": "markdown", - "id": "2a9645ec", + "id": "1", "metadata": { "user_expressions": [] }, @@ -24,7 +24,7 @@ }, { "cell_type": "markdown", - "id": "69c17d19", + "id": "2", "metadata": { "user_expressions": [] }, @@ -34,7 +34,7 @@ }, { "cell_type": "markdown", - "id": "f4905750", + "id": "3", "metadata": { "user_expressions": [] }, @@ -46,7 +46,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f8255144", + "id": "4", "metadata": { "tags": [] }, @@ -60,7 +60,7 @@ }, { "cell_type": "markdown", - "id": "338aff9c-2891-4583-9efa-b1a56669fe2f", + "id": "5", "metadata": { "user_expressions": [] }, @@ -71,7 +71,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c5ba242b", + "id": "6", "metadata": { "tags": [] }, @@ -86,7 +86,7 @@ }, { "cell_type": "markdown", - "id": "4b4b5606-be7c-4fd3-8f3d-fa7fbdbec74f", + "id": "7", "metadata": { "user_expressions": [] }, @@ -98,7 +98,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c52019dd-0d33-45d2-a10a-08429c6374a2", + "id": "8", "metadata": { "tags": [] }, @@ -110,7 +110,7 @@ }, { "cell_type": "markdown", - "id": "8e1c62ff", + "id": "9", "metadata": { "user_expressions": [] }, @@ -130,7 +130,7 @@ }, { "cell_type": "markdown", - "id": "fa072bc6-f5de-4917-92a3-2db493e36a35", + "id": "10", "metadata": { "user_expressions": [] }, @@ -141,7 +141,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53afa120", + "id": "11", "metadata": { "tags": [] }, @@ -170,7 +170,7 @@ }, { "cell_type": "markdown", - "id": "0a620522-8a1a-4eda-9b0a-04e304b49255", + "id": "12", "metadata": { "user_expressions": [] }, @@ -181,7 +181,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6f113734", + "id": "13", "metadata": { "tags": [] }, @@ -194,7 +194,7 @@ { "cell_type": "code", "execution_count": null, - "id": "66f3c2ea", + "id": "14", "metadata": { "tags": [] }, @@ -213,7 +213,7 @@ }, { "cell_type": "markdown", - "id": "dad9cbd6", + "id": "15", "metadata": { "user_expressions": [] }, @@ -228,7 +228,7 @@ }, { "cell_type": "markdown", - "id": "64e94c31", + "id": "16", "metadata": { "user_expressions": [] }, @@ -239,7 +239,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c797ab03", + "id": "17", "metadata": { "tags": [] }, @@ -281,7 +281,7 @@ }, { "cell_type": "markdown", - "id": "5f6fd74a-3427-4dd3-b8b4-013acf2c49ff", + "id": "18", "metadata": { "user_expressions": [] }, @@ -292,7 +292,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6d08038a", + "id": "19", "metadata": { "tags": [] }, @@ -308,7 +308,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ca979ba1", + "id": "20", "metadata": { "tags": [] }, @@ -327,7 +327,7 @@ }, { "cell_type": "markdown", - "id": "96a97034", + "id": "21", "metadata": { "user_expressions": [] }, @@ -341,7 +341,7 @@ }, { "cell_type": "markdown", - "id": "42ca8ac3", + "id": "22", "metadata": { "user_expressions": [] }, @@ -351,7 +351,7 @@ }, { "cell_type": "markdown", - "id": "36678885", + "id": "23", "metadata": { "user_expressions": [] }, @@ -361,7 +361,7 @@ }, { "cell_type": "markdown", - "id": "a6083799", + "id": "24", "metadata": { "user_expressions": [] }, @@ -373,7 +373,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bd825114", + "id": "25", "metadata": { "tags": [] }, @@ -395,7 +395,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8a856077", + "id": "26", "metadata": { "tags": [] }, @@ -410,7 +410,7 @@ { "cell_type": "code", "execution_count": null, - "id": "04d98879-6013-4161-b6b6-12f45ee7227e", + "id": "27", "metadata": { "tags": [] }, @@ -427,7 +427,7 @@ }, { "cell_type": "markdown", - "id": "81b73260", + "id": "28", "metadata": { "user_expressions": [] }, @@ -442,7 +442,7 @@ }, { "cell_type": "markdown", - "id": "a6cf8c10", + "id": "29", "metadata": { "user_expressions": [] }, @@ -452,7 +452,7 @@ }, { "cell_type": "markdown", - "id": "3a6f4cd9", + "id": "30", "metadata": { "user_expressions": [] }, @@ -465,7 +465,7 @@ { "cell_type": "code", "execution_count": null, - "id": "35609fa7", + "id": "31", "metadata": { "tags": [] }, @@ -487,7 +487,7 @@ { "cell_type": "code", "execution_count": null, - "id": "354b8190-af9b-4bc2-a4c4-de6a9266cc56", + "id": "32", "metadata": { "tags": [] }, @@ -527,7 +527,7 @@ }, { "cell_type": "markdown", - "id": "baa78beb", + "id": "33", "metadata": { "user_expressions": [] }, @@ -547,7 +547,7 @@ { "cell_type": "code", "execution_count": null, - "id": "33368242-30cf-4b30-8023-02ad1b1de22d", + "id": "34", "metadata": { "tags": [] }, @@ -564,7 +564,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7ac888c7-9a97-4881-8ffc-ab5a8af49ec1", + "id": "35", "metadata": { "tags": [] }, @@ -577,7 +577,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b49636d6", + "id": "36", "metadata": { "tags": [] }, @@ -605,7 +605,7 @@ }, { "cell_type": "markdown", - "id": "c6c31304-6c60-4864-a3ad-d40d6e5117b8", + "id": "37", "metadata": { "user_expressions": [] }, @@ -615,7 +615,7 @@ }, { "cell_type": "markdown", - "id": "4aea7896", + "id": "38", "metadata": { "user_expressions": [] }, @@ -636,7 +636,7 @@ { "cell_type": "code", "execution_count": null, - "id": "572bedd5-4aa2-4441-9555-31261de10aa9", + "id": "39", "metadata": { "tags": [] }, @@ -692,7 +692,7 @@ }, { "cell_type": "markdown", - "id": "7b9692de-d650-4054-84e2-7e4b2925a7aa", + "id": "40", "metadata": { "user_expressions": [] }, @@ -704,7 +704,7 @@ }, { "cell_type": "markdown", - "id": "242f83d4-9112-4552-8a54-0cb69a1939cb", + "id": "41", "metadata": {}, "source": [ "**Distribution based error metric**:\n", @@ -721,7 +721,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8b8ac189-f5e2-4bb6-a807-082c20f44c36", + "id": "42", "metadata": { "tags": [] }, @@ -733,7 +733,7 @@ { "cell_type": "code", "execution_count": null, - "id": "aeb3d098-1d52-4253-a283-10c7a59158ab", + "id": "43", "metadata": { "tags": [] }, @@ -758,7 +758,7 @@ }, { "cell_type": "markdown", - "id": "e750512f-1aa0-481f-91c6-a7913464ce59", + "id": "44", "metadata": {}, "source": [ "The vertical lines in the figure above indicate the means of the respective empirical distributions. As we saw earlier, the mean of the GCM without parameterization is slightly closer to the real mean than the mean of the GCM with parameterization. However, if we take into account the entire distribution, the GCM with parameterization seems to be a closer match to the true distribution. Our eyeball estimate is quantitatively confirmed by the next figure, which computes the distributional error, $E_{distr}$." @@ -767,7 +767,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e41ec23b-a0d0-44ab-9757-d958b2171e06", + "id": "45", "metadata": { "tags": [] }, @@ -787,7 +787,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10af9a85-fdac-422d-bedf-7dee947be990", + "id": "46", "metadata": { "tags": [] }, @@ -800,7 +800,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2f8c907c-98a7-4499-8a81-323dcd7dadbe", + "id": "47", "metadata": { "tags": [] }, @@ -832,7 +832,7 @@ }, { "cell_type": "markdown", - "id": "c769ec8e", + "id": "48", "metadata": { "user_expressions": [] }, @@ -849,7 +849,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c75e73f6", + "id": "49", "metadata": { "tags": [] }, @@ -863,7 +863,7 @@ }, { "cell_type": "markdown", - "id": "13a52dd0", + "id": "50", "metadata": { "user_expressions": [] }, @@ -874,7 +874,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2d331c18", + "id": "51", "metadata": { "tags": [] }, @@ -906,7 +906,7 @@ { "cell_type": "code", "execution_count": null, - "id": "44ac3775", + "id": "52", "metadata": { "tags": [] }, @@ -922,7 +922,7 @@ }, { "cell_type": "markdown", - "id": "3b65357a", + "id": "53", "metadata": { "user_expressions": [] }, @@ -932,7 +932,7 @@ }, { "cell_type": "markdown", - "id": "91d9ad08", + "id": "54", "metadata": { "tags": [], "user_expressions": [] @@ -970,7 +970,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8644d36e", + "id": "55", "metadata": { "tags": [] }, @@ -1013,7 +1013,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ca94d484-d1e7-464b-98b2-d34bda5cfa40", + "id": "56", "metadata": { "tags": [] }, @@ -1038,7 +1038,7 @@ }, { "cell_type": "markdown", - "id": "24427fd3-5ea3-4d78-909c-43f9d60396f2", + "id": "57", "metadata": { "user_expressions": [] }, @@ -1049,7 +1049,7 @@ { "cell_type": "code", "execution_count": null, - "id": "79aeab71-cf42-4d8d-9e4b-3dbb616a5971", + "id": "58", "metadata": { "tags": [] }, @@ -1062,7 +1062,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7326aa7b-e804-41b0-9d52-8bffb19fad6e", + "id": "59", "metadata": { "tags": [] }, @@ -1079,7 +1079,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a4ed0a93-9560-4466-a91f-2a048bfea1d1", + "id": "60", "metadata": { "tags": [] }, @@ -1100,7 +1100,7 @@ }, { "cell_type": "markdown", - "id": "3a885f6f-0055-478d-9093-3d7080ee9ff4", + "id": "61", "metadata": { "user_expressions": [] }, @@ -1113,7 +1113,7 @@ }, { "cell_type": "markdown", - "id": "dcdba996-0dd4-4ef8-adea-b8b221b56d53", + "id": "62", "metadata": { "tags": [], "user_expressions": [] diff --git a/notebooks/old_notebooks/gcm-analogue-old.ipynb b/notebooks/old_notebooks/gcm-analogue-old.ipynb index 7dd648dd..64583d34 100644 --- a/notebooks/old_notebooks/gcm-analogue-old.ipynb +++ b/notebooks/old_notebooks/gcm-analogue-old.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "08d71bbc", + "id": "0", "metadata": {}, "source": [ "# The Lorenz-96 and its GCM Analog\n", @@ -15,7 +15,7 @@ }, { "cell_type": "markdown", - "id": "15939db9-43b8-4d20-8a31-84bdbc80fd2d", + "id": "1", "metadata": {}, "source": [ "## The two time-scale model: analog for the real atmosphere\n", @@ -31,7 +31,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7f0d7a3f", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -44,7 +44,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1d6f4844", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -56,7 +56,7 @@ }, { "cell_type": "markdown", - "id": "fd78d2c3-b1f6-41d9-bc9c-a8da4898b9b0", + "id": "4", "metadata": {}, "source": [ "Here is the what the solution looks like:" @@ -65,7 +65,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1a01ce7b", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -88,7 +88,7 @@ }, { "cell_type": "markdown", - "id": "f63cb015", + "id": "6", "metadata": {}, "source": [ "## The single time-scale model: Analog for a general circulation model (GCM)\n", @@ -109,7 +109,7 @@ }, { "cell_type": "markdown", - "id": "fc65effa", + "id": "7", "metadata": {}, "source": [ "### The parametization $P(X_k)$\n", @@ -139,7 +139,7 @@ }, { "cell_type": "markdown", - "id": "3f8afa32-cf28-4401-becd-65bb311e320d", + "id": "8", "metadata": {}, "source": [ "\n", @@ -149,7 +149,7 @@ { "cell_type": "code", "execution_count": null, - "id": "934f3a28", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -159,7 +159,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ff240f58", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -170,7 +170,7 @@ }, { "cell_type": "markdown", - "id": "ed0a0e4b", + "id": "11", "metadata": {}, "source": [ "We now compare the results obtained with a linear polynomial approximation, {cite:t}`Wilks2005` polynomial parameterization and the \"truth\" values of the coupling terms." @@ -179,7 +179,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eae31105-17af-4c41-9d4d-5bef728ff21c", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -201,7 +201,7 @@ { "cell_type": "code", "execution_count": null, - "id": "06794e2b", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -223,7 +223,7 @@ }, { "cell_type": "markdown", - "id": "02669085-8725-456d-9100-5a31f92f2db0", + "id": "14", "metadata": {}, "source": [ "The figure above shows that the the relationship between the slow variables ($X_k$) and the observed coupling term ($U_k$) is non-linear. The higher order polynomlial, since it is more flexible, does a better job at capturing the overall pattern, relative to the linear fit. " @@ -231,7 +231,7 @@ }, { "cell_type": "markdown", - "id": "e42cd286", + "id": "15", "metadata": {}, "source": [ "## Testing the effect of parameterizations in the \"GCM\" model\n", @@ -246,7 +246,7 @@ { "cell_type": "code", "execution_count": null, - "id": "724df9e2", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -265,7 +265,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9579b622", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -286,7 +286,7 @@ }, { "cell_type": "markdown", - "id": "6a3f04e9-78bc-4af1-bb9b-31370693a20a", + "id": "18", "metadata": {}, "source": [ "Now we look at Hovmoeller plots of the different solutions." @@ -295,7 +295,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1bb832df", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -327,7 +327,7 @@ }, { "cell_type": "markdown", - "id": "aed7579a", + "id": "20", "metadata": {}, "source": [ "The next plot shows the temporal evolution of the variable $X_3$ obtained with the four models listed above." @@ -336,7 +336,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f5815467", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -354,7 +354,7 @@ }, { "cell_type": "markdown", - "id": "082fcc26-72a5-43bb-a764-159d14155605", + "id": "22", "metadata": {}, "source": [ "As seen above, all the simulation diverge at long times,and the unparameterized simulation diverges very rapidly. On the other hand, the parameterized GCMs track the \"real world\" solution better. The Wilks parameterization does better than the linear fit." @@ -362,7 +362,7 @@ }, { "cell_type": "markdown", - "id": "c200d831", + "id": "23", "metadata": {}, "source": [ "## Sources of model error\n", @@ -393,7 +393,7 @@ { "cell_type": "code", "execution_count": null, - "id": "94266f66", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -404,7 +404,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ac08e1e7", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -422,7 +422,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0e27da0b", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -440,7 +440,7 @@ }, { "cell_type": "markdown", - "id": "885d4942", + "id": "27", "metadata": {}, "source": [ "For this particular set of parameter values, the removal of the coupling term results in the highest error. The second and third sources of error correspond to the numerical error (introduced by increasing the time-step) and to the error resulting form poorly parameterized unresolved physics (modeled by consideing a first-order polynomial approximation of the coupling terms) respectively.\n", @@ -450,7 +450,7 @@ }, { "cell_type": "markdown", - "id": "43f0dd3f", + "id": "28", "metadata": {}, "source": [ "## Stochastic parameterizations\n", @@ -463,7 +463,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d0d5f104", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -477,7 +477,7 @@ }, { "cell_type": "markdown", - "id": "20aec9e7", + "id": "30", "metadata": {}, "source": [ "We compare the mean values of the slow variables $X_k$ , $k=1,\\ldots,K$." @@ -486,7 +486,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a739eaab", + "id": "31", "metadata": {}, "outputs": [], "source": [ @@ -504,7 +504,7 @@ { "cell_type": "code", "execution_count": null, - "id": "db7c78d1", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -515,7 +515,7 @@ }, { "cell_type": "markdown", - "id": "c195d28d", + "id": "33", "metadata": {}, "source": [ "As expected, the {cite}`Wilks2005` parameterization gives a mean value of the slow variables that is closer to the one obtained with the \"truth\" model compared to the model with linear parameterization since {cite}`Wilks2005` coupling term is modeled with a third-order polynomial.\n", @@ -526,7 +526,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fec51c41", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -547,7 +547,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2ef1a258", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -564,7 +564,7 @@ }, { "cell_type": "markdown", - "id": "fcd4a79b", + "id": "36", "metadata": {}, "source": [ "By comparing the mean and standard deviation values of the slow variables $X_k$ , $k=1,\\ldots,K$, we do not see a significant improvement when adding the stochastic component $e_k$ to the third-order polynomial parameterization." @@ -573,7 +573,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fafb47e7", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -595,7 +595,7 @@ }, { "cell_type": "markdown", - "id": "b33704fc", + "id": "38", "metadata": {}, "source": [ "## Summary\n", diff --git a/notebooks/old_notebooks/gcm-parameterization-problem-old.ipynb b/notebooks/old_notebooks/gcm-parameterization-problem-old.ipynb index 4d158500..b67660a2 100644 --- a/notebooks/old_notebooks/gcm-parameterization-problem-old.ipynb +++ b/notebooks/old_notebooks/gcm-parameterization-problem-old.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "b0650213-5f2e-4821-8489-b969f0da4b63", + "id": "0", "metadata": {}, "source": [ "# GCM parameterizations, skill metrics, and other sources of uncertainity" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "2a9645ec", + "id": "1", "metadata": {}, "source": [ "In the last notebook we provided a very quick overview of the problem we run into when trying to simulate a real system (e.g. the atmosphere) on a computer using a General Circulation Model (GCM), even when the exact equations to be solved are known in principle. The problem of limited computational resources translates into an inability to resolve all scales of motion in a GCM, and the unresolved scales need to be parameterized. \n", @@ -20,7 +20,7 @@ }, { "cell_type": "markdown", - "id": "69c17d19", + "id": "2", "metadata": {}, "source": [ "## The need for GCM parameterizations" @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "f4905750", + "id": "3", "metadata": {}, "source": [ "Let's first quickly review some concepts from the last notebook, using a slightly modified framing that might benefit some readers. \n", @@ -38,7 +38,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f8255144", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ }, { "cell_type": "markdown", - "id": "338aff9c-2891-4583-9efa-b1a56669fe2f", + "id": "5", "metadata": {}, "source": [ "Here, `L96` serves as the **\"real world\"** or two time-scale model, whereas `L96_eq1_xdot` represents the **beginning of rhs of X tendency** or the tendency in the single time-scale model." @@ -59,7 +59,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c5ba242b", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -72,7 +72,7 @@ }, { "cell_type": "markdown", - "id": "4b4b5606-be7c-4fd3-8f3d-fa7fbdbec74f", + "id": "7", "metadata": {}, "source": [ "Since we start the model with a random initial condition, there is no reason to expect that these initial conditions are an actual solution to the model. These arbitrary states can result in initial shocks to the system, which will are unrealistic features but get dissipated after some time.\n", @@ -82,7 +82,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c52019dd-0d33-45d2-a10a-08429c6374a2", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "markdown", - "id": "8e1c62ff", + "id": "9", "metadata": {}, "source": [ "From here on we can use `W.X` as perfect initial conditions for a model and sample the \"real world\" using `W.run(dt, T)`.\n", @@ -110,7 +110,7 @@ }, { "cell_type": "markdown", - "id": "fa072bc6-f5de-4917-92a3-2db493e36a35", + "id": "10", "metadata": {}, "source": [ "**GCM without parameterization**" @@ -119,7 +119,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53afa120", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -146,7 +146,7 @@ }, { "cell_type": "markdown", - "id": "0a620522-8a1a-4eda-9b0a-04e304b49255", + "id": "12", "metadata": {}, "source": [ "This GCM is unstable due to Euler forward time stepping scheme, so we don't integrate it for too long and compare it to the real world with the same time interval as `dt` used by the model." @@ -155,7 +155,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6f113734", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -166,7 +166,7 @@ { "cell_type": "code", "execution_count": null, - "id": "66f3c2ea", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -183,7 +183,7 @@ }, { "cell_type": "markdown", - "id": "dad9cbd6", + "id": "15", "metadata": { "user_expressions": [] }, @@ -198,7 +198,7 @@ }, { "cell_type": "markdown", - "id": "64e94c31", + "id": "16", "metadata": {}, "source": [ "**GCM with parameterization**" @@ -207,7 +207,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c797ab03", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -247,7 +247,7 @@ }, { "cell_type": "markdown", - "id": "5f6fd74a-3427-4dd3-b8b4-013acf2c49ff", + "id": "18", "metadata": {}, "source": [ "As a first step, we illustrate introducing a polynomial parameterization to GCM and then compare the model to the true trajectories obtained from the real world with the same time interval as `dt` used by the model. This is the same as what was done in the previous notebook, but is shown again for completeness." @@ -256,7 +256,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6d08038a", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -270,7 +270,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ca979ba1", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -287,7 +287,7 @@ }, { "cell_type": "markdown", - "id": "96a97034", + "id": "21", "metadata": {}, "source": [ "While the GCM with parameterization is better than the GCM without parameterization, it is still not very good at reproducing the true evolution of the full system. It also remains to find the most appropriate coefficients of the polynomial parameterization to make the Model as close as possible to the Truth.\n", @@ -299,7 +299,7 @@ }, { "cell_type": "markdown", - "id": "42ca8ac3", + "id": "22", "metadata": {}, "source": [ "## Should parameterizations be deterministic or stochastic ? " @@ -307,7 +307,7 @@ }, { "cell_type": "markdown", - "id": "36678885", + "id": "23", "metadata": {}, "source": [ "The `naive_parameterization` above has no particular physical nor mathematical justification. Most importantly, it relies on a very strong assumption, that the time rate of change of $X$ at time $t$ is a function of $X(t)$. This assumption implies that the future evolution of the reduced dimension system $X(t)$ is *deterministically* related to the initial reduced dimension state $X(0)$. " @@ -315,7 +315,7 @@ }, { "cell_type": "markdown", - "id": "a6083799", + "id": "24", "metadata": {}, "source": [ "But this is not a good assumption because the two identical reduced dimension states ($X$, macro-state) can be associated with very different fine scale states ($Y$, micro-state). This can also be seen visually by considering the [plot](sub-grid-hist-label) in the previous notebook, which shows that the for each value of $X$ there is a range of possible values for the sub-grid effects.\n", @@ -325,7 +325,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bd825114", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -345,7 +345,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8a856077", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -358,7 +358,7 @@ { "cell_type": "code", "execution_count": null, - "id": "04d98879-6013-4161-b6b6-12f45ee7227e", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -373,7 +373,7 @@ }, { "cell_type": "markdown", - "id": "81b73260", + "id": "28", "metadata": {}, "source": [ "So even very small uncertainties in the micro-state ($Y$) of L96 can lead to large scale changes (i.e. of the variable $X$) over short time.\n", @@ -386,7 +386,7 @@ }, { "cell_type": "markdown", - "id": "a6cf8c10", + "id": "29", "metadata": {}, "source": [ "## How to measure parameterization skill ? " @@ -394,7 +394,7 @@ }, { "cell_type": "markdown", - "id": "3a6f4cd9", + "id": "30", "metadata": {}, "source": [ "We would like to build our closures by systematically measuring their skills, so that we can compare different fomulations using these \"skill scores\". \n", @@ -405,7 +405,7 @@ { "cell_type": "code", "execution_count": null, - "id": "35609fa7", + "id": "31", "metadata": {}, "outputs": [], "source": [ @@ -424,7 +424,7 @@ { "cell_type": "code", "execution_count": null, - "id": "354b8190-af9b-4bc2-a4c4-de6a9266cc56", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -453,7 +453,7 @@ }, { "cell_type": "markdown", - "id": "baa78beb", + "id": "33", "metadata": {}, "source": [ "**Error metric based on model evolution**:\n", @@ -471,7 +471,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1102657f", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -484,7 +484,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7c31b065", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -500,7 +500,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b49636d6", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -524,7 +524,7 @@ }, { "cell_type": "markdown", - "id": "c6c31304-6c60-4864-a3ad-d40d6e5117b8", + "id": "37", "metadata": {}, "source": [ "The error grows with time, but saturates to some a constant after the truth and GCM have gotten completely decorrelated. This constant is equal to the sum of the variance of the truth and GCM states. Also, as expected, the error grows much more rapidly without a parameterization, showing that adding the parameterization has resulted in a quantitative improvement in the solution." @@ -532,7 +532,7 @@ }, { "cell_type": "markdown", - "id": "4aea7896", + "id": "38", "metadata": {}, "source": [ "**Climatology based error metric:**\n", @@ -551,7 +551,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c94cfa2f-fd42-44ae-b816-04cb929ff0a9", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -570,7 +570,7 @@ }, { "cell_type": "markdown", - "id": "7b9692de-d650-4054-84e2-7e4b2925a7aa", + "id": "40", "metadata": {}, "source": [ "As seen in the above figure, the long term mean state of the true solution is very different from the solution without the parameterization. The solution with the parameterization is closer to the truth.\n", @@ -581,7 +581,7 @@ { "cell_type": "code", "execution_count": null, - "id": "06f4c23c-6985-4d79-82cc-899c5d0316b6", + "id": "41", "metadata": {}, "outputs": [], "source": [ @@ -591,7 +591,7 @@ { "cell_type": "code", "execution_count": null, - "id": "59b6da87-2cd4-4f9e-aff4-cd4b9bd95643", + "id": "42", "metadata": {}, "outputs": [], "source": [ @@ -606,7 +606,7 @@ { "cell_type": "code", "execution_count": null, - "id": "30dd6820-9422-4c1f-ae02-8e5593d9d45b", + "id": "43", "metadata": { "tags": [] }, @@ -622,7 +622,7 @@ { "cell_type": "code", "execution_count": null, - "id": "05c053b7-f5eb-4ece-be3c-88b4d93c38ae", + "id": "44", "metadata": {}, "outputs": [], "source": [ @@ -638,7 +638,7 @@ }, { "cell_type": "markdown", - "id": "a0d1b84e-e649-431f-9f9e-4265c49e2232", + "id": "45", "metadata": {}, "source": [ "The RMSE difference in the mean state (climatology) is larger when the there is no parameterization. At long time, this metric stabalizes, as the time average starts to become more representative of the long time state of the system. " @@ -646,7 +646,7 @@ }, { "cell_type": "markdown", - "id": "c769ec8e", + "id": "46", "metadata": {}, "source": [ "**Tendency based error metric**:\n", @@ -661,7 +661,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c75e73f6", + "id": "47", "metadata": {}, "outputs": [], "source": [ @@ -673,7 +673,7 @@ }, { "cell_type": "markdown", - "id": "13a52dd0", + "id": "48", "metadata": {}, "source": [ "Because this metric is cheap to evaluate, as we do not need to integrate the GCM more than 1 time-step, we can start a sensitivity analysis in order to identify good optimal parameters for the specific formulation `naive_parameterization`" @@ -682,7 +682,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2d331c18", + "id": "49", "metadata": {}, "outputs": [], "source": [ @@ -712,7 +712,7 @@ { "cell_type": "code", "execution_count": null, - "id": "44ac3775", + "id": "50", "metadata": {}, "outputs": [], "source": [ @@ -726,7 +726,7 @@ }, { "cell_type": "markdown", - "id": "3b65357a", + "id": "51", "metadata": {}, "source": [ "From this analysis, we see that the optimisation problem is probably well posed as the cost function appears pretty smooth. One can also see that the parameter $p_1$ is more important than $p_2$. " @@ -734,7 +734,7 @@ }, { "cell_type": "markdown", - "id": "91d9ad08", + "id": "52", "metadata": { "user_expressions": [] }, @@ -771,7 +771,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8644d36e", + "id": "53", "metadata": {}, "outputs": [], "source": [ @@ -812,7 +812,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ca94d484-d1e7-464b-98b2-d34bda5cfa40", + "id": "54", "metadata": {}, "outputs": [], "source": [ @@ -836,7 +836,7 @@ { "cell_type": "code", "execution_count": null, - "id": "79aeab71-cf42-4d8d-9e4b-3dbb616a5971", + "id": "55", "metadata": {}, "outputs": [], "source": [ @@ -849,7 +849,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7326aa7b-e804-41b0-9d52-8bffb19fad6e", + "id": "56", "metadata": {}, "outputs": [], "source": [ @@ -864,7 +864,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d18f8c6d-8a76-4dd9-ad14-a9e54f1d3713", + "id": "57", "metadata": {}, "outputs": [], "source": [ @@ -874,7 +874,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a4ed0a93-9560-4466-a91f-2a048bfea1d1", + "id": "58", "metadata": {}, "outputs": [], "source": [ @@ -893,7 +893,7 @@ }, { "cell_type": "markdown", - "id": "3a885f6f-0055-478d-9093-3d7080ee9ff4", + "id": "59", "metadata": {}, "source": [ "Under the perturbations considered above, the lack of missing physics contributes the most error to the GCM. \n", @@ -904,7 +904,7 @@ }, { "cell_type": "markdown", - "id": "e3e6cb41", + "id": "60", "metadata": {}, "source": [ "### Comparing GCMs with small errors in the forcing F" @@ -913,7 +913,7 @@ { "cell_type": "code", "execution_count": null, - "id": "79c71ecb", + "id": "61", "metadata": {}, "outputs": [], "source": [ @@ -932,7 +932,7 @@ { "cell_type": "code", "execution_count": null, - "id": "68560d2f", + "id": "62", "metadata": {}, "outputs": [], "source": [ @@ -948,7 +948,7 @@ }, { "cell_type": "markdown", - "id": "424e69d2", + "id": "63", "metadata": {}, "source": [ "### Comparing GCM solutions with slight errors in the (resolved) initial condition" @@ -957,7 +957,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7c059fc3", + "id": "64", "metadata": {}, "outputs": [], "source": [ @@ -979,7 +979,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5adef508", + "id": "65", "metadata": {}, "outputs": [], "source": [ @@ -995,7 +995,7 @@ }, { "cell_type": "markdown", - "id": "d8ebe8ba", + "id": "66", "metadata": {}, "source": [ "The above two experiments illustrate that the definition of our distance metrics should take into account the other sources of errors in our GCMs:\n", @@ -1006,7 +1006,7 @@ }, { "cell_type": "markdown", - "id": "fce3cb8f", + "id": "67", "metadata": {}, "source": [ "### Comparing GCMs with identical parameters but different time discretization " @@ -1015,7 +1015,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5648a815", + "id": "68", "metadata": {}, "outputs": [], "source": [ @@ -1034,7 +1034,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8f3909f2", + "id": "69", "metadata": {}, "outputs": [], "source": [ @@ -1049,7 +1049,7 @@ }, { "cell_type": "markdown", - "id": "69c2b3ac", + "id": "70", "metadata": {}, "source": [ "This last experiment illustrates that depending on the numerical schemes of the GCM, the distance between the model prediction and the true state can be substantial. There again, we have an additional constraint on the definition of our distance metrics.\n", diff --git a/notebooks/rvm.py b/notebooks/rvm.py index a995ee5e..1b73d066 100644 --- a/notebooks/rvm.py +++ b/notebooks/rvm.py @@ -26,6 +26,7 @@ calculation is conducted iteratively. """ + import numpy as np from scipy.optimize import minimize @@ -36,7 +37,6 @@ class BaseRVM(BaseEstimator): - """Base Relevance Vector Machine class. Implementation of Mike Tipping's Relevance Vector Machine using the @@ -236,7 +236,6 @@ def fit(self, X, y, X_labels, standardise=False): class RVR(BaseRVM, RegressorMixin): - """Relevance Vector Machine Regression. Implementation of Mike Tipping's Relevance Vector Machine for regression diff --git a/notebooks/sindy_L96_2scale.ipynb b/notebooks/sindy_L96_2scale.ipynb index e4596235..568d784e 100644 --- a/notebooks/sindy_L96_2scale.ipynb +++ b/notebooks/sindy_L96_2scale.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "f37e1144-636c-4e98-b40d-1205c665d96f", + "id": "0", "metadata": {}, "source": [ "# Applying SINDy equation identification to L96" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "7cd0aa75-b52a-4b0b-b5ec-df2ac8c1bd86", + "id": "1", "metadata": { "tags": [] }, @@ -20,7 +20,7 @@ }, { "cell_type": "markdown", - "id": "42566028-fe9c-4c50-8582-b5e217d6394d", + "id": "2", "metadata": {}, "source": [ "This notebook provides an example of SINDy equation identification in application to the two-scale L96 model with the use of `pysindy` library. The objective is to try to identify the governing ODEs for the large-scale variable ($X_i$) if we know only their time measurements. In other words, we want to see how well the SINDy model can capture the form of the governing equations for large-scale variables in the presence of the subgrid forcing by small-scale variables ($Y$) when there is no measurements of the small scales." @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "1f222842-0a8a-4069-addf-55fc44d24dd2", + "id": "3", "metadata": {}, "source": [ "## Import" @@ -37,7 +37,7 @@ { "cell_type": "code", "execution_count": null, - "id": "73bbfeec-72e6-4d81-89b0-e5369671dff9", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ }, { "cell_type": "markdown", - "id": "866c8025-f383-4b05-ab30-b75945223edc", + "id": "5", "metadata": { "tags": [] }, @@ -61,7 +61,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22c47249-5ac0-475e-a28b-54f725ad5ecf", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -82,7 +82,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a0461f22-0ffd-4c84-abe0-3e2844a135f6", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -123,7 +123,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d37f0cf2-e597-428a-bb64-4fbe0e424ce4", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -134,7 +134,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10ce16bc-e99a-4260-b132-940e6af15bb8", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -157,7 +157,7 @@ }, { "cell_type": "markdown", - "id": "28330e77-e8c5-42ba-b01d-d4088155c397", + "id": "10", "metadata": { "tags": [], "user_expressions": [] @@ -175,7 +175,7 @@ }, { "cell_type": "markdown", - "id": "a5fa3dff-b284-4f82-a641-8c5ef42880f8", + "id": "11", "metadata": {}, "source": [ "Two-time-scale Lorenz 96 model:\n", @@ -185,7 +185,7 @@ }, { "cell_type": "markdown", - "id": "f6611d39-a079-4809-b7ce-a740f0ecf374", + "id": "12", "metadata": {}, "source": [ "We will put K=8 large-scale variables ($X$), for each of them there is J=32 small-scale variables ($Y$). For the first example, we will consider the case of **weak** interaction between the scales, which is defined by the parameter $h$ in the L96 class." @@ -194,7 +194,7 @@ { "cell_type": "code", "execution_count": null, - "id": "771cc013-8731-4885-ae69-00cc19161486", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -207,7 +207,7 @@ { "cell_type": "code", "execution_count": null, - "id": "32c16a5e-5db9-4a96-88bd-f5e9c39d7179", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -219,7 +219,7 @@ { "cell_type": "code", "execution_count": null, - "id": "88ba9965-c64f-4194-86b4-d2ddba8081ef", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -234,7 +234,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2225caa3-bcd8-4b9f-8049-6fb17a8f7c66", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -245,7 +245,7 @@ { "cell_type": "code", "execution_count": null, - "id": "128356d3-b283-4b07-97f3-bc675ab77c10", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -259,7 +259,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8958b6da-690f-451a-8d83-88cbc3403b29", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -272,7 +272,7 @@ }, { "cell_type": "markdown", - "id": "12b86c0a-3618-4a2f-abf8-0c9f74dcc240", + "id": "19", "metadata": {}, "source": [ "The data array $X$ will be used for the system identification by SINDy method. We suppose that we do not dispose measurements for the small-scale variables $Y$, so their contribution to the large-scale dymanics will basically treated as noise." @@ -280,7 +280,7 @@ }, { "cell_type": "markdown", - "id": "48efefd9-6716-43fc-b45e-7428249a366d", + "id": "20", "metadata": {}, "source": [ "## Define SINDy model" @@ -288,7 +288,7 @@ }, { "cell_type": "markdown", - "id": "9357c676-12bd-4a64-b609-3fe594201f4e", + "id": "21", "metadata": {}, "source": [ "The goal is to identify the governing equations for the large-scale variables $X$, having the measurement for them." @@ -297,7 +297,7 @@ { "cell_type": "code", "execution_count": null, - "id": "71be9da9-d2c4-40d4-b69a-d9eee9c694b7", + "id": "22", "metadata": {}, "outputs": [], "source": [ @@ -307,7 +307,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a87dddfb-1980-47c2-9d35-622a1e8bcc58", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -317,7 +317,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7e386c41-5682-4b3d-bcf4-399b14b36f2f", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -328,7 +328,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1ee1517b-85da-4231-a8f6-9c8966aae990", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -342,7 +342,7 @@ }, { "cell_type": "markdown", - "id": "720353b2-8448-4836-ac52-e462b5afe5ef", + "id": "26", "metadata": {}, "source": [ "# Optimize coefficients" @@ -351,7 +351,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d0ef6b41-cd32-4cbb-9514-c238b2a7b8ad", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -361,7 +361,7 @@ { "cell_type": "code", "execution_count": null, - "id": "725d681c-b7fb-4d9a-aac0-65ffa9f970fa", + "id": "28", "metadata": {}, "outputs": [], "source": [ @@ -372,7 +372,7 @@ { "cell_type": "code", "execution_count": null, - "id": "254779b5-cc18-4280-9a1b-421844bddffc", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -382,7 +382,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ed037830-d9ec-4a4a-98d5-227f5395eb72", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -392,7 +392,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d4caf05c-7ae9-44da-b456-63f3db11ea3c", + "id": "31", "metadata": {}, "outputs": [], "source": [ @@ -402,7 +402,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9966ea81-0724-4a1b-87d6-79f40ffcc6bf", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -426,7 +426,7 @@ }, { "cell_type": "markdown", - "id": "9da63499-fe14-49c3-a99b-c96416b764b6", + "id": "33", "metadata": {}, "source": [ "We can see that the SINDy model captures correctly the dominant terms and gives quite accurate predictions for the model coefficients and the value of the forcing. The subgrid forcing does not appear in this prediction." @@ -434,7 +434,7 @@ }, { "cell_type": "markdown", - "id": "fa412ba4-4c36-494d-a805-638a632ac731", + "id": "34", "metadata": {}, "source": [ "## Compare" @@ -442,7 +442,7 @@ }, { "cell_type": "markdown", - "id": "1a6c62ed-60e5-4496-84df-32d10716d874", + "id": "35", "metadata": {}, "source": [ "### Same initial conditions" @@ -451,7 +451,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2824415c-95bd-4fdb-8b47-2432500108df", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -461,7 +461,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a49f38e6-581e-46b0-ab85-1efb33156f08", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -482,7 +482,7 @@ }, { "cell_type": "markdown", - "id": "0757ead7-8fc1-418e-8b31-b72b43ae384f", + "id": "38", "metadata": {}, "source": [ "## Other initial conditions" @@ -491,7 +491,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9de3b4aa-cf64-4b34-ab4c-1ce89d4763a2", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -504,7 +504,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36d64b56-4591-4b85-9402-703a12365f95", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -514,7 +514,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a6790161-afc1-4db0-af1e-aa751f5f72b3", + "id": "41", "metadata": {}, "outputs": [], "source": [ @@ -535,7 +535,7 @@ }, { "cell_type": "markdown", - "id": "af3d1e9f-ef45-4c2c-9eb7-fee641376e62", + "id": "42", "metadata": {}, "source": [ "Similar to the case with L63 model, the system predicted by SINDy gives an accurate prediction of trajectories for a certain time, then the prediction strts deviating from the original model. It is related to the error in the coefficients obtained in optimization anf the chaotical nature of the Lorenz system." @@ -544,7 +544,7 @@ { "cell_type": "code", "execution_count": null, - "id": "74e8f259-a795-47c7-a4b0-cce3faea6c12", + "id": "43", "metadata": {}, "outputs": [], "source": [ @@ -554,7 +554,7 @@ { "cell_type": "code", "execution_count": null, - "id": "624b632a-787f-4942-a192-62e516fb4704", + "id": "44", "metadata": {}, "outputs": [], "source": [ @@ -564,7 +564,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ed1605e6-809b-46d4-9762-f1e908e15378", + "id": "45", "metadata": {}, "outputs": [], "source": [ @@ -574,7 +574,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c5cbf9dc-ffdd-4447-ac44-ce098508b5f2", + "id": "46", "metadata": {}, "outputs": [], "source": [ @@ -595,7 +595,7 @@ }, { "cell_type": "markdown", - "id": "3b1cad42-fda7-4764-af7a-5217a09ce8c9", + "id": "47", "metadata": { "tags": [] }, @@ -605,7 +605,7 @@ }, { "cell_type": "markdown", - "id": "b827bc2c-127d-48ba-a262-7942ff0f5e2b", + "id": "48", "metadata": {}, "source": [ "Now, let us consider the L96 two-scale model with a stronger coupling between the large and small scales. The coupling coefficient here is put to be 10 times higher." @@ -614,7 +614,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6c79ad20-f6db-43be-9bf5-c69a53ba1956", + "id": "49", "metadata": {}, "outputs": [], "source": [ @@ -628,7 +628,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e84bddb0-2a1b-4224-93d9-3d1c5ea48fe8", + "id": "50", "metadata": {}, "outputs": [], "source": [ @@ -641,7 +641,7 @@ }, { "cell_type": "markdown", - "id": "af054c0e-014b-4dc6-8b4f-b38f918dd943", + "id": "51", "metadata": {}, "source": [ "### Build the SINDy model" @@ -650,7 +650,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3b352eaa-2aa6-4d62-ae70-6494fb83d84a", + "id": "52", "metadata": {}, "outputs": [], "source": [ @@ -665,7 +665,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17c9b317-bb0b-4299-9e3f-967836c7d3ef", + "id": "53", "metadata": {}, "outputs": [], "source": [ @@ -675,7 +675,7 @@ { "cell_type": "code", "execution_count": null, - "id": "816f9bb0-c168-4bf6-bd98-c939ef929c04", + "id": "54", "metadata": {}, "outputs": [], "source": [ @@ -685,7 +685,7 @@ { "cell_type": "code", "execution_count": null, - "id": "37168b11-a774-4b61-8300-20b759c84bdf", + "id": "55", "metadata": {}, "outputs": [], "source": [ @@ -709,7 +709,7 @@ }, { "cell_type": "markdown", - "id": "eceb3597-f4d7-4f43-ba45-5fbaf472b523", + "id": "56", "metadata": {}, "source": [ "We can see that despite the fact that in general SINDy captures the structure of ODEs, the coefficients become different from the original model, due to the influence of the interaction with the small scale. Interestingly, the coefficients in front of linear terms in the ODEs are now all close to 1.7-1.8 (instead of 1.0 in the true model), and the predicted forcing value is systematically smaller than the true value 18. " @@ -717,7 +717,7 @@ }, { "cell_type": "markdown", - "id": "b8fcce95-8330-4f29-b385-d2943deb5ad2", + "id": "57", "metadata": {}, "source": [ "### Run the predicted model" @@ -726,7 +726,7 @@ { "cell_type": "code", "execution_count": null, - "id": "89d3d8d3-b82f-433b-b239-c530b6d4b06b", + "id": "58", "metadata": {}, "outputs": [], "source": [ @@ -736,7 +736,7 @@ { "cell_type": "code", "execution_count": null, - "id": "975e70b0-2bfb-4ab7-8b6c-a4d5d68b5e39", + "id": "59", "metadata": {}, "outputs": [], "source": [ @@ -757,7 +757,7 @@ }, { "cell_type": "markdown", - "id": "4bb4f383-606b-45e8-99a7-bcbe5a5bb860", + "id": "60", "metadata": { "tags": [] }, @@ -767,7 +767,7 @@ }, { "cell_type": "markdown", - "id": "8042a757-85e5-4113-a568-fcb0cfcec4d6", + "id": "61", "metadata": {}, "source": [ "## Conclusions\n", diff --git a/notebooks/symbolic_methods_comparison.ipynb b/notebooks/symbolic_methods_comparison.ipynb index 83e8231c..3ff098fc 100644 --- a/notebooks/symbolic_methods_comparison.ipynb +++ b/notebooks/symbolic_methods_comparison.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "f65214e4-a677-4ede-871e-5f3fef45f2b1", + "id": "0", "metadata": {}, "source": [ "# Introduction to Equation Discovery - Comparing Symbolic Regression Methods" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "25d66ef6-0c47-4725-8d01-eb8141171d6c", + "id": "1", "metadata": {}, "source": [ "Upto now we have seen that the climate models we developed are using physical equations that are based on our understanding of the physical processes that govern the climate. However, these equations are often complex and difficult to solve, and they can only be used to model the climate at a coarse resolution.\n", @@ -23,7 +23,7 @@ }, { "cell_type": "markdown", - "id": "7a104c71", + "id": "2", "metadata": {}, "source": [ "In this notebook, we'll review some common techniques for **Symbolic Regression (SR)**, a family of methods of discovering (simple) equations that relate inputs to outputs.\n", @@ -46,7 +46,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9070565d", + "id": "3", "metadata": { "tags": [] }, @@ -64,7 +64,7 @@ }, { "cell_type": "markdown", - "id": "c74c2213-8d54-477b-b483-0dc38d65a219", + "id": "4", "metadata": {}, "source": [ "## Data\n", @@ -81,7 +81,7 @@ { "cell_type": "code", "execution_count": null, - "id": "58b37b2c", + "id": "5", "metadata": { "tags": [] }, @@ -96,7 +96,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c04e4072", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -116,7 +116,7 @@ { "cell_type": "code", "execution_count": null, - "id": "98e6a5fc-5a49-4db8-b0fd-a4d34806f465", + "id": "7", "metadata": { "tags": [] }, @@ -129,7 +129,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5930af35", + "id": "8", "metadata": { "tags": [] }, @@ -145,7 +145,7 @@ }, { "cell_type": "markdown", - "id": "752732b1", + "id": "9", "metadata": {}, "source": [ "(genetic-programming-section)=\n", @@ -173,7 +173,7 @@ }, { "cell_type": "markdown", - "id": "eb6501f9", + "id": "10", "metadata": {}, "source": [ "(gplearn-sec)=\n", @@ -187,7 +187,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1100219a-ac1a-4b47-a6f9-c9606b8c852e", + "id": "11", "metadata": { "tags": [] }, @@ -200,7 +200,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1423685b", + "id": "12", "metadata": { "tags": [] }, @@ -223,7 +223,7 @@ }, { "cell_type": "markdown", - "id": "65cd4ab0", + "id": "13", "metadata": {}, "source": [ "#### Interpret results" @@ -232,7 +232,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fa7a7000", + "id": "14", "metadata": { "tags": [] }, @@ -243,7 +243,7 @@ }, { "cell_type": "markdown", - "id": "0b9ccf66", + "id": "15", "metadata": {}, "source": [ "This looks very close to the right answer, though the constants are slightly off." @@ -252,7 +252,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a7fa571b", + "id": "16", "metadata": { "tags": [] }, @@ -263,7 +263,7 @@ }, { "cell_type": "markdown", - "id": "8e187826-75be-4522-a2eb-46b70e103847", + "id": "17", "metadata": {}, "source": [ "This is likely because gplearn mutations which add or update constants always pick values by drawing random uniform values (within pre-specified ranges, by default -1 to 1). In my limited experience so far, this is one of the major inefficiencies." @@ -271,7 +271,7 @@ }, { "cell_type": "markdown", - "id": "7a5c4b50", + "id": "18", "metadata": {}, "source": [ "(pysr-sec)=\n", @@ -285,7 +285,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8fff8157-5658-4a29-a97e-a5f0e4d3d265", + "id": "19", "metadata": { "tags": [] }, @@ -297,7 +297,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5f15cf43", + "id": "20", "metadata": { "tags": [] }, @@ -313,7 +313,7 @@ }, { "cell_type": "markdown", - "id": "2e79683d", + "id": "21", "metadata": {}, "source": [ "#### Interpret results" @@ -322,7 +322,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0037d292", + "id": "22", "metadata": {}, "outputs": [], "source": [ @@ -333,7 +333,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5adf39e9", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -342,7 +342,7 @@ }, { "cell_type": "markdown", - "id": "f1de78e9", + "id": "24", "metadata": {}, "source": [ "It looks like PySR is able to not only discover the correct equation, but also format it for us nicely.\n", @@ -353,7 +353,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ac318f36", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -363,7 +363,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8f3ec9c9-517b-420a-854a-824577068130", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -373,7 +373,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a3724a38", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -412,7 +412,7 @@ }, { "cell_type": "markdown", - "id": "acfbc136", + "id": "28", "metadata": {}, "source": [ "(sparse-regression-sec)=\n", @@ -432,7 +432,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c82ac622-e269-4c67-a484-d826b9746c6d", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -458,7 +458,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c3df85a0", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -473,7 +473,7 @@ { "cell_type": "code", "execution_count": null, - "id": "59368f96-e85b-4086-bee0-133e31f231eb", + "id": "31", "metadata": {}, "outputs": [], "source": [ @@ -482,7 +482,7 @@ }, { "cell_type": "markdown", - "id": "2b7b11c2", + "id": "32", "metadata": {}, "source": [ "Applying this twice brings us from 2 features up to 37:" @@ -491,7 +491,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1221f8a1", + "id": "33", "metadata": {}, "outputs": [], "source": [ @@ -500,7 +500,7 @@ }, { "cell_type": "markdown", - "id": "598e13c9", + "id": "34", "metadata": {}, "source": [ "We can see that the complexity of this approach starts to blow up exponentially, though, as we increase the maximum depth of an expression:" @@ -509,7 +509,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0c865291", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -519,7 +519,7 @@ { "cell_type": "code", "execution_count": null, - "id": "47403518", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -528,7 +528,7 @@ }, { "cell_type": "markdown", - "id": "094e5f3c", + "id": "37", "metadata": {}, "source": [ "If we need arbitrary polynomials of up to 4th order, we end up with orders of magnitude more features than samples, which will cause problems.\n", @@ -541,7 +541,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2e53fc88", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -555,7 +555,7 @@ }, { "cell_type": "markdown", - "id": "1d678e8b", + "id": "39", "metadata": {}, "source": [ "(linear-regression-sec)=\n", @@ -567,7 +567,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a9175e41", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -578,7 +578,7 @@ }, { "cell_type": "markdown", - "id": "8cd9b0d9-f8f7-4cc9-985b-daa322894112", + "id": "41", "metadata": {}, "source": [ "Unfortunately, this doesn't seem to give us a very sparse solution in this case." @@ -586,7 +586,7 @@ }, { "cell_type": "markdown", - "id": "d6fd81cd", + "id": "42", "metadata": {}, "source": [ "(lasso-sec)=\n", @@ -600,7 +600,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d485daad", + "id": "43", "metadata": {}, "outputs": [], "source": [ @@ -611,7 +611,7 @@ }, { "cell_type": "markdown", - "id": "605f5bbc", + "id": "44", "metadata": {}, "source": [ "Here, LASSO ends up doing a pretty good job finding the first two terms of the ground-truth model ($x_0^2 - 0.5x_1^2$), with an approximation of the remaining $\\sin(0.5 x_0 x_1)$ term as $ 0.2(x_0\\sin(x_1)+x_1\\sin(x_0)) + 0.1(\\sin(x_0 x_1) + x_0x_1)$:" @@ -620,7 +620,7 @@ { "cell_type": "code", "execution_count": null, - "id": "732ab3e2", + "id": "45", "metadata": {}, "outputs": [], "source": [ @@ -643,7 +643,7 @@ }, { "cell_type": "markdown", - "id": "83068241", + "id": "46", "metadata": {}, "source": [ "(rvm-sec)=\n", @@ -655,7 +655,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b96ed603", + "id": "47", "metadata": {}, "outputs": [], "source": [ @@ -666,7 +666,7 @@ }, { "cell_type": "markdown", - "id": "037f08eb-2e3c-4ecd-9f0f-962413e31d8c", + "id": "48", "metadata": {}, "source": [ "RVM does discover the $x_0^2$ and $-0.5x_1^2$ terms almost exactly, but ends up with an even more complex approximate expression for the missing $\\sin(0.5x_0x_1)$ term." @@ -674,7 +674,7 @@ }, { "cell_type": "markdown", - "id": "38dff9b2", + "id": "49", "metadata": {}, "source": [ "(stlsq-sec)=\n", @@ -686,7 +686,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6ae57fce", + "id": "50", "metadata": {}, "outputs": [], "source": [ @@ -697,7 +697,7 @@ }, { "cell_type": "markdown", - "id": "7c57ec9d", + "id": "51", "metadata": {}, "source": [ "This method ends up finding a sparser solution than Lasso, though I did tweak the parameters a bit to make that happen :)\n", @@ -707,7 +707,7 @@ }, { "cell_type": "markdown", - "id": "3016804a", + "id": "52", "metadata": {}, "source": [ "Overall, all of these methods were able to find the $x_0^2$ and $-0.5x_1^2$ terms, but all of them also needed to find approximations for the remaining term because it wasn't in the feature library. Meaning the expressions are both less accurate and more complex." @@ -715,7 +715,7 @@ }, { "cell_type": "markdown", - "id": "da109831", + "id": "53", "metadata": {}, "source": [ "### Tweaking the Feature Library\n", @@ -726,7 +726,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b0e30e71", + "id": "54", "metadata": {}, "outputs": [], "source": [ @@ -741,7 +741,7 @@ }, { "cell_type": "markdown", - "id": "0d31d7c4", + "id": "55", "metadata": {}, "source": [ "In this case, the correct model should be expressed as\n", @@ -756,7 +756,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2dc1a7bf", + "id": "56", "metadata": {}, "outputs": [], "source": [ @@ -768,7 +768,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9f9c67f7", + "id": "57", "metadata": {}, "outputs": [], "source": [ @@ -780,7 +780,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6ff1bb78", + "id": "58", "metadata": {}, "outputs": [], "source": [ @@ -792,7 +792,7 @@ { "cell_type": "code", "execution_count": null, - "id": "65cb4ad3", + "id": "59", "metadata": {}, "outputs": [], "source": [ @@ -803,7 +803,7 @@ }, { "cell_type": "markdown", - "id": "a1f55c25", + "id": "60", "metadata": {}, "source": [ "In this case, every method except Lasso finds the true model exactly (which held true across many choices of regularization parameter).\n", @@ -816,7 +816,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a9159d34", + "id": "61", "metadata": {}, "outputs": [], "source": [ @@ -826,7 +826,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a3ab527f", + "id": "62", "metadata": {}, "outputs": [], "source": [ @@ -838,7 +838,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ce01433d", + "id": "63", "metadata": {}, "outputs": [], "source": [ @@ -850,7 +850,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a7f3d131", + "id": "64", "metadata": {}, "outputs": [], "source": [ @@ -862,7 +862,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a0e87cc2", + "id": "65", "metadata": {}, "outputs": [], "source": [ @@ -873,7 +873,7 @@ }, { "cell_type": "markdown", - "id": "90e5a4d7", + "id": "66", "metadata": {}, "source": [ "In this case, it actually becomes _Lasso_ which gets closest to the true model (i.e. is the only regression technique whose first three leading terms match the ground-truth model). This reversal illustrates how noise can strongly impact the effectiveness of different methods for learning symbolic models.\n", @@ -884,7 +884,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1de9bcf5", + "id": "67", "metadata": {}, "outputs": [], "source": [ @@ -899,7 +899,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1834ef0d", + "id": "68", "metadata": {}, "outputs": [], "source": [ @@ -909,7 +909,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ad12ddb5", + "id": "69", "metadata": {}, "outputs": [], "source": [ @@ -919,7 +919,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3f4831fe", + "id": "70", "metadata": {}, "outputs": [], "source": [ @@ -928,7 +928,7 @@ }, { "cell_type": "markdown", - "id": "6fd761c3", + "id": "71", "metadata": {}, "source": [ "It looks like PySR was still able to discover the true expression as part of its Pareto frontier, though with the noise it's only rated third-best for the default tradeoff between complexity and performance (with a sin-less version taking first)." @@ -936,7 +936,7 @@ }, { "cell_type": "markdown", - "id": "68ffe19c", + "id": "72", "metadata": {}, "source": [ "## Other Methods for Symbolic Regression\n", @@ -952,7 +952,7 @@ { "cell_type": "code", "execution_count": null, - "id": "282ec194-148a-4e70-9570-104e18b007bf", + "id": "73", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/symbolic_vs_nn_multiscale_L96.ipynb b/notebooks/symbolic_vs_nn_multiscale_L96.ipynb index b89ef174..aa27a055 100644 --- a/notebooks/symbolic_vs_nn_multiscale_L96.ipynb +++ b/notebooks/symbolic_vs_nn_multiscale_L96.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "d41242f2", + "id": "0", "metadata": { "user_expressions": [] }, @@ -31,7 +31,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9070565d", + "id": "1", "metadata": { "tags": [] }, @@ -62,7 +62,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bf5ecc5a-b99e-4dbb-8b46-3920ef9e3513", + "id": "2", "metadata": { "tags": [] }, @@ -75,7 +75,7 @@ }, { "cell_type": "markdown", - "id": "379d8bd2-e489-44d2-9791-fe7b453c5636", + "id": "3", "metadata": { "user_expressions": [] }, @@ -90,7 +90,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fbc1437d-188e-427f-8da9-5e955cdc58b0", + "id": "4", "metadata": { "tags": [] }, @@ -110,7 +110,7 @@ }, { "cell_type": "markdown", - "id": "b0350f65", + "id": "5", "metadata": { "user_expressions": [] }, @@ -121,7 +121,7 @@ { "cell_type": "code", "execution_count": null, - "id": "54892735", + "id": "6", "metadata": { "tags": [] }, @@ -138,7 +138,7 @@ { "cell_type": "code", "execution_count": null, - "id": "351d3e5a", + "id": "7", "metadata": { "tags": [ "hide-input" @@ -159,7 +159,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7be290f", + "id": "8", "metadata": { "tags": [ "hide-input" @@ -182,7 +182,7 @@ }, { "cell_type": "markdown", - "id": "04e202f0", + "id": "9", "metadata": { "user_expressions": [] }, @@ -194,7 +194,7 @@ }, { "cell_type": "markdown", - "id": "aa935b26-4491-4678-a94a-48675c9dea1b", + "id": "10", "metadata": { "user_expressions": [] }, @@ -205,7 +205,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cd24ec2c-f8ad-4481-b81e-397af8239330", + "id": "11", "metadata": { "tags": [] }, @@ -221,7 +221,7 @@ }, { "cell_type": "markdown", - "id": "5b46ea2a-eb12-4be7-822d-bc0f8f93fb58", + "id": "12", "metadata": { "tags": [], "user_expressions": [] @@ -233,7 +233,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b288a4a4-078b-44d5-8e41-1e058de861ed", + "id": "13", "metadata": { "tags": [] }, @@ -256,7 +256,7 @@ }, { "cell_type": "markdown", - "id": "745bcc69-4d0f-4b3e-a1de-41e341b4eb6d", + "id": "14", "metadata": { "user_expressions": [] }, @@ -268,7 +268,7 @@ }, { "cell_type": "markdown", - "id": "c6bcfcb9-b79e-4777-8e55-a31373e5241b", + "id": "15", "metadata": { "user_expressions": [] }, @@ -283,7 +283,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a0ed1572-d504-47db-b962-98a1300ec4a1", + "id": "16", "metadata": { "tags": [ "hide-cell" @@ -361,7 +361,7 @@ }, { "cell_type": "markdown", - "id": "6526b2d4", + "id": "17", "metadata": { "user_expressions": [] }, @@ -372,7 +372,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6445e5d8", + "id": "18", "metadata": { "tags": [] }, @@ -394,7 +394,7 @@ }, { "cell_type": "markdown", - "id": "22dadd73-44d8-4aa4-b7a7-72352dc9ccaa", + "id": "19", "metadata": { "user_expressions": [] }, @@ -405,7 +405,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ab123e48", + "id": "20", "metadata": { "tags": [] }, @@ -423,7 +423,7 @@ }, { "cell_type": "markdown", - "id": "26fd373a-2f8e-427e-9ac4-67eb365f9675", + "id": "21", "metadata": { "user_expressions": [] }, @@ -434,7 +434,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26a0eec2-007c-49fa-96ca-8b32ac839d4b", + "id": "22", "metadata": { "tags": [] }, @@ -449,7 +449,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d5965164", + "id": "23", "metadata": { "tags": [] }, @@ -461,7 +461,7 @@ }, { "cell_type": "markdown", - "id": "1ba1dacc", + "id": "24", "metadata": { "user_expressions": [] }, @@ -471,7 +471,7 @@ }, { "cell_type": "markdown", - "id": "4e3aaddc-fcc8-41eb-ab37-cc923a83aeb8", + "id": "25", "metadata": { "user_expressions": [] }, @@ -482,7 +482,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ebec28d9", + "id": "26", "metadata": { "tags": [] }, @@ -495,7 +495,7 @@ }, { "cell_type": "markdown", - "id": "1677913e", + "id": "27", "metadata": { "user_expressions": [] }, @@ -506,7 +506,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4f3f9d6c", + "id": "28", "metadata": { "tags": [] }, @@ -521,7 +521,7 @@ }, { "cell_type": "markdown", - "id": "01bb0893", + "id": "29", "metadata": { "user_expressions": [] }, @@ -532,7 +532,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4f2a4893", + "id": "30", "metadata": { "tags": [] }, @@ -559,7 +559,7 @@ }, { "cell_type": "markdown", - "id": "d29faede", + "id": "31", "metadata": { "user_expressions": [] }, @@ -572,7 +572,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3947cb34", + "id": "32", "metadata": { "tags": [] }, @@ -598,7 +598,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cd991e1b", + "id": "33", "metadata": { "tags": [] }, @@ -610,7 +610,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ccd32a51", + "id": "34", "metadata": { "tags": [] }, @@ -623,7 +623,7 @@ }, { "cell_type": "markdown", - "id": "6510bdd5", + "id": "35", "metadata": { "user_expressions": [] }, @@ -634,7 +634,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f88adb60-93a2-44f9-8314-c411e59d43e7", + "id": "36", "metadata": { "tags": [] }, @@ -650,7 +650,7 @@ }, { "cell_type": "markdown", - "id": "59da9e31", + "id": "37", "metadata": { "user_expressions": [] }, @@ -663,7 +663,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bc2f7f44", + "id": "38", "metadata": { "tags": [] }, @@ -686,7 +686,7 @@ { "cell_type": "code", "execution_count": null, - "id": "67789b73", + "id": "39", "metadata": { "tags": [] }, @@ -712,7 +712,7 @@ { "cell_type": "code", "execution_count": null, - "id": "55fca58a-911e-46c3-9a15-4a2ba8590eee", + "id": "40", "metadata": { "tags": [] }, @@ -723,7 +723,7 @@ }, { "cell_type": "markdown", - "id": "905cfb5c", + "id": "41", "metadata": { "user_expressions": [] }, @@ -733,7 +733,7 @@ }, { "cell_type": "markdown", - "id": "bfecdaba", + "id": "42", "metadata": { "user_expressions": [] }, @@ -746,7 +746,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a30255fd", + "id": "43", "metadata": { "tags": [] }, @@ -766,7 +766,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b469cac9", + "id": "44", "metadata": { "tags": [] }, @@ -779,7 +779,7 @@ }, { "cell_type": "markdown", - "id": "30121a3d", + "id": "45", "metadata": { "user_expressions": [] }, @@ -790,7 +790,7 @@ { "cell_type": "code", "execution_count": null, - "id": "721fd64e-eae8-4b1c-b2f9-053aab36902a", + "id": "46", "metadata": { "tags": [] }, @@ -804,7 +804,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c928dc9b-eb83-45bc-a416-4357f1cb6c30", + "id": "47", "metadata": { "tags": [] }, @@ -815,7 +815,7 @@ }, { "cell_type": "markdown", - "id": "222c9020", + "id": "48", "metadata": { "user_expressions": [] }, @@ -828,7 +828,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0d60408a", + "id": "49", "metadata": { "tags": [] }, @@ -842,7 +842,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f3918224", + "id": "50", "metadata": { "tags": [] }, @@ -853,7 +853,7 @@ }, { "cell_type": "markdown", - "id": "45196067", + "id": "51", "metadata": { "user_expressions": [] }, @@ -864,7 +864,7 @@ { "cell_type": "code", "execution_count": null, - "id": "210d8e5f", + "id": "52", "metadata": { "tags": [] }, @@ -878,7 +878,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2b3aaf12", + "id": "53", "metadata": { "tags": [] }, @@ -889,7 +889,7 @@ }, { "cell_type": "markdown", - "id": "d666fd8e", + "id": "54", "metadata": { "user_expressions": [] }, @@ -900,7 +900,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4165960d", + "id": "55", "metadata": { "tags": [] }, @@ -931,7 +931,7 @@ }, { "cell_type": "markdown", - "id": "bdbfaa2c", + "id": "56", "metadata": { "user_expressions": [] }, @@ -944,7 +944,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24aa3a4b", + "id": "57", "metadata": { "tags": [] }, @@ -960,7 +960,7 @@ }, { "cell_type": "markdown", - "id": "0e5bb5ba", + "id": "58", "metadata": { "user_expressions": [] }, @@ -973,7 +973,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ddd19c3d", + "id": "59", "metadata": { "tags": [] }, @@ -1001,7 +1001,7 @@ { "cell_type": "code", "execution_count": null, - "id": "da7ab7fb", + "id": "60", "metadata": { "tags": [] }, @@ -1015,7 +1015,7 @@ { "cell_type": "code", "execution_count": null, - "id": "794ef1ce", + "id": "61", "metadata": { "tags": [] }, @@ -1027,7 +1027,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4d853e84", + "id": "62", "metadata": { "tags": [] }, @@ -1055,7 +1055,7 @@ }, { "cell_type": "markdown", - "id": "3a6f67e0", + "id": "63", "metadata": { "user_expressions": [] }, @@ -1070,7 +1070,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8310dfba", + "id": "64", "metadata": { "tags": [] }, @@ -1095,7 +1095,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c9a9027c", + "id": "65", "metadata": { "tags": [] }, @@ -1107,7 +1107,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f06a133d", + "id": "66", "metadata": { "tags": [] }, @@ -1121,7 +1121,7 @@ }, { "cell_type": "markdown", - "id": "e828836f", + "id": "67", "metadata": { "user_expressions": [] }, @@ -1132,7 +1132,7 @@ { "cell_type": "code", "execution_count": null, - "id": "45225961-2844-4d42-8756-b99eed311443", + "id": "68", "metadata": { "tags": [] }, @@ -1150,7 +1150,7 @@ }, { "cell_type": "markdown", - "id": "84b55c24", + "id": "69", "metadata": { "tags": [], "user_expressions": []