Merge pull request #31 from jukent/main

add bias section
ProjectPythia · Nov 30, 2023 · 5318d03 · 5318d03
2 parents d190612 + c7e7a0e
commit 5318d03
Showing 1 changed file with 145 additions and 90 deletions.
diff --git a/notebooks/taylor-diagrams.ipynb b/notebooks/taylor-diagrams.ipynb
@@ -17,13 +17,14 @@
     "\n",
     "Taylor diagrams are popular for displaying climatological data because the normalization of variances helps account for the widely varying numerical values of geoscientific variables such as temperature or precipitation.\n",
     "\n",
-    "This notebook explores how to create and customize Taylor diagrams using `geocat-viz`.\n",
+    "This notebook explores how to create and customize Taylor diagrams using `geocat-viz`. See the more information on [`geocat-viz.TaylorDiagram`](https://geocat-viz.readthedocs.io/en/latest/user_api/generated/geocat.viz.taylor.TaylorDiagram.html).\n",
     "\n",
     "1. Creating a Simple Taylor Diagram\n",
     "1. Necessary Statistical Analysis\n",
     "1. Plotting Different Ensemble Members\n",
-    "1. Plotting Multiple Variables\n",
     "1. Plotting Multiple Models\n",
+    "1. Plotting Multiple Variables\n",
+    "1. Plotting Bias\n",
     "1. Variants"
    ]
   },
@@ -257,11 +258,11 @@
     "taylor.add_model_set(\n",
     "    temp_rcp85_std,\n",
     "    temp_rcp85_corr,\n",
-    "    fontsize=20,\n",
+    "    fontsize=20,  # specify font size\n",
     "    xytext=(-5, 10),  # marker label location, in pixels\n",
-    "    color='red',\n",
-    "    marker='o',\n",
-    "    facecolors='none',\n",
+    "    color='red', # specify marker color\n",
+    "    marker='o', # specify marker shape\n",
+    "    facecolors='none', # specify marker fill\n",
     "    s=100)  # marker size\n",
     "\n",
     "# Add legend of ensemble names\n",
@@ -276,11 +277,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Plotting Multiple Variables\n",
+    "## Plotting Multiple Models\n",
     "\n",
-    "A Taylor Diagram can support multiple model sets, you simply need to call `taylor.add_model_set()` multiple times. By adding the `label` kwarg and calling `taylor.add_legend()` you can add a label distinguishing between the two sets.\n",
+    "Another potential use case for a Taylor diagram is to plot multiple models. Here we compare RCP2.6, RCP4.5, and RCP8.5 together. \n",
+    "\n",
+    "Because it isn't meaningful to compare ensemble members across model runs (the nature of the perturbations isn't reliably similar across RCPs or labs), we will look at the first ensemble `r1i1p1` for all models. For your analysis, you might find it more meaningful to average across ensemble members, but we'll keep it simple for this plotting example.\n",
     "\n",
-    "Let's repeat our calculations but for the pressure variable."
+    "Of course, you could still chose to display more information on one graph, but there is no real conection between the first ensemble of one model versus another.\n",
+    "\n",
+    "In this final example, we'll add another layer of complexity to our Taylor Diagram plot with contour lines of constant normalized standard deviation."
    ]
   },
   {
@@ -289,8 +294,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ps_rcp85 = xr.open_dataset(gdf.get('netcdf_files/ps_Amon_CanESM2_rcp85_2022_xyav.nc'))\n",
-    "ps_rcp85['time'] = ps_rcp85.indexes['time'].to_datetimeindex()"
+    "# Open RCP26 and RCP45 files\n",
+    "tas_rcp26 = xr.open_dataset(gdf.get('netcdf_files/tas_Amon_CanESM2_rcp26_2022_xyav.nc'))\n",
+    "tas_rcp26['time'] = tas_rcp26.indexes['time'].to_datetimeindex()\n",
+    "\n",
+    "tas_rcp45 = xr.open_dataset(gdf.get('netcdf_files/tas_Amon_CanESM2_rcp45_2022_xyav.nc'))\n",
+    "tas_rcp45['time'] = tas_rcp45.indexes['time'].to_datetimeindex()"
    ]
   },
   {
@@ -299,20 +308,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ps_rcp85_std = []\n",
-    "ps_rcp85_corr = []\n",
-    "\n",
-    "std_ps_obsv = float(era5_ps.std().values)\n",
-    "\n",
-    "for em in list(ps_rcp85.data_vars): # for each ensemble member\n",
-    "    std = float(ps_rcp85[em].std().values)\n",
-    "    std_norm = std / std_ps_obsv\n",
+    "# Perform statistical analysis to create our standard deviation and correlation coefficient lists\n",
+    "temp_rcp26_std = float(tas_rcp26['r1i1p1'].std().values) \n",
+    "temp_rcp26_std_norm = temp_rcp26_std / std_temp_obsv\n",
+    "temp_rcp26_corr = float(xr.corr(era5_temp, tas_rcp26['r1i1p1']).values)\n",
     "\n",
-    "    corr = float(xr.corr(era5_ps, ps_rcp85[em]).values)\n",
+    "temp_rcp45_std = float(tas_rcp45['r1i1p1'].std().values)\n",
+    "temp_rcp45_std_norm = temp_rcp45_std / std_temp_obsv\n",
+    "temp_rcp45_corr = float(xr.corr(era5_temp, tas_rcp45['r1i1p1']).values)\n",
     "\n",
-    "    ps_rcp85_std.append(std)\n",
-    "    corr = 0 if corr < 0 else corr # This is producing negative correlations for some values which is destroying the plot\n",
-    "    ps_rcp85_corr.append(corr) "
+    "temp_std = [temp_rcp26_std_norm, temp_rcp45_std_norm, temp_rcp85_std[0]]\n",
+    "temp_corr = [temp_rcp26_corr, temp_rcp45_corr, temp_rcp85_corr[0]]"
    ]
   },
   {
@@ -330,69 +336,49 @@
     "# Also enforces proper X-Y ratio\n",
     "taylor.add_xgrid(np.array([0.6, 0.9]))\n",
     "\n",
-    "# Add temperature data\n",
+    "# Add model set for temp dataset\n",
     "taylor.add_model_set(\n",
-    "    temp_rcp85_std,\n",
-    "    temp_rcp85_corr,\n",
+    "    temp_std,\n",
+    "    temp_corr,\n",
     "    fontsize=20,\n",
     "    xytext=(-5, 10),  # marker label location, in pixels\n",
     "    color='red',\n",
     "    marker='o',\n",
     "    facecolors='none',\n",
-    "    label='Near Surface Temperature',\n",
     "    s=100)  # marker size\n",
     "\n",
-    "# Add pressure data\n",
-    "taylor.add_model_set(\n",
-    "    ps_rcp85_std,\n",
-    "    ps_rcp85_corr,\n",
-    "    fontsize=20,\n",
-    "    xytext=(-5, 10),  # marker label location, in pixels\n",
-    "    color='blue',\n",
-    "    marker='D',\n",
-    "    facecolors='none',\n",
-    "    label='Surface Pressure',\n",
-    "    s=100)\n",
-    "\n",
-    "# Add figure title\n",
-    "plt.title(\"RCP85\", size=26, pad=45);\n",
+    "#gv.util.set_axes_limits_and_ticks(ax, xlim=[0,2])\n",
     "\n",
-    "# Add legend of ensemble names\n",
-    "namearr = list(ps_rcp85.data_vars)\n",
+    "namearr = ['rcp26', 'rcp45', 'rcp85']\n",
     "taylor.add_model_name(namearr, fontsize=16)\n",
     "\n",
-    "# Enforce x axis\n",
-    "gv.set_axes_limits_and_ticks(ax, xlim=[0, 2])\n",
+    "# Add figure title\n",
+    "plt.title(\"CMIP5 Temperature - First Ensemble Member\", size=26, pad=45)\n",
     "\n",
-    "# Add figure legend\n",
-    "taylor.add_legend(fontsize=16, pad=45);"
+    "# Add constant centered RMS difference contours.\n",
+    "taylor.add_contours(levels=np.arange(0, 1.1, 0.25),\n",
+    "                 colors='lightgrey',\n",
+    "                 linewidths=0.5);"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Plotting Multiple Models\n",
-    "\n",
-    "Another potential use case for a Taylor diagram is to plot multiple models. Here we compare RCP2.6, RCP4.5, and RCP8.5 together. \n",
-    "\n",
-    "Because it isn't meaningful to compare ensemble members across model runs (the nature of the perturbations isn't reliably similar across RCPs or labs), we will look at the first ensemble `r1i1p1` for all models. For your analysis, you might find it more meaningful to average across ensemble members, but we'll keep it simple for this plotting example.\n",
-    "\n",
-    "In this final example, we'll add another layer of complexity to our Taylor Diagram plot with contour lines of constant normalized standard deviation."
+    "Based on these three RCPs it looks like RCP8.5 has the closest correlation to our observed climate behavior, but RCP2.6 has a closer standard deviation to what we experience. Based on your selected data, scientific interpretations may vary."
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# Open RCP26 and RCP45 files\n",
-    "tas_rcp26 = xr.open_dataset(gdf.get('netcdf_files/tas_Amon_CanESM2_rcp26_2022_xyav.nc'))\n",
-    "tas_rcp26['time'] = tas_rcp26.indexes['time'].to_datetimeindex()\n",
+    "## Plotting Multiple Variables\n",
     "\n",
-    "tas_rcp45 = xr.open_dataset(gdf.get('netcdf_files/tas_Amon_CanESM2_rcp45_2022_xyav.nc'))\n",
-    "tas_rcp45['time'] = tas_rcp45.indexes['time'].to_datetimeindex()"
+    "A Taylor Diagram can support multiple model sets, you simply need to call `taylor.add_model_set()` multiple times. By adding the `label` kwarg and calling `taylor.add_legend()` you can add a label distinguishing between the two sets.\n",
+    "\n",
+    "Since we've already demonstrated the statistical analysis necessary to perform Taylor Diagrams, the following example will be using sample data.\n",
+    "\n",
+    "Here we make sample data for 7 common climate model variables, for two different models."
    ]
   },
   {
@@ -401,17 +387,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Perform statistical analysis to create our standard deviation and correlation coefficient lists\n",
-    "temp_rcp26_std = float(tas_rcp26['r1i1p1'].std().values) \n",
-    "temp_rcp26_std_norm = temp_rcp26_std / std_temp_obsv\n",
-    "temp_rcp26_corr = float(xr.corr(era5_temp, tas_rcp26['r1i1p1']).values)\n",
+    "# Create sample data\n",
     "\n",
-    "temp_rcp45_std = float(tas_rcp45['r1i1p1'].std().values)\n",
-    "temp_rcp45_std_norm = temp_rcp45_std / std_temp_obsv\n",
-    "temp_rcp45_corr = float(xr.corr(era5_temp, tas_rcp45['r1i1p1']).values)\n",
+    "# Model A\n",
+    "a_sdev = [1.230, 0.988, 1.092, 1.172, 1.064, 0.966, 1.079]  # normalized standard deviation\n",
+    "a_ccorr = [0.958, 0.973, 0.740, 0.743, 0.922, 0.982, 0.952]  # correlation coefficient\n",
     "\n",
-    "temp_std = [temp_rcp26_std_norm, temp_rcp45_std_norm, temp_rcp85_std[0]]\n",
-    "temp_corr = [temp_rcp26_corr, temp_rcp45_corr, temp_rcp85_corr[0]]"
+    "# Model B\n",
+    "b_sdev = [1.129, 0.996, 1.016, 1.134, 1.023, 0.962, 1.048]  # normalized standard deviation\n",
+    "b_ccorr = [0.963, 0.975, 0.801, 0.814, 0.946, 0.984, 0.968]  # correlation coefficient\n",
+    "\n",
+    "# Sample Variable List\n",
+    "var_list = ['Surface Pressure', '2m Temp', 'Dew Point Temp', 'U Wind', 'V Wind', 'Precip', 'Cloud Cov']"
    ]
   },
   {
@@ -420,33 +407,34 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Create figure and Taylor Diagram instance\n",
-    "fig = plt.figure(figsize=(12, 12))\n",
+    "# Create figure and TaylorDiagram instance\n",
+    "fig = plt.figure(figsize=(10, 10))\n",
     "taylor = gv.TaylorDiagram(fig=fig, label='REF')\n",
-    "ax = plt.gca()\n",
     "\n",
     "# Draw diagonal dashed lines from origin to correlation values\n",
     "# Also enforces proper X-Y ratio\n",
     "taylor.add_xgrid(np.array([0.6, 0.9]))\n",
     "\n",
-    "# Add model sets for p and t datasets\n",
-    "taylor.add_model_set(\n",
-    "    temp_std,\n",
-    "    temp_corr,\n",
-    "    fontsize=20,\n",
-    "    xytext=(-5, 10),  # marker label location, in pixels\n",
-    "    color='red',\n",
-    "    marker='o',\n",
-    "    facecolors='none',\n",
-    "    s=100)  # marker size\n",
+    "# Add models to Taylor diagram\n",
+    "taylor.add_model_set(a_sdev,\n",
+    "                  a_ccorr,\n",
+    "                  color='red',\n",
+    "                  marker='o',\n",
+    "                  label='Model A', # add model set legend label\n",
+    "                  fontsize=16)\n",
     "\n",
-    "#gv.util.set_axes_limits_and_ticks(ax, xlim=[0,2])\n",
+    "taylor.add_model_set(b_sdev,\n",
+    "                  b_ccorr,\n",
+    "                  color='blue',\n",
+    "                  marker='o',\n",
+    "                  label='Model B',\n",
+    "                  fontsize=16)\n",
     "\n",
-    "namearr = ['rcp26', 'rcp45', 'rcp85']\n",
-    "taylor.add_model_name(namearr, fontsize=16)\n",
+    "# Add model name\n",
+    "taylor.add_model_name(var_list, fontsize=16)\n",
     "\n",
-    "# Add figure title\n",
-    "plt.title(\"CMIP5 Temperature - First Ensemble Member\", size=26, pad=45)\n",
+    "# Add figure legend\n",
+    "taylor.add_legend(fontsize=16)\n",
     "\n",
     "# Add constant centered RMS difference contours.\n",
     "taylor.add_contours(levels=np.arange(0, 1.1, 0.25),\n",
@@ -458,7 +446,74 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Based on these three RCPs it looks like RCP8.5 has the closest correlation to our observed climate behavior, but RCP2.6 has a closer standard deviation to what we experience. Based on your selected data, scientific interpretations may vary."
+    "## Plotting Bias\n",
+    "\n",
+    "We can add another layer of information to the Taylor Diagram by changing the marker size and shape depending on a third variable. Most commonly this is done to demonstrate bias, a statistical definition of the difference between the observed and estimated values.\n",
+    "\n",
+    "We do this by adding a `bias_array` kwarg to the `add_model_set()` method. Doing so necessitates removing the `marker` specification, since they are overriden with up or down arrows of varrying sizes. Bias values are in percentages.\n",
+    "\n",
+    "Indicate the meaning of these new bias symbols with a third legend with the call `add_bias_legend()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sample corresponding bias data.\n",
+    "\n",
+    "# Case A\n",
+    "a_bias = [2.7, -1.5, 17.31, -20.11, 12.5, 8.341, -4.7]  # bias (%)\n",
+    "\n",
+    "# Case B\n",
+    "b_bias = [1.7, 2.5, -17.31, 20.11, 19.5, 7.341, 9.2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create figure and TaylorDiagram instance\n",
+    "fig = plt.figure(figsize=(10, 10))\n",
+    "taylor = gv.TaylorDiagram(fig=fig, label='REF')\n",
+    "\n",
+    "# Draw diagonal dashed lines from origin to correlation values\n",
+    "# Also enforces proper X-Y ratio\n",
+    "taylor.add_xgrid(np.array([0.6, 0.9]))\n",
+    "\n",
+    "# Add models to Taylor diagram\n",
+    "taylor.add_model_set(a_sdev,\n",
+    "                  a_ccorr,\n",
+    "                  percent_bias_on=True, # indicate marker and size to be plotted based on bias_array\n",
+    "                  bias_array=a_bias, # specify bias array\n",
+    "                  color='red',\n",
+    "                  label='Model A',\n",
+    "                  fontsize=16)\n",
+    "\n",
+    "taylor.add_model_set(b_sdev,\n",
+    "                  b_ccorr,\n",
+    "                  percent_bias_on=True,\n",
+    "                  bias_array=b_bias,\n",
+    "                  color='blue',\n",
+    "                  label='Model B',\n",
+    "                  fontsize=16)\n",
+    "\n",
+    "# Add model name\n",
+    "taylor.add_model_name(var_list, fontsize=16)\n",
+    "\n",
+    "# Add figure legend\n",
+    "taylor.add_legend(fontsize=16)\n",
+    "\n",
+    "# Add bias legend\n",
+    "taylor.add_bias_legend()\n",
+    "\n",
+    "# Add constant centered RMS difference contours.\n",
+    "taylor.add_contours(levels=np.arange(0, 1.1, 0.25),\n",
+    "                 colors='lightgrey',\n",
+    "                 linewidths=0.5);"
    ]
   },
   {