Skip to content

Commit

Permalink
Merge pull request #31 from jukent/main
Browse files Browse the repository at this point in the history
add bias section
  • Loading branch information
jukent authored Nov 30, 2023
2 parents d190612 + c7e7a0e commit 5318d03
Showing 1 changed file with 145 additions and 90 deletions.
235 changes: 145 additions & 90 deletions notebooks/taylor-diagrams.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
"\n",
"Taylor diagrams are popular for displaying climatological data because the normalization of variances helps account for the widely varying numerical values of geoscientific variables such as temperature or precipitation.\n",
"\n",
"This notebook explores how to create and customize Taylor diagrams using `geocat-viz`.\n",
"This notebook explores how to create and customize Taylor diagrams using `geocat-viz`. See the more information on [`geocat-viz.TaylorDiagram`](https://geocat-viz.readthedocs.io/en/latest/user_api/generated/geocat.viz.taylor.TaylorDiagram.html).\n",
"\n",
"1. Creating a Simple Taylor Diagram\n",
"1. Necessary Statistical Analysis\n",
"1. Plotting Different Ensemble Members\n",
"1. Plotting Multiple Variables\n",
"1. Plotting Multiple Models\n",
"1. Plotting Multiple Variables\n",
"1. Plotting Bias\n",
"1. Variants"
]
},
Expand Down Expand Up @@ -257,11 +258,11 @@
"taylor.add_model_set(\n",
" temp_rcp85_std,\n",
" temp_rcp85_corr,\n",
" fontsize=20,\n",
" fontsize=20, # specify font size\n",
" xytext=(-5, 10), # marker label location, in pixels\n",
" color='red',\n",
" marker='o',\n",
" facecolors='none',\n",
" color='red', # specify marker color\n",
" marker='o', # specify marker shape\n",
" facecolors='none', # specify marker fill\n",
" s=100) # marker size\n",
"\n",
"# Add legend of ensemble names\n",
Expand All @@ -276,11 +277,15 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plotting Multiple Variables\n",
"## Plotting Multiple Models\n",
"\n",
"A Taylor Diagram can support multiple model sets, you simply need to call `taylor.add_model_set()` multiple times. By adding the `label` kwarg and calling `taylor.add_legend()` you can add a label distinguishing between the two sets.\n",
"Another potential use case for a Taylor diagram is to plot multiple models. Here we compare RCP2.6, RCP4.5, and RCP8.5 together. \n",
"\n",
"Because it isn't meaningful to compare ensemble members across model runs (the nature of the perturbations isn't reliably similar across RCPs or labs), we will look at the first ensemble `r1i1p1` for all models. For your analysis, you might find it more meaningful to average across ensemble members, but we'll keep it simple for this plotting example.\n",
"\n",
"Let's repeat our calculations but for the pressure variable."
"Of course, you could still chose to display more information on one graph, but there is no real conection between the first ensemble of one model versus another.\n",
"\n",
"In this final example, we'll add another layer of complexity to our Taylor Diagram plot with contour lines of constant normalized standard deviation."
]
},
{
Expand All @@ -289,8 +294,12 @@
"metadata": {},
"outputs": [],
"source": [
"ps_rcp85 = xr.open_dataset(gdf.get('netcdf_files/ps_Amon_CanESM2_rcp85_2022_xyav.nc'))\n",
"ps_rcp85['time'] = ps_rcp85.indexes['time'].to_datetimeindex()"
"# Open RCP26 and RCP45 files\n",
"tas_rcp26 = xr.open_dataset(gdf.get('netcdf_files/tas_Amon_CanESM2_rcp26_2022_xyav.nc'))\n",
"tas_rcp26['time'] = tas_rcp26.indexes['time'].to_datetimeindex()\n",
"\n",
"tas_rcp45 = xr.open_dataset(gdf.get('netcdf_files/tas_Amon_CanESM2_rcp45_2022_xyav.nc'))\n",
"tas_rcp45['time'] = tas_rcp45.indexes['time'].to_datetimeindex()"
]
},
{
Expand All @@ -299,20 +308,17 @@
"metadata": {},
"outputs": [],
"source": [
"ps_rcp85_std = []\n",
"ps_rcp85_corr = []\n",
"\n",
"std_ps_obsv = float(era5_ps.std().values)\n",
"\n",
"for em in list(ps_rcp85.data_vars): # for each ensemble member\n",
" std = float(ps_rcp85[em].std().values)\n",
" std_norm = std / std_ps_obsv\n",
"# Perform statistical analysis to create our standard deviation and correlation coefficient lists\n",
"temp_rcp26_std = float(tas_rcp26['r1i1p1'].std().values) \n",
"temp_rcp26_std_norm = temp_rcp26_std / std_temp_obsv\n",
"temp_rcp26_corr = float(xr.corr(era5_temp, tas_rcp26['r1i1p1']).values)\n",
"\n",
" corr = float(xr.corr(era5_ps, ps_rcp85[em]).values)\n",
"temp_rcp45_std = float(tas_rcp45['r1i1p1'].std().values)\n",
"temp_rcp45_std_norm = temp_rcp45_std / std_temp_obsv\n",
"temp_rcp45_corr = float(xr.corr(era5_temp, tas_rcp45['r1i1p1']).values)\n",
"\n",
" ps_rcp85_std.append(std)\n",
" corr = 0 if corr < 0 else corr # This is producing negative correlations for some values which is destroying the plot\n",
" ps_rcp85_corr.append(corr) "
"temp_std = [temp_rcp26_std_norm, temp_rcp45_std_norm, temp_rcp85_std[0]]\n",
"temp_corr = [temp_rcp26_corr, temp_rcp45_corr, temp_rcp85_corr[0]]"
]
},
{
Expand All @@ -330,69 +336,49 @@
"# Also enforces proper X-Y ratio\n",
"taylor.add_xgrid(np.array([0.6, 0.9]))\n",
"\n",
"# Add temperature data\n",
"# Add model set for temp dataset\n",
"taylor.add_model_set(\n",
" temp_rcp85_std,\n",
" temp_rcp85_corr,\n",
" temp_std,\n",
" temp_corr,\n",
" fontsize=20,\n",
" xytext=(-5, 10), # marker label location, in pixels\n",
" color='red',\n",
" marker='o',\n",
" facecolors='none',\n",
" label='Near Surface Temperature',\n",
" s=100) # marker size\n",
"\n",
"# Add pressure data\n",
"taylor.add_model_set(\n",
" ps_rcp85_std,\n",
" ps_rcp85_corr,\n",
" fontsize=20,\n",
" xytext=(-5, 10), # marker label location, in pixels\n",
" color='blue',\n",
" marker='D',\n",
" facecolors='none',\n",
" label='Surface Pressure',\n",
" s=100)\n",
"\n",
"# Add figure title\n",
"plt.title(\"RCP85\", size=26, pad=45);\n",
"#gv.util.set_axes_limits_and_ticks(ax, xlim=[0,2])\n",
"\n",
"# Add legend of ensemble names\n",
"namearr = list(ps_rcp85.data_vars)\n",
"namearr = ['rcp26', 'rcp45', 'rcp85']\n",
"taylor.add_model_name(namearr, fontsize=16)\n",
"\n",
"# Enforce x axis\n",
"gv.set_axes_limits_and_ticks(ax, xlim=[0, 2])\n",
"# Add figure title\n",
"plt.title(\"CMIP5 Temperature - First Ensemble Member\", size=26, pad=45)\n",
"\n",
"# Add figure legend\n",
"taylor.add_legend(fontsize=16, pad=45);"
"# Add constant centered RMS difference contours.\n",
"taylor.add_contours(levels=np.arange(0, 1.1, 0.25),\n",
" colors='lightgrey',\n",
" linewidths=0.5);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plotting Multiple Models\n",
"\n",
"Another potential use case for a Taylor diagram is to plot multiple models. Here we compare RCP2.6, RCP4.5, and RCP8.5 together. \n",
"\n",
"Because it isn't meaningful to compare ensemble members across model runs (the nature of the perturbations isn't reliably similar across RCPs or labs), we will look at the first ensemble `r1i1p1` for all models. For your analysis, you might find it more meaningful to average across ensemble members, but we'll keep it simple for this plotting example.\n",
"\n",
"In this final example, we'll add another layer of complexity to our Taylor Diagram plot with contour lines of constant normalized standard deviation."
"Based on these three RCPs it looks like RCP8.5 has the closest correlation to our observed climate behavior, but RCP2.6 has a closer standard deviation to what we experience. Based on your selected data, scientific interpretations may vary."
]
},
{
"cell_type": "code",
"execution_count": null,
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": [
"# Open RCP26 and RCP45 files\n",
"tas_rcp26 = xr.open_dataset(gdf.get('netcdf_files/tas_Amon_CanESM2_rcp26_2022_xyav.nc'))\n",
"tas_rcp26['time'] = tas_rcp26.indexes['time'].to_datetimeindex()\n",
"## Plotting Multiple Variables\n",
"\n",
"tas_rcp45 = xr.open_dataset(gdf.get('netcdf_files/tas_Amon_CanESM2_rcp45_2022_xyav.nc'))\n",
"tas_rcp45['time'] = tas_rcp45.indexes['time'].to_datetimeindex()"
"A Taylor Diagram can support multiple model sets, you simply need to call `taylor.add_model_set()` multiple times. By adding the `label` kwarg and calling `taylor.add_legend()` you can add a label distinguishing between the two sets.\n",
"\n",
"Since we've already demonstrated the statistical analysis necessary to perform Taylor Diagrams, the following example will be using sample data.\n",
"\n",
"Here we make sample data for 7 common climate model variables, for two different models."
]
},
{
Expand All @@ -401,17 +387,18 @@
"metadata": {},
"outputs": [],
"source": [
"# Perform statistical analysis to create our standard deviation and correlation coefficient lists\n",
"temp_rcp26_std = float(tas_rcp26['r1i1p1'].std().values) \n",
"temp_rcp26_std_norm = temp_rcp26_std / std_temp_obsv\n",
"temp_rcp26_corr = float(xr.corr(era5_temp, tas_rcp26['r1i1p1']).values)\n",
"# Create sample data\n",
"\n",
"temp_rcp45_std = float(tas_rcp45['r1i1p1'].std().values)\n",
"temp_rcp45_std_norm = temp_rcp45_std / std_temp_obsv\n",
"temp_rcp45_corr = float(xr.corr(era5_temp, tas_rcp45['r1i1p1']).values)\n",
"# Model A\n",
"a_sdev = [1.230, 0.988, 1.092, 1.172, 1.064, 0.966, 1.079] # normalized standard deviation\n",
"a_ccorr = [0.958, 0.973, 0.740, 0.743, 0.922, 0.982, 0.952] # correlation coefficient\n",
"\n",
"temp_std = [temp_rcp26_std_norm, temp_rcp45_std_norm, temp_rcp85_std[0]]\n",
"temp_corr = [temp_rcp26_corr, temp_rcp45_corr, temp_rcp85_corr[0]]"
"# Model B\n",
"b_sdev = [1.129, 0.996, 1.016, 1.134, 1.023, 0.962, 1.048] # normalized standard deviation\n",
"b_ccorr = [0.963, 0.975, 0.801, 0.814, 0.946, 0.984, 0.968] # correlation coefficient\n",
"\n",
"# Sample Variable List\n",
"var_list = ['Surface Pressure', '2m Temp', 'Dew Point Temp', 'U Wind', 'V Wind', 'Precip', 'Cloud Cov']"
]
},
{
Expand All @@ -420,33 +407,34 @@
"metadata": {},
"outputs": [],
"source": [
"# Create figure and Taylor Diagram instance\n",
"fig = plt.figure(figsize=(12, 12))\n",
"# Create figure and TaylorDiagram instance\n",
"fig = plt.figure(figsize=(10, 10))\n",
"taylor = gv.TaylorDiagram(fig=fig, label='REF')\n",
"ax = plt.gca()\n",
"\n",
"# Draw diagonal dashed lines from origin to correlation values\n",
"# Also enforces proper X-Y ratio\n",
"taylor.add_xgrid(np.array([0.6, 0.9]))\n",
"\n",
"# Add model sets for p and t datasets\n",
"taylor.add_model_set(\n",
" temp_std,\n",
" temp_corr,\n",
" fontsize=20,\n",
" xytext=(-5, 10), # marker label location, in pixels\n",
" color='red',\n",
" marker='o',\n",
" facecolors='none',\n",
" s=100) # marker size\n",
"# Add models to Taylor diagram\n",
"taylor.add_model_set(a_sdev,\n",
" a_ccorr,\n",
" color='red',\n",
" marker='o',\n",
" label='Model A', # add model set legend label\n",
" fontsize=16)\n",
"\n",
"#gv.util.set_axes_limits_and_ticks(ax, xlim=[0,2])\n",
"taylor.add_model_set(b_sdev,\n",
" b_ccorr,\n",
" color='blue',\n",
" marker='o',\n",
" label='Model B',\n",
" fontsize=16)\n",
"\n",
"namearr = ['rcp26', 'rcp45', 'rcp85']\n",
"taylor.add_model_name(namearr, fontsize=16)\n",
"# Add model name\n",
"taylor.add_model_name(var_list, fontsize=16)\n",
"\n",
"# Add figure title\n",
"plt.title(\"CMIP5 Temperature - First Ensemble Member\", size=26, pad=45)\n",
"# Add figure legend\n",
"taylor.add_legend(fontsize=16)\n",
"\n",
"# Add constant centered RMS difference contours.\n",
"taylor.add_contours(levels=np.arange(0, 1.1, 0.25),\n",
Expand All @@ -458,7 +446,74 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Based on these three RCPs it looks like RCP8.5 has the closest correlation to our observed climate behavior, but RCP2.6 has a closer standard deviation to what we experience. Based on your selected data, scientific interpretations may vary."
"## Plotting Bias\n",
"\n",
"We can add another layer of information to the Taylor Diagram by changing the marker size and shape depending on a third variable. Most commonly this is done to demonstrate bias, a statistical definition of the difference between the observed and estimated values.\n",
"\n",
"We do this by adding a `bias_array` kwarg to the `add_model_set()` method. Doing so necessitates removing the `marker` specification, since they are overriden with up or down arrows of varrying sizes. Bias values are in percentages.\n",
"\n",
"Indicate the meaning of these new bias symbols with a third legend with the call `add_bias_legend()`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Sample corresponding bias data.\n",
"\n",
"# Case A\n",
"a_bias = [2.7, -1.5, 17.31, -20.11, 12.5, 8.341, -4.7] # bias (%)\n",
"\n",
"# Case B\n",
"b_bias = [1.7, 2.5, -17.31, 20.11, 19.5, 7.341, 9.2]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create figure and TaylorDiagram instance\n",
"fig = plt.figure(figsize=(10, 10))\n",
"taylor = gv.TaylorDiagram(fig=fig, label='REF')\n",
"\n",
"# Draw diagonal dashed lines from origin to correlation values\n",
"# Also enforces proper X-Y ratio\n",
"taylor.add_xgrid(np.array([0.6, 0.9]))\n",
"\n",
"# Add models to Taylor diagram\n",
"taylor.add_model_set(a_sdev,\n",
" a_ccorr,\n",
" percent_bias_on=True, # indicate marker and size to be plotted based on bias_array\n",
" bias_array=a_bias, # specify bias array\n",
" color='red',\n",
" label='Model A',\n",
" fontsize=16)\n",
"\n",
"taylor.add_model_set(b_sdev,\n",
" b_ccorr,\n",
" percent_bias_on=True,\n",
" bias_array=b_bias,\n",
" color='blue',\n",
" label='Model B',\n",
" fontsize=16)\n",
"\n",
"# Add model name\n",
"taylor.add_model_name(var_list, fontsize=16)\n",
"\n",
"# Add figure legend\n",
"taylor.add_legend(fontsize=16)\n",
"\n",
"# Add bias legend\n",
"taylor.add_bias_legend()\n",
"\n",
"# Add constant centered RMS difference contours.\n",
"taylor.add_contours(levels=np.arange(0, 1.1, 0.25),\n",
" colors='lightgrey',\n",
" linewidths=0.5);"
]
},
{
Expand Down

0 comments on commit 5318d03

Please sign in to comment.