diff --git a/src/notebooks/visualization_notebook.ipynb b/src/notebooks/visualization_notebook.ipynb index 0811a23b..4d1e7588 100644 --- a/src/notebooks/visualization_notebook.ipynb +++ b/src/notebooks/visualization_notebook.ipynb @@ -47,7 +47,8 @@ "from utils import plot_field, load_obj, sum_total_emissions, count_obs_in_mask\n", "\n", "import warnings\n", - "warnings.filterwarnings(\"ignore\", category=FutureWarning) " + "\n", + "warnings.filterwarnings(\"ignore\", category=FutureWarning)" ] }, { @@ -75,12 +76,12 @@ "outputs": [], "source": [ "# Open the state vector file\n", - "state_vector_filepath = './../StateVector.nc'\n", + "state_vector_filepath = \"./../StateVector.nc\"\n", "state_vector = xr.load_dataset(state_vector_filepath)\n", - "state_vector_labels = state_vector['StateVector']\n", + "state_vector_labels = state_vector[\"StateVector\"]\n", "\n", "# Identify the last element of the region of interest\n", - "last_ROI_element = int(np.nanmax(state_vector_labels.values) - config['nBufferClusters'])\n", + "last_ROI_element = int(np.nanmax(state_vector_labels.values) - config[\"nBufferClusters\"])\n", "\n", "# Define mask for region of interest\n", "mask = (state_vector_labels <= last_ROI_element)" @@ -96,7 +97,7 @@ "# update prior prefix and period based on whether using kalman mode\n", "if config[\"KalmanMode\"]:\n", " prior_prefix = \"./../..\"\n", - " period = int(os.getcwd().split(\"kf_inversions/period\")[-1]) # get current period\n", + " period = int(os.getcwd().split(\"kf_inversions/period\")[-1]) # get current period\n", " periods_df = pd.read_csv(\"./../../periods.csv\")\n", " start_date = periods_df.iloc[period - 1, 0]\n", "else:\n", @@ -104,12 +105,12 @@ " start_date = config[\"StartDate\"]\n", "\n", "prior_pth = f'{prior_prefix}/jacobian_runs/{config[\"RunName\"]}_0000/OutputDir/HEMCO_diagnostics.{start_date}0000.nc'\n", - "results_pth = './gridded_posterior.nc'\n", - "satdat_dir = './data_converted'\n", - "inversion_result_path = './inversion_result.nc'\n", - "posterior_dir = './data_converted_posterior'\n", - "visualization_dir = './data_visualization'\n", - "posterior_viz_dir = './data_visualization_posterior'" + "results_pth = \"./gridded_posterior.nc\"\n", + "satdat_dir = \"./data_converted\"\n", + "inversion_result_path = \"./inversion_result.nc\"\n", + "posterior_dir = \"./data_converted_posterior\"\n", + "visualization_dir = \"./data_visualization\"\n", + "posterior_viz_dir = \"./data_visualization_posterior\"" ] }, { @@ -121,15 +122,15 @@ "# Set latitude/longitude bounds for plots\n", "\n", "# Trim 1-2.5 degrees to remove GEOS-Chem buffer zone\n", - "if config['Res'] == '0.25x0.3125':\n", + "if config[\"Res\"] == \"0.25x0.3125\":\n", " degx = 4 * 0.3125\n", " degy = 4 * 0.25\n", - "elif config['Res'] == '0.5x0.625':\n", + "elif config[\"Res\"] == \"0.5x0.625\":\n", " degx = 4 * 0.625\n", " degy = 4 * 0.5\n", "\n", - "lon_bounds = [np.min(state_vector.lon.values)+degx, np.max(state_vector.lon.values)-degx]\n", - "lat_bounds = [np.min(state_vector.lat.values)+degy, np.max(state_vector.lat.values)-degy]" + "lon_bounds = [np.min(state_vector.lon.values) + degx, np.max(state_vector.lon.values) - degx]\n", + "lat_bounds = [np.min(state_vector.lat.values) + degy, np.max(state_vector.lat.values) - degy]" ] }, { @@ -146,12 +147,17 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plt.figure(figsize=(8,8))\n", - "plt.rcParams.update({'font.size': 16})\n", - "ax = fig.subplots(1,1,subplot_kw={'projection': ccrs.PlateCarree()})\n", + "fig = plt.figure(figsize=(8, 8))\n", + "plt.rcParams.update({\"font.size\": 16})\n", + "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", - "plot_field(ax, state_vector_labels, cmap=cc.cm.glasbey, \n", - " title='State vector elements', cbar_label='Element Id')" + "plot_field(\n", + " ax,\n", + " state_vector_labels,\n", + " cmap=cc.cm.glasbey,\n", + " title=\"State vector elements\",\n", + " cbar_label=\"Element Id\",\n", + ")" ] }, { @@ -169,10 +175,10 @@ "outputs": [], "source": [ "# Prior emissions\n", - "prior = xr.load_dataset(prior_pth)['EmisCH4_Total'].isel(time=0)\n", + "prior = xr.load_dataset(prior_pth)[\"EmisCH4_Total\"].isel(time=0)\n", "\n", "# Optimized scale factors\n", - "scale = xr.load_dataset(results_pth)['ScaleFactor']\n", + "scale = xr.load_dataset(results_pth)[\"ScaleFactor\"]\n", "\n", "# Posterior emissions\n", "posterior = prior * scale" @@ -186,13 +192,13 @@ "source": [ "# Total emissions in the region of interest\n", "\n", - "areas = xr.load_dataset(prior_pth)['AREA']\n", + "areas = xr.load_dataset(prior_pth)[\"AREA\"]\n", "\n", "total_prior_emissions = sum_total_emissions(prior, areas, mask)\n", "total_posterior_emissions = sum_total_emissions(posterior, areas, mask)\n", "\n", - "print('Prior emissions :', total_prior_emissions, 'Tg/y')\n", - "print('Posterior emissions :', total_posterior_emissions, 'Tg/y')" + "print(\"Prior emissions :\", total_prior_emissions, \"Tg/y\")\n", + "print(\"Posterior emissions :\", total_posterior_emissions, \"Tg/y\")" ] }, { @@ -202,16 +208,26 @@ "outputs": [], "source": [ "# Plot prior emissions\n", - "fig = plt.figure(figsize=(8,8))\n", - "plt.rcParams.update({'font.size': 16})\n", - "ax = fig.subplots(1,1,subplot_kw={'projection': ccrs.PlateCarree()})\n", + "fig = plt.figure(figsize=(8, 8))\n", + "plt.rcParams.update({\"font.size\": 16})\n", + "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", - "prior_kgkm2h = prior * (1000 ** 2) * 60 * 60 # Units kg/km2/h\n", + "prior_kgkm2h = prior * (1000**2) * 60 * 60 # Units kg/km2/h\n", "\n", - "plot_field(ax, prior_kgkm2h, cmap=cc.cm.linear_kryw_5_100_c67_r, \n", - " lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " vmin=0, vmax=14, title='Prior emissions', cbar_label='Emissions (kg km$^{-2}$ h$^{-1}$)',\n", - " only_ROI=True, state_vector_labels=state_vector_labels, last_ROI_element=last_ROI_element)" + "plot_field(\n", + " ax,\n", + " prior_kgkm2h,\n", + " cmap=cc.cm.linear_kryw_5_100_c67_r,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " vmin=0,\n", + " vmax=14,\n", + " title=\"Prior emissions\",\n", + " cbar_label=\"Emissions (kg km$^{-2}$ h$^{-1}$)\",\n", + " only_ROI=True,\n", + " state_vector_labels=state_vector_labels,\n", + " last_ROI_element=last_ROI_element,\n", + ")" ] }, { @@ -221,16 +237,26 @@ "outputs": [], "source": [ "# Plot posterior emissions\n", - "fig = plt.figure(figsize=(8,8))\n", - "plt.rcParams.update({'font.size': 16})\n", - "ax = fig.subplots(1,1,subplot_kw={'projection': ccrs.PlateCarree()})\n", + "fig = plt.figure(figsize=(8, 8))\n", + "plt.rcParams.update({\"font.size\": 16})\n", + "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", - "posterior_kgkm2h = posterior * (1000 ** 2) * 60 * 60 # Units kg/km2/h\n", + "posterior_kgkm2h = posterior * (1000**2) * 60 * 60 # Units kg/km2/h\n", "\n", - "plot_field(ax, posterior_kgkm2h, cmap=cc.cm.linear_kryw_5_100_c67_r, \n", - " lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " vmin=0, vmax=14, title='Posterior emissions', cbar_label='Emissions (kg km$^{-2}$ h$^{-1}$)',\n", - " only_ROI=True, state_vector_labels=state_vector_labels, last_ROI_element=last_ROI_element)" + "plot_field(\n", + " ax,\n", + " posterior_kgkm2h,\n", + " cmap=cc.cm.linear_kryw_5_100_c67_r,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " vmin=0,\n", + " vmax=14,\n", + " title=\"Posterior emissions\",\n", + " cbar_label=\"Emissions (kg km$^{-2}$ h$^{-1}$)\",\n", + " only_ROI=True,\n", + " state_vector_labels=state_vector_labels,\n", + " last_ROI_element=last_ROI_element,\n", + ")" ] }, { @@ -244,38 +270,77 @@ "posterior = prior * scale\n", "\n", "emission_types = {\n", - "'SoilPost emissions' : sum_total_emissions(posterior[\"EmisCH4_SoilAbsorb\"].isel(time=0), areas, mask),\n", - "'Termites emissions' : sum_total_emissions(posterior[\"EmisCH4_Termites\"].isel(time=0), areas, mask),\n", - "'Lakes emissions' : sum_total_emissions(posterior[\"EmisCH4_Lakes\"].isel(time=0), areas, mask),\n", - "'Seeps emissions' : sum_total_emissions(posterior[\"EmisCH4_Seeps\"].isel(time=0), areas, mask),\n", - "'Wetlands emissions' : sum_total_emissions(posterior[\"EmisCH4_Wetlands\"].isel(time=0), areas, mask),\n", - "'BiomassBurn emissions' :sum_total_emissions(posterior[\"EmisCH4_BiomassBurn\"].isel(time=0), areas, mask),\n", - "'OtherAnth emissions' : sum_total_emissions(posterior[\"EmisCH4_OtherAnth\"].isel(time=0), areas, mask),\n", - "'Rice emissions' : sum_total_emissions(posterior[\"EmisCH4_Rice\"].isel(time=0), areas, mask),\n", - "'Wastewater emissions' : sum_total_emissions(posterior[\"EmisCH4_Wastewater\"].isel(time=0), areas, mask),\n", - "'Landfills emissions' : sum_total_emissions(posterior[\"EmisCH4_Landfills\"].isel(time=0), areas, mask),\n", - "'Livestock emissions' : sum_total_emissions(posterior[\"EmisCH4_Livestock\"].isel(time=0), areas, mask),\n", - "'Coal emissions' : sum_total_emissions(posterior[\"EmisCH4_Coal\"].isel(time=0), areas, mask),\n", - "'Gas emissions' : sum_total_emissions(posterior[\"EmisCH4_Gas\"].isel(time=0), areas, mask) }\n", + " \"SoilPost emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_SoilAbsorb\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Termites emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Termites\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Lakes emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Lakes\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Seeps emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Seeps\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Wetlands emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Wetlands\"].isel(time=0), areas, mask\n", + " ),\n", + " \"BiomassBurn emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_BiomassBurn\"].isel(time=0), areas, mask\n", + " ),\n", + " \"OtherAnth emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_OtherAnth\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Rice emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Rice\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Wastewater emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Wastewater\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Landfills emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Landfills\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Livestock emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Livestock\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Coal emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Coal\"].isel(time=0), areas, mask\n", + " ),\n", + " \"Gas emissions\": sum_total_emissions(\n", + " posterior[\"EmisCH4_Gas\"].isel(time=0), areas, mask\n", + " ),\n", + "}\n", "\n", "labels = []\n", "values = []\n", "\n", "for subemission in emission_types:\n", " print(subemission, \"=\", emission_types[subemission], \"Tg/y\")\n", - " if (emission_types[subemission]>0):\n", + " if emission_types[subemission] > 0:\n", " labels.append(subemission)\n", " values.append(emission_types[subemission])\n", "\n", "\n", - "title = plt.title('CH4 emissions by sector')\n", + "title = plt.title(\"CH4 emissions by sector\")\n", "title.set_ha(\"center\")\n", "plt.gca().axis(\"equal\")\n", - "def my_autopct(pct): # Only plots the data labels for sectoral emissions that are larger than 15%\n", - " return ('%.1f%%' % pct) if pct > 15 else ''\n", + "\n", + "\n", + "def my_autopct(\n", + " pct,\n", + "): # Only plots the data labels for sectoral emissions that are larger than 15%\n", + " return (\"%.1f%%\" % pct) if pct > 15 else \"\"\n", + "\n", + "\n", "pie = plt.pie(values, startangle=0, autopct=my_autopct)\n", - "plt.legend(pie[0],labels, bbox_to_anchor=(1,0.5), loc=\"center right\", fontsize=10, \n", - " bbox_transform=plt.gcf().transFigure)\n", + "plt.legend(\n", + " pie[0],\n", + " labels,\n", + " bbox_to_anchor=(1, 0.5),\n", + " loc=\"center right\",\n", + " fontsize=10,\n", + " bbox_transform=plt.gcf().transFigure,\n", + ")\n", "plt.subplots_adjust(left=0.0, bottom=0.1, right=0.65)" ] }, @@ -293,14 +358,24 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plt.figure(figsize=(8,8))\n", - "plt.rcParams.update({'font.size': 16})\n", - "ax = fig.subplots(1,1,subplot_kw={'projection': ccrs.PlateCarree()})\n", + "fig = plt.figure(figsize=(8, 8))\n", + "plt.rcParams.update({\"font.size\": 16})\n", + "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", - "plot_field(ax, scale, cmap='RdBu_r',\n", - " lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " vmin=0, vmax=2, title='Scale factors', cbar_label='Scale factor',\n", - " only_ROI=True, state_vector_labels=state_vector_labels, last_ROI_element=last_ROI_element)" + "plot_field(\n", + " ax,\n", + " scale,\n", + " cmap=\"RdBu_r\",\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " vmin=0,\n", + " vmax=2,\n", + " title=\"Scale factors\",\n", + " cbar_label=\"Scale factor\",\n", + " only_ROI=True,\n", + " state_vector_labels=state_vector_labels,\n", + " last_ROI_element=last_ROI_element,\n", + ")" ] }, { @@ -317,8 +392,8 @@ "metadata": {}, "outputs": [], "source": [ - "S_post_grid = xr.load_dataset(results_pth)['S_post']\n", - "A_grid = xr.load_dataset(results_pth)['A']\n", + "S_post_grid = xr.load_dataset(results_pth)[\"S_post\"]\n", + "A_grid = xr.load_dataset(results_pth)[\"A\"]\n", "avkern_ROI = A_grid.where(state_vector_labels <= last_ROI_element)" ] }, @@ -328,14 +403,22 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plt.figure(figsize=(8,8))\n", - "plt.rcParams.update({'font.size': 16})\n", - "ax = fig.subplots(1,1,subplot_kw={'projection': ccrs.PlateCarree()})\n", + "fig = plt.figure(figsize=(8, 8))\n", + "plt.rcParams.update({\"font.size\": 16})\n", + "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", - "plot_field(ax, avkern_ROI, cmap=cc.cm.CET_L19,\n", - " lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='Averaging kernel sensitivities', cbar_label='Sensitivity', \n", - " only_ROI=True, state_vector_labels=state_vector_labels, last_ROI_element=last_ROI_element)" + "plot_field(\n", + " ax,\n", + " avkern_ROI,\n", + " cmap=cc.cm.CET_L19,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"Averaging kernel sensitivities\",\n", + " cbar_label=\"Sensitivity\",\n", + " only_ROI=True,\n", + " state_vector_labels=state_vector_labels,\n", + " last_ROI_element=last_ROI_element,\n", + ")" ] }, { @@ -345,9 +428,9 @@ "outputs": [], "source": [ "# ungridded inversion result is used to calculate DOFS using the trace of the averaging kernel\n", - "A_ROI = xr.load_dataset(inversion_result_path)['A'].values[:last_ROI_element, :last_ROI_element]\n", + "A_ROI = xr.load_dataset(inversion_result_path)[\"A\"].values[:last_ROI_element, :last_ROI_element]\n", "DOFS = np.trace(A_ROI)\n", - "print('DOFS =', DOFS)" + "print(\"DOFS =\", DOFS)" ] }, { @@ -376,48 +459,56 @@ "\n", " for f in files:\n", " # Get paths\n", - " pth = os.path.join(data_dir,f)\n", - " pth_posterior = os.path.join(data_posterior,f)\n", + " pth = os.path.join(data_dir, f)\n", + " pth_posterior = os.path.join(data_posterior, f)\n", " # Load TROPOMI/GEOS-Chem and Jacobian matrix data from the .pkl file\n", " obj = load_obj(pth)\n", " obj_posterior = load_obj(pth_posterior)\n", " # If there aren't any TROPOMI observations on this day, skip\n", - " if obj['obs_GC'].shape[0] == 0:\n", + " if obj[\"obs_GC\"].shape[0] == 0:\n", " continue\n", " # Otherwise, grab the TROPOMI/GEOS-Chem data\n", - " obs_GC = obj['obs_GC']\n", - " obs_GC_posterior = obj_posterior['obs_GC']\n", + " obs_GC = obj[\"obs_GC\"]\n", + " obs_GC_posterior = obj_posterior[\"obs_GC\"]\n", " # Only consider data within latitude and longitude bounds\n", - " ind = np.where((obs_GC[:,2]>=lon_bounds[0]) & (obs_GC[:,2]<=lon_bounds[1]) & \n", - " (obs_GC[:,3]>=lat_bounds[0]) & (obs_GC[:,3]<=lat_bounds[1]))\n", - " if (len(ind[0]) == 0): # Skip if no data in bounds\n", + " ind = np.where(\n", + " (obs_GC[:, 2] >= lon_bounds[0])\n", + " & (obs_GC[:, 2] <= lon_bounds[1])\n", + " & (obs_GC[:, 3] >= lat_bounds[0])\n", + " & (obs_GC[:, 3] <= lat_bounds[1])\n", + " )\n", + " if len(ind[0]) == 0: # Skip if no data in bounds\n", " continue\n", - " obs_GC = obs_GC[ind[0],:] # TROPOMI and GEOS-Chem data within bounds\n", - " obs_GC_posterior = obs_GC_posterior[ind[0],:]\n", + " obs_GC = obs_GC[ind[0], :] # TROPOMI and GEOS-Chem data within bounds\n", + " obs_GC_posterior = obs_GC_posterior[ind[0], :]\n", " # Record lat, lon, tropomi ch4, and geos ch4\n", - " lat = np.concatenate((lat, obs_GC[:,3]))\n", - " lon = np.concatenate((lon, obs_GC[:,2]))\n", - " tropomi = np.concatenate((tropomi, obs_GC[:,0]))\n", - " geos_prior = np.concatenate((geos_prior, obs_GC[:,1]))\n", - " observation_count = np.concatenate((observation_count, obs_GC[:,4]))\n", - " geos_posterior = np.concatenate((geos_posterior, obs_GC_posterior[:,1]))\n", - " \n", + " lat = np.concatenate((lat, obs_GC[:, 3]))\n", + " lon = np.concatenate((lon, obs_GC[:, 2]))\n", + " tropomi = np.concatenate((tropomi, obs_GC[:, 0]))\n", + " geos_prior = np.concatenate((geos_prior, obs_GC[:, 1]))\n", + " observation_count = np.concatenate((observation_count, obs_GC[:, 4]))\n", + " geos_posterior = np.concatenate((geos_posterior, obs_GC_posterior[:, 1]))\n", + "\n", " df = pd.DataFrame()\n", - " df['lat'] = lat\n", - " df['lon'] = lon\n", - " df['tropomi'] = tropomi\n", - " df['geos_prior'] = geos_prior\n", - " df['geos_posterior'] = geos_posterior\n", - " df['diff_tropomi_prior'] = geos_prior - tropomi\n", - " df['diff_tropomi_posterior'] = geos_posterior - tropomi\n", - " df['observation_count'] = observation_count\n", + " df[\"lat\"] = lat\n", + " df[\"lon\"] = lon\n", + " df[\"tropomi\"] = tropomi\n", + " df[\"geos_prior\"] = geos_prior\n", + " df[\"geos_posterior\"] = geos_posterior\n", + " df[\"diff_tropomi_prior\"] = geos_prior - tropomi\n", + " df[\"diff_tropomi_posterior\"] = geos_posterior - tropomi\n", + " df[\"observation_count\"] = observation_count\n", "\n", " return df\n", + "\n", + "\n", "superobs_df = aggregate_data(satdat_dir, posterior_dir)\n", "visualization_df = aggregate_data(visualization_dir, posterior_viz_dir)\n", - "n_obs = len(superobs_df['tropomi'])\n", + "n_obs = len(superobs_df[\"tropomi\"])\n", "\n", - "print(f'Found {n_obs} super-observations in the domain, representing {np.sum(superobs_df[\"observation_count\"]).round(0)} TROPOMI observations.')\n", + "print(\n", + " f'Found {n_obs} super-observations in the domain, representing {np.sum(superobs_df[\"observation_count\"]).round(0)} TROPOMI observations.'\n", + ")\n", "superobs_df.head()" ] }, @@ -436,10 +527,53 @@ "outputs": [], "source": [ "# Print some error statistics\n", - "print('Bias in prior :' , np.round(np.average(superobs_df['diff_tropomi_prior'], weights=superobs_df['observation_count']),2),'ppb')\n", - "print('Bias in posterior :' , np.round(np.average(superobs_df['diff_tropomi_posterior'], weights=superobs_df['observation_count']),2),'ppb')\n", - "print('RMSE prior :' , np.round(np.sqrt(np.average(superobs_df['diff_tropomi_prior']**2, weights=superobs_df['observation_count'])),2),'ppb')\n", - "print('RMSE posterior :' , np.round(np.sqrt(np.average(superobs_df['diff_tropomi_posterior']**2, weights=superobs_df['observation_count'])),2),'ppb')" + "print(\n", + " \"Bias in prior :\",\n", + " np.round(\n", + " np.average(\n", + " superobs_df[\"diff_tropomi_prior\"], weights=superobs_df[\"observation_count\"]\n", + " ),\n", + " 2,\n", + " ),\n", + " \"ppb\",\n", + ")\n", + "print(\n", + " \"Bias in posterior :\",\n", + " np.round(\n", + " np.average(\n", + " superobs_df[\"diff_tropomi_posterior\"],\n", + " weights=superobs_df[\"observation_count\"],\n", + " ),\n", + " 2,\n", + " ),\n", + " \"ppb\",\n", + ")\n", + "print(\n", + " \"RMSE prior :\",\n", + " np.round(\n", + " np.sqrt(\n", + " np.average(\n", + " superobs_df[\"diff_tropomi_prior\"] ** 2,\n", + " weights=superobs_df[\"observation_count\"],\n", + " )\n", + " ),\n", + " 2,\n", + " ),\n", + " \"ppb\",\n", + ")\n", + "print(\n", + " \"RMSE posterior :\",\n", + " np.round(\n", + " np.sqrt(\n", + " np.average(\n", + " superobs_df[\"diff_tropomi_posterior\"] ** 2,\n", + " weights=superobs_df[\"observation_count\"],\n", + " )\n", + " ),\n", + " 2,\n", + " ),\n", + " \"ppb\",\n", + ")" ] }, { @@ -456,7 +590,7 @@ "metadata": {}, "outputs": [], "source": [ - "print('Found',count_obs_in_mask(mask, superobs_df),'super-observations within the region of interest')" + "print(\"Found\", count_obs_in_mask(mask, superobs_df), \"super-observations within the region of interest\")" ] }, { @@ -482,10 +616,10 @@ "outputs": [], "source": [ "# Simple averaging scheme to grid the XCH4 data at 0.1 x 0.1 resolution\n", - "df_copy = visualization_df.copy() # save for later\n", - "visualization_df['lat'] = np.round(visualization_df['lat'],1)\n", - "visualization_df['lon'] = np.round(visualization_df['lon'],1)\n", - "visualization_df = visualization_df.groupby(['lat','lon']).mean()\n", + "df_copy = visualization_df.copy() # save for later\n", + "visualization_df[\"lat\"] = np.round(visualization_df[\"lat\"], 1)\n", + "visualization_df[\"lon\"] = np.round(visualization_df[\"lon\"], 1)\n", + "visualization_df = visualization_df.groupby([\"lat\", \"lon\"]).mean()\n", "ds = visualization_df.to_xarray()" ] }, @@ -496,13 +630,22 @@ "outputs": [], "source": [ "# Mean TROPOMI XCH4 columns on 0.1 x 0.1 grid\n", - "fig = plt.figure(figsize=(8,8))\n", - "ax = fig.subplots(1,1,subplot_kw={'projection': ccrs.PlateCarree()})\n", + "fig = plt.figure(figsize=(8, 8))\n", + "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", - "plot_field(ax, ds['tropomi'], cmap='Spectral_r',\n", - " vmin=1800, vmax=1850, lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='TROPOMI $X_{CH4}$', cbar_label='Column mixing ratio (ppb)', \n", - " mask=mask, only_ROI=False)" + "plot_field(\n", + " ax,\n", + " ds[\"tropomi\"],\n", + " cmap=\"Spectral_r\",\n", + " vmin=1800,\n", + " vmax=1850,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"TROPOMI $X_{CH4}$\",\n", + " cbar_label=\"Column mixing ratio (ppb)\",\n", + " mask=mask,\n", + " only_ROI=False,\n", + ")" ] }, { @@ -512,18 +655,36 @@ "outputs": [], "source": [ "# Mean prior and posterior GEOS-Chem XCH4 columns on 0.1 x 0.1 grid\n", - "fig = plt.figure(figsize=(20,8))\n", - "ax1, ax2 = fig.subplots(1,2,subplot_kw={'projection': ccrs.PlateCarree()})\n", - "\n", - "plot_field(ax1, ds['geos_prior'], cmap='Spectral_r',\n", - " vmin=1800, vmax=1850, lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='GEOS-Chem $X_{CH4}$ (prior simulation)', cbar_label='Dry column mixing ratio (ppb)', \n", - " mask=mask, only_ROI=False)\n", - "\n", - "plot_field(ax2, ds['geos_posterior'], cmap='Spectral_r',\n", - " vmin=1800, vmax=1850, lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='GEOS-Chem $X_{CH4}$ (posterior simulation)', cbar_label='Dry column mixing ratio (ppb)', \n", - " mask=mask, only_ROI=False)" + "fig = plt.figure(figsize=(20, 8))\n", + "ax1, ax2 = fig.subplots(1, 2, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", + "\n", + "plot_field(\n", + " ax1,\n", + " ds[\"geos_prior\"],\n", + " cmap=\"Spectral_r\",\n", + " vmin=1800,\n", + " vmax=1850,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"GEOS-Chem $X_{CH4}$ (prior simulation)\",\n", + " cbar_label=\"Dry column mixing ratio (ppb)\",\n", + " mask=mask,\n", + " only_ROI=False,\n", + ")\n", + "\n", + "plot_field(\n", + " ax2,\n", + " ds[\"geos_posterior\"],\n", + " cmap=\"Spectral_r\",\n", + " vmin=1800,\n", + " vmax=1850,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"GEOS-Chem $X_{CH4}$ (posterior simulation)\",\n", + " cbar_label=\"Dry column mixing ratio (ppb)\",\n", + " mask=mask,\n", + " only_ROI=False,\n", + ")" ] }, { @@ -533,18 +694,36 @@ "outputs": [], "source": [ "# Plot differences between GEOS-Chem and TROPOMI XCH4\n", - "fig = plt.figure(figsize=(20,8))\n", - "ax1, ax2 = fig.subplots(1,2,subplot_kw={'projection': ccrs.PlateCarree()})\n", - "\n", - "plot_field(ax1, ds['diff_tropomi_prior'], cmap='RdBu_r',\n", - " vmin=-40, vmax=40, lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='Prior $-$ TROPOMI', cbar_label='ppb', \n", - " mask=mask, only_ROI=False)\n", - "\n", - "plot_field(ax2, ds['diff_tropomi_posterior'], cmap='RdBu_r',\n", - " vmin=-40, vmax=40, lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='Posterior $-$ TROPOMI', cbar_label='ppb', \n", - " mask=mask, only_ROI=False)" + "fig = plt.figure(figsize=(20, 8))\n", + "ax1, ax2 = fig.subplots(1, 2, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", + "\n", + "plot_field(\n", + " ax1,\n", + " ds[\"diff_tropomi_prior\"],\n", + " cmap=\"RdBu_r\",\n", + " vmin=-40,\n", + " vmax=40,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"Prior $-$ TROPOMI\",\n", + " cbar_label=\"ppb\",\n", + " mask=mask,\n", + " only_ROI=False,\n", + ")\n", + "\n", + "plot_field(\n", + " ax2,\n", + " ds[\"diff_tropomi_posterior\"],\n", + " cmap=\"RdBu_r\",\n", + " vmin=-40,\n", + " vmax=40,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"Posterior $-$ TROPOMI\",\n", + " cbar_label=\"ppb\",\n", + " mask=mask,\n", + " only_ROI=False,\n", + ")" ] }, { @@ -554,16 +733,24 @@ "outputs": [], "source": [ "# Plot differences between posterior and prior simulated XCH4\n", - "fig = plt.figure(figsize=(8,8))\n", - "ax = fig.subplots(1,1,subplot_kw={'projection': ccrs.PlateCarree()})\n", + "fig = plt.figure(figsize=(8, 8))\n", + "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", - "diff = ds['geos_posterior']-ds['geos_prior']\n", + "diff = ds[\"geos_posterior\"] - ds[\"geos_prior\"]\n", "\n", - "plot_field(ax, diff, cmap='seismic',\n", - " vmin=-np.nanmax(diff),vmax=np.nanmax(diff),\n", - " lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='$\\Delta X_{CH4}$ (Posterior $-$ Prior)', cbar_label='ppb', \n", - " mask=mask, only_ROI=False)" + "plot_field(\n", + " ax,\n", + " diff,\n", + " cmap=\"seismic\",\n", + " vmin=-np.nanmax(diff),\n", + " vmax=np.nanmax(diff),\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"$\\Delta X_{CH4}$ (Posterior $-$ Prior)\",\n", + " cbar_label=\"ppb\",\n", + " mask=mask,\n", + " only_ROI=False,\n", + ")" ] }, { @@ -581,11 +768,42 @@ "outputs": [], "source": [ "# Simple averaging scheme to grid the XCH4 data at 0.1 x 0.1 resolution\n", - "df_copy = superobs_df.copy() # save for later\n", - "superobs_df['lat'] = np.round(superobs_df['lat'],1)\n", - "superobs_df['lon'] = np.round(superobs_df['lon'],1)\n", - "superobs_df = superobs_df.groupby(['lat','lon']).mean()\n", - "ds = superobs_df.to_xarray()" + "df_copy = superobs_df.copy() # save for later\n", + "superobs_df[\"lat\"] = np.round(superobs_df[\"lat\"], 1)\n", + "superobs_df[\"lon\"] = np.round(superobs_df[\"lon\"], 1)\n", + "superobs_df = superobs_df.groupby([\"lat\", \"lon\"]).mean()\n", + "superobs_df = superobs_df.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Fill in any missing lat/lon coordinates at .1 degree spacing\n", + "new_lat_values = np.arange(ds[\"lat\"][0], ds[\"lat\"][-1] + 0.1, 0.1)\n", + "new_lon_values = np.arange(ds[\"lon\"][0], ds[\"lon\"][-1] + 0.1, 0.1)\n", + "\n", + "data = []\n", + "# Iterate through latitude and longitude combinations\n", + "for lat in new_lat_values:\n", + " for lon in new_lon_values:\n", + " data.append({\"lat\": lat, \"lon\": lon, \"value\": None})\n", + "\n", + "# Create a new DataFrame from the data\n", + "df_temp = pd.DataFrame(data)\n", + "\n", + "# ensure matching sigfigs with superobs_df\n", + "df_temp[\"lat\"] = np.round(df_temp[\"lat\"], 1)\n", + "df_temp[\"lon\"] = np.round(df_temp[\"lon\"], 1)\n", + "\n", + "# merge the two dataframes and drop the filler column\n", + "# then convert to xarray dataset\n", + "merged_df = pd.merge(df_temp, superobs_df, on=[\"lat\", \"lon\"], how=\"left\").drop(\n", + " columns=[\"value\"]\n", + ")\n", + "ds = xr.Dataset.from_dataframe(merged_df.set_index([\"lat\", \"lon\"]))" ] }, { @@ -597,8 +815,8 @@ "# calculate the grid bounds for .1x.1 grid\n", "lat_b = np.arange(ds[\"lat\"][0] - 0.05, ds[\"lat\"][-1] + 0.1, 0.1)\n", "lon_b = np.arange(ds[\"lon\"][0] - 0.05, ds[\"lon\"][-1] + 0.1, 0.1)\n", - "ds = ds.assign_coords(lon_b=('lon_b', lon_b))\n", - "ds = ds.assign_coords(lat_b=('lat_b', lat_b))\n", + "ds = ds.assign_coords(lon_b=(\"lon_b\", lon_b))\n", + "ds = ds.assign_coords(lat_b=(\"lat_b\", lat_b))\n", "ds[\"mask\"] = xr.where(~np.isnan(ds[\"tropomi\"]), 1, 0)" ] }, @@ -609,8 +827,8 @@ "outputs": [], "source": [ "# Global 0.25 x 0.3125 grid\n", - "reference_lat_grid = np.arange(-90 , 90+0.25 , 0.25)\n", - "reference_lon_grid = np.arange(-180, 180+0.3125, 0.3125)\n", + "reference_lat_grid = np.arange(-90, 90 + 0.25, 0.25)\n", + "reference_lon_grid = np.arange(-180, 180 + 0.3125, 0.3125)\n", "\n", "# Find closest reference coordinates to selected lat/lon bounds\n", "lat_min = reference_lat_grid[np.abs(reference_lat_grid - lat_bounds[0]).argmin()]\n", @@ -619,18 +837,20 @@ "lon_max = reference_lon_grid[np.abs(reference_lon_grid - lon_bounds[1]).argmin()]\n", "\n", "# Create an xESMF regridder object to resample the data on the grid HEMCO expects\n", - "new_lat_grid = np.arange(lat_min, lat_max+0.25, 0.25)\n", - "new_lon_grid = np.arange(lon_min, lon_max+0.3125, 0.3125)\n", - "new_lat_b = np.arange(new_lat_grid[0]-0.125, new_lat_grid[-1]+0.25, 0.25)\n", - "new_lon_b = np.arange(new_lon_grid[0]-0.15625, new_lon_grid[-1]+0.3125, 0.3125)\n", - "ds_out = xr.Dataset({'lat': (['lat'], new_lat_grid),\n", - " 'lon': (['lon'], new_lon_grid),\n", - " 'lat_b': (['lat_b'], new_lat_b),\n", - " 'lon_b': (['lon_b'], new_lon_b),\n", - " }\n", - " )\n", + "new_lat_grid = np.arange(lat_min, lat_max + 0.25, 0.25)\n", + "new_lon_grid = np.arange(lon_min, lon_max + 0.3125, 0.3125)\n", + "new_lat_b = np.arange(new_lat_grid[0] - 0.125, new_lat_grid[-1] + 0.25, 0.25)\n", + "new_lon_b = np.arange(new_lon_grid[0] - 0.15625, new_lon_grid[-1] + 0.3125, 0.3125)\n", + "ds_out = xr.Dataset(\n", + " {\n", + " \"lat\": ([\"lat\"], new_lat_grid),\n", + " \"lon\": ([\"lon\"], new_lon_grid),\n", + " \"lat_b\": ([\"lat_b\"], new_lat_b),\n", + " \"lon_b\": ([\"lon_b\"], new_lon_b),\n", + " }\n", + ")\n", "\n", - "regridder = xe.Regridder(ds, ds_out, 'conservative_normed')" + "regridder = xe.Regridder(ds, ds_out, \"conservative_normed\")" ] }, { @@ -640,7 +860,7 @@ "outputs": [], "source": [ "# Regrid the data\n", - "ds_regrid = regridder(ds)" + "ds_regrid = regridder(ds)\n" ] }, { @@ -650,18 +870,36 @@ "outputs": [], "source": [ "# Re-plot differences between GEOS-Chem and TROPOMI XCH4\n", - "fig = plt.figure(figsize=(20,8))\n", - "ax1, ax2 = fig.subplots(1,2,subplot_kw={'projection': ccrs.PlateCarree()})\n", - "\n", - "plot_field(ax1, ds_regrid['diff_tropomi_prior'], cmap='RdBu_r',\n", - " vmin=-25, vmax=25, lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='Prior $-$ TROPOMI', cbar_label='ppb', \n", - " mask=mask, only_ROI=False)\n", - "\n", - "plot_field(ax2, ds_regrid['diff_tropomi_posterior'], cmap='RdBu_r',\n", - " vmin=-25, vmax=25, lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='Posterior $-$ TROPOMI', cbar_label='ppb', \n", - " mask=mask, only_ROI=False)" + "fig = plt.figure(figsize=(20, 8))\n", + "ax1, ax2 = fig.subplots(1, 2, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", + "\n", + "plot_field(\n", + " ax1,\n", + " ds_regrid[\"diff_tropomi_prior\"],\n", + " cmap=\"RdBu_r\",\n", + " vmin=-25,\n", + " vmax=25,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"Prior $-$ TROPOMI\",\n", + " cbar_label=\"ppb\",\n", + " mask=mask,\n", + " only_ROI=False,\n", + ")\n", + "\n", + "plot_field(\n", + " ax2,\n", + " ds_regrid[\"diff_tropomi_posterior\"],\n", + " cmap=\"RdBu_r\",\n", + " vmin=-25,\n", + " vmax=25,\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"Posterior $-$ TROPOMI\",\n", + " cbar_label=\"ppb\",\n", + " mask=mask,\n", + " only_ROI=False,\n", + ")" ] }, { @@ -671,16 +909,24 @@ "outputs": [], "source": [ "# Re-plot differences between posterior and prior simulated XCH4\n", - "fig = plt.figure(figsize=(8,8))\n", - "ax = fig.subplots(1,1,subplot_kw={'projection': ccrs.PlateCarree()})\n", - "\n", - "diff_regrid = ds_regrid['geos_posterior']-ds_regrid['geos_prior']\n", - "\n", - "plot_field(ax, diff_regrid, cmap='seismic',\n", - " vmin=-np.nanmax(diff_regrid),vmax=np.nanmax(diff_regrid), \n", - " lon_bounds=lon_bounds, lat_bounds=lat_bounds,\n", - " title='Posterior $-$ Prior', cbar_label='ppb', \n", - " mask=mask, only_ROI=False)" + "fig = plt.figure(figsize=(8, 8))\n", + "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", + "\n", + "diff_regrid = ds_regrid[\"geos_posterior\"] - ds_regrid[\"geos_prior\"]\n", + "\n", + "plot_field(\n", + " ax,\n", + " diff_regrid,\n", + " cmap=\"seismic\",\n", + " vmin=-np.nanmax(diff_regrid),\n", + " vmax=np.nanmax(diff_regrid),\n", + " lon_bounds=lon_bounds,\n", + " lat_bounds=lat_bounds,\n", + " title=\"Posterior $-$ Prior\",\n", + " cbar_label=\"ppb\",\n", + " mask=mask,\n", + " only_ROI=False,\n", + ")" ] }, {