diff --git a/README.md b/README.md index 668ac8b..0029fee 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ Options: -lnd, --land Run land component diagnostics -ice, --seaice Run sea ice component diagnostics -glc, --landice Run land ice component diagnostics + -rof, --river-runoff Run river runoff component diagnostics --config_path Path to the YAML configuration file containing specifications for notebooks (default config.yml) -h, --help Show this message and exit. ``` diff --git a/cupid/run.py b/cupid/run.py index d1e578a..d791669 100755 --- a/cupid/run.py +++ b/cupid/run.py @@ -17,6 +17,7 @@ -lnd, --land Run land component diagnostics -ice, --seaice Run sea ice component diagnostics -glc, --landice Run land ice component diagnostics + -rof, --river-runoff Run river runoff component diagnostics -config_path Path to the YAML configuration file containing specifications for notebooks (default: config.yml) -h, --help Show this message and exit. """ @@ -46,6 +47,7 @@ @click.option("--land", "-lnd", is_flag=True, help="Run land component diagnostics") @click.option("--seaice", "-ice", is_flag=True, help="Run sea ice component diagnostics") @click.option("--landice", "-glc", is_flag=True, help="Run land ice component diagnostics") +@click.option("--river-runoff", "-rof", is_flag=True, help="Run river runoff component diagnostics") @click.argument("config_path", default="config.yml") def run( config_path, @@ -57,6 +59,7 @@ def run( land=False, seaice=False, landice=False, + river_runoff=False, ): """ Main engine to set up running all the notebooks. @@ -81,11 +84,12 @@ def run( "lnd": land, "ice": seaice, "glc": landice, + "rof": river_runoff, } # Automatically run all if no components specified - if True not in [atmosphere, ocean, land, seaice, landice]: + if True not in [atmosphere, ocean, land, seaice, landice, river_runoff]: all = True for key in component_options.keys(): component_options[key] = True diff --git a/examples/coupled_model/config.yml b/examples/coupled_model/config.yml index bf91a81..ac3e83a 100644 --- a/examples/coupled_model/config.yml +++ b/examples/coupled_model/config.yml @@ -95,13 +95,21 @@ timeseries: end_years: [102] level: 'lev' + rof: + vars: [] + derive_vars: [] + hist_str: 'initial_hist' + start_years: [2] + end_years: [102] + level: 'lev' + compute_notebooks: # This is where all the notebooks you want run and their # parameters are specified. Several examples of different # types of notebooks are provided. - # The first key (here simple_no_params_nb) is the name of the + # The second key (here adf_quick_run) is the name of the # notebook from nb_path_root, minus the .ipynb infrastructure: diff --git a/examples/key_metrics/config.yml b/examples/key_metrics/config.yml index e559ee7..c33e268 100644 --- a/examples/key_metrics/config.yml +++ b/examples/key_metrics/config.yml @@ -129,6 +129,24 @@ compute_notebooks: obs_name: 'GrIS_MARv3.12_climo_1960_1999.nc' climo_nyears: 40 + rof: + global_discharge_gauge_compare_obs: + parameter_groups: + none: + analysis_name: "" + grid_name: 'f09_f09_mosart' # ROF grid name + rof_start_date: '0091-01-01' + rof_end_date: '0101-01-01' + figureSave: False + global_discharge_ocean_compare_obs: + parameter_groups: + none: + analysis_name: "" + grid_name: 'f09_f09_mosart' # ROF grid name + rof_start_date: '0091-01-01' + rof_end_date: '0101-01-01' + figureSave: False + # ice: # seaice: # parameter_groups: diff --git a/examples/nblibrary/rof/global_discharge_gauge_compare_obs.ipynb b/examples/nblibrary/rof/global_discharge_gauge_compare_obs.ipynb new file mode 100644 index 0000000..dc64a3c --- /dev/null +++ b/examples/nblibrary/rof/global_discharge_gauge_compare_obs.ipynb @@ -0,0 +1,1217 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## ROF global monthly, annual, seasonal flows analysis \n", + "\n", + "Use the following datasets\n", + "\n", + "1. reach-D19 gauge link ascii\n", + "2. D19 flow site geopackage\n", + "3. D19 discharge netCDF\n", + "4. history netCD including river discharge\n", + "\n", + "[1. Setupt](#setup)\n", + "\n", + "[2. Loading data](#load_data)\n", + "\n", + "- Read monthly history files from archive. \n", + "- Reference data: monthly discharge estimates at 922 big river mouths from Dai et al. 2019 data (D19)\n", + "\n", + "[3. Large 24 river analysis](#24_large_rivers)\n", + "\n", + "- Plotting time seriese (annual, seasonal cycle).\n", + "- if D19 referece data is available, scatter plots at Large 24 selected rivers against D19 referece data\n", + "\n", + "[4. Large 50 rivers analysis](#50_large_rivers)\n", + "\n", + "- Annual flow summary table at large 50 selected rivers.\n", + "- if D19 referece data is available, Scatter plot of annual flow against D19 reference data.\n", + "\n", + "[5. 922 rivers analysis](#922_rivers)\n", + "\n", + "- run only if reference flow is available\n", + "- error statistics (%bias, rmse, correlation) at all 922 river sites.\n", + "- plot error statistic on the global map\n", + "- plot boxplots including case(s) for each error metric\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import os, sys\n", + "import glob\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "from matplotlib.ticker import FormatStrFormatter\n", + "from sklearn.linear_model import LinearRegression\n", + "import numpy as np\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import xarray as xr\n", + "import cartopy.crs as ccrs\n", + "import cartopy.feature as cfeature\n", + "from dask.distributed import Client, LocalCluster\n", + "\n", + "import scripts.colors as colors\n", + "import scripts.metrics as metric\n", + "from scripts.utility import load_yaml\n", + "from scripts.utility import no_time_variable\n", + "\n", + "rivers_50m = cfeature.NaturalEarthFeature(\"physical\", \"rivers_lake_centerlines\", \"50m\")\n", + "land = cfeature.LAND\n", + "\n", + "print(\"\\nThe Python version: %s.%s.%s\" % sys.version_info[:3])\n", + "print(xr.__name__, xr.__version__)\n", + "print(pd.__name__, pd.__version__)\n", + "print(gpd.__name__, gpd.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-------------------------\n", + "## 1. Setup " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Parameter Defaults\n", + "# parameters are set in CUPiD's config.yml file\n", + "\n", + "CESM_output_dir = \"\"\n", + "case_name = None # case name\n", + "base_case_name = None # base case name\n", + "start_date = \"\"\n", + "end_date = \"\"\n", + "serial = False # use dask LocalCluster\n", + "lc_kwargs = {}\n", + "\n", + "analysis_name = \"\" # Used for Figure png names\n", + "if analysis_name:\n", + " analysis_name = case_name\n", + "rof_start_date = start_date # specify if different starting yyyy-mm-dd is desired\n", + "rof_end_date = end_date # specify if different ending yyyy-mm-dd is desired\n", + "grid_name = \"f09_f09_mosart\" # ROF grid name used in case\n", + "base_grid_name = (\n", + " grid_name # spcify ROF grid name for base_case in config.yml if different than case\n", + ")\n", + "figureSave = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-------------------------\n", + "ROF ancillary data specification " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "setup = load_yaml(\"./setup/setup.yaml\")\n", + "\n", + "ancillary_dir = setup[\n", + " \"ancillary_dir\"\n", + "] # ancillary directory including ROF domain, river network data etc.\n", + "ref_flow_dir = setup[\"ref_flow_dir\"] # including observed or reference flow data\n", + "case_meta = setup[\"case_meta\"] # Case metadata\n", + "reach_gpkg = setup[\"reach_gpkg\"] # river reach geopackage meta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "case_list = [case for case in [case_name, base_case_name] if case is not None]\n", + "grid_list = [grid for grid in [grid_name, base_grid_name] if grid is not None]\n", + "time_period = slice(f\"{rof_start_date}\", f\"{rof_end_date}\") # analysis time period\n", + "nyrs = int(rof_end_date[:4]) - int(rof_start_date[:4]) + 1 # number of years\n", + "nmons = nyrs * 12 # number of months\n", + "year_list = [\n", + " \"{:04d}\".format(yr)\n", + " for yr in np.arange(int(rof_start_date[0:4]), int(rof_end_date[0:4]) + 1)\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-----\n", + "### dasks (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Spin up cluster (if running in parallel)\n", + "client = None\n", + "if not serial:\n", + " cluster = LocalCluster(**lc_kwargs)\n", + " client = Client(cluster)\n", + "\n", + "client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Loading data " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1. Monthly/annual flow netCDFs\n", + "- month_data (xr dataset) - read from archive\n", + "- year_data (xr dataset) - computed from monthly\n", + "- seas_data (xr dataset) - computed from monthly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "reachID = {}\n", + "month_data = {}\n", + "year_data = {}\n", + "seas_data = {}\n", + "for case, grid in zip(case_list, grid_list):\n", + " in_dire = os.path.join(CESM_output_dir, case, \"rof/hist\")\n", + " model = case_meta[grid][\"model\"]\n", + " domain = case_meta[grid][\"domain_nc\"]\n", + " var_list = case_meta[grid][\"vars_read\"]\n", + "\n", + " def preprocess(ds):\n", + " return ds[var_list]\n", + "\n", + " # monthly\n", + " nc_list = []\n", + " for nc_path in sorted(glob.glob(f\"{in_dire}/{case}.{model}.h*.????-*.nc\")):\n", + " for yr in year_list:\n", + " if yr in os.path.basename(nc_path):\n", + " nc_list.append(nc_path)\n", + "\n", + " month_data[case] = (\n", + " xr.open_mfdataset(\n", + " nc_list,\n", + " data_vars=\"minimal\",\n", + " parallel=True,\n", + " preprocess=preprocess,\n", + " )\n", + " .sel(time=time_period)\n", + " .load()\n", + " )\n", + " # annual\n", + " year_data[case] = month_data[case].resample(time=\"YS\").mean(dim=\"time\")\n", + "\n", + " # seasonal (compute here instead of reading for conisistent analysis period)\n", + " seas_data[case] = month_data[case].groupby(\"time.month\").mean(\"time\")\n", + " vars_no_time = no_time_variable(month_data[case])\n", + " if vars_no_time:\n", + " seas_data[case][vars_no_time] = seas_data[case][vars_no_time].isel(\n", + " month=0, drop=True\n", + " )\n", + " time = month_data[case][\"time\"]\n", + " if domain == \"None\":\n", + " reachID[case] = month_data[case][\"reachID\"].values\n", + " else:\n", + " reachID[case] = xr.open_dataset(f\"{ancillary_dir}/{domain}\")[\"reachID\"].values\n", + " print(f\"Finished loading {case}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.2 Large river ID and name ascii\n", + "- big_river_50: dictionary {_site_id_:_river name_}\n", + "- big_river_24: dictionary {_site_id_:_river name_}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"./setup/large_river_50.txt\", index_col=\"river_name\")\n", + "big_river_50 = {key: values[\"site_id\"] for key, values in df.iterrows()}\n", + "big_river_24 = {\n", + " key: values[\"site_id\"] for ix, (key, values) in enumerate(df.iterrows()) if ix < 24\n", + "} # The first 24 is used for plots" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.3. reach-D19 gauge link csv\n", + "- gauge_reach_lnk (dataframe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gauge_reach_lnk = {}\n", + "for case, grid in zip(case_list, grid_list):\n", + " gauge_reach_lnk[case] = pd.read_csv(\n", + " \"%s/D09/D09_925.%s.asc\" % (ref_flow_dir, case_meta[grid][\"network\"])\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.4 D19 flow site shapefile\n", + "- gauge_shp (dataframe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "gauge_shp = gpd.read_file(\n", + " os.path.join(ref_flow_dir, \"D09\", \"geospatial\", \"D09_925.gpkg\")\n", + ")\n", + "gauge_shp = gauge_shp[gauge_shp[\"id\"] != 9999999]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.5 D19 discharge data\n", + "- ds_q_obs_mon (xr datasets)\n", + "- ds_q_obs_yr (xr datasets)\n", + "- dr_q_obs_seasonal (xr datasets)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "# read monthly data\n", + "ds_q = xr.open_dataset(\n", + " \"%s/D09/coastal-stns-Vol-monthly.updated-May2019.mod.nc\" % (ref_flow_dir),\n", + " decode_times=False,\n", + ")\n", + "ds_q[\"time\"] = xr.cftime_range(\n", + " start=\"1900-01-01\", end=\"2018-12-01\", freq=\"MS\", calendar=\"standard\"\n", + ")\n", + "\n", + "# monthly - if time_period is outside observation period, use the entire obs period\n", + "obs_available = True\n", + "if ds_q[\"time\"].sel(time=time_period).values.size == 0:\n", + " obs_available = False\n", + " ds_q_obs_mon = ds_q[\"FLOW\"]\n", + "else:\n", + " ds_q_obs_mon = ds_q[\"FLOW\"].sel(time=time_period)\n", + "# compute annual flow from monthly\n", + "ds_q_obs_yr = ds_q_obs_mon.resample(time=\"YE\").mean(dim=\"time\")\n", + "# compute annual cycle at monthly scale\n", + "dr_q_obs_seasonal = ds_q_obs_mon.groupby(\"time.month\").mean(\"time\")\n", + "# annual mean, max, and min flow during time_period\n", + "ds_q_obs_yr_min = ds_q_obs_yr.min(\"time\")\n", + "ds_q_obs_yr_max = ds_q_obs_yr.max(\"time\")\n", + "ds_q_obs_yr_mean = ds_q_obs_yr.mean(\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.6 Get indices in observation and simulation for gauge name (processing)\n", + "- gauge_plot (dictionary)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "gauge_plot = {}\n", + "for gname, site_id in big_river_50.items():\n", + " gauge_plot[gname] = {}\n", + " for case in case_list:\n", + " gauge_ix = [\n", + " i for i, gid in enumerate(ds_q.id.values) if gid == site_id\n", + " ] # go through obs Dataset and get index matching to river (gauge) name\n", + " gauge_id = ds_q.id.values[gauge_ix][0] ## guage ID\n", + " seg_id = (\n", + " gauge_reach_lnk[case]\n", + " .loc[gauge_reach_lnk[case][\"gauge_id\"] == gauge_id][\"route_id\"]\n", + " .values\n", + " ) # matching reach ID in river network\n", + " seg_ix = np.argwhere(\n", + " reachID[case] == seg_id\n", + " ) # matching reach index in river network\n", + " if len(seg_ix) == 0:\n", + " seg_ix = -999\n", + " else:\n", + " seg_ix = seg_ix[0]\n", + " gauge_plot[gname][case] = [gauge_ix, seg_ix, seg_id]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "------\n", + "## 3. Analysis for 24 large rivers " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 Annual flow series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "fig, axes = plt.subplots(8, 3, figsize=(7.25, 11.5))\n", + "plt.subplots_adjust(\n", + " top=0.975, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n", + ") # create some space below the plots by increasing the bottom-value\n", + "\n", + "for ix, river_name in enumerate(big_river_24.keys()):\n", + " row = ix // 3\n", + " col = ix % 3\n", + " for case, grid in zip(case_list, grid_list):\n", + " net_idx = gauge_plot[river_name][case][1]\n", + " gaug_idx = gauge_plot[river_name][case][0]\n", + "\n", + " q_name = case_meta[grid][\"flow_name\"]\n", + "\n", + " if len(net_idx) == 1:\n", + " year_data[case][q_name][:, net_idx].plot(\n", + " ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n", + " )\n", + " elif len(net_idx) == 2: # means 2d grid\n", + " year_data[case][q_name][:, net_idx[0], net_idx[1]].plot(\n", + " ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n", + " )\n", + " if obs_available:\n", + " ds_q_obs_yr.loc[:, gaug_idx].plot(\n", + " ax=axes[row, col],\n", + " linestyle=\"None\",\n", + " marker=\"o\",\n", + " markersize=3,\n", + " c=\"k\",\n", + " label=\"D19\",\n", + " )\n", + " else:\n", + " qob_mean = ds_q_obs_yr_mean.loc[gaug_idx]\n", + " qob_max = ds_q_obs_yr_max.loc[gaug_idx]\n", + " qob_min = ds_q_obs_yr_min.loc[gaug_idx]\n", + " axes[row, col].axhline(\n", + " y=qob_mean, color=\"k\", linestyle=\"--\", lw=0.5, label=\"D19 mean (1900-2018)\"\n", + " )\n", + " axes[row, col].axhspan(\n", + " qob_min[0],\n", + " qob_max[0],\n", + " color=\"gray\",\n", + " alpha=0.1,\n", + " label=\"D19 min-max (1900-2018)\",\n", + " )\n", + "\n", + " axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n", + "\n", + " axes[row, col].set_xlabel(\"\")\n", + " if row < 7:\n", + " axes[row, col].set_xticklabels(\"\")\n", + " if col == 0:\n", + " axes[row, col].set_ylabel(\"Annual flow [m$^3$/s]\", fontsize=8)\n", + " else:\n", + " axes[row, col].set_ylabel(\"\")\n", + " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", + "\n", + "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n", + "axes.flatten()[-2].legend(\n", + " loc=\"upper center\",\n", + " bbox_to_anchor=(0.125, -0.35, 0.75, 0.1),\n", + " ncol=5,\n", + " fontsize=\"x-small\",\n", + ")\n", + "\n", + "if figureSave:\n", + " plt.savefig(f\"./NB1_Fig1_big_river_annual_{analysis_name}.png\", dpi=200)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.2. Annual cycle at monthly step" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "fig, axes = plt.subplots(8, 3, figsize=(7.25, 11.5))\n", + "plt.subplots_adjust(\n", + " top=0.975, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n", + ") # create some space below the plots by increasing the bottom-value\n", + "\n", + "for ix, river_name in enumerate(big_river_24.keys()):\n", + " row = ix // 3\n", + " col = ix % 3\n", + " for case, grid in zip(case_list, grid_list):\n", + "\n", + " net_idx = gauge_plot[river_name][case][1]\n", + " gaug_idx = gauge_plot[river_name][case][0]\n", + "\n", + " q_name = case_meta[grid][\"flow_name\"]\n", + "\n", + " if len(net_idx) == 1: # means vector\n", + " seas_data[case][q_name][:, net_idx].plot(\n", + " ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n", + " )\n", + " elif len(net_idx) == 2: # means 2d grid\n", + " seas_data[case][q_name][:, net_idx[0], net_idx[1]].plot(\n", + " ax=axes[row, col], linestyle=\"-\", lw=1.0, label=case\n", + " )\n", + "\n", + " if obs_available:\n", + " dr_q_obs_seasonal.loc[:, gaug_idx].plot(\n", + " ax=axes[row, col],\n", + " linestyle=\":\",\n", + " lw=0.5,\n", + " marker=\"o\",\n", + " markersize=2,\n", + " c=\"k\",\n", + " label=\"D19\",\n", + " )\n", + "\n", + " axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n", + " axes[row, col].set_xlabel(\"\")\n", + " if row < 7:\n", + " axes[row, col].set_xticklabels(\"\")\n", + " if col == 0:\n", + " axes[row, col].set_ylabel(\"Mon. flow [m$^3$/s]\", fontsize=8)\n", + " else:\n", + " axes[row, col].set_ylabel(\"\")\n", + " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", + "\n", + "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n", + "axes.flatten()[-2].legend(\n", + " loc=\"upper center\",\n", + " bbox_to_anchor=(0.10, -0.30, 0.75, 0.1),\n", + " ncol=5,\n", + " fontsize=\"x-small\",\n", + ")\n", + "\n", + "if figureSave:\n", + " plt.savefig(f\"./NB1_Fig2_big_river_season_{analysis_name}.png\", dpi=200)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.3. scatter plots of monthly flow - obs vs sim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "if obs_available:\n", + " # Monthly flow scatter plot\n", + " fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n", + " plt.subplots_adjust(\n", + " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n", + " )\n", + "\n", + " for ix, river_name in enumerate(big_river_24.keys()):\n", + " row = ix // 3\n", + " col = ix % 3\n", + " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n", + "\n", + " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n", + "\n", + " net_idx = gauge_plot[river_name][case][1]\n", + " gaug_idx = gauge_plot[river_name][case][0]\n", + "\n", + " q_name = case_meta[grid][\"flow_name\"]\n", + "\n", + " if len(net_idx) == 1: # means vector\n", + " ds_sim = month_data[case][q_name][:, net_idx].sel(time=time_period)\n", + " elif len(net_idx) == 2: # means 2d grid\n", + " ds_sim = (\n", + " month_data[case][q_name][:, net_idx[0], net_idx[1]]\n", + " .sel(time=time_period)\n", + " .squeeze()\n", + " )\n", + "\n", + " ds_obs = ds_q_obs_mon.sel(time=time_period).loc[:, gaug_idx]\n", + "\n", + " axes[row, col].plot(\n", + " ds_obs, ds_sim, \"o\", label=case, markersize=4.0, alpha=0.4\n", + " )\n", + " if jx == 0:\n", + " max_val = np.max(ds_obs)\n", + " min_val = np.min(ds_obs)\n", + " else:\n", + " max_val = np.max([np.max(ds_sim), max_val])\n", + " min_val = np.min([np.min(ds_sim), min_val])\n", + "\n", + " axes[row, col].plot(\n", + " [min_val * 0.98, max_val * 1.02],\n", + " [min_val * 0.98, max_val * 1.02],\n", + " \":k\",\n", + " linewidth=0.5,\n", + " )\n", + "\n", + " axes[row, col].annotate(\n", + " \"%d %s\" % (ix + 1, river_name),\n", + " xy=(0.05, 0.875),\n", + " xycoords=\"axes fraction\",\n", + " fontsize=8,\n", + " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n", + " )\n", + " if row == 7 and col == 1:\n", + " axes[row, col].set_xlabel(\n", + " \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n", + " )\n", + " else:\n", + " axes[row, col].set_xlabel(\"\")\n", + "\n", + " if col == 0 and row == 5:\n", + " axes[row, col].set_ylabel(\n", + " \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n", + " )\n", + " else:\n", + " axes[row, col].set_ylabel(\"\")\n", + " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", + "\n", + " axes.flatten()[-2].legend(\n", + " loc=\"upper center\",\n", + " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n", + " ncol=5,\n", + " fontsize=\"x-small\",\n", + " )\n", + "\n", + " if figureSave:\n", + " plt.savefig(f\"./NB1_Fig3_big_river_month_scatter_{analysis_name}.png\", dpi=200)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.4. scatter plots of annual flow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "if obs_available:\n", + " # annual flow scatter plot\n", + " fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n", + " plt.subplots_adjust(\n", + " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n", + " )\n", + "\n", + " for ix, river_name in enumerate(big_river_24.keys()):\n", + " row = ix // 3\n", + " col = ix % 3\n", + " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n", + "\n", + " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n", + "\n", + " net_idx = gauge_plot[river_name][case][1]\n", + " gaug_idx = gauge_plot[river_name][case][0]\n", + "\n", + " q_name = case_meta[grid][\"flow_name\"]\n", + "\n", + " if len(net_idx) == 1: # means vector\n", + " ds_sim = year_data[case][q_name][:, net_idx].sel(time=time_period)\n", + " elif len(net_idx) == 2: # means 2d grid\n", + " ds_sim = year_data[case][q_name][:, net_idx[0], net_idx[1]].sel(\n", + " time=time_period\n", + " )\n", + "\n", + " ds_obs = ds_q_obs_yr.sel(time=time_period).loc[:, gaug_idx]\n", + "\n", + " axes[row, col].plot(\n", + " ds_obs, ds_sim, \"o\", label=case, markersize=4.0, alpha=0.4\n", + " )\n", + " if jx == 0:\n", + " max_val = np.max(ds_obs)\n", + " min_val = np.min(ds_obs)\n", + " else:\n", + " max_val = np.max([np.max(ds_sim), max_val])\n", + " min_val = np.min([np.min(ds_sim), min_val])\n", + "\n", + " axes[row, col].plot(\n", + " [min_val * 0.98, max_val * 1.02],\n", + " [min_val * 0.98, max_val * 1.02],\n", + " \":k\",\n", + " linewidth=0.5,\n", + " )\n", + "\n", + " axes[row, col].annotate(\n", + " \"%d %s\" % (ix + 1, river_name),\n", + " xy=(0.05, 0.875),\n", + " xycoords=\"axes fraction\",\n", + " fontsize=8,\n", + " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n", + " )\n", + " if row == 7 and col == 1:\n", + " axes[row, col].set_xlabel(\n", + " \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n", + " )\n", + " else:\n", + " axes[row, col].set_xlabel(\"\")\n", + "\n", + " if col == 0 and row == 5:\n", + " axes[row, col].set_ylabel(\n", + " \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n", + " )\n", + " else:\n", + " axes[row, col].set_ylabel(\"\")\n", + " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", + "\n", + " axes.flatten()[-2].legend(\n", + " loc=\"upper center\",\n", + " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n", + " ncol=5,\n", + " fontsize=\"x-small\",\n", + " )\n", + "\n", + " if figureSave:\n", + " plt.savefig(f\"./NB1_Fig4_big_river_annual_scatter_{analysis_name}.png\", dpi=200)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Anaysis for Large 50 rivers " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1 Summary tables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "with open(f\"large50rivers_{analysis_name}.txt\", \"w\") as f:\n", + " # some meta\n", + " f.write(\"# obs: Dai et al.(2019)\\n\")\n", + " f.write(\"# vol: km^3/yr\\n\")\n", + " f.write(\"# area: km^2\\n\")\n", + "\n", + " # headers\n", + " f.write(\"No, river_name,\")\n", + " f.write(\"{0: >15}_vol,\".format(\"obs\"))\n", + " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n", + " f.write(\"{0: >15}_vol,\".format(grid))\n", + " f.write(\"{0: >15}_area\".format(\"obs\"))\n", + " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n", + " f.write(\",\")\n", + " f.write(\"{0: >15}_area\".format(grid))\n", + " f.write(\"\\n\")\n", + "\n", + " # data for each river\n", + " for ix, river_name in enumerate(big_river_50.keys()):\n", + "\n", + " f.write(\"%02d,\" % (ix + 1))\n", + " f.write(\"{0: >20}\".format(river_name))\n", + "\n", + " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n", + " f.write(\",\")\n", + " net_idx = gauge_plot[river_name][case][1]\n", + " gaug_idx = gauge_plot[river_name][case][0]\n", + "\n", + " q_name = case_meta[grid][\"flow_name\"]\n", + "\n", + " if len(net_idx) == 1: # means vector\n", + " qsim = (\n", + " np.nanmean(\n", + " year_data[case][q_name][:, net_idx].sel(time=time_period).values\n", + " )\n", + " * 60\n", + " * 60\n", + " * 24\n", + " * 365\n", + " / 10**9\n", + " )\n", + " elif len(net_idx) == 2: # means 2d grid\n", + " qsim = (\n", + " np.nanmean(\n", + " year_data[case][q_name][:, net_idx[0], net_idx[1]]\n", + " .sel(time=time_period)\n", + " .values\n", + " )\n", + " * 60\n", + " * 60\n", + " * 24\n", + " * 365\n", + " / 10**9\n", + " )\n", + "\n", + " if jx == 0:\n", + " qobs = (\n", + " ds_q_obs_yr_mean.loc[gaug_idx].values[0]\n", + " * 60\n", + " * 60\n", + " * 24\n", + " * 365\n", + " / 10**9\n", + " )\n", + " f.write(\"{:19.1f},\".format(qobs))\n", + "\n", + " f.write(\"{:19.1f}\".format(qsim))\n", + "\n", + " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n", + " f.write(\",\")\n", + " gaug_idx = gauge_plot[river_name][case][0]\n", + "\n", + " # just get gauge_id from qs_q for now\n", + " gauge_id = ds_q[\"id\"].loc[gaug_idx].values[0]\n", + " network_area = gauge_reach_lnk[case][\n", + " gauge_reach_lnk[case][\"gauge_id\"] == gauge_id\n", + " ][\"route_area\"].values[0]\n", + "\n", + " if jx == 0:\n", + " area = ds_q[\"area_stn\"].loc[gaug_idx].values[0]\n", + " f.write(\"{:20.0f},\".format(area))\n", + "\n", + " f.write(\"{:20.0f}\".format(network_area))\n", + "\n", + " f.write(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2. scatter plot of annual mean flow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if obs_available:\n", + "\n", + " df_yr_vol = pd.read_csv(\n", + " f\"./large50rivers_{analysis_name}.txt\",\n", + " skiprows=3,\n", + " index_col=[\"No\"],\n", + " skipinitialspace=True,\n", + " )\n", + "\n", + " fig, axes = plt.subplots(1, figsize=(5.50, 5.50))\n", + " regressor = LinearRegression()\n", + " bias_text = \"\"\n", + "\n", + " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n", + "\n", + " # compute linear regression\n", + " df_reg = df_yr_vol[[\"obs_vol\", f\"{grid}_vol\"]].dropna()\n", + " regressor.fit(df_reg[[\"obs_vol\"]], df_reg[f\"{grid}_vol\"])\n", + " y_pred = regressor.predict(df_reg[[\"obs_vol\"]])\n", + "\n", + " # compute bias over 50 sites\n", + " diff = (df_yr_vol[f\"{grid}_vol\"] - df_yr_vol[\"obs_vol\"]).mean(\n", + " axis=0, skipna=True\n", + " )\n", + "\n", + " df_yr_vol.plot(\n", + " ax=axes,\n", + " kind=\"scatter\",\n", + " x=\"obs_vol\",\n", + " y=f\"{grid}_vol\",\n", + " s=30,\n", + " alpha=0.6,\n", + " label=grid,\n", + " )\n", + " # plt.plot(df_reg['obs_vol'], y_pred, color=color)\n", + " bias_text = bias_text + f\"\\n{grid}: {diff:.1f} \"\n", + "\n", + " plt.text(\n", + " 0.65, 0.30, \"bias [km3/yr]\", transform=axes.transAxes, verticalalignment=\"top\"\n", + " )\n", + " plt.text(\n", + " 0.65, 0.30, f\"{bias_text} \", transform=axes.transAxes, verticalalignment=\"top\"\n", + " )\n", + "\n", + " plt.axline((0, 0), slope=1, linestyle=\"--\", color=\"black\")\n", + " axes.set_xscale(\"log\")\n", + " axes.set_yscale(\"log\")\n", + " axes.set_xlabel(\"reference flow\")\n", + " axes.set_ylabel(\"Simulated flow\")\n", + " axes.set_title(\"River Flow at stations [km^3/yr]\")\n", + "\n", + " if figureSave:\n", + " plt.savefig(\n", + " f\"./NB1_Fig5_50big_river_annual_flow_scatter_{analysis_name}.png\", dpi=200\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "------\n", + "## 5. Anaysis for all 922 sites " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.1 Compute metris at all the sites (no plots nor tables)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "if obs_available:\n", + " # Merge gauge_reach lnk (dataframe) into gauge shapefile\n", + " gauge_shp1 = {}\n", + " for case, df in gauge_reach_lnk.items():\n", + " # df = df.loc[(df['flag'] == 0)]\n", + " df1 = df.drop(columns=[\"riv_name\"])\n", + " gauge_shp1[case] = pd.merge(\n", + " gauge_shp, df1, how=\"inner\", left_on=\"id\", right_on=\"gauge_id\"\n", + " )\n", + "\n", + " # compute %bias, correlation, and RMSE at each site based on monthly\n", + " mon_pbias = {}\n", + " mon_corr = {}\n", + " mon_rmse = {}\n", + "\n", + " for case, grid in zip(case_list, grid_list):\n", + "\n", + " bias = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", + " corr = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", + " rmse = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", + "\n", + " for ix, row in gauge_shp1[case].iterrows():\n", + " q_obs = np.full(\n", + " nmons, np.nan, dtype=float\n", + " ) # dummy q_sim that will be replaced by actual data if exist\n", + " q_sim = np.full(\n", + " nmons, np.nan, dtype=float\n", + " ) # dummy q_sim that will be replaced by actual data if exist\n", + "\n", + " route_id = row[\"route_id\"]\n", + " gauge_id = row[\"gauge_id\"]\n", + "\n", + " q_name = case_meta[grid][\"flow_name\"]\n", + "\n", + " gauge_ix = np.argwhere(ds_q.id.values == gauge_id)\n", + " if len(gauge_ix) == 1:\n", + " gauge_ix = gauge_ix[0]\n", + " q_obs = ds_q_obs_mon[:, gauge_ix].squeeze().values\n", + "\n", + " route_ix = np.argwhere(reachID[case] == route_id)\n", + " if (\n", + " len(route_ix) == 1\n", + " ): # meaning there is flow site in network and simulation exist at this site\n", + " route_ix = route_ix[0]\n", + " if len(route_ix) == 1: # means vector\n", + " q_sim = month_data[case][q_name][:, route_ix].squeeze().values\n", + " elif len(route_ix) == 2: # means 2d grid\n", + " q_sim = (\n", + " month_data[case][q_name][:, route_ix[0], route_ix[1]]\n", + " .squeeze()\n", + " .values\n", + " )\n", + "\n", + " # compute %bias, correlation, RMSE\n", + " bias[ix] = metric.pbias(q_sim, q_obs)\n", + " corr[ix] = np.corrcoef(q_sim, q_obs)[0, 1]\n", + " rmse[ix] = metric.rmse(qsim, qobs)\n", + "\n", + " mon_pbias[case] = bias\n", + " mon_corr[case] = corr\n", + " mon_rmse[case] = rmse\n", + "\n", + " gauge_shp1[case][f\"bias_{grid}\"] = bias\n", + " gauge_shp1[case][f\"corr_{grid}\"] = corr\n", + " gauge_shp1[case][f\"rmse_{grid}\"] = rmse" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.2. Spatial metric map " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "if obs_available:\n", + " # some local plot setups\n", + " cbar_kwrgs = {\n", + " \"bias\": {\n", + " \"shrink\": 0.9,\n", + " \"pad\": 0.02,\n", + " \"orientation\": \"horizontal\",\n", + " \"extend\": \"both\",\n", + " },\n", + " \"corr\": {\n", + " \"shrink\": 0.9,\n", + " \"pad\": 0.02,\n", + " \"orientation\": \"horizontal\",\n", + " \"extend\": \"min\",\n", + " },\n", + " \"rmse\": {\n", + " \"shrink\": 0.9,\n", + " \"pad\": 0.02,\n", + " \"orientation\": \"horizontal\",\n", + " \"extend\": \"max\",\n", + " },\n", + " }\n", + "\n", + " meta = {\n", + " \"bias\": {\n", + " \"name\": \"%bias\",\n", + " \"vmin\": -100,\n", + " \"vmax\": 100,\n", + " \"cm\": colors.cmap_RedGrayBlue,\n", + " },\n", + " \"corr\": {\"name\": \"correlation\", \"vmin\": 0.2, \"vmax\": 1, \"cm\": colors.cmap_corr},\n", + " \"rmse\": {\"name\": \"RMSE\", \"vmin\": 0, \"vmax\": 1000, \"cm\": mpl.cm.turbo},\n", + " }\n", + "\n", + " for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n", + " for case, grid in zip(case_list, grid_list):\n", + " fig, ax = plt.subplots(\n", + " 1, figsize=(7.5, 4.0), subplot_kw={\"projection\": ccrs.PlateCarree()}\n", + " )\n", + "\n", + " ax.add_feature(\n", + " rivers_50m, facecolor=\"None\", edgecolor=\"b\", lw=0.5, alpha=0.3\n", + " )\n", + " ax.add_feature(land, facecolor=\"white\", edgecolor=\"grey\")\n", + "\n", + " gauge_shp1[case].plot(\n", + " ax=ax,\n", + " column=f\"{error_metric}_{grid}\",\n", + " markersize=10,\n", + " cmap=meta[error_metric][\"cm\"],\n", + " vmin=meta[error_metric][\"vmin\"],\n", + " vmax=meta[error_metric][\"vmax\"],\n", + " )\n", + "\n", + " ax.set_title(\"%s %s\" % (case, meta[error_metric][\"name\"]))\n", + "\n", + " points = ax.collections[-1]\n", + " plt.colorbar(points, ax=ax, **cbar_kwrgs[error_metric])\n", + "\n", + " plt.tight_layout()\n", + "\n", + " if figureSave:\n", + " plt.savefig(f\"./NB1_Fig6_{error_metric}_{case}_map.png\", dpi=200)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.4 Boxplots of Error metrics (RMSE, %bias, and correlation coefficient)\n", + "Boxplot distribution is based on metrics sampled at 922 sites.\n", + "\n", + "The box extends from the Q1 to Q3 quartile values of the data, with a line at the median (Q2). \n", + "The whiskers extend from the edges of box to show the range of the data. \n", + "By default, they extend no more than 1.5 * IQR (IQR = Q3 - Q1) from the edges of the box, ending at the farthest data point within that interval. \n", + "Outliers are plotted as separate dots." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if obs_available:\n", + " boxprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"blue\"}\n", + " medianprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"red\"}\n", + "\n", + " for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n", + " column_stat = []\n", + " gauge_shp_all_case = gauge_shp.copy(deep=True)\n", + " for case, grid in zip(case_list, grid_list):\n", + " gauge_shp_all_case = gauge_shp_all_case.merge(\n", + " gauge_shp1[case][[\"id\", f\"{error_metric}_{grid}\"]],\n", + " left_on=\"id\",\n", + " right_on=\"id\",\n", + " )\n", + " column_stat.append(f\"{error_metric}_{grid}\")\n", + " fig, ax = plt.subplots(1, figsize=(6.5, 4))\n", + " gauge_shp_all_case.boxplot(\n", + " ax=ax,\n", + " column=column_stat,\n", + " boxprops=boxprops,\n", + " medianprops=medianprops,\n", + " sym=\".\",\n", + " )\n", + "\n", + " xticklabels = [label[len(error_metric) + 1 :] for label in column_stat]\n", + " ax.set_xticklabels(xticklabels)\n", + "\n", + " if error_metric == \"rmse\":\n", + " ax.set_ylim([0, 1000])\n", + " ax.set_title(\"RMSE [m3/s]\")\n", + " elif error_metric == \"bias\":\n", + " ax.set_ylim([-150, 250])\n", + " ax.set_title(\"%bias [%]\")\n", + " elif error_metric == \"corr\":\n", + " ax.set_ylim([-0.2, 1])\n", + " ax.set_title(\"correlation\")\n", + "\n", + " if figureSave:\n", + " plt.savefig(f\"./NB1_Fig7_{error_metric}_boxplot.png\", dpi=150)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cupid-analysis", + "language": "python", + "name": "cupid-analysis" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/nblibrary/rof/global_discharge_ocean_compare_obs.ipynb b/examples/nblibrary/rof/global_discharge_ocean_compare_obs.ipynb new file mode 100644 index 0000000..5d09da6 --- /dev/null +++ b/examples/nblibrary/rof/global_discharge_ocean_compare_obs.ipynb @@ -0,0 +1,603 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## ROF monthly, annual, seasonal discharge at ocean outlets \n", + "\n", + "Use the following datasets\n", + "\n", + "1. reach-D19 gauge link ascii\n", + "2. D19 flow site geopackage\n", + "3. D19 discharge netCDF\n", + "4. monthly and yearly flow netCD (history file)\n", + "\n", + "[1. Setupt](#setup)\n", + "\n", + "\n", + "[2. Loading discharge data](#load_discharge_data)\n", + "\n", + "- Read monthly history files from archive. \n", + "- Reference data: monthly discharge estimates at 922 big river mouths from Dai et al. 2019 data (D19)\n", + "\n", + "[3. Read river, catchment, gauge information](#read_ancillary)\n", + "\n", + "- catchment polygon (geopackage)\n", + "- gauge point (geopackage)\n", + "- gauge-catchment link (csv)\n", + "- outlet reach information (netCDF) including discharging ocean names\n", + "\n", + "[4. Ocean discharge line plots](#24_large_rivers)\n", + "\n", + "- total seasonal flow for oceans. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import os, sys\n", + "import glob\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import xarray as xr\n", + "import cartopy.feature as cfeature\n", + "from dask.distributed import Client, LocalCluster\n", + "\n", + "from scripts.utility import load_yaml\n", + "from scripts.utility import no_time_variable\n", + "from scripts.utility import read_shps\n", + "from scripts.utility import get_index_array\n", + "\n", + "rivers_50m = cfeature.NaturalEarthFeature(\"physical\", \"rivers_lake_centerlines\", \"50m\")\n", + "land = cfeature.LAND\n", + "\n", + "print(\"\\nThe Python version: %s.%s.%s\" % sys.version_info[:3])\n", + "print(xr.__name__, xr.__version__)\n", + "print(pd.__name__, pd.__version__)\n", + "print(gpd.__name__, gpd.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-------------------------\n", + "## 1. Setup " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Parameter Defaults\n", + "# parameters are set in CUPiD's config.yml file\n", + "\n", + "CESM_output_dir = \"\"\n", + "case_name = None # case name\n", + "base_case_name = None # base case name\n", + "start_date = \"\"\n", + "end_date = \"\"\n", + "serial = False # use dask LocalCluster\n", + "lc_kwargs = {}\n", + "\n", + "analysis_name = \"\" # Used for Figure png names\n", + "if analysis_name:\n", + " analysis_name = case_name\n", + "rof_start_date = start_date # specify if different starting yyyy-mm-dd is desired\n", + "rof_end_date = end_date # specify if different ending yyyy-mm-dd is desired\n", + "grid_name = \"f09_f09_mosart\" # ROF grid name used in case\n", + "base_grid_name = (\n", + " grid_name # spcify ROF grid name for base_case in config.yml if different than case\n", + ")\n", + "figureSave = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-------------------------\n", + "ROF ancillary data specification " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "setup = load_yaml(\"./setup/setup.yaml\")\n", + "\n", + "domain_dir = setup[\n", + " \"ancillary_dir\"\n", + "] # ancillary directory including such as ROF domain, river network data\n", + "geospatial_dir = setup[\"geospatial_dir\"] # including shapefiles or geopackages\n", + "ref_flow_dir = setup[\"ref_flow_dir\"] # including observed or reference flow data\n", + "case_meta = setup[\"case_meta\"] # RO grid meta\n", + "catch_gpkg = setup[\"catch_gpkg\"] # catchment geopackage meta\n", + "reach_gpkg = setup[\"reach_gpkg\"] # reach geopackage meta\n", + "network_nc = setup[\"river_network\"] # river network meta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "oceans_list = [\n", + " \"arctic\",\n", + " \"atlantic\",\n", + " \"indian\",\n", + " \"mediterranean\",\n", + " \"pacific\",\n", + " \"south_china\",\n", + " \"global\",\n", + "]\n", + "case_list = [case for case in [case_name, base_case_name] if case is not None]\n", + "grid_list = [grid for grid in [grid_name, base_grid_name] if grid is not None]\n", + "time_period = slice(f\"{rof_start_date}\", f\"{rof_end_date}\") # analysis time period\n", + "nyrs = int(rof_end_date[:4]) - int(rof_start_date[:4]) + 1 # number of years\n", + "nmons = nyrs * 12 # number of months\n", + "year_list = [\n", + " \"{:04d}\".format(yr)\n", + " for yr in np.arange(int(rof_start_date[0:4]), int(rof_end_date[0:4]) + 1)\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-----\n", + "### dasks (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Spin up cluster (if running in parallel)\n", + "client = None\n", + "if not serial:\n", + " cluster = LocalCluster(**lc_kwargs)\n", + " client = Client(cluster)\n", + "\n", + "client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Loading discharge data " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1. Monthly/annual flow netCDFs\n", + "- month_data (xr dataset)\n", + "- year_data (xr dataset)\n", + "- seas_data (xr dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "reachID = {}\n", + "month_data = {}\n", + "year_data = {}\n", + "seas_data = {}\n", + "for case, grid in zip(case_list, grid_list):\n", + " in_dire = os.path.join(CESM_output_dir, case, \"rof/hist\")\n", + " model = case_meta[grid][\"model\"]\n", + " domain = case_meta[grid][\"domain_nc\"]\n", + " var_list = case_meta[grid][\"vars_read\"]\n", + "\n", + " def preprocess(ds):\n", + " return ds[var_list]\n", + "\n", + " # monthly\n", + " nc_list = []\n", + " for nc_path in sorted(glob.glob(f\"{in_dire}/{case}.{model}.h*.????-*.nc\")):\n", + " for yr in year_list:\n", + " if yr in os.path.basename(nc_path):\n", + " nc_list.append(nc_path)\n", + " month_data[case] = (\n", + " xr.open_mfdataset(\n", + " nc_list,\n", + " data_vars=\"minimal\",\n", + " parallel=True,\n", + " preprocess=preprocess,\n", + " )\n", + " .sel(time=time_period)\n", + " .load()\n", + " )\n", + " # annual\n", + " year_data[case] = month_data[case].resample(time=\"YS\").mean(dim=\"time\")\n", + "\n", + " # seasonal (compute here instead of reading for conisistent analysis period)\n", + " seas_data[case] = month_data[case].groupby(\"time.month\").mean(\"time\")\n", + " vars_no_time = no_time_variable(month_data[case])\n", + " if vars_no_time:\n", + " seas_data[case][vars_no_time] = seas_data[case][vars_no_time].isel(\n", + " month=0, drop=True\n", + " )\n", + " time = month_data[case][\"time\"]\n", + " if domain == \"None\":\n", + " reachID[case] = month_data[case][\"reachID\"].values\n", + " else:\n", + " reachID[case] = (\n", + " xr.open_dataset(f\"{domain_dir}/{domain}\")[\"reachID\"]\n", + " .stack(seg=(\"lat\", \"lon\"))\n", + " .values\n", + " )\n", + " print(case)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.2 D19 discharge data\n", + "- ds_q_obs_mon (xr datasets)\n", + "- ds_q_obs_yr (xr datasets)\n", + "- dr_q_obs_seasonal (xr datasets)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "# read monthly data\n", + "ds_q = xr.open_dataset(\n", + " \"%s/D09/coastal-stns-Vol-monthly.updated-May2019.mod.nc\" % (ref_flow_dir),\n", + " decode_times=False,\n", + ")\n", + "ds_q[\"time\"] = xr.cftime_range(\n", + " start=\"1900-01-01\", end=\"2018-12-01\", freq=\"MS\", calendar=\"standard\"\n", + ")\n", + "\n", + "# monthly- if time_period is outside observation period, use the entire obs period\n", + "obs_available = True\n", + "if ds_q[\"time\"].sel(time=time_period).values.size == 0:\n", + " obs_available = False\n", + " ds_q_obs_mon = ds_q[\"FLOW\"]\n", + "else:\n", + " ds_q_obs_mon = ds_q[\"FLOW\"].sel(time=time_period)\n", + "# compute annual flow from monthly\n", + "ds_q_obs_yr = ds_q_obs_mon.resample(time=\"YE\").mean(dim=\"time\")\n", + "# compute annual cycle at monthly scale\n", + "dr_q_obs_seasonal = ds_q_obs_mon.groupby(\"time.month\").mean(\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Reading river, catchment, gauge infomation \n", + "\n", + "- catchment polygon (geopackage)\n", + "- gauge point (geopackage)\n", + "- gauge-catchment link (csv)\n", + "- outlet reach information (netCDF)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1. reach-D19 gauge link csv\n", + "- gauge_reach_lnk (dataframe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gauge_reach_lnk = {}\n", + "for case, grid in zip(case_list, grid_list):\n", + " gauge_reach_lnk[case] = pd.read_csv(\n", + " \"%s/D09/D09_925.%s.asc\" % (ref_flow_dir, case_meta[grid][\"network\"])\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.2 D19 flow site shapefile\n", + "- gauge_shp (dataframe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "gauge_shp = gpd.read_file(\n", + " os.path.join(ref_flow_dir, \"D09\", \"geospatial\", \"D09_925.gpkg\")\n", + ")\n", + "gauge_shp = gauge_shp[gauge_shp[\"id\"] != 9999999]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "ocean_shp = gpd.read_file(os.path.join(geospatial_dir, \"oceans.gpkg\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.3 Read river network information\n", + "- riv_ocean (dataframe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "## read catchment geopackage\n", + "gdf_cat = {}\n", + "for case, grid in zip(case_list, grid_list):\n", + " cat_gpkg = os.path.join(\n", + " geospatial_dir, catch_gpkg[grid][\"file_name\"]\n", + " ) # geopackage name\n", + " id_name_cat = catch_gpkg[grid][\"id_name\"] # reach ID in geopackage\n", + " var_list = [id_name_cat]\n", + " if \"lk\" in grid_name:\n", + " var_list.append(\"lake\")\n", + " gdf_cat[case] = read_shps([cat_gpkg], var_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "# read river outlet netcdf\n", + "riv_ocean = {}\n", + "for case, grid in zip(case_list, grid_list):\n", + " riv_ocean_file = os.path.join(\n", + " domain_dir, network_nc[grid][\"file_name\"].replace(\".aug.nc\", \".outlet.nc\")\n", + " ) # network netcdf name\n", + " ds_rn_ocean = xr.open_dataset(riv_ocean_file).set_index(seg=\"seg_id\")\n", + " df_tmp = ds_rn_ocean.to_dataframe()\n", + " riv_ocean[case] = pd.merge(\n", + " gdf_cat[case], df_tmp, left_on=catch_gpkg[grid][\"id_name\"], right_index=True\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.6 Merge gauge, outlet catchment dataframe\n", + "\n", + "- gauge_shp1 (dataframe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "# Merge gauge_reach lnk (dataframe) into gauge shapefile\n", + "gauge_shp1 = {}\n", + "for case, grid in zip(case_list, grid_list):\n", + " df = gauge_reach_lnk[case]\n", + "\n", + " # df = df.loc[(df['flag'] == 0)]\n", + " df1 = df.drop(columns=[\"riv_name\"])\n", + " df2 = pd.merge(gauge_shp, df1, how=\"inner\", left_on=\"id\", right_on=\"gauge_id\")\n", + " gauge_shp1[case] = pd.merge(\n", + " df2,\n", + " riv_ocean[case],\n", + " how=\"inner\",\n", + " left_on=\"route_id\",\n", + " right_on=catch_gpkg[grid][\"id_name\"],\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "------\n", + "## 3. plot annual cycle for global oceans \n", + "\n", + "TODO: Referece flow plot should be independent from cases (network). Currently the last case plotted looks better matched with reference flow. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%time\n", + "\n", + "nrows = 4\n", + "ncols = 2\n", + "fig, axes = plt.subplots(nrows, ncols, figsize=(7.25, 6.5))\n", + "plt.subplots_adjust(\n", + " top=0.95, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n", + ") # create some space below the plots by increasing the bottom-value\n", + "\n", + "for ix, ocean_name in enumerate(oceans_list):\n", + " row = ix // 2\n", + " col = ix % 2\n", + " for case, grid in zip(case_list, grid_list):\n", + "\n", + " q_name = case_meta[grid][\"flow_name\"]\n", + "\n", + " if case_meta[grid][\"network_type\"] == \"vector\":\n", + " if ocean_name == \"global\":\n", + " id_list = gauge_shp1[case][\"route_id\"].values\n", + " else:\n", + " id_list = gauge_shp1[case][gauge_shp1[case][\"ocean\"] == ocean_name][\n", + " \"route_id\"\n", + " ].values\n", + " reach_index = get_index_array(reachID[case], id_list)\n", + " dr_flow = seas_data[case][q_name].isel(seg=reach_index).sum(dim=\"seg\")\n", + " dr_flow.plot(ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case)\n", + "\n", + " elif case_meta[grid_name][\"network_type\"] == \"grid\": # means 2d grid\n", + " if ocean_name == \"global\":\n", + " id_list = gauge_shp1[case][\"route_id\"].values\n", + " else:\n", + " id_list = gauge_shp1[case][gauge_shp1[case][\"ocean\"] == ocean_name][\n", + " \"route_id\"\n", + " ].values\n", + "\n", + " reach_index = get_index_array(reachID[case], id_list)\n", + " seas_data_vector = seas_data[case][q_name].stack(seg=(\"lat\", \"lon\"))\n", + " dr_flow = seas_data_vector.isel(seg=reach_index).sum(dim=\"seg\")\n", + " dr_flow.plot(ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case)\n", + "\n", + " # reference data\n", + " if obs_available:\n", + " if ocean_name == \"global\":\n", + " id_list = gauge_shp1[case][\"id\"].values\n", + " else:\n", + " id_list = gauge_shp1[case][gauge_shp1[case][\"ocean\"] == ocean_name][\n", + " \"id\"\n", + " ].values\n", + " gauge_index = get_index_array(ds_q[\"id\"].values, id_list)\n", + " dr_obs = dr_q_obs_seasonal.isel(station=gauge_index).sum(dim=\"station\")\n", + " dr_obs.plot(\n", + " ax=axes[row, col],\n", + " linestyle=\"None\",\n", + " marker=\"o\",\n", + " markersize=2,\n", + " c=\"k\",\n", + " label=\"D19\",\n", + " )\n", + "\n", + " axes[row, col].set_title(\"%d %s\" % (ix + 1, ocean_name), fontsize=9)\n", + " axes[row, col].set_xlabel(\"\")\n", + " if row < 7:\n", + " axes[row, col].set_xticklabels(\"\")\n", + " if col == 0:\n", + " axes[row, col].set_ylabel(\"Mon. flow [m$^3$/s]\", fontsize=9)\n", + " else:\n", + " axes[row, col].set_ylabel(\"\")\n", + " axes[row, col].tick_params(\"both\", labelsize=\"x-small\")\n", + "\n", + "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n", + "axes[row, col].legend(\n", + " loc=\"center left\", bbox_to_anchor=(1.10, 0.40, 0.75, 0.1), ncol=1, fontsize=\"small\"\n", + ")\n", + "\n", + "for jx in range(ix + 1, nrows * ncols):\n", + " row = jx // 2\n", + " col = jx % 2\n", + " fig.delaxes(axes[row][col])\n", + "\n", + "if figureSave:\n", + " plt.savefig(f\"./NB2_Fig1_ocean_discharge_season_{analysis_name}.png\", dpi=200)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cupid-analysis", + "language": "python", + "name": "cupid-analysis" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/nblibrary/rof/scripts/colors.py b/examples/nblibrary/rof/scripts/colors.py new file mode 100644 index 0000000..71c5619 --- /dev/null +++ b/examples/nblibrary/rof/scripts/colors.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import matplotlib as mpl +from matplotlib.colors import LinearSegmentedColormap + +# create colormaps +# --------------- +cmap_RedGrayBlue = LinearSegmentedColormap.from_list( + "custom blue", + [ + (0, "xkcd:red"), + (0.05, "xkcd:orange"), + (0.50, "xkcd:light grey"), + (0.65, "xkcd:sky blue"), + (1, "xkcd:blue"), + ], + N=15, +) + +# %bias +vals_pbias = [-50, -40, -30, -20, -10, 10, 0.2, 30, 40, 50] +cmap_pbias = LinearSegmentedColormap.from_list( + "custom1", + [(0.0, "xkcd:red"), (0.5, "xkcd:light grey"), (1.0, "xkcd:blue")], + N=11, +) +cmap_pbias.set_over("xkcd:dark blue") +cmap_pbias.set_under("xkcd:dark red") +norm_pbias = mpl.colors.BoundaryNorm(vals_pbias, cmap_pbias.N) + + +# corr +vals_corr = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] +cmap_corr = LinearSegmentedColormap.from_list( + "custom1", + [(0.00, "xkcd:yellow"), (0.50, "xkcd:green"), (1.00, "xkcd:blue")], + N=9, +) +cmap_corr.set_over("xkcd:dark blue") +cmap_corr.set_under("xkcd:brown") +norm_corr = mpl.colors.BoundaryNorm(vals_corr, cmap_corr.N) diff --git a/examples/nblibrary/rof/scripts/metrics.py b/examples/nblibrary/rof/scripts/metrics.py new file mode 100644 index 0000000..263c28d --- /dev/null +++ b/examples/nblibrary/rof/scripts/metrics.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import numpy as np + +# ----- time series error +# There are many other error metrics (e.g., KGE, NSE, ame, etc.) as well as +# many flow metrics (Flow duration curve, annual maximmum flow and timing etcs.) +# available. + + +def remove_nan(qsim, qobs): + """ + Compare sim array and obs array time series, + Then remove both sim and obs at times if either or both sim and obs have NaN + + Arguments + --------- + qsim: array-like + Simulated time series array. + qobs: array-like + Observed time series array. + + Returns + ------- + sim_obs: float + simulation and observation pair with nan removed + """ + + sim_obs = np.stack((qsim, qobs), axis=1) + sim_obs = sim_obs[~np.isnan(sim_obs).any(axis=1), :] + return sim_obs[:, 0], sim_obs[:, 1] + + +def pbias(qsim, qobs): + """ + Calculates percentage bias between two flow arrays. + + Arguments + --------- + sim: array-like + Simulated time series array. + obs: array-like + Observed time series array. + + Returns + ------- + pbial: float + percentage bias calculated between the two arrays. + """ + + qsim1, qobs1 = remove_nan(qsim, qobs) + return np.sum(qsim1 - qobs1) / np.sum(qobs1) * 100 + + +def rmse(qsim, qobs): + """ + Calculates root mean squared of error (rmse) between two time series arrays. + Arguments + --------- + sim: array-like + Simulated time series array. + obs: array-like + Observed time series array. + + Returns + ------- + rmse: float + rmse calculated between the two arrays. + """ + qsim1, qobs1 = remove_nan(qsim, qobs) + return np.sqrt(np.mean((qsim1 - qobs1) ** 2)) diff --git a/examples/nblibrary/rof/scripts/utility.py b/examples/nblibrary/rof/scripts/utility.py new file mode 100644 index 0000000..bb13d19 --- /dev/null +++ b/examples/nblibrary/rof/scripts/utility.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd +import yaml +from pyogrio import read_dataframe + + +# in case toml module may not be available +try: + import tomli + + def load_toml(toml_file) -> dict: + """Load TOML data from file""" + with open(toml_file, "rb") as f: + return tomli.load(f) + +except ImportError: + pass # or anything to log + + +def load_yaml(yaml_file) -> dict: + """Load yaml data from file""" + with open(yaml_file) as ymlfile: + return yaml.load(ymlfile, Loader=yaml.FullLoader) + + +class AutoVivification(dict): + """Implementation of perl's autovivification feature.""" + + def __getitem__(self, item): + try: + return dict.__getitem__(self, item) + except KeyError: + value = self[item] = type(self)() + return value + + +def read_shps(shp_list, usecols, **kwargs): + """Faster load shapefiles with selected attributes in dataframe""" + gdf_frame = [] + for shp in shp_list: + gdf_frame.append(read_dataframe(shp, columns=usecols)) + print("Finished reading %s" % shp.strip("\n")) + return pd.concat(gdf_frame) + + +def get_index_array(a_array, b_array): + """ + Get index array where each index points to locataion in a_array. The order of index array corresponds to b_array + + e.g., + a_array = [2, 4, 1, 8, 3, 10, 5, 9, 7, 6] + b_array = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + result = [2, 0, 4, 1, 6, 9, 8, 3, 7, 5] + + https://stackoverflow.com/questions/8251541/numpy-for-every-element-in-one-array-find-the-index-in-another-array + """ + index = np.argsort(a_array) + sorted_a_array = a_array[index] + sorted_index = np.searchsorted(sorted_a_array, b_array) + + yindex = np.take(index, sorted_index, mode="clip") + mask = a_array[yindex] != b_array + + result = np.ma.array(yindex, mask=mask) + + return result + + +def reorder_index(ID_array_orig, ID_array_target): + x = ID_array_orig + # Find the indices of the reordered array. See link for more details: + # https://stackoverflow.com/questions/8251541/numpy-for-every-element-in-one-array-find-the-index-in-another-array + index = np.argsort(x) + sorted_x = x[index] + sorted_index = np.searchsorted(sorted_x, ID_array_target) + + return np.take(index, sorted_index, mode="clip") + + +def no_time_variable(ds): + vars_without_time = [] + for var in ds.variables: + if "time" not in ds[var].dims: + if var not in list(ds.coords): + vars_without_time.append(var) + return vars_without_time diff --git a/examples/nblibrary/rof/setup/large_river_50.txt b/examples/nblibrary/rof/setup/large_river_50.txt new file mode 100644 index 0000000..b2c777a --- /dev/null +++ b/examples/nblibrary/rof/setup/large_river_50.txt @@ -0,0 +1,51 @@ +site_id,river_name +81168,Amazon +9894,Congo +9910,Orinoco +9306,Changjiang +9270,Brahmaputra +9598,Mississippi +90973,Yenisey +35021,Parana +91050,Lena +9903,Mekong +81329,Tocantins +91082,Ob +9227,Ganges +9900,Irrawaddy +9550,St Lawrence +90271,Amur +10224,Mackenzie +9301,Xijiang +9560,Columbia +9400,Magdalena +9370,Uruguay +10623,Yukon +9763,Danube +9052,Niger +81217,Xingu +9403,Atrato +81192,Tapajos +9038,Ogoou +9360,Essequibo +9504,Fraser +91103,Pechora +9483,Nelson +95213,Khatanga +9736,Sepik +95067,Kolyma +9114,Zambeze +91128,Severnaya Dvina +9275,Indus +9079,Sanaga +9226,Godavari +10006,Rajang +81748,Sao Francisco +9626,Usumacinta +9366,Maroni +9909,Rhine +9733,Purari +9459,Caniapiscau +9232,Mahanadi +9558,Sacramento +9789,Elbe diff --git a/examples/nblibrary/rof/setup/setup.yaml b/examples/nblibrary/rof/setup/setup.yaml new file mode 100644 index 0000000..32a0b46 --- /dev/null +++ b/examples/nblibrary/rof/setup/setup.yaml @@ -0,0 +1,145 @@ +## Configurations used for ctsm-mizuRoute + +# Directories +geospatial_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/rof_data/geospatial +ancillary_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/rof_data/ancillary_data +ref_flow_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/rof_data/obs + +# case meta dictionary data (TODO: consider removing color) +# grid_name -> name of grid +# network_name -> name of river network +# model -> MOSART or mizuroute +# network_type -> grid or vector +# vars_read -> history variables read in +# flow_name -> routed runoff +# runoff_name -> instantaneous runoff from land model +# color -> color used for plotting +case_meta: + rHDMAlk: + network: HDMA_lake + model: mizuroute + network_type: vector + domain_nc: None + vars_read: + - DWroutedRunoff + - basRunoff + flow_name: DWroutedRunoff + runoff_name: basRunoff + color: 'xkcd:red' + rHDMAlk_h06: + network: HDMA_lake_h06 + model: mizuroute + network_type: vector + domain_nc: None + vars_read: + - DWroutedRunoff + - basRunoff + flow_name: DWroutedRunoff + runoff_name: basRunoff + color: 'xkcd:green' + rHDMA: + network: HDMA + model: mizuroute + network_type: vector + domain_nc: None + vars_read: + - DWroutedRunoff + - basRunoff + flow_name: DWroutedRunoff + runoff_name: basRunoff + color: 'xkcd:blue' + rMERIT: + network: MERIT_Hydro + model: mizuroute + network_type: vector + domain_nc: None + vars_read: + - DWroutedRunoff + - basRunoff + flow_name: DWroutedRunoff + runoff_name: basRunoff + color: 'xkcd:green' + f09_f09: + network: mosart + model: mizuroute + network_type: vector + domain_nc: None + vars_read: + - DWroutedRunoff + - basRunoff + flow_name: DWroutedRunoff + runoff_name: basRunoff + color: 'xkcd:green' + f09_f09_mosart: + network: mosart + model: mosart + network_type: grid + domain_nc: mosart0.5_domain.nc + vars_read: + - RIVER_DISCHARGE_OVER_LAND_LIQ + - RIVER_DISCHARGE_OVER_LAND_ICE + flow_name: RIVER_DISCHARGE_OVER_LAND_LIQ + runoff_name: None + color: 'xkcd:grey' + +# riiver line geopackage metadata +# sub-key is network name +reach_gpkg: + rHDMA: + file_name: hdma_global_stream.gpkg + id_name: seg_id + down_id_name: Tosegment + rHDMAlk: + file_name: hdma_global_stream.gpkg + id_name: seg_id + down_id_name: Tosegment + rMERIT: + file_name: rivEndMERIT_simplified_0003.gpkg + id_name: COMID + down_id_name: NextDownID + f09_f09_mosart: + file_name: MOSART_routing_Global_0.5x0.5_c170601_flowline.gpkg + id_name: seg_id + down_id_name: Tosegment + f09_f09: + file_name: MOSART_routing_Global_0.5x0.5_c170601_flowline.gpkg + id_name: seg_id + down_id_name: Tosegment + +# catchment geopackage metadata +# sub-key is network name +catch_gpkg: + rHDMA: + file_name: hdma_global_catch_v2_0.01.gpkg + id_name: hruid + rHDMAlk: + file_name: hdma_hydrolake_global_catch_v1_0.01.gpkg + id_name: ID + rMERIT: + file_name: catEndoMERIT.gpkg + id_name: hruid + f09_f09_mosart: + file_name: MOSART_routing_Global_0.5x0.5_c170601_hru.gpkg + id_name: hruid + f09_f09: + file_name: MOSART_routing_Global_0.5x0.5_c170601_hru.gpkg + id_name: hruid + +# river network data (mizuRoute format, augumented one) +# sub-key is network name +river_network: + rHDMA: + file_name: ntopo_HDMA.v2.aug.nc + reach_ID: seg_id + rHDMAlk: + file_name: Network_topology_HDMA_HydroLake_v3.aug.nc + reach_ID: seg_id + rMERIT: + file_name: ntopo_MERIT_Hydro_v1.aug.nc + reach_ID: seg_id + f09_f09_mosart: + file_name: mizuRoute_MOSART_routing_Global_0.5x0.5_c170601.aug.nc + reach_ID: seg_id + f09_f09: + file_name: mizuRoute_MOSART_routing_Global_0.5x0.5_c170601.aug.nc + reach_ID: seg_id