From 31b34d90dcb478da0594ee916e7d9d94b86428f4 Mon Sep 17 00:00:00 2001 From: Sarah Charlotte Johnson Date: Wed, 6 Dec 2023 13:02:48 -0800 Subject: [PATCH] Add code for nwm video (#40) --- .../make-timelapse-video.ipynb | 145 ++++++++++++++++++ national-water-model/xarray-water-model.py | 20 +-- 2 files changed, 152 insertions(+), 13 deletions(-) create mode 100644 national-water-model/make-timelapse-video.ipynb diff --git a/national-water-model/make-timelapse-video.ipynb b/national-water-model/make-timelapse-video.ipynb new file mode 100644 index 0000000..0ebe389 --- /dev/null +++ b/national-water-model/make-timelapse-video.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data Prep" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import xarray as xr\n", + "import numpy as np\n", + "\n", + "# Read county shapefile, combo of state FIPS code and county FIPS code as multi-index\n", + "counties = gpd.read_file(\n", + " \"https://www2.census.gov/geo/tiger/GENZ2020/shp/cb_2020_us_county_20m.zip\"\n", + ").to_crs(\"EPSG:3395\")\n", + "counties[\"STATEFP\"] = counties.STATEFP.astype(int)\n", + "counties[\"COUNTYFP\"] = counties.COUNTYFP.astype(int)\n", + "continental = counties[\n", + " ~counties[\"STATEFP\"].isin([2, 15, 72])\n", + "] # drop Alaska, Hawaii, Puerto Rico\n", + "\n", + "# Read in saved data from xarray-water-model.py\n", + "ds = xr.open_dataset(\"mean_zwattablrt_nwm_1979_2020.nc\")\n", + "ds[\"week\"] = ds.time.dt.strftime(\"%Y-%U\")\n", + "ds = ds.groupby(\"week\").mean()\n", + "# Interpret county as combo of state FIPS code and county FIPS code\n", + "ds.coords[\"STATEFP\"] = (ds.county // 1000).astype(int)\n", + "ds.coords[\"COUNTYFP\"] = np.mod(ds.county, 1000).astype(int)\n", + "df = ds.to_dataframe().reset_index()\n", + "\n", + "# Join\n", + "merge_df = continental.merge(df, on=[\"STATEFP\", \"COUNTYFP\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make all the plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", + "from datetime import datetime\n", + "\n", + "day_0 = datetime.strptime(weeks[0] + \"-0\", \"%Y-%U-%w\")\n", + "weeks = merge_df.week.unique()\n", + "\n", + "for week in weeks:\n", + " fig, ax = plt.subplots(1, 1, figsize=(7.68, 4.32)) # for 3840x2160 resolution\n", + "\n", + " ax.set_axis_off()\n", + "\n", + " divider = make_axes_locatable(ax)\n", + " cax = divider.append_axes(\"bottom\", size=\"5%\", pad=0.1)\n", + "\n", + " cax.tick_params(labelsize=8)\n", + " cax.set_title(\"Depth (in meters) of the water table\", fontsize=8)\n", + "\n", + " merge_df[merge_df[\"week\"] == f\"{week}\"].plot(\n", + " column=\"zwattablrt\",\n", + " cmap=\"BrBG_r\",\n", + " vmin=0,\n", + " vmax=2,\n", + " legend=True,\n", + " ax=ax,\n", + " cax=cax,\n", + " legend_kwds={\n", + " \"orientation\": \"horizontal\",\n", + " \"ticks\": [0, 0.5, 1, 1.5, 2],\n", + " },\n", + " )\n", + "\n", + " # Add legends for memory, time, and cost\n", + " current_day = datetime.strptime(week + \"-0\", \"%Y-%U-%w\")\n", + " n_days = (current_day - day_0).days\n", + " memory = n_days * (16.88 * 1.07374) # daily memory converted to GB\n", + " cost = (n_days / 7) * 0.01124606 # weekly cost\n", + "\n", + " if memory >= 1000:\n", + " memory_string = f\"{memory/1000:.1f} TB processed, ~${cost:.2f} in cloud costs\"\n", + " else:\n", + " memory_string = f\"{memory:.0f} GB processed, ~${cost:.2f} in cloud costs\"\n", + "\n", + " plt.text(0, 1, memory_string, transform=ax.transAxes, size=9)\n", + " # convert Year - Week Number to Month - Year\n", + " date = datetime.strptime(week + \"-0\", \"%Y-%U-%w\").strftime(\"%Y %b\")\n", + " plt.text(0.85, 1, f\"{date}\", transform=ax.transAxes, size=10)\n", + " plt.savefig(f\"../../nwm-animation/3840x2160/{week}.png\", transparent=True, dpi=500)\n", + " plt.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use [ffmpeg](https://ffmpeg.org/) to stitch the images together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ffmpeg -pattern_type glob -i '3840x2160/*.png' -r 60 -crf 18 -pix_fmt yuv420p nwm-video.mp4" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nwm", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/national-water-model/xarray-water-model.py b/national-water-model/xarray-water-model.py index 2190a2e..ab188ed 100644 --- a/national-water-model/xarray-water-model.py +++ b/national-water-model/xarray-water-model.py @@ -16,35 +16,29 @@ cluster = coiled.Cluster( name="nwm-1979-2020", - region="us-east-1", # close to data + region="us-east-1", # close to data n_workers=10, - scheduler_vm_types="r7g.xlarge", # ARM instance + scheduler_vm_types="r7g.xlarge", # ARM instance worker_vm_types="r7g.2xlarge", - compute_purchase_option="spot_with_fallback" # use spot, replace with on-demand + spot_policy="spot_with_fallback", # use spot, replace with on-demand ) client = cluster.get_client() cluster.adapt(minimum=10, maximum=200) ds = xr.open_zarr( - fsspec.get_mapper( - "s3://noaa-nwm-retrospective-2-1-zarr-pds/rtout.zarr", - anon=True - ), + fsspec.get_mapper("s3://noaa-nwm-retrospective-2-1-zarr-pds/rtout.zarr", anon=True), consolidated=True, - chunks={"time": 896, "x": 350, "y": 350} + chunks={"time": 896, "x": 350, "y": 350}, ) -subset = ds.zwattablrt.sel( - time=slice("1979-02-01", "2020-12-31") -) +subset = ds.zwattablrt.sel(time=slice("1979-02-01", "2020-12-31")) fs = fsspec.filesystem("s3", requester_pays=True) with dask.annotate(retries=3): counties = rioxarray.open_rasterio( - fs.open("s3://nwm-250m-us-counties/Counties_on_250m_grid.tif"), - chunks="auto" + fs.open("s3://nwm-250m-us-counties/Counties_on_250m_grid.tif"), chunks="auto" ).squeeze() # remove any small floating point error in coordinate locations