From 15e832c0c56691b4488b9d22b48ec73c5f85df1a Mon Sep 17 00:00:00 2001 From: mgrover1 Date: Tue, 17 Dec 2024 13:40:28 -0600 Subject: [PATCH] ADD: Add io of data on pythia cloud in calc cloud top --- notebooks/Calc_cloud_base_and_top.ipynb | 14465 +++++++++++++++++++++- 1 file changed, 14392 insertions(+), 73 deletions(-) diff --git a/notebooks/Calc_cloud_base_and_top.ipynb b/notebooks/Calc_cloud_base_and_top.ipynb index 9d2861e..72d3e74 100644 --- a/notebooks/Calc_cloud_base_and_top.ipynb +++ b/notebooks/Calc_cloud_base_and_top.ipynb @@ -25,13 +25,14 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime\n", "import numpy as np\n", "import xarray as xr\n", + "import fsspec\n", "import xwrf\n", "\n", "import matplotlib.pyplot as plt" @@ -53,19 +54,14278 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "path_shcu_root = \"/data/project/ARM_Summer_School_2024_Data/lasso_tutorial/ShCu/untar\" # on Jupyter\n", + "# Set the URL and path for the cloud\n", + "URL = 'https://js2.jetstream-cloud.org:8001/'\n", + "path = f'pythia/lasso-sgp'\n", "\n", + "# Configure the s3-like storage endpoint on jetstream\n", + "fs = fsspec.filesystem(\"s3\", anon=True, client_kwargs=dict(endpoint_url=URL))\n", + "\n", + "# Set the analysis date and simulation number\n", "case_date = datetime(2019, 4, 4)\n", "sim_id = 7\n", "\n", - "ds_stat = xr.open_dataset(f\"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id:04d}/raw_model/wrfstat_d01_{case_date:%Y-%m-%d_12:00:00}.nc\")\n", - "ds_stat\n", + "# Read the wrfstat files\n", + "wrfstat_pattern = f's3://{path}/sim000{sim_id}/raw_model/wrfstat*'\n", + "wrfstat_files = sorted(fs.glob(wrfstat_pattern))\n", + "\n", + "# Remotely read these into a list\n", + "wrfstat_file_list = [fs.open(file) for file in wrfstat_files]\n", + "wrfstat_file_list" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load into an `xarray.Dataset`" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 72GB\n",
+       "Dimensions:         (Time: 91, bottom_top: 226, bottom_top_stag: 227,\n",
+       "                     south_north: 250, west_east: 250, west_east_stag: 251,\n",
+       "                     south_north_stag: 251)\n",
+       "Coordinates:\n",
+       "    XTIME           (Time) datetime64[ns] 728B dask.array<chunksize=(91,), meta=np.ndarray>\n",
+       "  * Time            (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-0...\n",
+       "Dimensions without coordinates: bottom_top, bottom_top_stag, south_north,\n",
+       "                                west_east, west_east_stag, south_north_stag\n",
+       "Data variables: (12/179)\n",
+       "    Times           (Time) |S19 2kB dask.array<chunksize=(1,), meta=np.ndarray>\n",
+       "    CST_CLDLOW      (Time) float32 364B dask.array<chunksize=(91,), meta=np.ndarray>\n",
+       "    CST_CLDTOT      (Time) float32 364B dask.array<chunksize=(91,), meta=np.ndarray>\n",
+       "    CST_LWP         (Time) float32 364B dask.array<chunksize=(91,), meta=np.ndarray>\n",
+       "    CST_IWP         (Time) float32 364B dask.array<chunksize=(91,), meta=np.ndarray>\n",
+       "    CST_PRECW       (Time) float32 364B dask.array<chunksize=(91,), meta=np.ndarray>\n",
+       "    ...              ...\n",
+       "    CSV_IWC         (Time, bottom_top, south_north, west_east) float32 5GB dask.array<chunksize=(1, 226, 125, 125), meta=np.ndarray>\n",
+       "    CSV_CLDFRAC     (Time, bottom_top, south_north, west_east) float32 5GB dask.array<chunksize=(1, 226, 125, 125), meta=np.ndarray>\n",
+       "    CSS_LWP         (Time, south_north, west_east) float32 23MB dask.array<chunksize=(1, 125, 125), meta=np.ndarray>\n",
+       "    CSS_IWP         (Time, south_north, west_east) float32 23MB dask.array<chunksize=(1, 125, 125), meta=np.ndarray>\n",
+       "    CSS_CLDTOT      (Time, south_north, west_east) float32 23MB dask.array<chunksize=(1, 125, 125), meta=np.ndarray>\n",
+       "    CSS_CLDLOW      (Time, south_north, west_east) float32 23MB dask.array<chunksize=(1, 125, 125), meta=np.ndarray>\n",
+       "Attributes: (12/96)\n",
+       "    TITLE:                                  OUTPUT FROM WRF V3.8.1 MODEL\n",
+       "    START_DATE:                            2019-04-04_12:00:00\n",
+       "    WEST-EAST_GRID_DIMENSION:              251\n",
+       "    SOUTH-NORTH_GRID_DIMENSION:            251\n",
+       "    BOTTOM-TOP_GRID_DIMENSION:             227\n",
+       "    DX:                                    100.0\n",
+       "    ...                                    ...\n",
+       "    config_aerosol:                        NA\n",
+       "    config_forecast_time:                  15.0 h\n",
+       "    config_boundary_method:                Periodic\n",
+       "    config_microphysics:                   Thompson (mp_physics=8)\n",
+       "    config_nickname:                       runlas20190404v1msda2d150km\n",
+       "    simulation_origin_host:                cumulus-login2.ccs.ornl.gov
" + ], + "text/plain": [ + " Size: 72GB\n", + "Dimensions: (Time: 91, bottom_top: 226, bottom_top_stag: 227,\n", + " south_north: 250, west_east: 250, west_east_stag: 251,\n", + " south_north_stag: 251)\n", + "Coordinates:\n", + " XTIME (Time) datetime64[ns] 728B dask.array\n", + " * Time (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-0...\n", + "Dimensions without coordinates: bottom_top, bottom_top_stag, south_north,\n", + " west_east, west_east_stag, south_north_stag\n", + "Data variables: (12/179)\n", + " Times (Time) |S19 2kB dask.array\n", + " CST_CLDLOW (Time) float32 364B dask.array\n", + " CST_CLDTOT (Time) float32 364B dask.array\n", + " CST_LWP (Time) float32 364B dask.array\n", + " CST_IWP (Time) float32 364B dask.array\n", + " CST_PRECW (Time) float32 364B dask.array\n", + " ... ...\n", + " CSV_IWC (Time, bottom_top, south_north, west_east) float32 5GB dask.array\n", + " CSV_CLDFRAC (Time, bottom_top, south_north, west_east) float32 5GB dask.array\n", + " CSS_LWP (Time, south_north, west_east) float32 23MB dask.array\n", + " CSS_IWP (Time, south_north, west_east) float32 23MB dask.array\n", + " CSS_CLDTOT (Time, south_north, west_east) float32 23MB dask.array\n", + " CSS_CLDLOW (Time, south_north, west_east) float32 23MB dask.array\n", + "Attributes: (12/96)\n", + " TITLE: OUTPUT FROM WRF V3.8.1 MODEL\n", + " START_DATE: 2019-04-04_12:00:00\n", + " WEST-EAST_GRID_DIMENSION: 251\n", + " SOUTH-NORTH_GRID_DIMENSION: 251\n", + " BOTTOM-TOP_GRID_DIMENSION: 227\n", + " DX: 100.0\n", + " ... ...\n", + " config_aerosol: NA\n", + " config_forecast_time: 15.0 h\n", + " config_boundary_method: Periodic\n", + " config_microphysics: Thompson (mp_physics=8)\n", + " config_nickname: runlas20190404v1msda2d150km\n", + " simulation_origin_host: cumulus-login2.ccs.ornl.gov" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_stat = xr.open_mfdataset(wrfstat_file_list, engine='h5netcdf')\n", "\n", - "ds_stat[\"Time\"] = ds_stat[\"XTIME\"]" + "# Rename time - in this case, we are not using xwrf to clean the dataset\n", + "ds_stat[\"Time\"] = ds_stat[\"XTIME\"]\n", + "ds_stat" ] }, { @@ -77,7 +14337,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -456,13 +14716,13 @@ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])\n", "Coordinates:\n", " XTIME (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-04-05T03...\n", - "Dimensions without coordinates: Time
    • Time
      PandasIndex
      PandasIndex(DatetimeIndex(['2019-04-04 12:00:00', '2019-04-04 12:10:00',\n",
      +       "               '2019-04-04 12:20:00', '2019-04-04 12:30:00',\n",
      +       "               '2019-04-04 12:40:00', '2019-04-04 12:50:00',\n",
      +       "               '2019-04-04 13:00:00', '2019-04-04 13:10:00',\n",
      +       "               '2019-04-04 13:20:00', '2019-04-04 13:30:00',\n",
      +       "               '2019-04-04 13:40:00', '2019-04-04 13:50:00',\n",
      +       "               '2019-04-04 14:00:00', '2019-04-04 14:10:00',\n",
      +       "               '2019-04-04 14:20:00', '2019-04-04 14:30:00',\n",
      +       "               '2019-04-04 14:40:00', '2019-04-04 14:50:00',\n",
      +       "               '2019-04-04 15:00:00', '2019-04-04 15:10:00',\n",
      +       "               '2019-04-04 15:20:00', '2019-04-04 15:30:00',\n",
      +       "               '2019-04-04 15:40:00', '2019-04-04 15:50:00',\n",
      +       "               '2019-04-04 16:00:00', '2019-04-04 16:10:00',\n",
      +       "               '2019-04-04 16:20:00', '2019-04-04 16:30:00',\n",
      +       "               '2019-04-04 16:40:00', '2019-04-04 16:50:00',\n",
      +       "               '2019-04-04 17:00:00', '2019-04-04 17:10:00',\n",
      +       "               '2019-04-04 17:20:00', '2019-04-04 17:30:00',\n",
      +       "               '2019-04-04 17:40:00', '2019-04-04 17:50:00',\n",
      +       "               '2019-04-04 18:00:00', '2019-04-04 18:10:00',\n",
      +       "               '2019-04-04 18:20:00', '2019-04-04 18:30:00',\n",
      +       "               '2019-04-04 18:40:00', '2019-04-04 18:50:00',\n",
      +       "               '2019-04-04 19:00:00', '2019-04-04 19:10:00',\n",
      +       "               '2019-04-04 19:20:00', '2019-04-04 19:30:00',\n",
      +       "               '2019-04-04 19:40:00', '2019-04-04 19:50:00',\n",
      +       "               '2019-04-04 20:00:00', '2019-04-04 20:10:00',\n",
      +       "               '2019-04-04 20:20:00', '2019-04-04 20:30:00',\n",
      +       "               '2019-04-04 20:40:00', '2019-04-04 20:50:00',\n",
      +       "               '2019-04-04 21:00:00', '2019-04-04 21:10:00',\n",
      +       "               '2019-04-04 21:20:00', '2019-04-04 21:30:00',\n",
      +       "               '2019-04-04 21:40:00', '2019-04-04 21:50:00',\n",
      +       "               '2019-04-04 22:00:00', '2019-04-04 22:10:00',\n",
      +       "               '2019-04-04 22:20:00', '2019-04-04 22:30:00',\n",
      +       "               '2019-04-04 22:40:00', '2019-04-04 22:50:00',\n",
      +       "               '2019-04-04 23:00:00', '2019-04-04 23:10:00',\n",
      +       "               '2019-04-04 23:20:00', '2019-04-04 23:30:00',\n",
      +       "               '2019-04-04 23:40:00', '2019-04-04 23:50:00',\n",
      +       "               '2019-04-05 00:00:00', '2019-04-05 00:10:00',\n",
      +       "               '2019-04-05 00:20:00', '2019-04-05 00:30:00',\n",
      +       "               '2019-04-05 00:40:00', '2019-04-05 00:50:00',\n",
      +       "               '2019-04-05 01:00:00', '2019-04-05 01:10:00',\n",
      +       "               '2019-04-05 01:20:00', '2019-04-05 01:30:00',\n",
      +       "               '2019-04-05 01:40:00', '2019-04-05 01:50:00',\n",
      +       "               '2019-04-05 02:00:00', '2019-04-05 02:10:00',\n",
      +       "               '2019-04-05 02:20:00', '2019-04-05 02:30:00',\n",
      +       "               '2019-04-05 02:40:00', '2019-04-05 02:50:00',\n",
      +       "               '2019-04-05 03:00:00'],\n",
      +       "              dtype='datetime64[ns]', name='Time', freq=None))
  • " ], "text/plain": [ " Size: 728B\n", @@ -515,10 +14866,10 @@ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])\n", "Coordinates:\n", " XTIME (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-04-05T03...\n", - "Dimensions without coordinates: Time" + " * Time (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-04-05T03..." ] }, - "execution_count": 3, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -527,7 +14878,7 @@ "ds_stat[\"bottom_top\"] = ds_stat.bottom_top\n", "ds_stat\n", "ki = ds_stat['CSP_THL'].idxmin(dim='bottom_top')\n", - "ki" + "ki.load()" ] }, { @@ -539,16 +14890,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 5, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" }, @@ -570,16 +14921,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 13, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" }, @@ -601,16 +14952,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 6, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" }, @@ -643,7 +14994,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -652,7 +15003,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -661,7 +15012,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -726,7 +15077,7 @@ " 14440.3545 , 14719.19 ], dtype=float32)" ] }, - "execution_count": 26, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -744,7 +15095,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -752,47 +15103,14 @@ "output_type": "stream", "text": [ " Size: 364B\n", - "array([ nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan,\n", - " nan, nan, nan, 742.4813 , 772.7549 ,\n", - " 772.7549 , 803.0155 , 833.27167, 863.53253, 893.79333,\n", - " 833.27167, 863.53253, 863.53253, 863.53253, 833.27167,\n", - " 651.6837 , 742.4813 , 772.7549 , 651.6837 , 469.91293,\n", - " 681.9462 , 893.79333, 803.0155 , 984.625 , 651.6837 ,\n", - " 954.3343 , 621.4132 , 1075.5452 , 1075.5452 , 1105.8611 ,\n", - " 1105.8611 , 1136.1669 , 1136.1669 , 1136.1669 , 1166.4724 ,\n", - " 1166.4724 , 1166.4724 , 1166.4724 , 1196.7739 , 1196.7739 ,\n", - " 1196.7739 , 1196.7739 , 1196.7739 , 1227.061 , 1257.3391 ,\n", - " 1227.061 , 1317.8835 , 1348.1545 , 1348.1545 , 1348.1545 ,\n", - " 1348.1545 , 1348.1545 , 1348.1545 , 1348.1545 , 1348.1545 ,\n", - " nan, nan, nan, nan, nan,\n", - " nan], dtype=float32)\n", + "dask.array\n", "Coordinates:\n", - " XTIME (Time) datetime64[ns] 728B ...\n", + " XTIME (Time) datetime64[ns] 728B dask.array\n", " * Time (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-04-05T03...\n", " Size: 364B\n", - "array([ nan, nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan, nan,\n", - " nan, nan, nan, nan, nan, nan,\n", - " nan, nan, nan, 803.0155, 954.3343, 1075.5452,\n", - " 1227.061 , 1378.427 , 1499.5316, 1560.058 , 1620.6185, 1741.6282,\n", - " 1741.6282, 1802.084 , 1862.5199, 1892.7288, 1922.9358, 1922.9358,\n", - " 2074.0422, 2043.8057, 1983.3519, 1983.3519, 1953.1406, 1953.1406,\n", - " 1983.3519, 2013.5719, 2013.5719, 2013.5719, 1983.3519, 2013.5719,\n", - " 2013.5719, 1953.1406, 1983.3519, 1953.1406, 1953.1406, 1953.1406,\n", - " 2043.8057, 1953.1406, 1983.3519, 1862.5199, 1892.7288, 1862.5199,\n", - " 1832.3059, 1771.863 , 1741.6282, 1681.1194, 1681.1194, 1650.8711,\n", - " 1620.6185, 1560.058 , 1529.7927, 1469.2583, 1438.9777, 1408.7014,\n", - " 1378.427 , nan, nan, nan, nan, nan,\n", - " nan], dtype=float32)\n", + "dask.array\n", "Coordinates:\n", - " XTIME (Time) datetime64[ns] 728B ...\n", + " XTIME (Time) datetime64[ns] 728B dask.array\n", " * Time (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-04-05T03...\n" ] } @@ -809,7 +15127,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -834,7 +15152,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -862,7 +15180,7 @@ " nan, nan, nan, nan, nan,\n", " nan], dtype=float32)\n", "Coordinates:\n", - " XTIME (Time) datetime64[ns] 728B ...\n", + " XTIME (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-04-05T03...\n", " * Time (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-04-05T03...\n", " Size: 364B\n", "array([ nan, nan, nan, nan, nan, nan,\n", @@ -882,7 +15200,7 @@ " 1378.427 , nan, nan, nan, nan, nan,\n", " nan], dtype=float32)\n", "Coordinates:\n", - " XTIME (Time) datetime64[ns] 728B ...\n", + " XTIME (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-04-05T03...\n", " * Time (Time) datetime64[ns] 728B 2019-04-04T12:00:00 ... 2019-04-05T03...\n" ] } @@ -890,16 +15208,16 @@ "source": [ "ds_stat['cb_ql'] = (ds_stat['CSP_LWC']>0).idxmax(dim = 'bottom_top')\n", "ds_stat['cb_ql'] = ds_stat['cb_ql'].where(ds_stat['cb_ql']>ds_stat['bottom_top'][0])\n", - "print(ds_stat['cb_ql'])\n", + "print(ds_stat['cb_ql'].load())\n", "\n", "ds_stat['ct_ql'] = ((ds_stat['CSP_LWC'].isel(bottom_top = slice(None, None, -1)))>0).idxmax(dim='bottom_top')\n", "ds_stat['ct_ql'] = ds_stat['ct_ql'].where(ds_stat['ct_ql']