ADD: Hysplit reader in io module (#791)

* ADD: HYSPLIT reader * ADD: Hysplit to datastream name * STY: Pep8 and fix hard coded filename * FIX: Add HYSPLIT entry to sample_files * FIX: Ok, let's try this again.
ARM-DOE · Jan 23, 2024 · 14fde26 · 14fde26
1 parent d78fa96
commit 14fde26
Show file tree

Hide file tree

Showing 5 changed files with 148 additions and 1 deletion.
diff --git a/act/io/__init__.py b/act/io/__init__.py
@@ -7,7 +7,7 @@
 
 __getattr__, __dir__, __all__ = lazy.attach(
     __name__,
-    submodules=['arm', 'text', 'icartt', 'mpl', 'neon', 'noaagml', 'noaapsl', 'pysp2'],
+    submodules=['arm', 'text', 'icartt', 'mpl', 'neon', 'noaagml', 'noaapsl', 'pysp2', 'hysplit'],
     submod_attrs={
         'arm': [
             'WriteDataset',
@@ -39,5 +39,6 @@
         ],
         'pysp2': ['read_hk_file', 'read_sp2', 'read_sp2_dat'],
         'sodar': ['read_mfas_sodar'],
+        'hysplit': ['read_hysplit']
     },
 )
diff --git a/act/io/hysplit.py b/act/io/hysplit.py
@@ -0,0 +1,105 @@
+import os
+import xarray as xr
+import numpy as np
+import pandas as pd
+
+from datetime import datetime
+from .text import read_csv
+
+
+def read_hysplit(filename, base_year=2000):
+    """
+    Reads an input HYSPLIT trajectory for plotting in ACT.
+
+    Parameters
+    ----------
+    filename: str
+        The input file name.
+    base_year: int
+        The first year of the century in which the data are contained.
+
+    Returns
+    -------
+    ds: xarray Dataset
+        The ACT dataset containing the HYSPLIT trajectories
+    """
+
+    ds = xr.Dataset({})
+    num_lines = 0
+    with open(filename, 'r') as filebuf:
+        num_grids = int(filebuf.readline().split()[0])
+        num_lines += 1
+        grid_times = []
+        grid_names = []
+        forecast_hours = np.zeros(num_grids)
+        for i in range(num_grids):
+            data = filebuf.readline().split()
+            num_lines += 1
+            grid_names.append(data[0])
+            grid_times.append(
+                datetime(year=int(data[1]), month=int(data[2]), day=int(data[3]), hour=int(data[4])))
+            forecast_hours[i] = int(data[5])
+        ds["grid_forecast_hour"] = xr.DataArray(forecast_hours, dims=["num_grids"])
+        ds["grid_forecast_hour"].attrs["standard_name"] = "Grid forecast hour"
+        ds["grid_forecast_hour"].attrs["units"] = "Hour [UTC]"
+        ds["grid_times"] = xr.DataArray(np.array(grid_times), dims=["num_grids"])
+        data_line = filebuf.readline().split()
+        num_lines += 1
+        ds.attrs["trajectory_direction"] = data_line[1]
+        ds.attrs["vertical_motion_calculation_method"] = data_line[2]
+        num_traj = int(data_line[0])
+        traj_times = []
+        start_lats = np.zeros(num_traj)
+        start_lons = np.zeros(num_traj)
+        start_alt = np.zeros(num_traj)
+        for i in range(num_traj):
+            data = filebuf.readline().split()
+            num_lines += 1
+            traj_times.append(
+                datetime(year=(base_year + int(data[0])), month=int(data[1]),
+                         day=int(data[2]), hour=int(data[3])))
+            start_lats[i] = float(data[4])
+            start_lons[i] = float(data[5])
+            start_alt[i] = float(data[6])
+
+        ds["start_latitude"] = xr.DataArray(start_lats, dims=["num_trajectories"])
+        ds["start_latitude"].attrs["long_name"] = "Trajectory start latitude"
+        ds["start_latitude"].attrs["units"] = "degree"
+        ds["start_longitude"] = xr.DataArray(start_lats, dims=["num_trajectories"])
+        ds["start_longitude"].attrs["long_name"] = "Trajectory start longitude"
+        ds["start_longitude"].attrs["units"] = "degree"
+        ds["start_altitude"] = xr.DataArray(start_alt, dims=["num_trajectories"])
+        ds["start_altitude"].attrs["long_name"] = "Trajectory start altitude"
+        ds["start_altitude"].attrs["units"] = "degree"
+        data = filebuf.readline().split()
+        num_lines += 1
+        var_list = ["trajectory_number", "grid_number", "year", "month", "day",
+                    "hour", "minute", "forecast_hour", "age", "lat", "lon", "alt"]
+        for variable in data[1:]:
+            var_list.append(variable)
+    input_df = pd.read_csv(
+        filename, sep='\s+', index_col=False, names=var_list, skiprows=12)
+    input_df['year'] = base_year + input_df['year']
+    input_df['time'] = pd.to_datetime(input_df[["year", "month", "day", "hour", "minute"]],
+                                      format='%y%m%d%H%M')
+    input_df = input_df.set_index("time")
+    del input_df["year"]
+    del input_df["month"]
+    del input_df["day"]
+    del input_df["hour"]
+    del input_df["minute"]
+    ds = ds.merge(input_df.to_xarray())
+    ds.attrs['datastream'] = 'hysplit'
+    ds["trajectory_number"].attrs["standard_name"] = "Trajectory number"
+    ds["trajectory_number"].attrs["units"] = "1"
+    ds["grid_number"].attrs["standard_name"] = "Grid number"
+    ds["grid_number"].attrs["units"] = "1"
+    ds["age"].attrs["standard_name"] = "Grid number"
+    ds["age"].attrs["units"] = "1"
+    ds["lat"].attrs["standard_name"] = "Latitude"
+    ds["lat"].attrs["units"] = "degree"
+    ds["lon"].attrs["standard_name"] = "Longitude"
+    ds["lon"].attrs["units"] = "degree"
+    ds["alt"].attrs["standard_name"] = "Altitude"
+    ds["alt"].attrs["units"] = "meter"
+    return ds
diff --git a/act/tests/sample_files.py b/act/tests/sample_files.py
@@ -64,6 +64,7 @@
 EXAMPLE_OLD_QC = DATASETS.fetch('sgp30ecorE6.b1.20040705.000000.cdf')
 EXAMPLE_SONDE_WILDCARD = DATASETS.fetch('sgpsondewnpnC1.b1.20190101.053200.cdf')
 EXAMPLE_CEIL_WILDCARD = DATASETS.fetch('sgpceilC1.b1.20190101.000000.nc')
+EXAMPLE_HYSPLIT = DATASETS.fetch('houstonaug300.0summer2010080100')
 
 # Multiple files in a list
 dlppi_multi_list = ['sgpdlppiC1.b1.20191015.120023.cdf',

diff --git a/examples/io/plot_hysplit.py b/examples/io/plot_hysplit.py
@@ -0,0 +1,23 @@
+"""
+Read and plot a HYSPLIT trajectory file from a HYSPlIT run.
+-----------------------------------------------------------
+
+This example shows how to read and plot a backtrajectory calculated by the NOAA
+HYSPLIT model over Houston.
+
+Author: Robert Jackson
+"""
+
+import act
+import matplotlib.pyplot as plt
+
+from arm_test_data import DATASETS
+
+# Load the data
+filename = DATASETS.fetch('houstonaug300.0summer2010080100')
+ds = act.io.read_hysplit(filename)
+
+# Use the GeographicPlotDisplay object to make the plot
+disp = act.plotting.GeographicPlotDisplay(ds)
+disp.geoplot('PRESSURE', cartopy_feature=['STATES', 'OCEAN', 'LAND'])
+plt.show()
diff --git a/tests/io/test_hysplit.py b/tests/io/test_hysplit.py
@@ -0,0 +1,17 @@
+import act
+import matplotlib.pyplot as plt
+
+from act.tests import sample_files
+
+
+def test_read_hysplit():
+    filename = sample_files.EXAMPLE_HYSPLIT
+    ds = act.io.read_hysplit(filename)
+    assert 'lat' in ds.variables.keys()
+    assert 'lon' in ds.variables.keys()
+    assert 'alt' in ds.variables.keys()
+    assert 'PRESSURE' in ds.variables.keys()
+    assert ds.dims["num_grids"] == 8
+    assert ds.dims["num_trajectories"] == 1
+    assert ds.dims['time'] == 121
+    assert ds['age'].min() == -120