Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Module for interactive time series postprocessing in Python shell #22

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -69,6 +69,8 @@ benchmarks/__pycache__/*
grids/utilities/jigsaw/__pycache__/*
post_proc/py/geometry_lat_lon_2d_plot/__pycache__/*
post_proc/py/__pycache__/
post_proc/py/time_series_compare/__pycache__
post_proc/py/interactive_postprocessing/__pycache__

src/core_*/inc
.DS_Store
246 changes: 246 additions & 0 deletions post_proc/py/interactive_postprocessing/interactive_ts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Module for interactively plotting time series from MPAS raw output files
at particular points. Usage example from Python shell:
########################################################################
import matplotlib.pyplot as plt; from interactive_ts import ts, plot_ts
dir='/path/to/data';
stream='history'; t0='2021-01-01 00:00:00'; tf='2021-01-02 18:00:00';
dt='10800'; var='swdnb'; lat='-52'; lon='-2'; vertlevel='0'
t1,y1,label1=ts(dir=dir,stream=stream,t0=t0,tf=tf,dt=dt,var=var,lat=lat,
lon=lon,vertlevel=vertlevel)
var='swupb'
t2,y2,label2=ts(dir=dir,stream=stream,t0=t0,tf=tf,dt=dt,var=var,lat=lat,
lon=lon,vertlevel=vertlevel)
y3=y1+y2
label3='total'
plot_ts(t1,y1,label1,ofile='y1.png')
plot_ts(t2,y2,label2,ofile='y1_y2.png')
plot_ts(t2,y3,label3,ofile='y1_y2_y3.png')
########################################################################
Guilherme Torres Mendonça ([email protected])
March 2024
"""
import numpy as np
import matplotlib.pyplot as plt
import argparse
from utils import (
create_datetime_range,
concat_mpas_output,
cs_string_to_list,
find_nCells_from_latlon
)
import os

def ts_vars_list(t0,tf,dt,lat,lon,vertlevel,var,
dir,stream='history',closest_value='euclidean',
verbose='y'):
'''
Plots time series for a list of variables at a
particular point (lon,lat,vertlevel) and time period.
Usage (optional input arguments may be given, see below):
python3 mpas_time_series_at_point.py --dir /path/to/data
--stream history --lat -2.12 --lon -59.00 --vertlevel 0 --t0 '2021-01-01 00:00:00' --tf '2021-01-02 18:00:00'
--dt 10800 --var swdnb,swupb
'''

# Create datetime range and transform it into a list of datetime strings
datetime_list = create_datetime_range(
t0=t0,
tf=tf,
dt=dt
)

# Select variable and concatenate files between t0 and tf
vars_list = cs_string_to_list(var)
cat_file = concat_mpas_output(
stream=stream,
datetime_list=datetime_list,
data_dir=dir,
vars_list=vars_list,
)

# Get lat, lon, vertlevel
lat = float(lat)
lon = float(lon)
vertlevel = int(vertlevel)

# Find grid cell
nCells, ds = find_nCells_from_latlon(cat_file,lon=lon,lat=lat)

# Plot
## Get again list of variables for plotting
vars_list = cs_string_to_list(var)
## Get again list of datetimes, but abbreviated
datetime_list = create_datetime_range(
t0=t0,
tf=tf,
dt=dt,
short_strings='y'
)

plt.figure('ts')
for var in vars_list:
# Try choosing nVertLevels
try:
y_series = ds[var].sel(nCells=nCells,nVertLevels=vertlevel).values
except:
y_series = ds[var].sel(nCells=nCells).values
plt.plot(datetime_list,y_series,linestyle='-',label=var)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xticks(np.arange(min(ds.Time), max(ds.Time)+1,1),
rotation=45, ha='right')
plt.grid(True)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.title(f"{stream} from {t0} to {tf}")
plt.tight_layout()
plt.savefig("time_series.png")
plt.show()

def ts(t0,tf,dt,lat,lon,vertlevel,var,
dir,stream='history',verbose='n',closest_value='haversine'):
'''
Reads MPAS raw $stream output files from $dir and returns a
list of dates and times $datetime_list, a time series of $var ($y_series)
at ($lat,$lon,$vertlevel) between $t0 and $tf with time step $dt, and
a label $var for plotting
INPUT: t0 (str) - initial time
tf (str) - final time
dt (str) - time step
lat (str) - latitude of the grid cell
lon (str) - longitude of the grid cell
vertlevel (str) - vertical level (if applicable)
var (str) - variable to be plotted
dir (str) - directory of MPAS output files
stream (str) - stream name
closest_value (str) - method to find grid cell
OUTPUT: datetime_list (list) - list of datetime strings (abbreviated for plotting)
y_series (array) - time series for var
var (str) - variable name (label)
'''

# Create datetime range and transform it into a list of datetime strings
datetime_list = create_datetime_range(
t0=t0,
tf=tf,
dt=dt
)

# Select variable and concatenate files between t0 and tf
var_list = cs_string_to_list(var)
cat_file, first_file = concat_mpas_output(
stream=stream,
datetime_list=datetime_list,
data_dir=dir,
vars_list=var_list,
)

# Get lat, lon, vertlevel
lat = float(lat)
lon = float(lon)
vertlevel = int(vertlevel)

# Find grid cell using only first file
nCells, first_file = find_nCells_from_latlon(ds=first_file,
lon=lon,lat=lat,
verbose=verbose,
method=closest_value)

# Define time series
try:
y_series = cat_file[var].sel(nCells=nCells,nVertLevels=vertlevel).values
except:
y_series = cat_file[var].sel(nCells=nCells).values

# Define list of dates
datetime_list = create_datetime_range(
t0=t0,
tf=tf,
dt=dt,
short_strings='y'
)

return datetime_list, y_series, var

def plot_ts(t,y,label=None,title=None,labelsize=12,ofile=None):
plt.figure('ts')
if label is not None:
plt.plot(t,y,linestyle='-',label=label)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
else:
plt.plot(t,y,linestyle='-')
if title is not None:
plt.title(title, fontsize=labelsize)
plt.tick_params(axis='both', which='major', labelsize=labelsize)
plt.xticks(rotation=45, ha='right')
plt.grid(True)
plt.tight_layout()
if ofile is not None:
plt.savefig(ofile)
plt.ion()
plt.show()

if __name__ == "__main__":
# Create ArgumentParser object
parser = argparse.ArgumentParser(description=
'plotting time series for a list of variables at a'
+'particular point and time period.')
# Input arguments
parser.add_argument('--dir', default=os.getcwd(), type=str,
help='data directory')
parser.add_argument('--stream', default='history', type=str,
help='stream')
parser.add_argument('--var', default='swdnb, swupb', type=str,
help='list of variables to plot (var1,var2,...)')
parser.add_argument('--lat', default='-2.124375557901979', type=str,
help='latitude')
parser.add_argument('--lon', default='-59.003420487096946', type=str,
help='longitude')
parser.add_argument('--vertlevel', default=0, type=str,
help='vertical level')
parser.add_argument('--t0', type=str,
help='initial datetime (YYYY-mm-dd HH:MM:SS)')
parser.add_argument('--tf', type=str,
help='final datetime (YYYY-mm-dd HH:MM:SS)')
parser.add_argument('--dt', type=str,
help='datetime step (seconds)')
parser.add_argument('--closest_value', type=str, default='euclidean',
help='method to find grid cell from (lat,lon)')

# Parse the command line arguments
args = parser.parse_args()

t0=args.t0
tf=args.tf
dt=args.dt
lat=args.lat
lon=args.lon
vertlevel=args.vertlevel
var=args.var
dir=args.dir
stream=args.stream
closest_value=args.closest_value

ts_vars_list(
dir=dir,
stream=stream,
t0=t0,
tf=tf,
dt=dt,
var=var,
lat=lat,
lon=lon,
vertlevel=vertlevel
)
176 changes: 176 additions & 0 deletions post_proc/py/interactive_postprocessing/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import xarray as xr
import numpy as np
import math
import pandas as pd
from geopy.distance import distance
from haversine import haversine

#TODO: utils.py should contain generally useful functions for postprocessing.
# Therefore, it should live under MPAS-BR/post_proc/py/.

derived_variables = {
'latCell': ['latitude'],
'lonCell': ['longitude'],
'latVertex': ['latitudeVertex'],
'lonVertex': ['longitudeVertex'],
'areaCell': ['area', 'resolution'],
}

def add_mpas_mesh_variables(ds, full=True, **kwargs):
for v in ds.data_vars:
if v not in derived_variables:
continue

newvs = derived_variables[v]

for newv in newvs:
if newv in ds:
#print(newv + ' already here')
continue

if 'lat' in v or 'lon' in v:
ds[newv] = xr.apply_ufunc(np.rad2deg, ds[v])
ds[newv] = ds[newv].where(ds[newv] <= 180.0, ds[newv] - 360.0)
ds[newv].attrs['units'] = 'degrees'

elif newv == 'area':
radius_circle = ds.attrs.get('sphere_radius', 1.0)
if radius_circle == 1:
# need to correct to earth radius
correction_rad_earth = 6371220.0
else:
correction_rad_earth = 1

ds[newv] = (ds[v] / 10 ** 6) * correction_rad_earth**2
ds[newv].attrs['units'] = 'km^2 (assuming areaCell in m^2)'
ds[newv].attrs['long_name'] = 'Area of the cell in km^2'

elif newv == 'resolution':
radius_circle = ds.attrs.get('sphere_radius', 1.0)
if radius_circle == 1.0:
#print('need to correct to earth radius!!')
correction_rad_earth = 6371220.0
else:
correction_rad_earth = 1

# km^2 (assuming areaCell in m^2)
area = (ds[v] / 10 ** 6) * correction_rad_earth**2

ds[newv] = 2 * (xr.apply_ufunc(np.sqrt, area / math.pi))
ds[newv].attrs['units'] = 'km'
ds[newv].attrs['long_name'] = 'Resolution of the cell (approx)'

return ds

def bash_array_to_list(bash_array):
return bash_array.split("|")[:-1]

def cs_string_to_list(cs_string):
return cs_string.split(",")

def create_datetime_range(t0,tf,dt,short_strings='n'):
'''
Creates a list of datetime strings between t0 and tf, with
time step dt.
INPUT: t0 (str) - initial datetime in format 'YYYY.mm.dd HH:MM:SS'
tf (str) - final datetime in format 'YYYY.mm.dd HH:MM:SS'
dt (str) - time step in seconds
OUTPUT: datetime_str_list (list) - list of datetime strings
'''
t0 = pd.Timestamp(t0)
tf = pd.Timestamp(tf)
datetime_range = pd.date_range(start=t0,end=tf,freq=f"{dt}S")
if short_strings == 'y':
datetime_str_list = [dt.strftime('%d/%m %Hh') for dt in datetime_range]
else:
datetime_str_list = [dt.strftime('%Y-%m-%d_%H.%M.%S') for dt in datetime_range]
return datetime_str_list

def concat_mpas_output(stream,datetime_list,data_dir,vars_list):
'''
Concatenates in time, for variables vars_list,
MPAS output files named as follows:
stream.YYYY.mm.dd_HH.MM.SS
INPUT: stream (str) - name of the stream
datetime_list (list) - list of datetime strings in format YYYY.mm.dd_HH.MM.SS
data_dir (str) - directory where output files are stored
vars_list (list) - list of variable names (strings)
OUTPUT: cat_file (Xarray dataset) - concatenated dataset
'''
# Set variable list to keep in file
vars_list.extend(['latCell','lonCell','latVertex','lonVertex','areaCell'])
# Read first file
filename1 = stream + '.' + datetime_list[0] + '.nc'
first_file = xr.open_dataset(f'{data_dir}/{filename1}', engine='netcdf4')[vars_list]
cat_file = xr.open_dataset(f'{data_dir}/{filename1}', engine='netcdf4')[vars_list]
# Read and concatenate that file to remaining files
for datetime in datetime_list[1:]:
filename = stream + '.' + datetime + '.nc'
ds_temp = xr.open_dataset(f'{data_dir}/{filename}', engine='netcdf4')[vars_list]
cat_file = xr.concat([cat_file, ds_temp], dim='Time')
return cat_file, first_file

def get_distance_haversine(lats, lons, lat_ref, lon_ref):
'''
Using the haversine formula (https://en.wikipedia.org/wiki/Haversine_formula),
returns the distance in km of each lat, lon point to (lat_ref,lon_ref).
from Marta G. Badarjí, slightly modified (based on
pypi haversine package: https://pypi.org/project/haversine/)
by G. Torres Mendonça
'''

radius = 6371.0088 # km
d_lat = np.radians(lats - lat_ref) # lat distance in radians
d_lon = np.radians(lons - lon_ref) # lon distance in radians

a = (np.sin(d_lat / 2.) * np.sin(d_lat / 2.) +
np.cos(np.radians(lat_ref)) * np.cos(np.radians(lats)) *
np.sin(d_lon / 2.) * np.sin(d_lon / 2.))
c = np.arcsin(np.sqrt(a))
d = 2 * radius * c

return d

def closest_value_haversine(ds,lon,lat):
d = get_distance_haversine(ds['latitude'].values, ds['longitude'].values,
lat_ref=lat, lon_ref=lon)
ds['distance'] = xr.DataArray(d, dims=['nCells'])
#print ('distance data array')
#print (ds[['distance','longitude','latitude']])
nCells_index = ds['distance'].argmin().item()
#print ('nCells_index')
#print (nCells_index)
#print ('distance min')
#print (ds['distance'].min())
#print ('ds[nCells]:')
#print (ds['nCells'])
nCells_value = ds['nCells'].isel(nCells=nCells_index)
distance_value = ds['distance'].isel(nCells=nCells_index)
return nCells_value, distance_value

def find_nCells_from_latlon(ds,lon,lat,method='haversine',verbose='n'):
ds = add_mpas_mesh_variables(ds)
if method == 'haversine':
index_closest, distance_value = closest_value_haversine(ds=ds,lon=lon,lat=lat)
else:
print (f"{method} method not supported.")
exit (-1)
if verbose == 'y':
# Print information on (lon,lat) point
closest_lon = ds['longitude'].sel(nCells=index_closest)
closest_lat = ds['latitude'].sel(nCells=index_closest)
print ("input (lon,lat):", (lon,lat))
print ("closest (lon,lat):", (float(closest_lon),float(closest_lat)))
print ("distance to input point (km):", distance_value.values)
print ("corresponding nCells value:",index_closest.values)
print ('max/min lat:', ds['latitude'].values.max(),ds['latitude'].values.min())
print ('max/min lon:', ds['longitude'].values.max(),ds['longitude'].values.min())
return index_closest, ds