From e545563712b9bafe17d813cb3bdb44a01cd85144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Mon, 1 Apr 2024 19:32:20 -0300 Subject: [PATCH 01/11] change original mpas_time_series filename, and import draft script --- .../mpas_time_series_at_cell.py | 174 ++++++++++++++++++ ...eries.py => mpas_time_series_benchmark.py} | 0 2 files changed, 174 insertions(+) create mode 100644 post_proc/py/time_series_compare/mpas_time_series_at_cell.py rename post_proc/py/time_series_compare/{mpas_time_series.py => mpas_time_series_benchmark.py} (100%) diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_cell.py b/post_proc/py/time_series_compare/mpas_time_series_at_cell.py new file mode 100644 index 0000000000..ca40c359c4 --- /dev/null +++ b/post_proc/py/time_series_compare/mpas_time_series_at_cell.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script to plot time series for a certain variable at a +particular point (lon,lat,vertlevel) and time period. + +Guilherme Torres Mendonça (guilherme.torresmendonca@ime.usp.br) +March 2024 +""" +import xarray as xr +import numpy as np +import matplotlib.pyplot as plt +import cfgrib +from haversine import haversine +from utils import add_mpas_mesh_variables, bash_array_to_list +import argparse + +def closest_value_haversine(ds,lon,lat): + df = ds.to_dataframe() + df['dist_norm'] = df.apply(lambda row: + haversine((row['latitude'],row['longitude']), + (lat,lon)), axis=1) + mask = df['dist_norm'] == df['dist_norm'].min() + nCells_value = df.loc[mask, :].index.get_level_values('nCells')[0] + df_masked = df.loc[mask,:] + return nCells_value + +def closest_value_euclidean(ds,lon,lat): + ds['dist_norm'] = np.sqrt((ds['longitude'] - lon)**2 + (ds['latitude'] - lat)**2) + mask = ds['dist_norm'] == ds['dist_norm'].min() + nCells_index = mask.argmax(dim='nCells') + nCells_value = ds['nCells'].isel(nCells=nCells_index.values.item()) + return nCells_value + +def closest_value_era5(ds,lon,lat): + ds['dist_norm'] = np.sqrt((ds['longitude'] - lon)**2 + (ds['latitude'] - lat)**2) + lon_min = ds['longitude'].where(ds['dist_norm'] == ds['dist_norm'].min()).values + # Select only that value where lon is not nan + lon_min = lon_min[~np.isnan(lon_min)] + lat_min = ds['latitude'].where(ds['dist_norm'] == ds['dist_norm'].min()).values + # Select only that value where lon is not nan + lat_min = lat_min[~np.isnan(lat_min)] + + return lon_min,lat_min + +def compute_wind_speed(data_dir,lat,lon,vertlevel): + + # Read dataset with concatenated data + #ds = xr.open_dataset(f"{data_dir}/cat.nc") + ds = xr.open_dataset(data_dir) + ds = add_mpas_mesh_variables(ds) + + #index_closest = closest_value_haversine(ds=ds.sel(Time=0),lon=lon,lat=lat) + index_closest = closest_value_euclidean(ds=ds.sel(Time=0),lon=lon,lat=lat) + + # Information on (lon,lat) point + closest_lon = ds['longitude'].sel(Time=0,nCells=index_closest) + closest_lat = ds['latitude'].sel(Time=0,nCells=index_closest) + print ("nCells value:",index_closest) + print ("ref (lon,lat):", (lon,lat)) + print ("closest (lon,lat):", (float(closest_lon),float(closest_lat))) + print("zgrid:") + print (ds['zgrid'].sel(Time=0,nCells=index_closest).values) + + # Obtain wind time series + wind_speed = np.zeros((len(vertlevel),len(ds.Time))) + for t in range(len(ds.Time)): + for l in range(len(vertlevel)): + wind_speed[l,t] = np.sqrt((ds['uReconstructZonal'].sel(nCells=index_closest, + nVertLevels=vertlevel[l], + Time=ds.Time[t]).values)**2 + +(ds['uReconstructMeridional'].sel(nCells=index_closest, + nVertLevels=vertlevel[l], + Time=ds.Time[t]).values)**2) + return ds.Time, wind_speed + +def compute_wind_speed_era5(data_dir_u,data_dir_v,lat,lon,vertlevel): + # see https://stackoverflow.com/questions/67963199/xarray-from-grib-file-to-dataset + # Import data + grib_data_u = cfgrib.open_datasets(data_dir_u)[0] + grib_data_v = cfgrib.open_datasets(data_dir_v)[0] + # Correct longitude values + grib_data_u['longitude'] = grib_data_u['longitude'].where(grib_data_u['longitude'] <= 180.0, grib_data_u['longitude'] - 360.0) + grib_data_v['longitude'] = grib_data_v['longitude'].where(grib_data_v['longitude'] <= 180.0, grib_data_v['longitude'] - 360.0) + # Find (lon_min,lat_min) that are closest to (lon,lat) given in input + lon_min_u,lat_min_u = closest_value_era5(ds=grib_data_u,lon=lon,lat=lat) + lon_min_v,lat_min_v = closest_value_era5(ds=grib_data_v,lon=lon,lat=lat) + # Check whether the same (lon_min,lat_min) was found for both u and v + if lon_min_u == lon_min_v: + if lat_min_u == lat_min_v: + print ("same (lon,lat) found for u and v in era5 data") + lat_min = lat_min_u + lon_min = lon_min_u + else: + raise Exception("lat_min_u differs from lat_min_v") + else: + raise Exception("lon_min_u differs from lon_min_v") + print ("lat_min", lat_min) + print ("lon_min", lon_min) + # Obtain wind time series + wind_speed = np.zeros((len(vertlevel),len(grib_data_u.time))) + for t in range(len(grib_data_u.time)): + for l in range(len(vertlevel)): + wind_speed[l,t] = np.sqrt((grib_data_u['u'].sel(longitude=lon_min, + latitude=lat_min, + isobaricInhPa=vertlevel[l], + time=grib_data_u.time[t]).values)**2 + +(grib_data_v['v'].sel(longitude=lon_min, + latitude=lat_min, + isobaricInhPa=vertlevel[l], + time=grib_data_u.time[t]).values)**2) + print (wind_speed) + return grib_data_u.time,wind_speed + +# Create ArgumentParser object +parser = argparse.ArgumentParser(description= + 'plotting wind speed for several experiments') +# Input arguments +parser.add_argument('--data_dir_array', type=str, help='data directories') +parser.add_argument('--fig_label_array', type=str, help='labels for figures') +parser.add_argument('--lat', type=str, help='latitude') +parser.add_argument('--lon', type=str, help='longitude') +parser.add_argument('--vertlevel', type=str, help='vertical level') + +# Parse the command line arguments +args = parser.parse_args() + +# Transform bash arrays into lists +data_dir_list = bash_array_to_list(args.data_dir_array) +fig_label_list = bash_array_to_list(args.fig_label_array) + +# Remove empty items +data_dir_list = list(filter(None, data_dir_list)) +fig_label_list = list(filter(None, fig_label_list)) + +# Get lat,lon,vertlevel +lat = float(args.lat) +lon = float(args.lon) +vertlevel = [int(args.vertlevel)] + +# Set colormap +cmap = plt.get_cmap('Blues') + +plt.figure("wind_speed_without_spinup") +plt.figure(figsize=(10, 6)) +j=0 +for data_dir_tmp in data_dir_list: + print ("plotting:", fig_label_list[j]) + t,wind_speed = compute_wind_speed(data_dir=f"{data_dir_tmp}/output/cat.nc", + lat=lat, + lon=lon, + vertlevel=vertlevel) + if len(data_dir_list) > 1: + color = cmap(j / (len(data_dir_list) - 1)) + else: + color = "k" #cmap(1) + plt.plot(t[0:25],wind_speed[0,5:30],linestyle='-', + color=color,label=fig_label_list[j]) + j=j+1 +plt.xlabel("Time [h]",fontsize=12) +plt.ylabel("Wind Speed [m/s]",fontsize=12) +plt.tick_params(axis='both', which='major', labelsize=12) +plt.xticks(np.arange(min(t), max(t)+1,1), + rotation=45, ha='center') +plt.grid(True) +#plt.legend(loc='lower left', bbox_to_anchor=(0., 1.02), borderaxespad=0) +#plt.legend(loc='best') +plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) +plt.xlim([0,24]) +plt.ylim([0,12]) +plt.title("wind speed at theta(1) = 390m; 2017-04-04") +#plt.show() +plt.tight_layout() +plt.savefig("windspeed.png") \ No newline at end of file diff --git a/post_proc/py/time_series_compare/mpas_time_series.py b/post_proc/py/time_series_compare/mpas_time_series_benchmark.py similarity index 100% rename from post_proc/py/time_series_compare/mpas_time_series.py rename to post_proc/py/time_series_compare/mpas_time_series_benchmark.py From b3fcdb190f9fa1b8275737196d71d1b82d51a263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Tue, 2 Apr 2024 17:43:03 -0300 Subject: [PATCH 02/11] scripts tested --- .gitignore | 1 + .../mpas_time_series_at_cell.py | 174 ------------------ .../mpas_time_series_at_point.py | 111 +++++++++++ post_proc/py/time_series_compare/utils.py | 147 +++++++++++++++ 4 files changed, 259 insertions(+), 174 deletions(-) delete mode 100644 post_proc/py/time_series_compare/mpas_time_series_at_cell.py create mode 100644 post_proc/py/time_series_compare/mpas_time_series_at_point.py create mode 100644 post_proc/py/time_series_compare/utils.py diff --git a/.gitignore b/.gitignore index bc0c629553..c0fc95bd46 100644 --- a/.gitignore +++ b/.gitignore @@ -69,6 +69,7 @@ benchmarks/__pycache__/* grids/utilities/jigsaw/__pycache__/* post_proc/py/geometry_lat_lon_2d_plot/__pycache__/* post_proc/py/__pycache__/ +post_proc/py/time_series_compare/__pycache__ src/core_*/inc .DS_Store diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_cell.py b/post_proc/py/time_series_compare/mpas_time_series_at_cell.py deleted file mode 100644 index ca40c359c4..0000000000 --- a/post_proc/py/time_series_compare/mpas_time_series_at_cell.py +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Script to plot time series for a certain variable at a -particular point (lon,lat,vertlevel) and time period. - -Guilherme Torres Mendonça (guilherme.torresmendonca@ime.usp.br) -March 2024 -""" -import xarray as xr -import numpy as np -import matplotlib.pyplot as plt -import cfgrib -from haversine import haversine -from utils import add_mpas_mesh_variables, bash_array_to_list -import argparse - -def closest_value_haversine(ds,lon,lat): - df = ds.to_dataframe() - df['dist_norm'] = df.apply(lambda row: - haversine((row['latitude'],row['longitude']), - (lat,lon)), axis=1) - mask = df['dist_norm'] == df['dist_norm'].min() - nCells_value = df.loc[mask, :].index.get_level_values('nCells')[0] - df_masked = df.loc[mask,:] - return nCells_value - -def closest_value_euclidean(ds,lon,lat): - ds['dist_norm'] = np.sqrt((ds['longitude'] - lon)**2 + (ds['latitude'] - lat)**2) - mask = ds['dist_norm'] == ds['dist_norm'].min() - nCells_index = mask.argmax(dim='nCells') - nCells_value = ds['nCells'].isel(nCells=nCells_index.values.item()) - return nCells_value - -def closest_value_era5(ds,lon,lat): - ds['dist_norm'] = np.sqrt((ds['longitude'] - lon)**2 + (ds['latitude'] - lat)**2) - lon_min = ds['longitude'].where(ds['dist_norm'] == ds['dist_norm'].min()).values - # Select only that value where lon is not nan - lon_min = lon_min[~np.isnan(lon_min)] - lat_min = ds['latitude'].where(ds['dist_norm'] == ds['dist_norm'].min()).values - # Select only that value where lon is not nan - lat_min = lat_min[~np.isnan(lat_min)] - - return lon_min,lat_min - -def compute_wind_speed(data_dir,lat,lon,vertlevel): - - # Read dataset with concatenated data - #ds = xr.open_dataset(f"{data_dir}/cat.nc") - ds = xr.open_dataset(data_dir) - ds = add_mpas_mesh_variables(ds) - - #index_closest = closest_value_haversine(ds=ds.sel(Time=0),lon=lon,lat=lat) - index_closest = closest_value_euclidean(ds=ds.sel(Time=0),lon=lon,lat=lat) - - # Information on (lon,lat) point - closest_lon = ds['longitude'].sel(Time=0,nCells=index_closest) - closest_lat = ds['latitude'].sel(Time=0,nCells=index_closest) - print ("nCells value:",index_closest) - print ("ref (lon,lat):", (lon,lat)) - print ("closest (lon,lat):", (float(closest_lon),float(closest_lat))) - print("zgrid:") - print (ds['zgrid'].sel(Time=0,nCells=index_closest).values) - - # Obtain wind time series - wind_speed = np.zeros((len(vertlevel),len(ds.Time))) - for t in range(len(ds.Time)): - for l in range(len(vertlevel)): - wind_speed[l,t] = np.sqrt((ds['uReconstructZonal'].sel(nCells=index_closest, - nVertLevels=vertlevel[l], - Time=ds.Time[t]).values)**2 - +(ds['uReconstructMeridional'].sel(nCells=index_closest, - nVertLevels=vertlevel[l], - Time=ds.Time[t]).values)**2) - return ds.Time, wind_speed - -def compute_wind_speed_era5(data_dir_u,data_dir_v,lat,lon,vertlevel): - # see https://stackoverflow.com/questions/67963199/xarray-from-grib-file-to-dataset - # Import data - grib_data_u = cfgrib.open_datasets(data_dir_u)[0] - grib_data_v = cfgrib.open_datasets(data_dir_v)[0] - # Correct longitude values - grib_data_u['longitude'] = grib_data_u['longitude'].where(grib_data_u['longitude'] <= 180.0, grib_data_u['longitude'] - 360.0) - grib_data_v['longitude'] = grib_data_v['longitude'].where(grib_data_v['longitude'] <= 180.0, grib_data_v['longitude'] - 360.0) - # Find (lon_min,lat_min) that are closest to (lon,lat) given in input - lon_min_u,lat_min_u = closest_value_era5(ds=grib_data_u,lon=lon,lat=lat) - lon_min_v,lat_min_v = closest_value_era5(ds=grib_data_v,lon=lon,lat=lat) - # Check whether the same (lon_min,lat_min) was found for both u and v - if lon_min_u == lon_min_v: - if lat_min_u == lat_min_v: - print ("same (lon,lat) found for u and v in era5 data") - lat_min = lat_min_u - lon_min = lon_min_u - else: - raise Exception("lat_min_u differs from lat_min_v") - else: - raise Exception("lon_min_u differs from lon_min_v") - print ("lat_min", lat_min) - print ("lon_min", lon_min) - # Obtain wind time series - wind_speed = np.zeros((len(vertlevel),len(grib_data_u.time))) - for t in range(len(grib_data_u.time)): - for l in range(len(vertlevel)): - wind_speed[l,t] = np.sqrt((grib_data_u['u'].sel(longitude=lon_min, - latitude=lat_min, - isobaricInhPa=vertlevel[l], - time=grib_data_u.time[t]).values)**2 - +(grib_data_v['v'].sel(longitude=lon_min, - latitude=lat_min, - isobaricInhPa=vertlevel[l], - time=grib_data_u.time[t]).values)**2) - print (wind_speed) - return grib_data_u.time,wind_speed - -# Create ArgumentParser object -parser = argparse.ArgumentParser(description= - 'plotting wind speed for several experiments') -# Input arguments -parser.add_argument('--data_dir_array', type=str, help='data directories') -parser.add_argument('--fig_label_array', type=str, help='labels for figures') -parser.add_argument('--lat', type=str, help='latitude') -parser.add_argument('--lon', type=str, help='longitude') -parser.add_argument('--vertlevel', type=str, help='vertical level') - -# Parse the command line arguments -args = parser.parse_args() - -# Transform bash arrays into lists -data_dir_list = bash_array_to_list(args.data_dir_array) -fig_label_list = bash_array_to_list(args.fig_label_array) - -# Remove empty items -data_dir_list = list(filter(None, data_dir_list)) -fig_label_list = list(filter(None, fig_label_list)) - -# Get lat,lon,vertlevel -lat = float(args.lat) -lon = float(args.lon) -vertlevel = [int(args.vertlevel)] - -# Set colormap -cmap = plt.get_cmap('Blues') - -plt.figure("wind_speed_without_spinup") -plt.figure(figsize=(10, 6)) -j=0 -for data_dir_tmp in data_dir_list: - print ("plotting:", fig_label_list[j]) - t,wind_speed = compute_wind_speed(data_dir=f"{data_dir_tmp}/output/cat.nc", - lat=lat, - lon=lon, - vertlevel=vertlevel) - if len(data_dir_list) > 1: - color = cmap(j / (len(data_dir_list) - 1)) - else: - color = "k" #cmap(1) - plt.plot(t[0:25],wind_speed[0,5:30],linestyle='-', - color=color,label=fig_label_list[j]) - j=j+1 -plt.xlabel("Time [h]",fontsize=12) -plt.ylabel("Wind Speed [m/s]",fontsize=12) -plt.tick_params(axis='both', which='major', labelsize=12) -plt.xticks(np.arange(min(t), max(t)+1,1), - rotation=45, ha='center') -plt.grid(True) -#plt.legend(loc='lower left', bbox_to_anchor=(0., 1.02), borderaxespad=0) -#plt.legend(loc='best') -plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) -plt.xlim([0,24]) -plt.ylim([0,12]) -plt.title("wind speed at theta(1) = 390m; 2017-04-04") -#plt.show() -plt.tight_layout() -plt.savefig("windspeed.png") \ No newline at end of file diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_point.py b/post_proc/py/time_series_compare/mpas_time_series_at_point.py new file mode 100644 index 0000000000..53e33b8205 --- /dev/null +++ b/post_proc/py/time_series_compare/mpas_time_series_at_point.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script to plot time series for a certain variable at a +particular point (lon,lat,vertlevel) and time period. + +Usage: +python3 mpas_time_series_at_point.py --dir /path/to/data +--stream history --t0 '2021-01-01 00:00:00' --tf '2021-01-02 18:00:00' +--dt 10800 --var swdnb,swupb + +Guilherme Torres Mendonça (guilherme.torresmendonca@ime.usp.br) +March 2024 +""" +import xarray as xr +import numpy as np +import matplotlib.pyplot as plt +import cfgrib +from haversine import haversine +import argparse +import pandas as pd +from utils import ( + add_mpas_mesh_variables, + create_datetime_range, + concat_mpas_output, + cs_string_to_list, + find_nCells_from_latlon +) +import os + +# Create ArgumentParser object +parser = argparse.ArgumentParser(description= + 'plotting time series for a certain variable at a' + +'particular point and time period.') +# Input arguments +parser.add_argument('--dir', default=os.getcwd(), type=str, + help='data directory') +parser.add_argument('--stream', default='history', type=str, + help='stream') +parser.add_argument('--var', default='swdnb, swupb', type=str, + help='list of variables to plot (var1,var2,...)') +parser.add_argument('--lat', default='-2.124375557901979', type=str, + help='latitude') +parser.add_argument('--lon', default='-59.003420487096946', type=str, + help='longitude') +parser.add_argument('--vertlevel', default=0, type=str, + help='vertical level') +parser.add_argument('--t0', type=str, + help='initial datetime (YYYY-mm-dd HH:MM:SS)') +parser.add_argument('--tf', type=str, + help='final datetime (YYYY-mm-dd HH:MM:SS)') +parser.add_argument('--dt', type=str, + help='datetime step (seconds)') +parser.add_argument('--closest_value', type=str, default='euclidean', + help='method to find grid cell from (lat,lon)') + +# Parse the command line arguments +args = parser.parse_args() + +# Create datetime range and transform it into a list of datetime strings +datetime_list = create_datetime_range( + t0=args.t0, + tf=args.tf, + dt=args.dt +) + +# Select variable and concatenate files between t0 and tf +vars_list = cs_string_to_list(args.var) +cat_file = concat_mpas_output( + stream=args.stream, + datetime_list=datetime_list, + data_dir=args.dir, + vars_list=vars_list, + ) + +# Get lat, lon, vertlevel +lat = float(args.lat) +lon = float(args.lon) +vertlevel = int(args.vertlevel) + +# Find grid cell +nCells, ds = find_nCells_from_latlon(cat_file,lon=lon,lat=lat) + +# Plot +## Get again list of variables for plotting +vars_list = cs_string_to_list(args.var) +## Get again list of datetimes, but abbreviated +datetime_list = create_datetime_range( + t0=args.t0, + tf=args.tf, + dt=args.dt, + short_strings='y' +) + +plt.figure() +for var in vars_list: + # Try choosing nVertLevels + #try: + # y_series = ds[var].sel(nCells=nCells,nVertLevels=vertlevel).values + #except: + y_series = ds[var].sel(nCells=nCells).values + plt.plot(datetime_list,y_series,linestyle='-',label=var) +plt.tick_params(axis='both', which='major', labelsize=12) +plt.xticks(np.arange(min(ds.Time), max(ds.Time)+1,1), + rotation=45, ha='right') +plt.grid(True) +plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) +plt.title(f"{args.stream} from {args.t0} to {args.tf}") +plt.tight_layout() +plt.savefig("time_series.png") +plt.show() \ No newline at end of file diff --git a/post_proc/py/time_series_compare/utils.py b/post_proc/py/time_series_compare/utils.py new file mode 100644 index 0000000000..2200aa1d33 --- /dev/null +++ b/post_proc/py/time_series_compare/utils.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import xarray as xr +import numpy as np +import math +import pandas as pd + +#TODO: utils.py should contain generally useful functions for postprocessing. +# Therefore, it should live under MPAS-BR/post_proc/py/. + +derived_variables = { + 'latCell': ['latitude'], + 'lonCell': ['longitude'], + 'latVertex': ['latitudeVertex'], + 'lonVertex': ['longitudeVertex'], + 'areaCell': ['area', 'resolution'], +} + +def add_mpas_mesh_variables(ds, full=True, **kwargs): + for v in ds.data_vars: + if v not in derived_variables: + continue + + newvs = derived_variables[v] + + for newv in newvs: + if newv in ds: + #print(newv + ' already here') + continue + + if 'lat' in v or 'lon' in v: + ds[newv] = xr.apply_ufunc(np.rad2deg, ds[v]) + ds[newv] = ds[newv].where(ds[newv] <= 180.0, ds[newv] - 360.0) + ds[newv].attrs['units'] = 'degrees' + + elif newv == 'area': + radius_circle = ds.attrs.get('sphere_radius', 1.0) + if radius_circle == 1: + # need to correct to earth radius + correction_rad_earth = 6371220.0 + else: + correction_rad_earth = 1 + + ds[newv] = (ds[v] / 10 ** 6) * correction_rad_earth**2 + ds[newv].attrs['units'] = 'km^2 (assuming areaCell in m^2)' + ds[newv].attrs['long_name'] = 'Area of the cell in km^2' + + elif newv == 'resolution': + radius_circle = ds.attrs.get('sphere_radius', 1.0) + if radius_circle == 1.0: + #print('need to correct to earth radius!!') + correction_rad_earth = 6371220.0 + else: + correction_rad_earth = 1 + + # km^2 (assuming areaCell in m^2) + area = (ds[v] / 10 ** 6) * correction_rad_earth**2 + + ds[newv] = 2 * (xr.apply_ufunc(np.sqrt, area / math.pi)) + ds[newv].attrs['units'] = 'km' + ds[newv].attrs['long_name'] = 'Resolution of the cell (approx)' + + return ds + +def bash_array_to_list(bash_array): + return bash_array.split("|")[:-1] + +def cs_string_to_list(cs_string): + return cs_string.split(",") + +def create_datetime_range(t0,tf,dt,short_strings='n'): + ''' + Creates a list of datetime strings between t0 and tf, with + time step dt. + + INPUT: t0 (str) - initial datetime in format 'YYYY.mm.dd HH:MM:SS' + tf (str) - final datetime in format 'YYYY.mm.dd HH:MM:SS' + dt (str) - time step in seconds + + OUTPUT: datetime_str_list (list) - list of datetime strings + ''' + t0 = pd.Timestamp(t0) + tf = pd.Timestamp(tf) + datetime_range = pd.date_range(start=t0,end=tf,freq=f"{dt}S") + if short_strings == 'y': + datetime_str_list = [dt.strftime('%d/%m %Hh') for dt in datetime_range] + else: + datetime_str_list = [dt.strftime('%Y-%m-%d_%H.%M.%S') for dt in datetime_range] + return datetime_str_list + +def concat_mpas_output(stream,datetime_list,data_dir,vars_list): + ''' + Concatenates in time, for variables vars_list, + MPAS output files named as follows: + + stream.YYYY.mm.dd_HH.MM.SS + + INPUT: stream (str) - name of the stream + datetime_list (list) - list of datetime strings in format YYYY.mm.dd_HH.MM.SS + data_dir (str) - directory where output files are stored + vars_list (list) - list of variable names (strings) + + OUTPUT: cat_file (Xarray dataset) - concatenated dataset + ''' + # Set variable list to keep in file + vars_list.extend(['latCell','lonCell','latVertex','lonVertex','areaCell']) + # Read first file + filename1 = stream + '.' + datetime_list[0] + '.nc' + cat_file = xr.open_dataset(f'{data_dir}/{filename1}', engine='netcdf4')[vars_list] + # Read and concatenate that file to remaining files + for datetime in datetime_list[1:]: + filename = stream + '.' + datetime + '.nc' + ds_temp = xr.open_dataset(f'{data_dir}/{filename}', engine='netcdf4')[vars_list] + cat_file = xr.concat([cat_file, ds_temp], dim='Time') + return cat_file + +def closest_value_haversine(ds,lon,lat): + df = ds.to_dataframe() + df['dist_norm'] = df.apply(lambda row: + haversine((row['latitude'],row['longitude']), + (lat,lon)), axis=1) + mask = df['dist_norm'] == df['dist_norm'].min() + nCells_value = df.loc[mask, :].index.get_level_values('nCells')[0] + df_masked = df.loc[mask,:] + return nCells_value + +def closest_value_euclidean(ds,lon,lat): + ds['dist_norm'] = np.sqrt((ds['longitude'] - lon)**2 + (ds['latitude'] - lat)**2) + mask = ds['dist_norm'] == ds['dist_norm'].min() + nCells_index = mask.argmax(dim='nCells') + nCells_value = ds['nCells'].isel(nCells=nCells_index.values.item()) + return nCells_value + +def find_nCells_from_latlon(ds,lon,lat,method='euclidean',verbose='y'): + ds = add_mpas_mesh_variables(ds) + if method == 'euclidean': + index_closest = closest_value_euclidean(ds=ds.sel(Time=0),lon=lon,lat=lat) + elif method == 'haversine': + index_closest = closest_value_haversine(ds=ds.sel(Time=0),lon=lon,lat=lat) + if verbose == 'y': + # Print information on (lon,lat) point + closest_lon = ds['longitude'].sel(Time=0,nCells=index_closest) + closest_lat = ds['latitude'].sel(Time=0,nCells=index_closest) + print ("input (lon,lat):", (lon,lat)) + print ("closest (lon,lat):", (float(closest_lon),float(closest_lat))) + print ("corresponding nCells value:",index_closest.values) + return index_closest, ds From 2bb7dbcd420c845d1a07b264b6c6ffa80691b4ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Tue, 2 Apr 2024 17:57:12 -0300 Subject: [PATCH 03/11] comments --- post_proc/py/time_series_compare/mpas_time_series_at_point.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_point.py b/post_proc/py/time_series_compare/mpas_time_series_at_point.py index 53e33b8205..4eef2b6e07 100644 --- a/post_proc/py/time_series_compare/mpas_time_series_at_point.py +++ b/post_proc/py/time_series_compare/mpas_time_series_at_point.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Script to plot time series for a certain variable at a +Script to plot time series for a list of variables at a particular point (lon,lat,vertlevel) and time period. Usage: From 78542b78fad7236b7efae723a464deedab31ef57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Tue, 2 Apr 2024 17:58:15 -0300 Subject: [PATCH 04/11] remove unnecessary imports --- .../py/time_series_compare/mpas_time_series_at_point.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_point.py b/post_proc/py/time_series_compare/mpas_time_series_at_point.py index 4eef2b6e07..8c39521009 100644 --- a/post_proc/py/time_series_compare/mpas_time_series_at_point.py +++ b/post_proc/py/time_series_compare/mpas_time_series_at_point.py @@ -12,15 +12,10 @@ Guilherme Torres Mendonça (guilherme.torresmendonca@ime.usp.br) March 2024 """ -import xarray as xr import numpy as np import matplotlib.pyplot as plt -import cfgrib -from haversine import haversine import argparse -import pandas as pd -from utils import ( - add_mpas_mesh_variables, +from utils import ( create_datetime_range, concat_mpas_output, cs_string_to_list, From 5c66456ed2d8e5befd699d70f6f0548fcb624ce7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Tue, 2 Apr 2024 17:59:39 -0300 Subject: [PATCH 05/11] coorrect help msg --- post_proc/py/time_series_compare/mpas_time_series_at_point.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_point.py b/post_proc/py/time_series_compare/mpas_time_series_at_point.py index 8c39521009..3a90a98c1a 100644 --- a/post_proc/py/time_series_compare/mpas_time_series_at_point.py +++ b/post_proc/py/time_series_compare/mpas_time_series_at_point.py @@ -25,7 +25,7 @@ # Create ArgumentParser object parser = argparse.ArgumentParser(description= - 'plotting time series for a certain variable at a' + 'plotting time series for a list of variables at a' +'particular point and time period.') # Input arguments parser.add_argument('--dir', default=os.getcwd(), type=str, From d202d971dc4dc781c4601e9a80408ddc29a06476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Tue, 2 Apr 2024 18:05:03 -0300 Subject: [PATCH 06/11] update comments --- post_proc/py/time_series_compare/mpas_time_series_at_point.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_point.py b/post_proc/py/time_series_compare/mpas_time_series_at_point.py index 3a90a98c1a..b8cbf10002 100644 --- a/post_proc/py/time_series_compare/mpas_time_series_at_point.py +++ b/post_proc/py/time_series_compare/mpas_time_series_at_point.py @@ -4,9 +4,9 @@ Script to plot time series for a list of variables at a particular point (lon,lat,vertlevel) and time period. -Usage: +Usage (optional input arguments may be given, see below): python3 mpas_time_series_at_point.py --dir /path/to/data ---stream history --t0 '2021-01-01 00:00:00' --tf '2021-01-02 18:00:00' +--stream history --lat -2.12 --lon -59.00 --t0 '2021-01-01 00:00:00' --tf '2021-01-02 18:00:00' --dt 10800 --var swdnb,swupb Guilherme Torres Mendonça (guilherme.torresmendonca@ime.usp.br) From ed9df81cf7615da3182b0d82b31ba43955109adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Tue, 2 Apr 2024 18:08:13 -0300 Subject: [PATCH 07/11] account for case where nVertLevels is not dimension --- .../py/time_series_compare/mpas_time_series_at_point.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_point.py b/post_proc/py/time_series_compare/mpas_time_series_at_point.py index b8cbf10002..7cc64a9e17 100644 --- a/post_proc/py/time_series_compare/mpas_time_series_at_point.py +++ b/post_proc/py/time_series_compare/mpas_time_series_at_point.py @@ -90,10 +90,10 @@ plt.figure() for var in vars_list: # Try choosing nVertLevels - #try: - # y_series = ds[var].sel(nCells=nCells,nVertLevels=vertlevel).values - #except: - y_series = ds[var].sel(nCells=nCells).values + try: + y_series = ds[var].sel(nCells=nCells,nVertLevels=vertlevel).values + except: + y_series = ds[var].sel(nCells=nCells).values plt.plot(datetime_list,y_series,linestyle='-',label=var) plt.tick_params(axis='both', which='major', labelsize=12) plt.xticks(np.arange(min(ds.Time), max(ds.Time)+1,1), From b3784e999f34f55eb1aed83764aa653364822a30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Wed, 3 Apr 2024 09:46:33 -0300 Subject: [PATCH 08/11] add vertlevel to usage --- post_proc/py/time_series_compare/mpas_time_series_at_point.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_point.py b/post_proc/py/time_series_compare/mpas_time_series_at_point.py index 7cc64a9e17..5ed555801c 100644 --- a/post_proc/py/time_series_compare/mpas_time_series_at_point.py +++ b/post_proc/py/time_series_compare/mpas_time_series_at_point.py @@ -6,7 +6,7 @@ Usage (optional input arguments may be given, see below): python3 mpas_time_series_at_point.py --dir /path/to/data ---stream history --lat -2.12 --lon -59.00 --t0 '2021-01-01 00:00:00' --tf '2021-01-02 18:00:00' +--stream history --lat -2.12 --lon -59.00 --vertlevel 0 --t0 '2021-01-01 00:00:00' --tf '2021-01-02 18:00:00' --dt 10800 --var swdnb,swupb Guilherme Torres Mendonça (guilherme.torresmendonca@ime.usp.br) @@ -103,4 +103,4 @@ plt.title(f"{args.stream} from {args.t0} to {args.tf}") plt.tight_layout() plt.savefig("time_series.png") -plt.show() \ No newline at end of file +plt.show() From 68b63af9ccc46882b02f974cfd1ff6dacffae0e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Wed, 3 Apr 2024 11:19:29 -0300 Subject: [PATCH 09/11] small changes --- post_proc/py/time_series_compare/mpas_time_series_at_point.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_point.py b/post_proc/py/time_series_compare/mpas_time_series_at_point.py index 5ed555801c..630a336205 100644 --- a/post_proc/py/time_series_compare/mpas_time_series_at_point.py +++ b/post_proc/py/time_series_compare/mpas_time_series_at_point.py @@ -103,4 +103,4 @@ plt.title(f"{args.stream} from {args.t0} to {args.tf}") plt.tight_layout() plt.savefig("time_series.png") -plt.show() +plt.show() \ No newline at end of file From 474e8c9deb936dd6ec2f85ced632b7f6efbbe8cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Thu, 4 Apr 2024 10:52:42 -0300 Subject: [PATCH 10/11] corrections utils --- .../mpas_time_series_at_point.py | 5 +- post_proc/py/time_series_compare/utils.py | 70 +++++++++++++------ 2 files changed, 52 insertions(+), 23 deletions(-) diff --git a/post_proc/py/time_series_compare/mpas_time_series_at_point.py b/post_proc/py/time_series_compare/mpas_time_series_at_point.py index 630a336205..75c50382c4 100644 --- a/post_proc/py/time_series_compare/mpas_time_series_at_point.py +++ b/post_proc/py/time_series_compare/mpas_time_series_at_point.py @@ -46,7 +46,7 @@ help='final datetime (YYYY-mm-dd HH:MM:SS)') parser.add_argument('--dt', type=str, help='datetime step (seconds)') -parser.add_argument('--closest_value', type=str, default='euclidean', +parser.add_argument('--closest_value', type=str, default='haversine', help='method to find grid cell from (lat,lon)') # Parse the command line arguments @@ -74,7 +74,8 @@ vertlevel = int(args.vertlevel) # Find grid cell -nCells, ds = find_nCells_from_latlon(cat_file,lon=lon,lat=lat) +nCells, ds = find_nCells_from_latlon(cat_file,lon=lon,lat=lat, + method=args.closest_value) # Plot ## Get again list of variables for plotting diff --git a/post_proc/py/time_series_compare/utils.py b/post_proc/py/time_series_compare/utils.py index 2200aa1d33..9966d794a6 100644 --- a/post_proc/py/time_series_compare/utils.py +++ b/post_proc/py/time_series_compare/utils.py @@ -4,6 +4,8 @@ import numpy as np import math import pandas as pd +from geopy.distance import distance +from haversine import haversine #TODO: utils.py should contain generally useful functions for postprocessing. # Therefore, it should live under MPAS-BR/post_proc/py/. @@ -114,34 +116,60 @@ def concat_mpas_output(stream,datetime_list,data_dir,vars_list): cat_file = xr.concat([cat_file, ds_temp], dim='Time') return cat_file +def get_distance_haversine(lats, lons, lat_ref, lon_ref): + ''' + Using the haversine formula (https://en.wikipedia.org/wiki/Haversine_formula), + returns the distance in km of each lat, lon point to (lat_ref,lon_ref). + + from Marta G. Badarjí, slightly modified (based on + pypi haversine package: https://pypi.org/project/haversine/) + by G. Torres Mendonça + ''' + + radius = 6371.0088 # km + d_lat = np.radians(lats - lat_ref) # lat distance in radians + d_lon = np.radians(lons - lon_ref) # lon distance in radians + + a = (np.sin(d_lat / 2.) * np.sin(d_lat / 2.) + + np.cos(np.radians(lat_ref)) * np.cos(np.radians(lats)) * + np.sin(d_lon / 2.) * np.sin(d_lon / 2.)) + c = np.arcsin(np.sqrt(a)) + d = 2 * radius * c + + return d + def closest_value_haversine(ds,lon,lat): - df = ds.to_dataframe() - df['dist_norm'] = df.apply(lambda row: - haversine((row['latitude'],row['longitude']), - (lat,lon)), axis=1) - mask = df['dist_norm'] == df['dist_norm'].min() - nCells_value = df.loc[mask, :].index.get_level_values('nCells')[0] - df_masked = df.loc[mask,:] - return nCells_value - -def closest_value_euclidean(ds,lon,lat): - ds['dist_norm'] = np.sqrt((ds['longitude'] - lon)**2 + (ds['latitude'] - lat)**2) - mask = ds['dist_norm'] == ds['dist_norm'].min() - nCells_index = mask.argmax(dim='nCells') - nCells_value = ds['nCells'].isel(nCells=nCells_index.values.item()) - return nCells_value - -def find_nCells_from_latlon(ds,lon,lat,method='euclidean',verbose='y'): + d = get_distance_haversine(ds['latitude'].values, ds['longitude'].values, + lat_ref=lat, lon_ref=lon) + ds['distance'] = xr.DataArray(d, dims=['nCells']) + #print ('distance data array') + #print (ds[['distance','longitude','latitude']]) + nCells_index = ds['distance'].argmin().item() + #print ('nCells_index') + #print (nCells_index) + #print ('distance min') + #print (ds['distance'].min()) + #print ('ds[nCells]:') + #print (ds['nCells']) + nCells_value = ds['nCells'].isel(nCells=nCells_index) + distance_value = ds['distance'].isel(nCells=nCells_index) + return nCells_value, distance_value + +def find_nCells_from_latlon(ds,lon,lat,method='haversine',verbose='y'): ds = add_mpas_mesh_variables(ds) - if method == 'euclidean': - index_closest = closest_value_euclidean(ds=ds.sel(Time=0),lon=lon,lat=lat) - elif method == 'haversine': - index_closest = closest_value_haversine(ds=ds.sel(Time=0),lon=lon,lat=lat) + if method == 'haversine': + index_closest, distance_value = closest_value_haversine(ds=ds.sel(Time=0),lon=lon,lat=lat) + else: + print (f"{method} method not supported.") + exit (-1) if verbose == 'y': # Print information on (lon,lat) point closest_lon = ds['longitude'].sel(Time=0,nCells=index_closest) closest_lat = ds['latitude'].sel(Time=0,nCells=index_closest) print ("input (lon,lat):", (lon,lat)) print ("closest (lon,lat):", (float(closest_lon),float(closest_lat))) + print ("distance to input point (km):", distance_value.values) print ("corresponding nCells value:",index_closest.values) + print ('max/min lat:', ds['latitude'].values.max(),ds['latitude'].values.min()) + print ('max/min lon:', ds['longitude'].values.max(),ds['longitude'].values.min()) return index_closest, ds From 750508c0613e7b84b91537e0b08ce5cb129bfd39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Torres=20Mendon=C3=A7a?= Date: Thu, 4 Apr 2024 10:59:26 -0300 Subject: [PATCH 11/11] remove unnecessary imports --- post_proc/py/time_series_compare/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/post_proc/py/time_series_compare/utils.py b/post_proc/py/time_series_compare/utils.py index 9966d794a6..7e4c7ec482 100644 --- a/post_proc/py/time_series_compare/utils.py +++ b/post_proc/py/time_series_compare/utils.py @@ -4,8 +4,6 @@ import numpy as np import math import pandas as pd -from geopy.distance import distance -from haversine import haversine #TODO: utils.py should contain generally useful functions for postprocessing. # Therefore, it should live under MPAS-BR/post_proc/py/. @@ -172,4 +170,4 @@ def find_nCells_from_latlon(ds,lon,lat,method='haversine',verbose='y'): print ("corresponding nCells value:",index_closest.values) print ('max/min lat:', ds['latitude'].values.max(),ds['latitude'].values.min()) print ('max/min lon:', ds['longitude'].values.max(),ds['longitude'].values.min()) - return index_closest, ds + return index_closest, ds \ No newline at end of file