From 7409cce85c1b301316c1f683d88bb79a8ff6997e Mon Sep 17 00:00:00 2001 From: mgrover1 Date: Tue, 16 Jan 2024 10:28:10 -0600 Subject: [PATCH] ENH: Rename the csv module to prevent errors --- act/io/__init__.py | 9 +- act/io/csv.py | 103 ----- act/io/neon.py | 22 +- act/io/noaagml.py | 297 ++++++++---- act/io/noaapsl.py | 595 +++++++++++++++++++------ tests/io/{test_csv.py => test_text.py} | 4 +- 6 files changed, 678 insertions(+), 352 deletions(-) delete mode 100644 act/io/csv.py rename tests/io/{test_csv.py => test_text.py} (86%) diff --git a/act/io/__init__.py b/act/io/__init__.py index 8227cf89da..d1792bbf30 100644 --- a/act/io/__init__.py +++ b/act/io/__init__.py @@ -7,7 +7,6 @@ __getattr__, __dir__, __all__ = lazy.attach( __name__, - submodules=['arm', 'csv', 'icartt', 'mpl', 'neon', 'noaagml', 'noaapsl', 'pysp2'], submod_attrs={ 'arm': [ @@ -18,7 +17,7 @@ 'check_if_tar_gz_file', 'read_arm_mmcr', ], - 'csv': ['read_csv'], + 'text': ['read_csv'], 'icartt': ['read_icartt'], 'mpl': ['proc_sigma_mplv5_read', 'read_sigma_mplv5'], 'neon': ['read_neon_csv'], @@ -38,9 +37,7 @@ 'read_psl_radar_fmcw_moment', 'read_psl_surface_met', ], - 'pysp2': ['read_hk_file', 'read_sp2', 'read_sp2_dat' - ], - 'sodar' : [ - 'read_mfas_sodar'] + 'pysp2': ['read_hk_file', 'read_sp2', 'read_sp2_dat'], + 'sodar': ['read_mfas_sodar'], }, ) diff --git a/act/io/csv.py b/act/io/csv.py deleted file mode 100644 index 0aaa595329..0000000000 --- a/act/io/csv.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -This module contains I/O operations for loading csv files. - -""" - -import pathlib -import pandas as pd -from act.io.arm import check_arm_standards - - -def read_csv(filename, sep=',', engine='python', column_names=None, skipfooter=0, ignore_index=True, **kwargs): - - """ - Returns an `xarray.Dataset` with stored data and metadata from user-defined - query of CSV files. - - Parameters - ---------- - filenames : str or list - Name of file(s) to read. - sep : str - The separator between columns in the csv file. - column_names : list or None - The list of column names in the csv file. - verbose : bool - If true, will print if a file is not found. - ignore_index : bool - Keyword for pandas concat function. If True, do not use the index - values along the concatenation axis. The resulting axis will be labeled - 0, …, n - 1. This is useful if you are concatenating datasets where the - concatenation axis does not have meaningful indexing information. Note - the index values on the other axes are still respected in the join. - - Additional keyword arguments will be passed into pandas.read_csv. - - Returns - ------- - ds : xarray.Dataset - ACT Xarray dataset. Will be None if the file is not found. - - Examples - -------- - This example will load the example sounding data used for unit testing: - - .. code-block:: python - - import act - - ds = act.io.csv.read(act.tests.sample_files.EXAMPLE_CSV_WILDCARD) - - """ - - # Convert to string if filename is a pathlib or not a list - if isinstance(filename, (pathlib.PurePath, str)): - filename = [str(filename)] - - if isinstance(filename, list) and isinstance(filename[0], pathlib.PurePath): - filename = [str(ii) for ii in filename] - - # Read data using pandas read_csv one file at a time and append to - # list. Then concatinate the list into one pandas dataframe. - li = [] - for fl in filename: - df = pd.read_csv( - fl, sep=sep, names=column_names, skipfooter=skipfooter, engine=engine, **kwargs - ) - li.append(df) - - if len(li) == 1: - df = li[0] - else: - df = pd.concat(li, axis=0, ignore_index=ignore_index) - - # Set Coordinates if there's a variable date_time - if 'date_time' in df: - df.date_time = df.date_time.astype('datetime64[ns]') - df.time = df.date_time - df = df.set_index('time') - - # Convert to xarray DataSet - ds = df.to_xarray() - - # Set additional variables - # Since we cannot assume a standard naming convention setting - # file_date and file_time to the first time in the file - x_coord = ds.coords.to_index().values[0] - if isinstance(x_coord, str): - x_coord_dt = pd.to_datetime(x_coord) - ds.attrs['_file_dates'] = x_coord_dt.strftime('%Y%m%d') - ds.attrs['_file_times'] = x_coord_dt.strftime('%H%M%S') - - # Check for standard ARM datastream name, if none, assume the file is ARM - # standard format. - is_arm_file_flag = check_arm_standards(ds) - if is_arm_file_flag == 0: - - ds.attrs['_datastream'] = '.'.join(filename[0].split('/')[-1].split('.')[0:2]) - - # Add additional attributes, site, standards flag, etc... - ds.attrs['_site'] = str(ds.attrs['_datastream'])[0:3] - ds.attrs['_arm_standards_flag'] = is_arm_file_flag - - return ds diff --git a/act/io/neon.py b/act/io/neon.py index 69a8d8d96a..c6bec73ae0 100644 --- a/act/io/neon.py +++ b/act/io/neon.py @@ -2,11 +2,13 @@ Modules for reading in NOAA PSL data. """ -import pandas as pd -import xarray as xr import datetime as dt + import numpy as np -from act.io.csv import read_csv +import pandas as pd +import xarray as xr + +from act.io.text import read_csv def read_neon_csv(files, variable_files=None, position_files=None): @@ -85,8 +87,18 @@ def read_neon_csv(files, variable_files=None, position_files=None): ds['lat'] = xr.DataArray(data=float(loc_df['referenceLatitude'].values[idx])) ds['lon'] = xr.DataArray(data=float(loc_df['referenceLongitude'].values[idx])) ds['alt'] = xr.DataArray(data=float(loc_df['referenceElevation'].values[idx])) - variables = ['xOffset', 'yOffset', 'zOffset', 'eastOffset', 'northOffset', - 'pitch', 'roll', 'azimuth', 'xAzimuth', 'yAzimuth'] + variables = [ + 'xOffset', + 'yOffset', + 'zOffset', + 'eastOffset', + 'northOffset', + 'pitch', + 'roll', + 'azimuth', + 'xAzimuth', + 'yAzimuth', + ] for v in variables: ds[v] = xr.DataArray(data=float(loc_df[v].values[idx])) multi_ds.append(ds) diff --git a/act/io/noaagml.py b/act/io/noaagml.py index a8ad75df7e..6e552ebcf4 100644 --- a/act/io/noaagml.py +++ b/act/io/noaagml.py @@ -6,8 +6,8 @@ from datetime import datetime from pathlib import Path -import pandas as pd import numpy as np +import pandas as pd import xarray as xr import act @@ -47,8 +47,7 @@ def read_gml(filename, datatype=None, remove_time_vars=True, convert_missing=Tru if datatype is not None: if datatype.upper() == 'MET': - return read_gml_met( - filename, convert_missing=convert_missing, **kwargs) + return read_gml_met(filename, convert_missing=convert_missing, **kwargs) elif datatype.upper() == 'RADIATION': return read_gml_radiation( filename, @@ -59,8 +58,7 @@ def read_gml(filename, datatype=None, remove_time_vars=True, convert_missing=Tru elif datatype.upper() == 'OZONE': return read_gml_ozone(filename, **kwargs) elif datatype.upper() == 'CO2': - return read_gml_co2( - filename, convert_missing=convert_missing, **kwargs) + return read_gml_co2(filename, convert_missing=convert_missing, **kwargs) elif datatype.upper() == 'HALO': return read_gml_halo(filename, **kwargs) else: @@ -74,12 +72,10 @@ def read_gml(filename, datatype=None, remove_time_vars=True, convert_missing=Tru test_filename = str(Path(test_filename).name) if test_filename.startswith('met_') and test_filename.endswith('.txt'): - return read_gml_met( - filename, convert_missing=convert_missing, **kwargs) + return read_gml_met(filename, convert_missing=convert_missing, **kwargs) if test_filename.startswith('co2_') and test_filename.endswith('.txt'): - return read_gml_co2( - filename, convert_missing=convert_missing, **kwargs) + return read_gml_co2(filename, convert_missing=convert_missing, **kwargs) result = re.match(r'([a-z]{3})([\d]{5}).dat', test_filename) if result is not None: @@ -241,9 +237,9 @@ def read_gml_halo(filename, **kwargs): break header += 1 - ds = act.io.csv.read_csv( - filename, sep=r'\s+', header=header, - na_values=['Nan', 'NaN', 'nan', 'NAN'], **kwargs) + ds = act.io.text.read_csv( + filename, sep=r'\s+', header=header, na_values=['Nan', 'NaN', 'nan', 'NAN'], **kwargs + ) var_names = list(ds.data_vars) year_name, month_name, day_name, hour_name, min_name = None, None, None, None, None for var_name in var_names: @@ -258,7 +254,7 @@ def read_gml_halo(filename, **kwargs): elif var_name.endswith('min'): min_name = var_name - timestamp = np.full(ds[var_names[0]].size, np.nan, dtype="datetime64[ns]") + timestamp = np.full(ds[var_names[0]].size, np.nan, dtype='datetime64[ns]') for ii in range(0, len(timestamp)): if min_name is not None: ts = datetime( @@ -282,10 +278,9 @@ def read_gml_halo(filename, **kwargs): ds[day_name].values[ii], ) else: - ts = datetime( - ds[year_name].values[ii], ds[month_name].values[ii], 1) + ts = datetime(ds[year_name].values[ii], ds[month_name].values[ii], 1) - timestamp[ii] = np.datetime64(ts, "ns") + timestamp[ii] = np.datetime64(ts, 'ns') for var_name in [year_name, month_name, day_name, hour_name, min_name]: try: @@ -418,10 +413,9 @@ def read_gml_co2(filename=None, convert_missing=True, **kwargs): with open(test_filename) as fc: skiprows = int(fc.readline().strip().split()[-1]) - 1 - ds = act.io.csv.read_csv( - filename, sep=r'\s+', skiprows=skiprows, **kwargs) + ds = act.io.text.read_csv(filename, sep=r'\s+', skiprows=skiprows, **kwargs) - timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]") + timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[ns]') for ii in range(0, len(timestamp)): ts = datetime( ds['year'].values[ii], @@ -431,7 +425,7 @@ def read_gml_co2(filename=None, convert_missing=True, **kwargs): ds['minute'].values[ii], ds['second'].values[ii], ) - timestamp[ii] = np.datetime64(ts, "ns") + timestamp[ii] = np.datetime64(ts, 'ns') ds = ds.rename({'index': 'time'}) ds = ds.assign_coords(time=timestamp) @@ -538,11 +532,10 @@ def read_gml_ozone(filename=None, **kwargs): pass skiprows += 1 - ds = act.io.csv.read_csv( - filename, sep=r'\s+', skiprows=skiprows, **kwargs) + ds = act.io.text.read_csv(filename, sep=r'\s+', skiprows=skiprows, **kwargs) ds.attrs['station'] = str(ds['STN'].values[0]).lower() - timestamp = np.full(ds['YEAR'].size, np.nan, dtype="datetime64[ns]") + timestamp = np.full(ds['YEAR'].size, np.nan, dtype='datetime64[ns]') for ii in range(0, len(timestamp)): ts = datetime( ds['YEAR'].values[ii], @@ -550,7 +543,7 @@ def read_gml_ozone(filename=None, **kwargs): ds['DAY'].values[ii], ds['HR'].values[ii], ) - timestamp[ii] = np.datetime64(ts, "ns") + timestamp[ii] = np.datetime64(ts, 'ns') ds = ds.rename({'index': 'time'}) ds = ds.assign_coords(time=timestamp) @@ -569,8 +562,7 @@ def read_gml_ozone(filename=None, **kwargs): return ds -def read_gml_radiation(filename=None, convert_missing=True, - remove_time_vars=True, **kwargs): +def read_gml_radiation(filename=None, convert_missing=True, remove_time_vars=True, **kwargs): """ Function to read radiation data from NOAA GML. @@ -739,19 +731,18 @@ def read_gml_radiation(filename=None, convert_missing=True, } # Add additinal column names for NOAA SPASH campaign - if str(Path(filename).name).startswith('cbc') or \ - str(Path(filename).name).startswith('ckp'): + if str(Path(filename).name).startswith('cbc') or str(Path(filename).name).startswith('ckp'): column_names['SPN1_total'] = { 'units': 'W/m^2', 'long_name': 'SPN1 total average', '_FillValue': -9999.9, - '__type': np.float32 + '__type': np.float32, } column_names['SPN1_diffuse'] = { 'units': 'W/m^2', 'long_name': 'SPN1 diffuse average', '_FillValue': -9999.9, - '__type': np.float32 + '__type': np.float32, } names = list(column_names.keys()) @@ -772,7 +763,9 @@ def read_gml_radiation(filename=None, convert_missing=True, names.insert(ii + num, 'qc_' + name) num += 1 - ds = act.io.csv.read_csv(filename, sep=r'\s+', header=None, skiprows=2, column_names=names, **kwargs) + ds = act.io.text.read_csv( + filename, sep=r'\s+', header=None, skiprows=2, column_names=names, **kwargs + ) if isinstance(filename, (list, tuple)): filename = filename[0] @@ -820,7 +813,7 @@ def read_gml_radiation(filename=None, convert_missing=True, ) ds.attrs['location'] = station - timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]") + timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[ns]') for ii in range(0, len(timestamp)): ts = datetime( ds['year'].values[ii], @@ -829,7 +822,7 @@ def read_gml_radiation(filename=None, convert_missing=True, ds['hour'].values[ii], ds['minute'].values[ii], ) - timestamp[ii] = np.datetime64(ts, "ns") + timestamp[ii] = np.datetime64(ts, 'ns') ds = ds.rename({'index': 'time'}) ds = ds.assign_coords(time=timestamp) @@ -994,12 +987,12 @@ def read_gml_met(filename=None, convert_missing=True, **kwargs): minutes = False del column_names['minute'] - ds = act.io.csv.read_csv( - filename, sep=r'\s+', header=None, - column_names=column_names.keys(), **kwargs) + ds = act.io.text.read_csv( + filename, sep=r'\s+', header=None, column_names=column_names.keys(), **kwargs + ) if ds is not None: - timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]") + timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[ns]') for ii in range(0, len(timestamp)): if minutes: ts = datetime( @@ -1017,13 +1010,12 @@ def read_gml_met(filename=None, convert_missing=True, **kwargs): ds['hour'].values[ii], ) - timestamp[ii] = np.datetime64(ts, "ns") + timestamp[ii] = np.datetime64(ts, 'ns') ds = ds.rename({'index': 'time'}) ds = ds.assign_coords(time=timestamp) ds['time'].attrs['long_name'] = 'Time' for var_name, value in column_names.items(): - if value is None: del ds[var_name] else: @@ -1068,17 +1060,56 @@ def read_surfrad(filename, **kwargs): """ - names = ['year', 'jday', 'month', 'day', 'hour', 'minute', 'dec_time', - 'solar_zenith_angle', 'downwelling_global', 'qc_downwelling_global', - 'upwelling_global', 'qc_upwelling_global', 'direct_normal', 'qc_direct_normal', - 'downwelling_diffuse', 'qc_downwelling_diffuse', 'downwelling_ir', 'qc_downwelling_ir', - 'downwelling_ir_casetemp', 'qc_downwelling_ir_casetemp', 'downwelling_ir_dometemp', - 'qc_downwelling_ir_dometemp', 'upwelling_ir', 'qc_upwelling_ir', 'upwelling_ir_casetemp', - 'qc_upwelling_ir_casetemp', 'upwelling_ir_dometemp', 'qc_upwelling_ir_dometemp', - 'global_uvb', 'qc_global_uvb', 'par', 'qc_par', 'net_radiation', 'qc_net_radiation', - 'net_ir', 'qc_net_ir', 'total_net', 'qc_total_net', 'temperature', 'qc_temperature', - 'relative_humidity', 'qc_relative_humidity', 'wind_speed', 'qc_wind_speed', 'wind_direction', - 'qc_wind_direction', 'pressure', 'qc_pressure'] + names = [ + 'year', + 'jday', + 'month', + 'day', + 'hour', + 'minute', + 'dec_time', + 'solar_zenith_angle', + 'downwelling_global', + 'qc_downwelling_global', + 'upwelling_global', + 'qc_upwelling_global', + 'direct_normal', + 'qc_direct_normal', + 'downwelling_diffuse', + 'qc_downwelling_diffuse', + 'downwelling_ir', + 'qc_downwelling_ir', + 'downwelling_ir_casetemp', + 'qc_downwelling_ir_casetemp', + 'downwelling_ir_dometemp', + 'qc_downwelling_ir_dometemp', + 'upwelling_ir', + 'qc_upwelling_ir', + 'upwelling_ir_casetemp', + 'qc_upwelling_ir_casetemp', + 'upwelling_ir_dometemp', + 'qc_upwelling_ir_dometemp', + 'global_uvb', + 'qc_global_uvb', + 'par', + 'qc_par', + 'net_radiation', + 'qc_net_radiation', + 'net_ir', + 'qc_net_ir', + 'total_net', + 'qc_total_net', + 'temperature', + 'qc_temperature', + 'relative_humidity', + 'qc_relative_humidity', + 'wind_speed', + 'qc_wind_speed', + 'wind_direction', + 'qc_wind_direction', + 'pressure', + 'qc_pressure', + ] for i, f in enumerate(filename): new_df = pd.read_csv(f, names=names, skiprows=2, delimiter=r'\s+', header=None) if i == 0: @@ -1107,36 +1138,93 @@ def read_surfrad(filename, **kwargs): 'minute': {'long_name': 'Minutes', 'units': 'unitless'}, 'dec_time': {'long_name': 'Decimal time', 'units': 'unitless'}, 'solar_zenith_angle': {'long_name': 'Solar zenith angle', 'units': 'deg'}, - 'downwelling_global': {'long_name': 'Downwelling global solar', 'units': 'W m^-2', - 'standard_name': 'surface_downwelling_shortwave_flux_in_air'}, - 'upwelling_global': {'long_name': 'Upwelling global solar', 'units': 'W m^-2', - 'standard_name': 'surface_upwelling_shortwave_flux_in_air'}, - 'direct_normal': {'long_name': 'Direct normal solar', 'units': 'W m^-2', - 'standard_name': 'surface_direct_downwelling_shortwave_flux_in_air'}, - 'downwelling_diffuse': {'long_name': 'Downwelling diffuse solar', 'units': 'W m^-2', - 'standard_name': 'diffuse_downwelling_shortwave_flux_in_air'}, - 'downwelling_ir': {'long_name': 'Downwelling thermal infrared', 'units': 'W m^-2', - 'standard_name': 'net_downward_longwave_flux_in_air'}, - 'downwelling_ir_casetemp': {'long_name': 'Downwelling thermal infrared case temperature', 'units': 'K'}, - 'downwelling_ir_dometemp': {'long_name': 'Downwelling thermal infrared dome temperature', 'units': 'K'}, - 'upwelling_ir': {'long_name': 'Upwelling thermal infrared', 'units': 'W m^-2', - 'standard_name': 'net_upward_longwave_flux_in_air'}, - 'upwelling_ir_casetemp': {'long_name': 'Upwelling thermal infrared case temperature', 'units': 'K'}, - 'upwelling_ir_dometemp': {'long_name': 'Upwelling thermal infrared dome temperature', 'units': 'K'}, + 'downwelling_global': { + 'long_name': 'Downwelling global solar', + 'units': 'W m^-2', + 'standard_name': 'surface_downwelling_shortwave_flux_in_air', + }, + 'upwelling_global': { + 'long_name': 'Upwelling global solar', + 'units': 'W m^-2', + 'standard_name': 'surface_upwelling_shortwave_flux_in_air', + }, + 'direct_normal': { + 'long_name': 'Direct normal solar', + 'units': 'W m^-2', + 'standard_name': 'surface_direct_downwelling_shortwave_flux_in_air', + }, + 'downwelling_diffuse': { + 'long_name': 'Downwelling diffuse solar', + 'units': 'W m^-2', + 'standard_name': 'diffuse_downwelling_shortwave_flux_in_air', + }, + 'downwelling_ir': { + 'long_name': 'Downwelling thermal infrared', + 'units': 'W m^-2', + 'standard_name': 'net_downward_longwave_flux_in_air', + }, + 'downwelling_ir_casetemp': { + 'long_name': 'Downwelling thermal infrared case temperature', + 'units': 'K', + }, + 'downwelling_ir_dometemp': { + 'long_name': 'Downwelling thermal infrared dome temperature', + 'units': 'K', + }, + 'upwelling_ir': { + 'long_name': 'Upwelling thermal infrared', + 'units': 'W m^-2', + 'standard_name': 'net_upward_longwave_flux_in_air', + }, + 'upwelling_ir_casetemp': { + 'long_name': 'Upwelling thermal infrared case temperature', + 'units': 'K', + }, + 'upwelling_ir_dometemp': { + 'long_name': 'Upwelling thermal infrared dome temperature', + 'units': 'K', + }, 'global_uvb': {'long_name': 'Global UVB', 'units': 'milliWatts m^-2'}, - 'par': {'long_name': 'Photosynthetically active radiation', 'units': 'W m^-2', - 'standard_name': 'surface_downwelling_photosynthetic_radiative_flux_in_air'}, - 'net_radiation': {'long_name': 'Net solar (downwelling_global-upwelling_global)', 'units': 'W m^-2', - 'standard_name': 'surface_net_downward_shortwave_flux'}, - 'net_ir': {'long_name': 'Net infrared (downwelling_ir-upwelling_ir)', 'units': 'W m^-2', - 'standard_name': 'surface_net_downward_longwave_flux'}, - 'total_net': {'long_name': 'Total Net radiation (net_radiation + net_ir)', 'units': 'W m^-2'}, - 'temperature': {'long_name': '10-meter air temperature', 'units': 'degC', 'standard_name': 'air_temperature'}, - 'relative_humidity': {'long_name': 'Relative humidity', 'units': '%', 'standard_name': 'relative_humidity'}, + 'par': { + 'long_name': 'Photosynthetically active radiation', + 'units': 'W m^-2', + 'standard_name': 'surface_downwelling_photosynthetic_radiative_flux_in_air', + }, + 'net_radiation': { + 'long_name': 'Net solar (downwelling_global-upwelling_global)', + 'units': 'W m^-2', + 'standard_name': 'surface_net_downward_shortwave_flux', + }, + 'net_ir': { + 'long_name': 'Net infrared (downwelling_ir-upwelling_ir)', + 'units': 'W m^-2', + 'standard_name': 'surface_net_downward_longwave_flux', + }, + 'total_net': { + 'long_name': 'Total Net radiation (net_radiation + net_ir)', + 'units': 'W m^-2', + }, + 'temperature': { + 'long_name': '10-meter air temperature', + 'units': 'degC', + 'standard_name': 'air_temperature', + }, + 'relative_humidity': { + 'long_name': 'Relative humidity', + 'units': '%', + 'standard_name': 'relative_humidity', + }, 'wind_speed': {'long_name': 'Wind speed', 'units': 'ms^-1', 'standard_name': 'wind_speed'}, - 'wind_direction': {'long_name': 'Wind direction, clockwise from North', 'units': 'deg', - 'standard_name': 'wind_from_direction'}, - 'pressure': {'long_name': 'Station pressure', 'units': 'mb', 'standard_name': 'air_pressure'}, + 'wind_direction': { + 'long_name': 'Wind direction, clockwise from North', + 'units': 'deg', + 'standard_name': 'wind_from_direction', + }, + 'pressure': { + 'long_name': 'Station pressure', + 'units': 'mb', + 'standard_name': 'air_pressure', + }, } for v in ds: @@ -1144,23 +1232,46 @@ def read_surfrad(filename, **kwargs): ds[v].attrs = attrs[v] # Add attributes to all QC variables - qc_vars = ['downwelling_global', 'upwelling_global', 'direct_normal', 'downwelling_diffuse', - 'downwelling_ir', 'downwelling_ir_casetemp', 'downwelling_ir_dometemp', - 'upwelling_ir', 'upwelling_ir_casetemp', 'upwelling_ir_dometemp', 'global_uvb', - 'par', 'net_radiation', 'net_ir', 'total_net', 'temperature', 'relative_humidity', - 'wind_speed', 'wind_direction', 'pressure'] + qc_vars = [ + 'downwelling_global', + 'upwelling_global', + 'direct_normal', + 'downwelling_diffuse', + 'downwelling_ir', + 'downwelling_ir_casetemp', + 'downwelling_ir_dometemp', + 'upwelling_ir', + 'upwelling_ir_casetemp', + 'upwelling_ir_dometemp', + 'global_uvb', + 'par', + 'net_radiation', + 'net_ir', + 'total_net', + 'temperature', + 'relative_humidity', + 'wind_speed', + 'wind_direction', + 'pressure', + ] for v in qc_vars: - atts = {'long_name': 'Quality check results on variable: ' + v, - 'units': '1', - 'description': ''.join(['A QC flag of zero indicates that the corresponding data point is good,', - ' having passed all QC checks. A value greater than 0 indicates that', - ' the data failed one level of QC. For example, a QC value of 1 means', - ' that the recorded value is beyond a physically possible range, or it has', - ' been affected adversely in some manner to produce a knowingly bad value.', - ' A value of 2 indicates that the data value failed the second level QC check,', - ' indicating that the data value may be physically possible but should be used', - ' with scrutiny, and so on.'])} + atts = { + 'long_name': 'Quality check results on variable: ' + v, + 'units': '1', + 'description': ''.join( + [ + 'A QC flag of zero indicates that the corresponding data point is good,', + ' having passed all QC checks. A value greater than 0 indicates that', + ' the data failed one level of QC. For example, a QC value of 1 means', + ' that the recorded value is beyond a physically possible range, or it has', + ' been affected adversely in some manner to produce a knowingly bad value.', + ' A value of 2 indicates that the data value failed the second level QC check,', + ' indicating that the data value may be physically possible but should be used', + ' with scrutiny, and so on.', + ] + ), + } ds['qc_' + v].attrs = atts ds.attrs['datastream'] = 'SURFRAD Site: ' + filename[0].split('/')[-1][0:3] diff --git a/act/io/noaapsl.py b/act/io/noaapsl.py index e1640490c9..12a0412456 100644 --- a/act/io/noaapsl.py +++ b/act/io/noaapsl.py @@ -2,18 +2,19 @@ Modules for reading in NOAA PSL data. """ +import datetime as dt +import re from datetime import datetime, timedelta -from os import path as ospath from itertools import groupby +from os import path as ospath + import fsspec -import yaml -import re import numpy as np import pandas as pd import xarray as xr -import datetime as dt +import yaml -from act.io.csv import read_csv +from act.io.text import read_csv def read_psl_wind_profiler(filepath, transpose=True): @@ -52,17 +53,14 @@ def read_psl_wind_profiler(filepath, transpose=True): for section in sections_of_file: if section[0] != '$': list_of_datasets.append( - _parse_psl_wind_lines( - filepath, section, line_offset=start_line) + _parse_psl_wind_lines(filepath, section, line_offset=start_line) ) start_line += len(section) # Return two datasets for each mode and the merge of datasets of the # same mode. - mode_one_ds = xr.concat( - list_of_datasets[0::2], dim='time') - mode_two_ds = xr.concat( - list_of_datasets[1::2], dim='time') + mode_one_ds = xr.concat(list_of_datasets[0::2], dim='time') + mode_two_ds = xr.concat(list_of_datasets[1::2], dim='time') if transpose: mode_one_ds = mode_one_ds.transpose('HT', 'time') mode_two_ds = mode_two_ds.transpose('HT', 'time') @@ -101,8 +99,7 @@ def read_psl_wind_profiler_temperature(filepath, transpose=True): for section in sections_of_file: if section[0] != '$': list_of_datasets.append( - _parse_psl_temperature_lines( - filepath, section, line_offset=start_line) + _parse_psl_temperature_lines(filepath, section, line_offset=start_line) ) start_line += len(section) @@ -139,8 +136,7 @@ def _parse_psl_wind_lines(filepath, lines, line_offset=0): datatype, _, version = filter_list(lines[1].split(' ')) # 3 - station lat, lon, elevation - latitude, longitude, elevation = filter_list( - lines[2].split(' ')).astype(float) + latitude, longitude, elevation = filter_list(lines[2].split(' ')).astype(float) # 4 - year, month, day, hour, minute, second, utc time = parse_date_line(lines[3]) @@ -154,34 +150,42 @@ def _parse_psl_wind_lines(filepath, lines, line_offset=0): # pulse width, inner pulse period' # Values duplicate as oblique and vertical values ( - number_coherent_integrations_obl, number_coherent_integrations_vert, - number_spectral_averages_obl, number_spectral_averages_vert, - pulse_width_obl, pulse_width_vert, inner_pulse_period_obl, - inner_pulse_period_vert + number_coherent_integrations_obl, + number_coherent_integrations_vert, + number_spectral_averages_obl, + number_spectral_averages_vert, + pulse_width_obl, + pulse_width_vert, + inner_pulse_period_obl, + inner_pulse_period_vert, ) = filter_list(lines[6].split(' ')).astype(int) # 8 - full-scale doppler value, delay to first gate, number of gates, # spacing of gates. Values duplicate as oblique and vertical values. ( - full_scale_doppler_obl, full_scale_doppler_vert, + full_scale_doppler_obl, + full_scale_doppler_vert, beam_vertical_correction, - delay_first_gate_obl, delay_first_gate_vert, - number_of_gates_obl, number_of_gates_vert, - spacing_of_gates_obl, spacing_of_gates_vert + delay_first_gate_obl, + delay_first_gate_vert, + number_of_gates_obl, + number_of_gates_vert, + spacing_of_gates_obl, + spacing_of_gates_vert, ) = filter_list(lines[7].split(' ')).astype(float) # 9 - beam azimuth (degrees clockwise from north) ( - beam_azimuth1, beam_elevation1, - beam_azimuth2, beam_elevation2, - beam_azimuth3, beam_elevation3 + beam_azimuth1, + beam_elevation1, + beam_azimuth2, + beam_elevation2, + beam_azimuth3, + beam_elevation3, ) = filter_list(lines[8].split(' ')).astype(float) - beam_azimuth = np.array( - [beam_azimuth1, beam_azimuth2, beam_azimuth3], dtype='float32') - beam_elevation = np.array( - [beam_elevation1, beam_elevation2, beam_elevation3], - dtype='float32') + beam_azimuth = np.array([beam_azimuth1, beam_azimuth2, beam_azimuth3], dtype='float32') + beam_elevation = np.array([beam_elevation1, beam_elevation2, beam_elevation3], dtype='float32') # Read in the data table section using pandas df = pd.read_csv(filepath, skiprows=line_offset + 10, delim_whitespace=True) @@ -245,26 +249,21 @@ def _parse_psl_wind_lines(filepath, lines, line_offset=0): 'data_description' ] = 'https://psl.noaa.gov/data/obs/data/view_data_type_info.php?SiteID=ctd&DataOperationalID=5855&OperationalID=2371' ds.attrs['consensus_average_time'] = consensus_average_time - ds.attrs['oblique-beam_vertical_correction'] = int( - beam_vertical_correction) + ds.attrs['oblique-beam_vertical_correction'] = int(beam_vertical_correction) ds.attrs['number_of_beams'] = int(number_of_beams) ds.attrs['number_of_range_gates'] = int(number_of_range_gates) # Handle oblique and vertical attributes. ds.attrs['number_of_gates_oblique'] = int(number_of_gates_obl) ds.attrs['number_of_gates_vertical'] = int(number_of_gates_vert) - ds.attrs['number_spectral_averages_oblique'] = int( - number_spectral_averages_obl) - ds.attrs['number_spectral_averages_vertical'] = int( - number_spectral_averages_vert) + ds.attrs['number_spectral_averages_oblique'] = int(number_spectral_averages_obl) + ds.attrs['number_spectral_averages_vertical'] = int(number_spectral_averages_vert) ds.attrs['pulse_width_oblique'] = int(pulse_width_obl) ds.attrs['pulse_width_vertical'] = int(pulse_width_vert) ds.attrs['inner_pulse_period_oblique'] = int(inner_pulse_period_obl) ds.attrs['inner_pulse_period_vertical'] = int(inner_pulse_period_vert) - ds.attrs['full_scale_doppler_value_oblique'] = float( - full_scale_doppler_obl) - ds.attrs['full_scale_doppler_value_vertical'] = float( - full_scale_doppler_vert) + ds.attrs['full_scale_doppler_value_oblique'] = float(full_scale_doppler_obl) + ds.attrs['full_scale_doppler_value_vertical'] = float(full_scale_doppler_vert) ds.attrs['delay_to_first_gate_oblique'] = int(delay_first_gate_obl) ds.attrs['delay_to_first_gate_vertical'] = int(delay_first_gate_vert) ds.attrs['spacing_of_gates_oblique'] = int(spacing_of_gates_obl) @@ -298,8 +297,7 @@ def _parse_psl_temperature_lines(filepath, lines, line_offset=0): datatype, _, version = filter_list(lines[1].split(' ')) # 3 - station lat, lon, elevation - latitude, longitude, elevation = filter_list( - lines[2].split(' ')).astype(float) + latitude, longitude, elevation = filter_list(lines[2].split(' ')).astype(float) # 4 - year, month, day, hour, minute, second, utc time = parse_date_line(lines[3]) @@ -325,8 +323,7 @@ def _parse_psl_temperature_lines(filepath, lines, line_offset=0): ).astype(float) # 9 - beam azimuth (degrees clockwise from north) - beam_azimuth, beam_elevation = filter_list( - lines[8].split(' ')).astype(float) + beam_azimuth, beam_elevation = filter_list(lines[8].split(' ')).astype(float) # Read in the data table section using pandas df = pd.read_csv(filepath, skiprows=line_offset + 10, delim_whitespace=True) @@ -460,7 +457,7 @@ def read_psl_surface_met(filenames, conf_file=None): conf_file = ospath.join(ospath.dirname(__file__), 'conf', 'noaapsl_SurfaceMet.yaml') # Read configuration YAML file - with open(conf_file, "r") as fp: + with open(conf_file) as fp: try: result = yaml.load(fp, Loader=yaml.FullLoader) except AttributeError: @@ -470,18 +467,20 @@ def read_psl_surface_met(filenames, conf_file=None): try: result = result[site] except KeyError: - raise RuntimeError(f"Configuration for site '{site}' currently not available. " - "You can manually add the site configuration to a copy of " - "noaapsl_SurfaceMet.yaml and set conf_file= name of copied file " - "until the site is added.") + raise RuntimeError( + f"Configuration for site '{site}' currently not available. " + 'You can manually add the site configuration to a copy of ' + 'noaapsl_SurfaceMet.yaml and set conf_file= name of copied file ' + 'until the site is added.' + ) # Extract date and time from filename to use in extracting format from YAML file. - search_result = re.match(r"[a-z]{3}(\d{2})(\d{3})\.(\d{2})m", ospath.basename(filenames[0])) + search_result = re.match(r'[a-z]{3}(\d{2})(\d{3})\.(\d{2})m', ospath.basename(filenames[0])) yy, doy, hh = search_result.groups() if yy > '70': - yy = f"19{yy}" + yy = f'19{yy}' else: - yy = f"20{yy}" + yy = f'20{yy}' # Extract location information from configuration file. try: @@ -490,12 +489,18 @@ def read_psl_surface_met(filenames, conf_file=None): location_info = None # Loop through each date range for the site to extract the correct file format from conf file. - file_datetime = datetime.strptime(f'{yy}-01-01', '%Y-%m-%d') + timedelta(int(doy) - 1) + timedelta(hours=int(hh)) + file_datetime = ( + datetime.strptime(f'{yy}-01-01', '%Y-%m-%d') + + timedelta(int(doy) - 1) + + timedelta(hours=int(hh)) + ) for ii in result.keys(): if ii == 'info': continue - date_range = [datetime.strptime(jj, '%Y-%m-%d %H:%M:%S') for jj in result[ii]['_date_range']] + date_range = [ + datetime.strptime(jj, '%Y-%m-%d %H:%M:%S') for jj in result[ii]['_date_range'] + ] if file_datetime >= date_range[0] and file_datetime <= date_range[1]: result = result[ii] del result['_date_range'] @@ -506,14 +511,14 @@ def read_psl_surface_met(filenames, conf_file=None): # Calculate numpy datetime64 values from first 4 columns of the data file. time = np.array(ds['Year'].values - 1970, dtype='datetime64[Y]') - day = np.array(np.array(ds['J_day'].values - 1 , dtype='timedelta64[D]')) + day = np.array(np.array(ds['J_day'].values - 1, dtype='timedelta64[D]')) hourmin = ds['HoursMinutes'].values + 10000 hour = [int(str(ii)[1:3]) for ii in hourmin] hour = np.array(hour, dtype='timedelta64[h]') minute = [int(str(ii)[3:]) for ii in hourmin] minute = np.array(minute, dtype='timedelta64[m]') time = time + day + hour + minute - time = time.astype("datetime64[ns]") + time = time.astype('datetime64[ns]') # Update Dataset to use "time" coordinate and assigned calculated times ds = ds.assign_coords(index=time) ds = ds.rename(index='time') @@ -568,23 +573,136 @@ def read_psl_parsivel(files): """ # Define the names for the variables - names = ['time', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12', - 'B13', 'B14', 'B15', 'B16', 'B17', 'B18', 'B19', 'B20', 'B21', 'B22', 'B23', 'B24', - 'B25', 'B26', 'B27', 'B28', 'B29', 'B30', 'B31', 'B32', 'blackout', 'good', 'bad', - 'number_detected_particles', 'precip_rate', 'precip_amount', 'precip_accumulation', - 'equivalent_radar_reflectivity', 'number_in_error', 'dirty', 'very_dirty', 'damaged', - 'laserband_amplitude', 'laserband_amplitude_stdev', 'sensor_temperature', 'sensor_temperature_stdev', - 'sensor_voltage', 'sensor_voltage_stdev', 'heating_current', 'heating_current_stdev', 'number_rain_particles', - 'number_non_rain_particles', 'number_ambiguous_particles', 'precip_type'] + names = [ + 'time', + 'B1', + 'B2', + 'B3', + 'B4', + 'B5', + 'B6', + 'B7', + 'B8', + 'B9', + 'B10', + 'B11', + 'B12', + 'B13', + 'B14', + 'B15', + 'B16', + 'B17', + 'B18', + 'B19', + 'B20', + 'B21', + 'B22', + 'B23', + 'B24', + 'B25', + 'B26', + 'B27', + 'B28', + 'B29', + 'B30', + 'B31', + 'B32', + 'blackout', + 'good', + 'bad', + 'number_detected_particles', + 'precip_rate', + 'precip_amount', + 'precip_accumulation', + 'equivalent_radar_reflectivity', + 'number_in_error', + 'dirty', + 'very_dirty', + 'damaged', + 'laserband_amplitude', + 'laserband_amplitude_stdev', + 'sensor_temperature', + 'sensor_temperature_stdev', + 'sensor_voltage', + 'sensor_voltage_stdev', + 'heating_current', + 'heating_current_stdev', + 'number_rain_particles', + 'number_non_rain_particles', + 'number_ambiguous_particles', + 'precip_type', + ] # Define the particle sizes and class width sizes based on # https://psl.noaa.gov/data/obs/data/view_data_type_info.php?SiteID=ctd&DataOperationalID=5890 - vol_equiv_diam = [0.062, 0.187, 0.312, 0.437, 0.562, 0.687, 0.812, 0.937, 1.062, 1.187, 1.375, - 1.625, 1.875, 2.125, 2.375, 2.75, 3.25, 3.75, 4.25, 4.75, 5.5, 6.5, 7.5, 8.5, - 9.5, 11.0, 13.0, 15.0, 17.0, 19.0, 21.5, 24.5] - class_size_width = [0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, - 0.250, 0.250, 0.250, 0.250, 0.250, 0.5, 0.5, 0.5, 0.5, 0.5, - 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0] + vol_equiv_diam = [ + 0.062, + 0.187, + 0.312, + 0.437, + 0.562, + 0.687, + 0.812, + 0.937, + 1.062, + 1.187, + 1.375, + 1.625, + 1.875, + 2.125, + 2.375, + 2.75, + 3.25, + 3.75, + 4.25, + 4.75, + 5.5, + 6.5, + 7.5, + 8.5, + 9.5, + 11.0, + 13.0, + 15.0, + 17.0, + 19.0, + 21.5, + 24.5, + ] + class_size_width = [ + 0.125, + 0.125, + 0.125, + 0.125, + 0.125, + 0.125, + 0.125, + 0.125, + 0.125, + 0.125, + 0.250, + 0.250, + 0.250, + 0.250, + 0.250, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 2.0, + 2.0, + 2.0, + 2.0, + 2.0, + 3.0, + 3.0, + ] if not isinstance(files, list): files = [files] @@ -619,11 +737,17 @@ def read_psl_parsivel(files): ds = ds.rename({'index': 'time'}) long_name = 'Drop Size Distribution' attrs = {'long_name': long_name, 'units': 'count'} - da = xr.DataArray(np.transpose(dsd), dims=['time', 'particle_size'], coords=[ds['time'].values, vol_equiv_diam]) + da = xr.DataArray( + np.transpose(dsd), + dims=['time', 'particle_size'], + coords=[ds['time'].values, vol_equiv_diam], + ) ds['number_density_drops'] = da attrs = {'long_name': 'Particle class size average', 'units': 'mm'} - da = xr.DataArray(class_size_width, dims=['particle_size'], coords=[vol_equiv_diam], attrs=attrs) + da = xr.DataArray( + class_size_width, dims=['particle_size'], coords=[vol_equiv_diam], attrs=attrs + ) ds['class_size_width'] = da attrs = {'long_name': 'Class size width', 'units': 'mm'} @@ -635,31 +759,75 @@ def read_psl_parsivel(files): ds['interval_end_time'] = da # Define the attribuets and metadata and add into the DataSet - attrs = {'blackout': {'long_name': 'Number of samples excluded during PC clock sync', 'units': 'count'}, - 'good': {'long_name': 'Number of samples that passed QC checks', 'units': 'count'}, - 'bad': {'long_name': 'Number of samples that failed QC checks', 'units': 'count'}, - 'number_detected_particles': {'long_name': 'Total number of detected particles', 'units': 'count'}, - 'precip_rate': {'long_name': 'Precipitation rate', 'units': 'mm/hr'}, - 'precip_amount': {'long_name': 'Interval accumulation', 'units': 'mm'}, - 'precip_accumulation': {'long_name': 'Event accumulation', 'units': 'mm'}, - 'equivalent_radar_reflectivity': {'long_name': 'Radar Reflectivity', 'units': 'dB'}, - 'number_in_error': {'long_name': 'Number of samples that were reported dirt, very dirty, or damaged', 'units': 'count'}, - 'dirty': {'long_name': 'Laser glass is dirty but measurement is still possible', 'units': 'unitless'}, - 'very_dirty': {'long_name': 'Laser glass is dirty, partially covered no further measurements are possible', 'units': 'unitless'}, - 'damaged': {'long_name': 'Laser damaged', 'units': 'unitless'}, - 'laserband_amplitude': {'long_name': 'Average signal amplitude of the laser strip', 'units': 'unitless'}, - 'laserband_amplitude_stdev': {'long_name': 'Standard deviation of the signal amplitude of the laser strip', 'units': 'unitless'}, - 'sensor_temperature': {'long_name': 'Average sensor temperature', 'units': 'degC'}, - 'sensor_temperature_stdev': {'long_name': 'Standard deviation of sensor temperature', 'units': 'degC'}, - 'sensor_voltage': {'long_name': 'Sensor power supply voltage', 'units': 'V'}, - 'sensor_voltage_stdev': {'long_name': 'Standard deviation of the sensor power supply voltage', 'units': 'V'}, - 'heating_current': {'long_name': 'Average heating system current', 'units': 'A'}, - 'heating_current_stdev': {'long_name': 'Standard deviation of heating system current', 'units': 'A'}, - 'number_rain_particles': {'long_name': 'Number of particles detected as rain', 'units': 'unitless'}, - 'number_non_rain_particles': {'long_name': 'Number of particles detected not as rain', 'units': 'unitless'}, - 'number_ambiguous_particles': {'long_name': 'Number of particles detected as ambiguous', 'units': 'unitless'}, - 'precip_type': {'long_name': 'Precipitation type (1=rain; 2=mixed; 3=snow)', 'units': 'unitless'}, - 'number_density_drops': {'long_name': 'Drop Size Distribution', 'units': 'count'}} + attrs = { + 'blackout': { + 'long_name': 'Number of samples excluded during PC clock sync', + 'units': 'count', + }, + 'good': {'long_name': 'Number of samples that passed QC checks', 'units': 'count'}, + 'bad': {'long_name': 'Number of samples that failed QC checks', 'units': 'count'}, + 'number_detected_particles': { + 'long_name': 'Total number of detected particles', + 'units': 'count', + }, + 'precip_rate': {'long_name': 'Precipitation rate', 'units': 'mm/hr'}, + 'precip_amount': {'long_name': 'Interval accumulation', 'units': 'mm'}, + 'precip_accumulation': {'long_name': 'Event accumulation', 'units': 'mm'}, + 'equivalent_radar_reflectivity': {'long_name': 'Radar Reflectivity', 'units': 'dB'}, + 'number_in_error': { + 'long_name': 'Number of samples that were reported dirt, very dirty, or damaged', + 'units': 'count', + }, + 'dirty': { + 'long_name': 'Laser glass is dirty but measurement is still possible', + 'units': 'unitless', + }, + 'very_dirty': { + 'long_name': 'Laser glass is dirty, partially covered no further measurements are possible', + 'units': 'unitless', + }, + 'damaged': {'long_name': 'Laser damaged', 'units': 'unitless'}, + 'laserband_amplitude': { + 'long_name': 'Average signal amplitude of the laser strip', + 'units': 'unitless', + }, + 'laserband_amplitude_stdev': { + 'long_name': 'Standard deviation of the signal amplitude of the laser strip', + 'units': 'unitless', + }, + 'sensor_temperature': {'long_name': 'Average sensor temperature', 'units': 'degC'}, + 'sensor_temperature_stdev': { + 'long_name': 'Standard deviation of sensor temperature', + 'units': 'degC', + }, + 'sensor_voltage': {'long_name': 'Sensor power supply voltage', 'units': 'V'}, + 'sensor_voltage_stdev': { + 'long_name': 'Standard deviation of the sensor power supply voltage', + 'units': 'V', + }, + 'heating_current': {'long_name': 'Average heating system current', 'units': 'A'}, + 'heating_current_stdev': { + 'long_name': 'Standard deviation of heating system current', + 'units': 'A', + }, + 'number_rain_particles': { + 'long_name': 'Number of particles detected as rain', + 'units': 'unitless', + }, + 'number_non_rain_particles': { + 'long_name': 'Number of particles detected not as rain', + 'units': 'unitless', + }, + 'number_ambiguous_particles': { + 'long_name': 'Number of particles detected as ambiguous', + 'units': 'unitless', + }, + 'precip_type': { + 'long_name': 'Precipitation type (1=rain; 2=mixed; 3=snow)', + 'units': 'unitless', + }, + 'number_density_drops': {'long_name': 'Drop Size Distribution', 'units': 'count'}, + } for v in ds: if v in attrs: @@ -739,47 +907,169 @@ def _parse_psl_radar_moments(files): # Set the initial dictionary to convert to xarray dataset data = { 'site': {'dims': ['file'], 'data': [], 'attrs': {'long_name': 'NOAA site code'}}, - 'lat': {'dims': ['file'], 'data': [], 'attrs': {'long_name': 'North Latitude', 'units': 'degree_N'}}, - 'lon': {'dims': ['file'], 'data': [], 'attrs': {'long_name': 'East Longitude', 'units': 'degree_E'}}, - 'alt': {'dims': ['file'], 'data': [], 'attrs': {'long_name': 'Altitude above mean sea level', 'units': 'm'}}, - 'freq': {'dims': ['file'], 'data': [], 'attrs': {'long_name': 'Operating Frequency; Ignore for FMCW', 'units': 'Hz'}}, - 'azimuth': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Azimuth angle', 'units': 'deg'}}, - 'elevation': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Elevation angle', 'units': 'deg'}}, - 'beam_direction_code': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Beam direction code', 'units': ''}}, + 'lat': { + 'dims': ['file'], + 'data': [], + 'attrs': {'long_name': 'North Latitude', 'units': 'degree_N'}, + }, + 'lon': { + 'dims': ['file'], + 'data': [], + 'attrs': {'long_name': 'East Longitude', 'units': 'degree_E'}, + }, + 'alt': { + 'dims': ['file'], + 'data': [], + 'attrs': {'long_name': 'Altitude above mean sea level', 'units': 'm'}, + }, + 'freq': { + 'dims': ['file'], + 'data': [], + 'attrs': {'long_name': 'Operating Frequency; Ignore for FMCW', 'units': 'Hz'}, + }, + 'azimuth': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Azimuth angle', 'units': 'deg'}, + }, + 'elevation': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Elevation angle', 'units': 'deg'}, + }, + 'beam_direction_code': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Beam direction code', 'units': ''}, + }, 'year': {'dims': ['time'], 'data': [], 'attrs': {'long_name': '2-digit year', 'units': ''}}, - 'day_of_year': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Day of the year', 'units': ''}}, - 'hour': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Hour of the day', 'units': ''}}, + 'day_of_year': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Day of the year', 'units': ''}, + }, + 'hour': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Hour of the day', 'units': ''}, + }, 'minute': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Minutes', 'units': ''}}, 'second': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Seconds', 'units': ''}}, - 'interpulse_period': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Interpulse Period', 'units': 'ms'}}, - 'pulse_width': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Pulse width', 'units': 'ns'}}, - 'first_range_gate': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Range to first range gate', 'units': 'm'}}, - 'range_between_gates': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Distance between range gates', 'units': 'm'}}, - 'n_gates': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of range gates', 'units': 'count'}}, - 'n_coherent_integration': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of cohrent integration', 'units': 'count'}}, - 'n_averaged_spectra': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of average spectra', 'units': 'count'}}, - 'n_points_spectrum': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of points in spectra', 'units': 'count'}}, - 'n_code_bits': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of code bits', 'units': 'count'}}, - 'radial_velocity': {'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Radial velocity', 'units': 'm/s'}}, - 'snr': {'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Signal-to-noise ratio - not range corrected', 'units': 'dB'}}, - 'signal_power': {'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Signal Power - not range corrected', 'units': 'dB'}}, - 'spectral_width': {'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Spectral width', 'units': 'm/s'}}, - 'noise_amplitude': {'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'noise_amplitude', 'units': 'dB'}}, - 'qc_variable': {'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'QC Value - not used', 'units': ''}}, + 'interpulse_period': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Interpulse Period', 'units': 'ms'}, + }, + 'pulse_width': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Pulse width', 'units': 'ns'}, + }, + 'first_range_gate': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Range to first range gate', 'units': 'm'}, + }, + 'range_between_gates': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Distance between range gates', 'units': 'm'}, + }, + 'n_gates': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Number of range gates', 'units': 'count'}, + }, + 'n_coherent_integration': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Number of cohrent integration', 'units': 'count'}, + }, + 'n_averaged_spectra': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Number of average spectra', 'units': 'count'}, + }, + 'n_points_spectrum': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Number of points in spectra', 'units': 'count'}, + }, + 'n_code_bits': { + 'dims': ['time'], + 'data': [], + 'attrs': {'long_name': 'Number of code bits', 'units': 'count'}, + }, + 'radial_velocity': { + 'dims': ['time', 'range'], + 'data': [], + 'attrs': {'long_name': 'Radial velocity', 'units': 'm/s'}, + }, + 'snr': { + 'dims': ['time', 'range'], + 'data': [], + 'attrs': {'long_name': 'Signal-to-noise ratio - not range corrected', 'units': 'dB'}, + }, + 'signal_power': { + 'dims': ['time', 'range'], + 'data': [], + 'attrs': {'long_name': 'Signal Power - not range corrected', 'units': 'dB'}, + }, + 'spectral_width': { + 'dims': ['time', 'range'], + 'data': [], + 'attrs': {'long_name': 'Spectral width', 'units': 'm/s'}, + }, + 'noise_amplitude': { + 'dims': ['time', 'range'], + 'data': [], + 'attrs': {'long_name': 'noise_amplitude', 'units': 'dB'}, + }, + 'qc_variable': { + 'dims': ['time', 'range'], + 'data': [], + 'attrs': {'long_name': 'QC Value - not used', 'units': ''}, + }, 'time': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Datetime', 'units': ''}}, 'range': {'dims': ['range'], 'data': [], 'attrs': {'long_name': 'Range', 'units': 'm'}}, - 'reflectivity_uncalibrated': {'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Range', 'units': 'dB'}}, + 'reflectivity_uncalibrated': { + 'dims': ['time', 'range'], + 'data': [], + 'attrs': {'long_name': 'Range', 'units': 'dB'}, + }, } # Separate out the names as they will be accessed in different parts of the code h1_names = ['site', 'lat', 'lon', 'alt', 'freq'] - h2_names = ['azimuth', 'elevation', 'beam_direction_code', 'year', 'day_of_year', 'hour', 'minute', - 'second'] - h3_names = ['interpulse_period', 'pulse_width', 'first_range_gate', 'range_between_gates', - 'n_gates', 'n_coherent_integration', 'n_averaged_spectra', 'n_points_spectrum', - 'n_code_bits'] - names = {'radial_velocity': 0, 'snr': 1, 'signal_power': 2, 'spectral_width': 3, - 'noise_amplitude': 4, 'qc_variable': 5} + h2_names = [ + 'azimuth', + 'elevation', + 'beam_direction_code', + 'year', + 'day_of_year', + 'hour', + 'minute', + 'second', + ] + h3_names = [ + 'interpulse_period', + 'pulse_width', + 'first_range_gate', + 'range_between_gates', + 'n_gates', + 'n_coherent_integration', + 'n_averaged_spectra', + 'n_points_spectrum', + 'n_code_bits', + ] + names = { + 'radial_velocity': 0, + 'snr': 1, + 'signal_power': 2, + 'spectral_width': 3, + 'noise_amplitude': 4, + 'qc_variable': 5, + } # If file is a string, convert to list for handling. if not isinstance(files, list): @@ -793,7 +1083,7 @@ def _parse_psl_radar_moments(files): for d in df: if len(d) > 0: if d == 'lat' or d == 'lon': - data[h1_names[ctr]]['data'].append(float(d) / 100.) + data[h1_names[ctr]]['data'].append(float(d) / 100.0) else: data[h1_names[ctr]]['data'].append(d) ctr += 1 @@ -821,27 +1111,41 @@ def _parse_psl_radar_moments(files): data[h3_names[ctr]]['data'].append(d) ctr += 1 # Read in the data based on number of gates - df = pd.read_csv(f, skiprows=[0, 1, 2], nrows=int(data['n_gates']['data'][-1]) - 1, delim_whitespace=True, - names=list(names.keys())) + df = pd.read_csv( + f, + skiprows=[0, 1, 2], + nrows=int(data['n_gates']['data'][-1]) - 1, + delim_whitespace=True, + names=list(names.keys()), + ) index2 = 0 else: # Set indices for parsing data, reading 2 headers and then the columns of data index1 = ct * int(data['n_gates']['data'][-1]) index2 = index1 + int(data['n_gates']['data'][-1]) + 2 * ct + 4 - df = str(pd.read_table(f, nrows=0, skiprows=list(range(index2 - 1))).columns[0]).split(' ') + df = str( + pd.read_table(f, nrows=0, skiprows=list(range(index2 - 1))).columns[0] + ).split(' ') ctr = 0 for d in df: if len(d) > 0: data[h2_names[ctr]]['data'].append(d) ctr += 1 - df = str(pd.read_table(f, nrows=0, skiprows=list(range(index2))).columns[0]).split(' ') + df = str( + pd.read_table(f, nrows=0, skiprows=list(range(index2))).columns[0] + ).split(' ') ctr = 0 for d in df: if len(d) > 0: data[h3_names[ctr]]['data'].append(d) ctr += 1 - df = pd.read_csv(f, skiprows=list(range(index2 + 1)), nrows=int(data['n_gates']['data'][-1]) - 1, delim_whitespace=True, - names=list(names.keys())) + df = pd.read_csv( + f, + skiprows=list(range(index2 + 1)), + nrows=int(data['n_gates']['data'][-1]) - 1, + delim_whitespace=True, + names=list(names.keys()), + ) # Add data from the columns to the dictionary for n in names: @@ -849,19 +1153,24 @@ def _parse_psl_radar_moments(files): # Calculate the range based on number of gates, range to first gate and range between gates if len(data['range']['data']) == 0: - ranges = float(data['first_range_gate']['data'][-1]) + np.array(range(int(data['n_gates']['data'][-1]) - 1)) * \ - float(data['range_between_gates']['data'][-1]) + ranges = float(data['first_range_gate']['data'][-1]) + np.array( + range(int(data['n_gates']['data'][-1]) - 1) + ) * float(data['range_between_gates']['data'][-1]) data['range']['data'] = ranges # Calculate a time time = dt.datetime( - int('20' + data['year']['data'][-1]), 1, 1, int(data['hour']['data'][-1]), - int(data['minute']['data'][-1]), int(data['second']['data'][-1])) + \ - dt.timedelta(days=int(data['day_of_year']['data'][-1]) - 1) + int('20' + data['year']['data'][-1]), + 1, + 1, + int(data['hour']['data'][-1]), + int(data['minute']['data'][-1]), + int(data['second']['data'][-1]), + ) + dt.timedelta(days=int(data['day_of_year']['data'][-1]) - 1) data['time']['data'].append(time) # Range correct the snr which converts it essentially to an uncalibrated reflectivity - snr_rc = data['snr']['data'][-1] - 20. * np.log10(1. / (ranges / 1000.) ** 2) + snr_rc = data['snr']['data'][-1] - 20.0 * np.log10(1.0 / (ranges / 1000.0) ** 2) data['reflectivity_uncalibrated']['data'].append(snr_rc) except Exception as e: # Handle errors, if end of file then continue on, if something else diff --git a/tests/io/test_csv.py b/tests/io/test_text.py similarity index 86% rename from tests/io/test_csv.py rename to tests/io/test_text.py index aff5315112..db49962fd5 100644 --- a/tests/io/test_csv.py +++ b/tests/io/test_text.py @@ -30,13 +30,13 @@ def test_io_csv(): 'temp_soil_100cm', 'temp_soil_10ft', ] - anl_ds = act.io.csv.read_csv(act.tests.EXAMPLE_ANL_CSV, sep=r'\s+', column_names=headers) + anl_ds = act.io.text.read_csv(act.tests.EXAMPLE_ANL_CSV, sep=r'\s+', column_names=headers) assert 'temp_60m' in anl_ds.variables.keys() assert 'rh' in anl_ds.variables.keys() assert anl_ds['temp_60m'].values[10] == -1.7 anl_ds.close() files = glob.glob(act.tests.EXAMPLE_MET_CSV) - ds = act.io.csv.read_csv(files[0]) + ds = act.io.text.read_csv(files[0]) assert 'date_time' in ds assert '_datastream' in ds.attrs