diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..d15c975eac --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + groups: + actions: + patterns: + - "*" diff --git a/.github/workflows/antivirus.yml b/.github/workflows/antivirus.yml index 25b1d9bf84..b2a306e249 100644 --- a/.github/workflows/antivirus.yml +++ b/.github/workflows/antivirus.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest name: AV scan steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Git AV Scan uses: djdefi/gitavscan@main with: diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index c4329ad51c..1a63a7bdd3 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -20,7 +20,7 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # Install dependencies - name: Setup Conda Environment @@ -44,7 +44,7 @@ jobs: AIRNOW_API: ${{ secrets.AIRNOW_API }} # Push the book's HTML to github-pages - name: GitHub Pages action - uses: peaceiris/actions-gh-pages@v3.8.0 + uses: peaceiris/actions-gh-pages@v4.0.0 if: github.ref == 'refs/heads/main' with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b478c72225..68c2d8e77f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,9 @@ name: Run Unit Tests CI on: + schedule: + # Runs at 09Z (2am CDT) + - cron: "0 9 * * *" push: branches: [ main ] pull_request: @@ -38,7 +41,7 @@ jobs: platform: [x64] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Conda Environment uses: mamba-org/setup-micromamba@v1 @@ -64,10 +67,12 @@ jobs: python -m pytest -v --mpl --cov=./ --cov-report=xml - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v2.1.0 + uses: codecov/codecov-action@v4 with: file: ./coverage.xml flags: unittests env_vars: OS,PYTHON name: codecov-umbrella fail_ci_if_error: false + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index c2fb38c867..77cd91b23a 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -42,11 +42,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -57,7 +57,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v3 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -71,4 +71,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000000..cd7d332a71 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,33 @@ +name: linting + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + pre-job: + runs-on: ubuntu-latest + outputs: + should_skip: ${{ steps.skip_check.outputs.should_skip }} + steps: + - id: skip_check + uses: fkirc/skip-duplicate-actions@master + with: + concurrent_skipping: 'same_content' + skip_after_successful_duplicate: 'false' + do_not_skip: '["workflow_dispatch", "schedule"]' + linting: + needs: pre-job + runs-on: ubuntu-latest + if: ${{ needs.pre-job.outputs.should_skip != 'true' }} + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.12.1 + with: + access_token: ${{ github.token }} + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index a427a4b024..9f45e5d358 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -9,10 +9,10 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'ARM-DOE/ACT' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: 3.11 @@ -36,7 +36,7 @@ jobs: else echo "✅ Looks good" fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: releases path: dist @@ -45,11 +45,11 @@ jobs: needs: build-artifacts runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: "3.x" - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist @@ -62,12 +62,12 @@ jobs: if: github.event_name == 'release' runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@v1.8.10 + uses: pypa/gh-action-pypi-publish@v1.9.0 with: user: __token__ password: ${{ secrets.PYPI_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 16784b30d8..a0f1f3dc22 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,6 @@ repos: - id: check-docstring-first - id: check-json - id: check-yaml - - id: double-quote-string-fixer - id: debug-statements - id: mixed-line-ending @@ -24,12 +23,8 @@ repos: - id: black - id: black-jupyter - - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: 'v0.0.239' hooks: - - id: flake8 - - - repo: https://github.com/PyCQA/isort - rev: 5.12.0 - hooks: - - id: isort + - id: ruff + args: [ "--fix" ] diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 1c131a7c9d..c0dd6b1ead 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -247,39 +247,22 @@ An example: Code Style ---------- -ACT follows PEP8 coding standards. To make sure your code follows the -PEP8 style, you can use a variety of tools that can check for you. Two -popular PEP8 check modules are flake8 and pylint. (Note: ACT's continuous -integration uses flake8). +Py-ART uses pre-commit for linting, which applies a variety of pep8 and other +code style rules. For more on pep8 style: - https://www.python.org/dev/peps/pep-0008/ -To install flake8:: +To install pre-commit hooks for the Py-ART repo:: - conda install -c conda-forge flake8 + pre-commit install -To use flake8:: +Afterwards, pre-commit will run on every commit to the repository. It will +re-format files as neccessary. - flake8 path/to/code/to/check.py - -To install pylint:: - - conda install pylint - -To use pylint:: - - pylint path/to/code/to/check.py - -Both of these tools are highly configurable to suit a user's taste. Refer to -the tools documentation for details on this process. - -- https://flake8.pycqa.org/en/latest/ -- https://www.pylint.org/ - -Naming Convenction ----------------------------------------- +Naming Convention +----------------- Discovery ~~~~~~~~~ diff --git a/MANIFEST.in b/MANIFEST.in index df46685ecd..0dbcb3a227 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,9 +7,9 @@ include LICENSE.txt recursive-exclude * __pycache__ recursive-exclude * *.py[co] -recursive-include act/plotting *.txt -recursive-include act/tests * +recursive-include act/plotting *.txt +recursive-include tests *.py *.png recursive-include docs *.rst conf.py Makefile make.bat include versioneer.py diff --git a/README.rst b/README.rst index d828e20b3c..aaa6e33a2c 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Atmospheric data Community Toolkit (ACT) |AnacondaCloud| |CodeCovStatus| |Build| |Docs| -|CondaDownloads| |Zenodo| |ARM| +|CondaDownloads| |PyPiDownloads| |Zenodo| |ARM| .. |AnacondaCloud| image:: https://anaconda.org/conda-forge/act-atmos/badges/version.svg :target: https://anaconda.org/conda-forge/act-atmos @@ -12,6 +12,9 @@ Atmospheric data Community Toolkit (ACT) .. |CondaDownloads| image:: https://anaconda.org/conda-forge/act-atmos/badges/downloads.svg :target: https://anaconda.org/conda-forge/act-atmos/files +.. |PyPiDownloads| image:: https://img.shields.io/pypi/dm/act_atmos.svg + :target: https://pypi.org/project/act-atmos/ + .. |Zenodo| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.3855537.svg :target: https://doi.org/10.5281/zenodo.3855537 @@ -38,16 +41,14 @@ Please report any issues or feature requests by sumitting an `Issue `_ has been provided that explains the changes and how to work with the new syntax. -To test out the release candidate 2.0.0-rc.0 of ACT, use:: - - pip install git+https://github.com/ARM-DOE/ACT.git@v2.0.0-rc.0 +The new release is available on both PyPI and conda-forge. -Please report any bugs of the release candidate to the Issue Tracker mentioned in +Please report any bugs of the 2.0 release to the Issue Tracker mentioned in the Important Links section below. Important Links @@ -144,18 +145,14 @@ the source code or use git to checkout the repository:: git clone https://github.com/ARM-DOE/ACT.git -To install in your home directory, use:: - - python setup.py install --user +Once you have the directory locally, you can install ACT in +development mode using:: -To install for all users on Unix/Linux:: - - python setup.py build - sudo python setup.py install + pip install -e . -Development install using pip from within the ACT directory:: +If you want to install the repository directly, you can use:: - pip install -e . + pip install git+https://github.com/ARM-DOE/ACT.git Contributing ~~~~~~~~~~~~ diff --git a/act/__init__.py b/act/__init__.py index 81ae36fd04..56ee9cb5be 100644 --- a/act/__init__.py +++ b/act/__init__.py @@ -14,7 +14,7 @@ register_matplotlib_converters() # Import early so these classes are available to the object -from .qc import QCFilter, QCTests, clean +from .qc import QCFilter, QCTests, clean # noqa # Import the lazy loaded modules submodules = [ diff --git a/act/corrections/mpl.py b/act/corrections/mpl.py index 73e2616df1..e0601817f3 100644 --- a/act/corrections/mpl.py +++ b/act/corrections/mpl.py @@ -141,8 +141,8 @@ def correct_mpl( x_data = x_data - x_ap # R-Squared Correction - co_data = co_data * height ** 2 - x_data = x_data * height ** 2 + co_data = co_data * height**2 + x_data = x_data * height**2 # Overlap Correction for j in range(ds[range_bins_var_name].size): diff --git a/act/discovery/__init__.py b/act/discovery/__init__.py index c0a5fab10d..8159f8f4d3 100644 --- a/act/discovery/__init__.py +++ b/act/discovery/__init__.py @@ -8,14 +8,15 @@ __getattr__, __dir__, __all__ = lazy.attach( __name__, - submodules=['arm', 'cropscape', 'airnow', 'noaapsl', 'neon', 'surfrad'], + submodules=['arm', 'asos', 'airnow', 'cropscape', 'improve', 'noaapsl', 'neon', 'surfrad'], submod_attrs={ 'arm': ['download_arm_data', 'get_arm_doi'], 'asos': ['get_asos_data'], 'airnow': ['get_airnow_bounded_obs', 'get_airnow_obs', 'get_airnow_forecast'], 'cropscape': ['get_crop_type'], + 'improve': ['get_improve_data'], 'noaapsl': ['download_noaa_psl_data'], 'neon': ['get_neon_site_products', 'get_neon_product_avail', 'download_neon_data'], - 'surfrad': ['download_surfrad_data'] + 'surfrad': ['download_surfrad_data'], }, ) diff --git a/act/discovery/airnow.py b/act/discovery/airnow.py index 7b458e780c..97ec5f1cca 100644 --- a/act/discovery/airnow.py +++ b/act/discovery/airnow.py @@ -38,7 +38,7 @@ def get_airnow_forecast(token, date, zipcode=None, latlon=None, distance=25): """ # default beginning of the query url - query_url = ('https://airnowapi.org/aq/forecast/') + query_url = 'https://www.airnowapi.org/aq/forecast/' # checking is either a zipcode or latlon coordinate is defined # if neither is defined then error is raised @@ -46,17 +46,34 @@ def get_airnow_forecast(token, date, zipcode=None, latlon=None, distance=25): raise NameError("Zipcode or latlon must be defined") if zipcode: - url = (query_url + ('zipcode/?' + 'format=text/csv' + '&zipCode=' - + str(zipcode) + '&date=' + str(date) - + '&distance=' + str(distance) - + '&API_KEY=' + str(token))) + url = query_url + ( + 'zipCode/?' + + 'format=text/csv' + + '&zipCode=' + + str(zipcode) + + '&date=' + + str(date) + + '&distance=' + + str(distance) + + '&API_KEY=' + + str(token) + ) if latlon: - url = (query_url + ('latLong/?' + 'format=text/csv' - + '&latitude=' + str(latlon[0]) + '&longitude=' - + str(latlon[1]) + '&date=' + str(date) - + '&distance=' + str(distance) - + '&API_KEY=' + str(token))) + url = query_url + ( + 'latLong/?' + + 'format=text/csv' + + '&latitude=' + + str(latlon[0]) + + '&longitude=' + + str(latlon[1]) + + '&date=' + + str(date) + + '&distance=' + + str(distance) + + '&API_KEY=' + + str(token) + ) df = pd.read_csv(url) @@ -103,7 +120,7 @@ def get_airnow_obs(token, date=None, zipcode=None, latlon=None, distance=25): """ # default beginning of the query url - query_url = ('https://www.airnowapi.org/aq/observation/') + query_url = 'https://www.airnowapi.org/aq/observation/' # checking is either a zipcode or latlon coordinate is defined # if neither is defined then error is raised @@ -114,26 +131,67 @@ def get_airnow_obs(token, date=None, zipcode=None, latlon=None, distance=25): if date is None: obs_type = 'current' if zipcode: - url = (query_url + ('zipCode/' + str(obs_type) + '/?' + 'format=text/csv' - + '&zipCode=' + str(zipcode) + '&distance=' + str(distance) - + '&API_KEY=' + str(token))) + url = query_url + ( + 'zipCode/' + + str(obs_type) + + '/?' + + 'format=text/csv' + + '&zipCode=' + + str(zipcode) + + '&distance=' + + str(distance) + + '&API_KEY=' + + str(token) + ) if latlon: - url = (query_url + ('latLong/' + str(obs_type) + '/?' + 'format=text/csv' - + '&latitude=' + str(latlon[0]) - + '&longitude=' + str(latlon[1]) + '&distance=' - + str(distance) + '&API_KEY=' + str(token))) + url = query_url + ( + 'latLong/' + + str(obs_type) + + '/?' + + 'format=text/csv' + + '&latitude=' + + str(latlon[0]) + + '&longitude=' + + str(latlon[1]) + + '&distance=' + + str(distance) + + '&API_KEY=' + + str(token) + ) else: obs_type = 'historical' if zipcode: - url = (query_url + ('zipCode/' + str(obs_type) + '/?' + 'format=text/csv' - + '&zipCode=' + str(zipcode) + '&date=' + str(date) - + 'T00-0000&distance=' + str(distance) + '&API_KEY=' + str(token))) + url = query_url + ( + 'zipCode/' + + str(obs_type) + + '/?' + + 'format=text/csv' + + '&zipCode=' + + str(zipcode) + + '&date=' + + str(date) + + 'T00-0000&distance=' + + str(distance) + + '&API_KEY=' + + str(token) + ) if latlon: - url = (query_url + ('latLong/' + str(obs_type) + '/?' + 'format=text/csv' - + '&latitude=' + str(latlon[0]) - + '&longitude=' + str(latlon[1]) + '&date=' - + str(date) + 'T00-0000&distance=' + str(distance) - + '&API_KEY=' + str(token))) + url = query_url + ( + 'latLong/' + + str(obs_type) + + '/?' + + 'format=text/csv' + + '&latitude=' + + str(latlon[0]) + + '&longitude=' + + str(latlon[1]) + + '&date=' + + str(date) + + 'T00-0000&distance=' + + str(distance) + + '&API_KEY=' + + str(token) + ) df = pd.read_csv(url) @@ -143,8 +201,9 @@ def get_airnow_obs(token, date=None, zipcode=None, latlon=None, distance=25): return ds -def get_airnow_bounded_obs(token, start_date, end_date, latlon_bnds, parameters='OZONE,PM25', data_type='B', - mon_type=0): +def get_airnow_bounded_obs( + token, start_date, end_date, latlon_bnds, parameters='OZONE,PM25', data_type='B', mon_type=0 +): """ Get AQI values or data concentrations for a specific date and time range and set of parameters within a geographic area of intrest @@ -184,16 +243,44 @@ def get_airnow_bounded_obs(token, start_date, end_date, latlon_bnds, parameters= verbose = 1 inc_raw_con = 1 - url = ('https://www.airnowapi.org/aq/data/?startDate=' + str(start_date) - + '&endDate=' + str(end_date) + '¶meters=' + str(parameters) - + '&BBOX=' + str(latlon_bnds) + '&dataType=' + str(data_type) - + '&format=text/csv' + '&verbose=' + str(verbose) - + '&monitorType=' + str(mon_type) + '&includerawconcentrations=' - + str(inc_raw_con) + '&API_KEY=' + str(token)) + url = ( + 'https://www.airnowapi.org/aq/data/?startDate=' + + str(start_date) + + '&endDate=' + + str(end_date) + + '¶meters=' + + str(parameters) + + '&BBOX=' + + str(latlon_bnds) + + '&dataType=' + + str(data_type) + + '&format=text/csv' + + '&verbose=' + + str(verbose) + + '&monitorType=' + + str(mon_type) + + '&includerawconcentrations=' + + str(inc_raw_con) + + '&API_KEY=' + + str(token) + ) # Set Column names - names = ['latitude', 'longitude', 'time', 'parameter', 'concentration', 'unit', - 'raw_concentration', 'AQI', 'category', 'site_name', 'site_agency', 'aqs_id', 'full_aqs_id'] + names = [ + 'latitude', + 'longitude', + 'time', + 'parameter', + 'concentration', + 'unit', + 'raw_concentration', + 'AQI', + 'category', + 'site_name', + 'site_agency', + 'aqs_id', + 'full_aqs_id', + ] # Read data into CSV df = pd.read_csv(url, names=names) @@ -211,12 +298,9 @@ def get_airnow_bounded_obs(token, start_date, end_date, latlon_bnds, parameters= data_vars={ 'latitude': (['sites'], latitude), 'longitude': (['sites'], longitude), - 'aqs_id': (['sites'], aqs_id) + 'aqs_id': (['sites'], aqs_id), }, - coords={ - 'time': (['time'], times), - 'sites': (['sites'], sites) - } + coords={'time': (['time'], times), 'sites': (['sites'], sites)}, ) # Set up emtpy data with nans @@ -233,7 +317,11 @@ def get_airnow_bounded_obs(token, start_date, end_date, latlon_bnds, parameters= data[v, t, s] = list(result[variables[v]])[0] atts = {'units': ''} else: - result = df.loc[(df['time'] == times[t]) & (df['site_name'] == sites[s]) & (df['parameter'] == variables[v])] + result = df.loc[ + (df['time'] == times[t]) + & (df['site_name'] == sites[s]) + & (df['parameter'] == variables[v]) + ] if len(result['concentration']) > 0: data[v, t, s] = list(result['concentration'])[0] atts = {'units': list(result['unit'])[0]} diff --git a/act/discovery/arm.py b/act/discovery/arm.py index ab831771b0..1fda756913 100644 --- a/act/discovery/arm.py +++ b/act/discovery/arm.py @@ -3,14 +3,11 @@ """ -import argparse import json import os -import sys from datetime import timedelta import requests import textwrap -import warnings try: from urllib.request import urlopen @@ -163,7 +160,9 @@ def download_arm_data(username, token, datastream, startdate, enddate, time=None open_bytes_file.write(data) file_names.append(output_file) # Get ARM DOI and print it out - doi = get_arm_doi(datastream, start_datetime.strftime('%Y-%m-%d'), end_datetime.strftime('%Y-%m-%d')) + doi = get_arm_doi( + datastream, start_datetime.strftime('%Y-%m-%d'), end_datetime.strftime('%Y-%m-%d') + ) print('\nIf you use these data to prepare a publication, please cite:\n') print(textwrap.fill(doi, width=80)) print('') @@ -197,7 +196,11 @@ def get_arm_doi(datastream, startdate, enddate): """ # Get the DOI information - doi_url = 'https://adc.arm.gov/citationservice/citation/datastream?id=' + datastream + '&citationType=apa' + doi_url = ( + 'https://adc.arm.gov/citationservice/citation/datastream?id=' + + datastream + + '&citationType=apa' + ) doi_url += '&startDate=' + startdate doi_url += '&endDate=' + enddate try: diff --git a/act/discovery/asos.py b/act/discovery/asos.py index 5f5ae27534..0d37e18d8b 100644 --- a/act/discovery/asos.py +++ b/act/discovery/asos.py @@ -10,8 +10,7 @@ import numpy as np import pandas as pd -import xarray as xr -from six import StringIO +from io import StringIO try: from urllib.request import urlopen diff --git a/act/discovery/cropscape.py b/act/discovery/cropscape.py index 07308929f9..e50b4ff52b 100644 --- a/act/discovery/cropscape.py +++ b/act/discovery/cropscape.py @@ -89,10 +89,9 @@ def get_crop_type(lat=None, lon=None, year=None): # Add year, lat, and lon as parameters params = {'year': str(year), 'x': str(x), 'y': str(y)} - # Perform the request. Note, verify set to False until - # server SSL errors can be worked out + # Perform the request. try: - req = requests.get(url, params=params, verify=False, timeout=1) + req = requests.get(url, params=params, timeout=1) except Exception: return diff --git a/act/discovery/improve.py b/act/discovery/improve.py new file mode 100644 index 0000000000..677f51df38 --- /dev/null +++ b/act/discovery/improve.py @@ -0,0 +1,404 @@ +""" +Script for downloading data from the IMPROVE network + +""" + +import pandas as pd +import numpy as np +import xarray as xr + + +def get_improve_data(site_id=None, parameter_id=None, start_date=None, end_date=None): + """ + Retrieve IMPROVE data for the given site and variable ids and store it in an + xarray dataset. Documentation on the IMPROVE data can be found at + https://vista.cira.colostate.edu/Improve/data-user-guide/ + + Also adds in metadata from the site summary page to the global attributes + https://views.cira.colostate.edu/adms/Pub/SiteSummary.aspx?dsidse=10001&siidse=244 + + Parameters + ---------- + site_id : str + Site id number which can be retrieved from the IMPROVE page for each site such as + https://views.cira.colostate.edu/adms/Pub/SiteSummary.aspx?dsidse=10001&siidse=244 + parameter_id : list + List of parameter id values to retrieve from the API. + start_date : str + Start date formatted as M/D/YEAR such as 1/31/2022 + end_date : str + End date formatted as M/D/YEAR such as 1/31/2022 + + Returns + ------- + ds : xarray.Dataset + Returns an Xarray dataset object + + Example + ------- + act.discovery.get_improve_data(site_id='244') + + """ + + # Build URL + base_url = 'https://views.cira.colostate.edu/fed/svc/DataSvc.aspx?action=getqueryresults&cmdfileid=ServiceSqlCommandFile&cmdid=BasicDataQuery1_Codes' + + if site_id is None: + raise ValueError('Please provide a site_id') + else: + base_url += '&dsidse=10001&siidse=' + str(site_id) + + if parameter_id is None: + base_url += '&paidse=101,136,907,900,102,104,105,115,116,117,114,3778,142,143,144,145,3016,146,3699,141,3779,3217,108,109,112,113,301,304,303,3716,3717,3718,3719,3720,3721,3722,3730,3731,3732,3733,3734,3735,3736,3694,121,3723,3724,3725,3726,3727,3728,3729,3737,3738,3739,3740,3741,3742,3743,118,148,128,130,132,941,127,903,910,3744,3745,3746,3747,3748,3749,3750,3751,3752,3753,3754,3755,3756,3757,131,138,139,133,3704,3705,3706,3707,3708,3709,3710,3711,3712,3713,3714,3715,147,124,150,3695,3014,153,154,134,911,158,156,151,202,159,160,162,163' + else: + base_url += '&paidse=' + ','.join(parameter_id) + + if start_date is None: + raise ValueError('Please provide a start date') + else: + base_url += '&sd=' + start_date + if end_date is None: + raise ValueError('Please provide an end date') + else: + base_url += '&ed=' + end_date + + # Read data and get variables + df = pd.read_html(base_url)[0] + variables = np.unique(df.Param) + + # Print out proper acknowledgement + print("Please use the following acknowledgment when using IMPROVE data:\n") + + print( + "IMPROVE is a collaborative association of state, tribal, and federal agencies, and international partners. US Environmental Protection Agency is the primary funding source, with contracting and research support from the National Park Service. The Air Quality Group at the University of California, Davis is the central analytical laboratory, with ion analysis provided by Research Triangle Institute, and carbon analysis provided by Desert Research Institute." + ) + + # Creat mapping of variable names to metadata + mapping = { + 'ALf': {'name': 'aluminum_fine', 'long_name': 'Aluminum (Fine)', 'epa_code': '88104'}, + 'ASf': {'name': 'arsenic_fine', 'long_name': 'Arsenic (Fine)', 'epa_code': '88103'}, + 'BRf': {'name': 'bromine_fine', 'long_name': 'Bromine (Fine)', 'epa_code': '88109'}, + 'CAf': {'name': 'calcium_fine', 'long_name': 'Calcium (Fine)', 'epa_code': '88111'}, + 'CLf': {'name': 'chlorine_fine', 'long_name': 'Chlorine (Fine)', 'epa_code': '88115'}, + 'CRf': {'name': 'chromium_fine', 'long_name': 'Chromium (Fine)', 'epa_code': '88112'}, + 'CUf': {'name': 'copper_fine', 'long_name': 'Copper (Fine)', 'epa_code': '88114'}, + 'FEf': {'name': 'iron_fine', 'long_name': 'Iron (Fine)', 'epa_code': '88126'}, + 'PBf': {'name': 'lead_fine', 'long_name': 'Lead (Fine)', 'epa_code': '88128'}, + 'MGf': {'name': 'magnesium_fine', 'long_name': 'Magnesium (Fine)', 'epa_code': '88140'}, + 'MNf': {'name': 'manganese_fine', 'long_name': 'Manganese (Fine)', 'epa_code': '88132'}, + 'NIf': {'name': 'nickel_fine', 'long_name': 'Nickel (Fine)', 'epa_code': '88136'}, + 'Pf': {'name': 'phosphorus_fine', 'long_name': 'Phosphorus (Fine)', 'epa_code': '88152'}, + 'Kf': {'name': 'potassium_fine', 'long_name': 'Potassium (Fine)', 'epa_code': '88180'}, + 'RBf': {'name': 'rubidium_fine', 'long_name': 'Rubidium (Fine)', 'epa_code': '88176'}, + 'SEf': {'name': 'selenium_fine', 'long_name': 'Selenium (Fine)', 'epa_code': '88154'}, + 'SIf': {'name': 'silicon_fine', 'long_name': 'Silicon (Fine)', 'epa_code': '88165'}, + 'NAf': {'name': 'sodium_fine', 'long_name': 'Sodium (Fine)', 'epa_code': '88184'}, + 'SRf': {'name': 'strontium_fine', 'long_name': 'Strontium (Fine)', 'epa_code': '88168'}, + 'Sf': {'name': 'sulfur_fine', 'long_name': 'Sulfur (Fine)', 'epa_code': '88169'}, + 'TIf': {'name': 'titanium_fine', 'long_name': 'Titanium (Fine)', 'epa_code': '88161'}, + 'Vf': {'name': 'vanadium_fine', 'long_name': 'Vanadium (Fine)', 'epa_code': '88164'}, + 'ZNf': {'name': 'zinc_fine', 'long_name': 'Zinc (Fine)', 'epa_code': '88167'}, + 'ZRf': {'name': 'zirconium_fine', 'long_name': 'Zirconium (Fine)', 'epa_code': '88185'}, + 'CHLf': {'name': 'chloride_fine', 'long_name': 'Chloride (Fine)', 'epa_code': '88203'}, + 'NO3f': {'name': 'nitrate_fine', 'long_name': 'Nitrate (Fine)', 'epa_code': '88306'}, + 'N2f': {'name': 'nitrite_fine', 'long_name': 'Nitrite (Fine)', 'epa_code': '88338'}, + 'SO4f': {'name': 'sulfate_fine', 'long_name': 'Sulfate (Fine)', 'epa_code': '88403'}, + 'OC1f': { + 'name': 'carbon_organic_fraction_1_fine', + 'long_name': 'Carbon, Organic Fraction 1 (Fine)', + 'comments': 'TOR, pure helium (>99.999%) atmosphere, temperature (T) = 140 °C', + 'epa_code': '88324', + }, + 'OC2f': { + 'name': 'carbon_organic_fraction_2_fine', + 'long_name': 'Carbon, Organic Fraction 2 (Fine)', + 'comments': 'TOR, pure helium (>99.999%) atmosphere, temperature (T) = 280 °C', + 'epa_code': '88325', + }, + 'OC3f': { + 'name': 'carbon_organic_fraction_3_fine', + 'long_name': 'Carbon, Organic Fraction 3 (Fine)', + 'comments': 'TOR, pure helium (>99.999%) atmosphere, temperature (T) = 480 °C', + 'epa_code': '88326', + }, + 'OC4f': { + 'name': 'carbon_organic_fraction_4_fine', + 'long_name': 'Carbon, Organic Fraction 4 (Fine)', + 'comments': 'TOR, pure helium (>99.999%) atmosphere, temperature (T) = 580 °C', + 'epa_code': '88327', + }, + 'OPf': { + 'name': 'carbon_organic_reflectance_fine', + 'long_name': 'Carbon, Organic Pyrolized (Fine) by Reflectance', + 'comments': 'TOR, carbon that is measured after the introduction of helium/oxygen atmosphere at °550 C but beforereflectance returns to initial value', + 'epa_code': '88328', + }, + 'OPTf': { + 'name': 'carbon_organic_transmittance_fine', + 'long_name': 'Carbon, Organic Pyrolized (Fine) by Transmittance', + 'comments': 'TOR, carbon that is measured after the introduction of helium/oxygen atmosphere at °550 C but beforetransmittance returns to initial value', + 'epa_code': '88336', + }, + 'OCf': { + 'name': 'carbon_organic_total_fine', + 'long_name': 'Carbon, Organic Total (Fine)', + 'comments': 'Organic carbon from TOR carbon fractions (OC1f+OC2f+OC3f+OC4f+OPf)', + 'epa_code': '88320', + }, + 'EC1f': { + 'name': 'carbon_elemental_fraction_1_fine', + 'long_name': 'Carbon, Elemental Fraction 1 (Fine)', + 'comments': 'TOR, 98% helium, 2% oxygen atmosphere, temperature (T) = 580° C.', + 'epa_code': '88329', + }, + 'EC2f': { + 'name': 'carbon_elemental_fraction_2_fine', + 'long_name': 'Carbon, Elemental Fraction 2 (Fine)', + 'comments': 'TOR, 98% helium, 2% oxygen atmosphere, temperature (T) = 740° C.', + 'epa_code': '88380', + }, + 'EC3f': { + 'name': 'carbon_elemental_fraction_3_fine', + 'long_name': 'Carbon, Elemental Fraction 3 (Fine)', + 'comments': 'TOR, 98% helium, 2% oxygen atmosphere, temperature (T) = 840° C.', + 'epa_code': '88331', + }, + 'ECf': { + 'name': 'carbon_elemental_total_fine', + 'long_name': 'Carbon, Elemental Total (Fine)', + 'comments': 'Elemental carbon from TOR carbon fractions (E1+E2+E3-OP)', + 'epa_code': '88321', + }, + 'fAbs': { + 'name': 'filter_absorption_coeff', + 'long_name': 'Filter Absorption Coefficient', + 'comments': 'A calibrated absorption coefficient measured from a Teflon filter using a hybrid integrating plate and sphere (HIPS) method', + 'epa_code': '63102', + }, + 'FlowRate': { + 'name': 'flow_rate', + 'long_name': 'Flow Rate', + 'comments': 'The rate of air flow through an air sampling instrument', + 'epa_code': '63102', + }, + 'MF': { + 'name': 'mass_pm2_5', + 'long_name': 'Mass, PM2.5 (Fine)', + 'comments': 'Gravimetric mass measurement for particles with aerodynamic diameters less than 2.5 um', + 'epa_code': '88101', + }, + 'MT': { + 'name': 'mass_pm10', + 'long_name': 'Mass, PM10 (Total)', + 'comments': 'Gravimetric mass measurement for particles with aerodynamic diameters less than 10 um', + 'epa_code': '85101', + }, + 'SampDur': { + 'name': 'sample_duration', + 'long_name': 'Sampling Duration', + 'comments': 'The duration of a given sampling period in minutes', + }, + 'ammNO3f': { + 'name': 'ammonium_nitrate_fine', + 'long_name': 'Ammonium Nitrate (Fine)', + 'comments': '1.29 x NO3f', + }, + 'ammSO4f': { + 'name': 'ammonium_sulfate_fine', + 'long_name': 'Ammonium Sulfate (Fine)', + 'comments': '1.375 x SO4f', + }, + 'OMCf': { + 'name': 'carbon_organic_mass_fine', + 'long_name': 'Carbon, Organic Mass (fine)(1.8*OC)', + 'comments': '1.8 X OCf', + }, + 'TCf': { + 'name': 'carbon_total_fine', + 'long_name': 'Carbon, Total (fine)', + 'comments': 'From TOR carbon fractions (OCf+ECf)', + }, + 'CM_calculated': { + 'name': 'CM_calculated', + 'long_name': 'Mass, PM10-PM2.5 (Coarse)', + 'comments': 'MT-MF', + }, + 'SeaSaltf': { + 'name': 'sea_salt_fine', + 'long_name': 'Sea Salt (Fine)', + 'comments': '1.8XCHLf', + }, + 'SOILf': { + 'name': 'soil_fine', + 'long_name': 'Soil (Fine)', + 'comments': '2.2 × ALf + 2.49 × SIf + 1.63 × CAf + 2.42 × FEf + 1.94 × TIf', + }, + 'RCFM': { + 'name': 'mass_pm2_5_reconstructed', + 'long_name': 'Mass, PM2.5 Reconstructed (Fine)', + 'comments': 'Sum of ammSO4f, ammNO3f, OMCf, ECf, soilf, and seasaltf.', + }, + 'RCTM': { + 'name': 'mass_pmi10_reconstructed', + 'long_name': 'Mass, PM10 Reconstructed (Total)', + 'comments': 'Sum of ammSO4f, ammNO3f, OMCf, ECf, soilf, seasaltf, and CM_calculated.', + }, + } + laser_vars = { + 'RefF': { + 'units': 'ratio', + 'comments': 'Final laser reflectance at ', + 'name': 'final_laser_reflectance_', + }, + 'TransF': { + 'units': 'ratio', + 'comments': 'Final laser transmittance at ', + 'name': 'final_laser_transmittance_', + }, + 'RefI': { + 'units': 'ratio', + 'comments': 'Initial laser reflectance at ', + 'name': 'initial_laser_reflectance_', + }, + 'TransI': { + 'units': 'ratio', + 'comments': 'Initial laser transmittance at ', + 'name': 'initial_laser_transmittance_', + }, + 'RefM': { + 'units': 'ratio', + 'comments': 'Minimum laser reflectance at ', + 'name': 'min_laser_reflectance_', + }, + 'TransM': { + 'units': 'ratio', + 'comments': ' Minimum laser transmittance at ', + 'name': 'min_laser_transmittance_', + }, + 'OP_TR': { + 'units': 'ug m-3', + 'comments': 'Organic Pyrolyzed Carbon by Reflectance at ', + 'name': 'organic_pyrolyzed_carbon_reflectance_', + }, + 'OP_TT': { + 'units': 'ug m-3', + 'comments': 'Organic Pyrolyzed Carbon by Transmittance at ', + 'name': 'organic_pyrolyzed_carbon_transmittance_', + }, + } + laser_wl = ['405', '445', '532', '635', '780', '808', '980'] + for v in laser_vars: + for wl in laser_wl: + name = laser_vars[v]['name'] + wl + if 'OP' not in v: + mapping['_'.join([v, wl])] = { + 'units': laser_vars[v]['units'], + 'name': name, + 'long_name': ' '.join([laser_vars[v]['comments'], wl]), + } + else: + var_name = wl.join(v.split('_')) + mapping[var_name] = { + 'units': laser_vars[v]['units'], + 'name': name, + 'long_name': ' '.join([laser_vars[v]['comments'], wl]), + } + + # Run through each variable in the dataframe and add it to a dataset + # along with the appropriate metadata + ct = 0 + site = np.unique(df.Site)[0] + attrs = {'url': base_url, 'datastream': site + ' IMPROVE'} + for v in variables: + # Find data for just the variable in question + poc_attrs = {'units': '1', 'long_name': 'Parameter Occurrence Code for ' + v} + df2 = df[df.Param == v] + + # Get metadata + unit = np.unique(df2.UnitAbbr) + if len(unit) > 1: + raise ValueError('Multiple types of units detected, using first one') + + sites = np.unique(df2.Site) + if len(sites) > 1: + raise ValueError('Multiple sites detected, please use only one') + + # Get time, POC, and data + time = pd.to_datetime(df2.FactDate) + poc = df2.POC + + data = df2.FactValue + + # Set up attributes + var_attrs = {'units': unit[0], 'long_name': mapping[v]['long_name'], '_FillValue': -999.0} + if 'comments' in mapping[v]: + var_attrs['comments'] = mapping[v]['comments'] + if 'epa_code' in mapping[v]: + var_attrs['epa_code'] = mapping[v]['epa_code'] + + # If the first variable, create the dataset and then add variables to it + if ct == 0: + ds = xr.Dataset( + data_vars={mapping[v]['name']: (['time'], data, var_attrs)}, + coords={'time': time}, + attrs=attrs, + ) + ds['poc_' + mapping[v]['name']] = xr.DataArray( + data=poc, dims=['time'], coords={'time': time}, attrs=poc_attrs + ) + ct += 1 + else: + ds[mapping[v]['name']] = xr.DataArray( + data=data, dims=['time'], coords={'time': time}, attrs=var_attrs + ) + ds['poc_' + mapping[v]['name']] = xr.DataArray( + data=poc, dims=['time'], coords={'time': time}, attrs=poc_attrs + ) + + # Add in metadata from site summary page + url = 'https://views.cira.colostate.edu/adms/Pub/SiteSummary.aspx?dsidse=10001&siidse=' + str( + site_id + ) + df = pd.read_html(url) + for i in df[0].index: + # Add lat/lon as variables + if df[0][0][i] == 'Latitude': + attrs = { + 'long_name': 'North latitude', + 'units': 'degree_N', + 'valid_min': -90.0, + 'valid_max': 90.0, + 'standard_name': 'latitude', + } + ds['lat'] = xr.DataArray( + data=float(df[0][1][i]), + dims=['time'], + coords={'time': ds['time'].values}, + attrs=attrs, + ) + elif df[0][0][i] == 'Longitude': + attrs = { + 'long_name': 'East longitude', + 'units': 'degree_E', + 'valid_min': -180.0, + 'valid_max': 180.0, + 'standard_name': 'longitude', + } + ds['lon'] = xr.DataArray( + data=float(df[0][1][i]), + dims=['time'], + coords={'time': ds['time'].values}, + attrs=attrs, + ) + else: + ds.attrs[df[0][0][i]] = df[0][1][i] + + # Add in problem information from the site summary page + problem = '' + for i in df[-1].index: + problem += '_'.join( + [df[-1]['EventDate'][i], df[-1]['EventType'][i], df[-1]['Notes'][i], '\n'] + ) + ds.attrs['site_problems'] = problem + + return ds diff --git a/act/discovery/neon.py b/act/discovery/neon.py index cfe3eff2c1..13251bd888 100644 --- a/act/discovery/neon.py +++ b/act/discovery/neon.py @@ -7,7 +7,6 @@ """ -import json import requests import os import shutil diff --git a/act/discovery/noaapsl.py b/act/discovery/noaapsl.py index 30b55ff2c2..3239731bf4 100644 --- a/act/discovery/noaapsl.py +++ b/act/discovery/noaapsl.py @@ -2,10 +2,8 @@ Function for downloading data from NOAA PSL Profiler Network """ -import json from datetime import datetime import pandas as pd -import numpy as np import os try: @@ -14,8 +12,9 @@ from urllib import urlopen -def download_noaa_psl_data(site=None, instrument=None, startdate=None, enddate=None, - hour=None, output=None): +def download_noaa_psl_data( + site=None, instrument=None, startdate=None, enddate=None, hour=None, output=None +): """ Function to download data from the NOAA PSL Profiler Network Data Library https://psl.noaa.gov/data/obs/datadisplay/ @@ -76,9 +75,18 @@ def download_noaa_psl_data(site=None, instrument=None, startdate=None, enddate=N url = 'https://downloads.psl.noaa.gov/psd2/data/realtime/' # Set list of strings that all point to the surface meteorology dataset - met_ds = ['Pressure', 'Datalogger', 'Net Radiation', 'Temp/RH', - 'Solar Radiation', 'Tipping Bucket', 'TBRG', 'Wind Speed', - 'Wind Direction', 'Wind Speed and Direction'] + met_ds = [ + 'Pressure', + 'Datalogger', + 'Net Radiation', + 'Temp/RH', + 'Solar Radiation', + 'Tipping Bucket', + 'TBRG', + 'Wind Speed', + 'Wind Direction', + 'Wind Speed and Direction', + ] # Add to the url depending on which instrument is requested if 'Parsivel' in instrument: @@ -153,8 +161,9 @@ def download_noaa_psl_data(site=None, instrument=None, startdate=None, enddate=N # Write each file out to a file with same name as online for f in files: if hour is not None: - if (str(doy).zfill(3) + str(hour)) not in f and\ - (str(doy).zfill(3) + '.' + str(hour)) not in f: + if (str(doy).zfill(3) + str(hour)) not in f and ( + str(doy).zfill(3) + '.' + str(hour) + ) not in f: continue output_file = os.path.join(output_dir, f) try: diff --git a/act/discovery/surfrad.py b/act/discovery/surfrad.py index c6ba6fd356..23f5e3e05a 100644 --- a/act/discovery/surfrad.py +++ b/act/discovery/surfrad.py @@ -3,13 +3,8 @@ NOAA Surface Radiation Budget network """ -import json from datetime import datetime -import pandas as pd -import numpy as np import os -import re -import requests try: from urllib.request import urlopen diff --git a/act/io/__init__.py b/act/io/__init__.py index 93a6b913ef..9e391d20f2 100644 --- a/act/io/__init__.py +++ b/act/io/__init__.py @@ -7,7 +7,18 @@ __getattr__, __dir__, __all__ = lazy.attach( __name__, - submodules=['arm', 'text', 'icartt', 'mpl', 'neon', 'noaagml', 'noaapsl', 'pysp2'], + submodules=[ + 'arm', + 'ameriflux', + 'text', + 'icartt', + 'mpl', + 'neon', + 'noaagml', + 'noaapsl', + 'pysp2', + 'hysplit', + ], submod_attrs={ 'arm': [ 'WriteDataset', @@ -17,6 +28,7 @@ 'check_if_tar_gz_file', 'read_arm_mmcr', ], + 'ameriflux': ['convert_to_ameriflux'], 'text': ['read_csv'], 'icartt': ['read_icartt'], 'mpl': ['proc_sigma_mplv5_read', 'read_sigma_mplv5'], @@ -39,5 +51,6 @@ ], 'pysp2': ['read_hk_file', 'read_sp2', 'read_sp2_dat'], 'sodar': ['read_mfas_sodar'], + 'hysplit': ['read_hysplit'], }, ) diff --git a/act/io/ameriflux.py b/act/io/ameriflux.py new file mode 100644 index 0000000000..0bfb63aa35 --- /dev/null +++ b/act/io/ameriflux.py @@ -0,0 +1,181 @@ +""" +This module contains I/O operations for the U.S. Department of Energy +AmeriFlux program (https://ameriflux.lbl.gov/). +""" + +import numpy as np +import pandas as pd +import warnings + + +def convert_to_ameriflux( + ds, + variable_mapping=None, + soil_mapping=None, + depth_profile=[2.5, 5, 10, 15, 20, 30, 35, 50, 75, 100], + include_missing_variables=False, + **kwargs, +): + """ + + Returns `xarray.Dataset` with stored data and metadata from a user-defined + query of ARM-standard netCDF files from a single datastream. Has some procedures + to ensure time is correctly fomatted in returned Dataset. + + Parameters + ---------- + ds : xarray.Dataset + Dataset of data to convert to AmeriFlux format + variable_mapping : dict + Dictionary of variables mappings. The key should be the name of the variable + in the Dataset with the values being dictionaries of the AmeriFlux name and units. + For example: + var_mapping = { + 'co2_flux': {'name': 'FC', 'units': 'umol/(m^2 s)'}, + } + soil_mapping : dict + Dictionary of soil variables mappings following the same formatting as variable_mapping. + It is understood that the AmeriFlux name may be the same for some variables. This + script attempts to automatically name these measurements. If a variable is not dimensioned + by a depth nor has a sensor_height attribute, it will automatically assume that it's + at the first depth in the depth_profile variable. + depth_profile : list + List of depths that the variables will be mapped to. If a depth is not in this list, + the index chosen will be the one closest to the depth value. + include_missing_variables : boolean + If there variables that are completely missing (-9999) chose whether or not to include + them in the DataFrame. + + Returns + ------- + df : pandas.DataFrame (or None) + Returns a pandas dataframe for easy writing to csv + + """ + # Use ARM variable mappings if none provided + if variable_mapping is None: + warnings.warn('Variable mapping was not provided, using default ARM mapping') + # Define variable mapping and units + # The key is the variable name in the data and the name in the dictionary + # is the AmeriFlux Name + var_mapping = { + 'co2_flux': {'name': 'FC', 'units': 'umol/(m^2 s)'}, + 'co2_molar_fraction': {'name': 'CO2', 'units': 'nmol/mol'}, + 'co2_mixing_ratio': {'name': 'CO2_MIXING_RATIO', 'units': 'umol/mol'}, + 'h2o_mole_fraction': {'name': 'H2O', 'units': 'mmol/mol'}, + 'h2o_mixing_ratio': {'name': 'H2O_MIXING_RATIO', 'units': 'mmol/mol'}, + 'ch4_mole_fraction': {'name': 'CH4', 'units': 'nmol/mol'}, + 'ch4_mixing_ratio': {'name': 'CH4_MIXING_RATIO', 'units': 'nmol/mol'}, + 'momentum_flux': {'name': 'TAU', 'units': 'kg/(m s^2)'}, + 'sensible_heat_flux': {'name': 'H', 'units': 'W/m^2'}, + 'latent_flux': {'name': 'LE', 'units': 'W/m^2'}, + 'air_temperature': {'name': 'TA', 'units': 'deg C'}, + 'air_pressure': {'name': 'PA', 'units': 'kPa'}, + 'relative_humidity': {'name': 'RH', 'units': '%'}, + 'sonic_temperature': {'name': 'T_SONIC', 'units': 'deg C'}, + 'water_vapor_pressure_defecit': {'name': 'VPD', 'units': 'hPa'}, + 'Monin_Obukhov_length': {'name': 'MO_LENGTH', 'units': 'm'}, + 'Monin_Obukhov_stability_parameter': {'name': 'ZL', 'units': ''}, + 'mean_wind': {'name': 'WS', 'units': 'm/s'}, + 'wind_direction_from_north': {'name': 'WD', 'units': 'deg'}, + 'friction_velocity': {'name': 'USTAR', 'units': 'm/s'}, + 'maximum_instantaneous_wind_speed': {'name': 'WS_MAX', 'units': 'm/s'}, + 'down_short_hemisp': {'name': 'SW_IN', 'units': 'W/m^2'}, + 'up_short_hemisp': {'name': 'SW_OUT', 'units': 'W/m^2'}, + 'down_long': {'name': 'LW_IN', 'units': 'W/m^2'}, + 'up_long': {'name': 'LW_OUT', 'units': 'W/m^2'}, + 'albedo': {'name': 'ALB', 'units': '%'}, + 'net_radiation': {'name': 'NETRAD', 'units': 'W/m^2'}, + 'par_inc': {'name': 'PPFD_IN', 'units': 'umol/(m^2 s)'}, + 'par_ref': {'name': 'PPFD_OUT', 'units': 'umol/(m^2 s)'}, + 'precip': {'name': 'P', 'units': 'mm'}, + } + + # Use ARM variable mappings if none provided + # Similar to the above. This has only been tested on the ARM + # ECOR, SEBS, STAMP, and AMC combined. The automated naming may + # not work for all cases + if soil_mapping is None: + warnings.warn('Soil variable mapping was not provided, using default ARM mapping') + soil_mapping = { + 'surface_soil_heat_flux': {'name': 'G', 'units': 'W/m^2'}, + 'soil_temp': {'name': 'TS', 'units': 'deg C'}, + 'temp': {'name': 'TS', 'units': 'deg C'}, + 'soil_moisture': {'name': 'SWC', 'units': '%'}, + 'soil_specific_water_content': {'name': 'SWC', 'units': '%'}, + 'vwc': {'name': 'SWC', 'units': '%'}, + } + + # Loop through variables and update units to the AmeriFlux standard + for v in ds: + if v in var_mapping: + ds = ds.utils.change_units(variables=v, desired_unit=var_mapping[v]['units']) + + # Get start/end time stamps + ts_start = ds['time'].dt.strftime('%Y%m%d%H%M').values + ts_end = [ + pd.to_datetime(t + np.timedelta64(30, 'm')).strftime('%Y%m%d%H%M') + for t in ds['time'].values + ] + data = {} + data['TIMESTAMP_START'] = ts_start + data['TIMESTAMP_END'] = ts_end + + # Loop through the variables in the var mapping dictionary and add data to dictionary + for v in var_mapping: + if v in ds: + if 'missing_value' not in ds[v].attrs: + ds[v].attrs['missing_value'] = -9999 + if np.all(ds[v].isnull()): + if include_missing_variables: + data[var_mapping[v]['name']] = ds[v].values + else: + data[var_mapping[v]['name']] = ds[v].values + else: + if include_missing_variables: + data[var_mapping[v]['name']] = np.full(ds['time'].shape, -9999) + + # Automated naming for the soil variables + # Again, this may not work for other cases. Careful review is needed. + prev_var = '' + for var in soil_mapping: + if soil_mapping[var]['name'] != prev_var: + h = 1 + r = 1 + prev_var = soil_mapping[var]['name'] + soil_vars = [ + v2 + for v2 in list(ds) + if (v2.startswith(var)) & ('std' not in v2) & ('qc' not in v2) & ('net' not in v2) + ] + for i, svar in enumerate(soil_vars): + vert = 1 + if ('avg' in svar) | ('average' in svar): + continue + soil_data = ds[svar].values + data_shape = soil_data.shape + if len(data_shape) > 1: + coords = ds[svar].coords + depth_name = list(coords)[-1] + depth_values = ds[depth_name].values + for depth_ind in range(len(depth_values)): + soil_data_depth = soil_data[:, depth_ind] + vert = np.where(depth_profile == depth_values[depth_ind])[0][0] + 1 + new_name = '_'.join([soil_mapping[var]['name'], str(h), str(vert), str(r)]) + data[new_name] = soil_data_depth + else: + if 'sensor_height' in ds[svar].attrs: + sensor_ht = ds[svar].attrs['sensor_height'].split(' ') + depth = abs(float(sensor_ht[0])) + units = sensor_ht[1] + if units == 'cm': + vert = np.argmin(np.abs(np.array(depth_profile) - depth)) + 1 + new_name = '_'.join([soil_mapping[var]['name'], str(h), str(vert), str(r)]) + data[new_name] = soil_data + h += 1 + + # Convert dictionary to dataframe and return + df = pd.DataFrame(data) + df = df.fillna(-9999.0) + + return df diff --git a/act/io/arm.py b/act/io/arm.py index 8f9aa38ed4..e501726a4d 100644 --- a/act/io/arm.py +++ b/act/io/arm.py @@ -248,7 +248,7 @@ def read_arm_netcdf( file_dates.append(pts[2]) file_times.append(pts[3]) else: - if ds['time'].size > 1: + if len(ds['time'].shape) > 0: dummy = ds['time'].values[0] else: dummy = ds['time'].values @@ -767,6 +767,9 @@ def write_netcdf( ] ) + if 'time_bounds' in encoding.keys(): + encoding['time_bounds']['dtype'] = 'float64' + if hasattr(write_ds, 'time_bounds') and not write_ds.time.encoding: write_ds.time.encoding.update(write_ds.time_bounds.encoding) diff --git a/act/io/hysplit.py b/act/io/hysplit.py new file mode 100644 index 0000000000..ebd53b001f --- /dev/null +++ b/act/io/hysplit.py @@ -0,0 +1,136 @@ +import xarray as xr +import numpy as np +import pandas as pd + +from datetime import datetime + + +def read_hysplit(filename, base_year=2000, skiprows=1): + """ + Reads an input HYSPLIT trajectory for plotting in ACT. + + Parameters + ---------- + filename: str + The input file name. + base_year: int + The first year of the century in which the data are contained. + skiprows: int + Skip this number of rows after the header when reading. + + Returns + ------- + ds: xarray Dataset + The ACT dataset containing the HYSPLIT trajectories + """ + + ds = xr.Dataset({}) + num_lines = 0 + with open(filename) as filebuf: + num_grids = int(filebuf.readline().split()[0]) + num_lines += 1 + grid_times = [] + grid_names = [] + forecast_hours = np.zeros(num_grids) + for i in range(num_grids): + data = filebuf.readline().split() + num_lines += 1 + grid_names.append(data[0]) + grid_times.append( + datetime( + year=(int(data[1]) + base_year), + month=int(data[2]), + day=int(data[3]), + hour=int(data[4]), + ) + ) + forecast_hours[i] = int(data[5]) + ds["grid_forecast_hour"] = xr.DataArray(forecast_hours, dims=["num_grids"]) + ds["grid_forecast_hour"].attrs["standard_name"] = "Grid forecast hour" + ds["grid_forecast_hour"].attrs["units"] = "Hour [UTC]" + ds["grid_times"] = xr.DataArray(np.array(grid_times), dims=["num_grids"]) + data_line = filebuf.readline().split() + num_lines += 1 + ds.attrs["trajectory_direction"] = data_line[1] + ds.attrs["vertical_motion_calculation_method"] = data_line[2] + num_traj = int(data_line[0]) + traj_times = [] + start_lats = np.zeros(num_traj) + start_lons = np.zeros(num_traj) + start_alt = np.zeros(num_traj) + for i in range(num_traj): + data = filebuf.readline().split() + num_lines += 1 + traj_times.append( + datetime( + year=(base_year + int(data[0])), + month=int(data[1]), + day=int(data[2]), + hour=int(data[3]), + ) + ) + start_lats[i] = float(data[4]) + start_lons[i] = float(data[5]) + start_alt[i] = float(data[6]) + + ds["start_latitude"] = xr.DataArray(start_lats, dims=["num_trajectories"]) + ds["start_latitude"].attrs["long_name"] = "Trajectory start latitude" + ds["start_latitude"].attrs["units"] = "degree" + ds["start_longitude"] = xr.DataArray(start_lats, dims=["num_trajectories"]) + ds["start_longitude"].attrs["long_name"] = "Trajectory start longitude" + ds["start_longitude"].attrs["units"] = "degree" + ds["start_altitude"] = xr.DataArray(start_alt, dims=["num_trajectories"]) + ds["start_altitude"].attrs["long_name"] = "Trajectory start altitude" + ds["start_altitude"].attrs["units"] = "degree" + data = filebuf.readline().split() + num_lines += 1 + var_list = [ + "trajectory_number", + "grid_number", + "year", + "month", + "day", + "hour", + "minute", + "forecast_hour", + "age", + "lat", + "lon", + "alt", + ] + for variable in data[1:]: + var_list.append(variable) + + input_df = pd.read_csv( + filebuf, sep=r'\s+', index_col=False, names=var_list, skiprows=skiprows + ) # noqa W605 + input_df['year'] = base_year + input_df['year'] + input_df['year'] = input_df['year'].astype(int) + input_df['month'] = input_df['month'].astype(int) + input_df['day'] = input_df['day'].astype(int) + input_df['hour'] = input_df['hour'].astype(int) + input_df['minute'] = input_df['minute'].astype(int) + input_df['time'] = pd.to_datetime( + input_df[["year", "month", "day", "hour", "minute"]], format='%y%m%d%H%M' + ) + input_df = input_df.set_index("time") + del input_df["year"] + del input_df["month"] + del input_df["day"] + del input_df["hour"] + del input_df["minute"] + ds = ds.merge(input_df.to_xarray()) + ds.attrs['datastream'] = 'hysplit' + ds["trajectory_number"].attrs["standard_name"] = "Trajectory number" + ds["trajectory_number"].attrs["units"] = "1" + ds["grid_number"].attrs["standard_name"] = "Grid number" + ds["grid_number"].attrs["units"] = "1" + ds["age"].attrs["standard_name"] = "Grid number" + ds["age"].attrs["units"] = "1" + ds["lat"].attrs["standard_name"] = "Latitude" + ds["lat"].attrs["units"] = "degree" + ds["lon"].attrs["standard_name"] = "Longitude" + ds["lon"].attrs["units"] = "degree" + ds["alt"].attrs["standard_name"] = "Altitude" + ds["alt"].attrs["units"] = "meter" + return ds diff --git a/act/io/icartt.py b/act/io/icartt.py index 6f4911dca6..8e434b311b 100644 --- a/act/io/icartt.py +++ b/act/io/icartt.py @@ -7,7 +7,6 @@ - https://www.earthdata.nasa.gov/s3fs-public/imported/ESDS-RFC-029v2.pdf """ -import numpy as np import xarray as xr import datetime import warnings @@ -16,6 +15,7 @@ try: import icartt + _ICARTT_AVAILABLE = True _format = icartt.Formats.FFI1001 except ImportError: @@ -23,8 +23,7 @@ _format = None -def read_icartt(filename, format=_format, - return_None=False, **kwargs): +def read_icartt(filename, format=_format, return_None=False, **kwargs): """ Returns `xarray.Dataset` with stored data and metadata from a user-defined @@ -60,8 +59,7 @@ def read_icartt(filename, format=_format, """ if not _ICARTT_AVAILABLE: - raise ImportError( - "ICARTT is required to use to read ICARTT files but is not installed") + raise ImportError("ICARTT is required to use to read ICARTT files but is not installed") ds = None @@ -82,8 +80,7 @@ def read_icartt(filename, format=_format, return None # If requested return None for File not found error - if (type(exception).__name__ == 'OSError' - and exception.args[0] == 'no files to open'): + if type(exception).__name__ == 'OSError' and exception.args[0] == 'no files to open': return None # Define the Uncertainty for each variable. Note it may not be calculated. @@ -110,9 +107,7 @@ def read_icartt(filename, format=_format, key2 = 'quality_flag' else: key2 = key - da = xr.DataArray(ict.data[key], - coords=dict(time=ict.times), - name=key2, dims=['time']) + da = xr.DataArray(ict.data[key], coords=dict(time=ict.times), name=key2, dims=['time']) # Assume if Uncertainity does not match the number of variables, # values were not set within the file. Needs to be string! if len(uncertainty) != len(ict.variables): diff --git a/act/io/neon.py b/act/io/neon.py index 86d9bbd3eb..a5d1befa54 100644 --- a/act/io/neon.py +++ b/act/io/neon.py @@ -2,9 +2,7 @@ Modules for reading in NOAA PSL data. """ -import datetime as dt -import numpy as np import pandas as pd import xarray as xr @@ -84,9 +82,18 @@ def read_neon_csv(files, variable_files=None, position_files=None): dloc = loc_df.loc[loc_df['HOR.VER'] == hor_loc + '.' + ver_loc] idx = dloc.index.values if len(idx) > 0: - ds['lat'] = xr.DataArray(data=float(loc_df['referenceLatitude'].values[idx])) - ds['lon'] = xr.DataArray(data=float(loc_df['referenceLongitude'].values[idx])) - ds['alt'] = xr.DataArray(data=float(loc_df['referenceElevation'].values[idx])) + if len(loc_df['referenceLatitude'].values) > 1: + ds['lat'] = xr.DataArray(data=float(loc_df['referenceLatitude'].values[idx][0])) + ds['lon'] = xr.DataArray( + data=float(loc_df['referenceLongitude'].values[idx][0]) + ) + ds['alt'] = xr.DataArray( + data=float(loc_df['referenceElevation'].values[idx][0]) + ) + else: + ds['lat'] = xr.DataArray(data=float(loc_df['referenceLatitude'].values[idx])) + ds['lon'] = xr.DataArray(data=float(loc_df['referenceLongitude'].values[idx])) + ds['alt'] = xr.DataArray(data=float(loc_df['referenceElevation'].values[idx])) variables = [ 'xOffset', 'yOffset', @@ -100,7 +107,10 @@ def read_neon_csv(files, variable_files=None, position_files=None): 'yAzimuth', ] for v in variables: - ds[v] = xr.DataArray(data=float(loc_df[v].values[idx])) + if len(loc_df[v].values) > 1: + ds[v] = xr.DataArray(data=float(loc_df[v].values[idx][0])) + else: + ds[v] = xr.DataArray(data=float(loc_df[v].values[idx])) multi_ds.append(ds) ds = xr.merge(multi_ds) diff --git a/act/io/noaapsl.py b/act/io/noaapsl.py index a5e2da5e46..8fb4099514 100644 --- a/act/io/noaapsl.py +++ b/act/io/noaapsl.py @@ -188,7 +188,7 @@ def _parse_psl_wind_lines(filepath, lines, line_offset=0): beam_elevation = np.array([beam_elevation1, beam_elevation2, beam_elevation3], dtype='float32') # Read in the data table section using pandas - df = pd.read_csv(filepath, skiprows=line_offset + 10, delim_whitespace=True) + df = pd.read_csv(filepath, skiprows=line_offset + 10, sep=r'\s+') # Only read in the number of rows for a given set of gates df = df.iloc[: int(number_of_range_gates)] @@ -326,7 +326,7 @@ def _parse_psl_temperature_lines(filepath, lines, line_offset=0): beam_azimuth, beam_elevation = filter_list(lines[8].split(' ')).astype(float) # Read in the data table section using pandas - df = pd.read_csv(filepath, skiprows=line_offset + 10, delim_whitespace=True) + df = pd.read_csv(filepath, skiprows=line_offset + 10, sep=r'\s+') # Only read in the number of rows for a given set of gates df = df.iloc[: int(number_of_gates)] @@ -1115,7 +1115,7 @@ def _parse_psl_radar_moments(files): f, skiprows=[0, 1, 2], nrows=int(data['n_gates']['data'][-1]) - 1, - delim_whitespace=True, + sep=r'\s+', names=list(names.keys()), ) index2 = 0 @@ -1143,7 +1143,7 @@ def _parse_psl_radar_moments(files): f, skiprows=list(range(index2 + 1)), nrows=int(data['n_gates']['data'][-1]) - 1, - delim_whitespace=True, + sep=r'\s+', names=list(names.keys()), ) diff --git a/act/io/sodar.py b/act/io/sodar.py index 15dc238f45..ec5c83f70c 100644 --- a/act/io/sodar.py +++ b/act/io/sodar.py @@ -74,11 +74,9 @@ def read_mfas_sodar(filepath): # Parse data to a dataframe skipping rows that aren't data. # tmp_columns is used to removed '#' column that causes # columns to move over by one. - df = pd.read_table(filepath, - sep=r'\s+', - skiprows=skip_full_ind, - names=tmp_columns, - usecols=columns) + df = pd.read_table( + filepath, sep=r'\s+', skiprows=skip_full_ind, names=tmp_columns, usecols=columns + ) df = df[~df['W'].isin(['dir'])].reset_index(drop=True) @@ -158,7 +156,7 @@ def _metadata_retrieval(lines): file_type_ind = np.argwhere(line_array == '# file type')[0][0] # Index the section of file information. - file_def = line_array[file_info_ind + 2:file_type_ind - 1] + file_def = line_array[file_info_ind + 2 : file_type_ind - 1] # Create a dictionary of file information to be plugged in later to the xarray # dataset attributes. @@ -179,7 +177,7 @@ def _metadata_retrieval(lines): data_ind = np.argwhere(line_array == '# beginning of data block')[0][0] # Index the section of variable information. - variable_def = line_array[variable_info_ind + 2 :data_ind - 1] + variable_def = line_array[variable_info_ind + 2 : data_ind - 1] # Create a dictionary of variable information to be plugged in later to the xarray # variable attributes. Skipping error code as it does not have metadata similar to diff --git a/act/plotting/__init__.py b/act/plotting/__init__.py index 68547a9a3c..1e8bad1732 100644 --- a/act/plotting/__init__.py +++ b/act/plotting/__init__.py @@ -17,10 +17,10 @@ import lazy_loader as lazy # Load colormaps -import cmweather +import cmweather # noqa # Eagerly load in common -from . import common +from . import common # noqa __getattr__, __dir__, __all__ = lazy.attach( __name__, @@ -45,6 +45,6 @@ 'timeseriesdisplay': ['TimeSeriesDisplay'], 'windrosedisplay': ['WindRoseDisplay'], 'xsectiondisplay': ['XSectionDisplay'], - 'distributiondisplay' : ['DistributionDisplay'], + 'distributiondisplay': ['DistributionDisplay'], }, ) diff --git a/act/plotting/distributiondisplay.py b/act/plotting/distributiondisplay.py index 0dec535c43..5cb967b265 100644 --- a/act/plotting/distributiondisplay.py +++ b/act/plotting/distributiondisplay.py @@ -5,7 +5,7 @@ import xarray as xr import pandas as pd -from ..utils import datetime_utils as dt_utils +from ..utils import datetime_utils as dt_utils, calculate_percentages from .plot import Display @@ -184,7 +184,8 @@ def plot_stacked_bar( ydata.values.flatten(), density=density, bins=[bins, sortby_bins], - **hist_kwargs) + **hist_kwargs, + ) x_inds = (x_bins[:-1] + x_bins[1:]) / 2.0 self.axes[subplot_index].bar( x_inds, @@ -202,8 +203,9 @@ def plot_stacked_bar( ) self.axes[subplot_index].legend() else: - my_hist, bins = np.histogram(xdata.values.flatten(), bins=bins, - density=density, **hist_kwargs) + my_hist, bins = np.histogram( + xdata.values.flatten(), bins=bins, density=density, **hist_kwargs + ) x_inds = (bins[:-1] + bins[1:]) / 2.0 self.axes[subplot_index].bar(x_inds, my_hist) @@ -322,7 +324,9 @@ def plot_size_distribution( ) if time is not None: t = pd.Timestamp(time) - set_title += ''.join([' at ', ':'.join([str(t.hour), str(t.minute), str(t.second)])]) + set_title += ''.join( + [' at ', ':'.join([str(t.hour), str(t.minute), str(t.second)])] + ) self.axes[subplot_index].set_title(set_title) self.axes[subplot_index].step(bins.values, xdata.values, **kwargs) self.axes[subplot_index].set_xlabel(xtitle) @@ -425,7 +429,7 @@ def plot_stairstep( ydata.values.flatten(), density=density, bins=[bins, sortby_bins], - **hist_kwargs + **hist_kwargs, ) x_inds = (x_bins[:-1] + x_bins[1:]) / 2.0 self.axes[subplot_index].step( @@ -443,8 +447,9 @@ def plot_stairstep( ) self.axes[subplot_index].legend() else: - my_hist, bins = np.histogram(xdata.values.flatten(), bins=bins, - density=density, **hist_kwargs) + my_hist, bins = np.histogram( + xdata.values.flatten(), bins=bins, density=density, **hist_kwargs + ) x_inds = (bins[:-1] + bins[1:]) / 2.0 self.axes[subplot_index].step(x_inds, my_hist, **kwargs) @@ -575,15 +580,15 @@ def plot_heatmap( if x_bins is None: my_hist, x_bins, y_bins = np.histogram2d( - xdata.values.flatten(), ydata.values.flatten(), density=density, - **hist_kwargs) + xdata.values.flatten(), ydata.values.flatten(), density=density, **hist_kwargs + ) else: my_hist, x_bins, y_bins = np.histogram2d( xdata.values.flatten(), ydata.values.flatten(), density=density, bins=[x_bins, y_bins], - **hist_kwargs + **hist_kwargs, ) # Adding in the ability to threshold the heatmaps if threshold is not None: @@ -616,7 +621,7 @@ def plot_heatmap( return return_dict - def set_ratio_line(self, subplot_index=(0, )): + def set_ratio_line(self, subplot_index=(0,)): """ Sets the 1:1 ratio line. @@ -633,16 +638,17 @@ def set_ratio_line(self, subplot_index=(0, )): ratio = np.linspace(xlims, xlims[-1]) self.axes[subplot_index].plot(ratio, ratio, 'k--') - def plot_scatter(self, - x_field, - y_field, - m_field=None, - dsname=None, - cbar_label=None, - set_title=None, - subplot_index=(0,), - **kwargs, - ): + def plot_scatter( + self, + x_field, + y_field, + m_field=None, + dsname=None, + cbar_label=None, + set_title=None, + subplot_index=(0,), + **kwargs, + ): """ This procedure will produce a scatter plot from 2 variables. @@ -745,18 +751,19 @@ def plot_scatter(self, return self.axes[subplot_index] - def plot_violin(self, - field, - positions=None, - dsname=None, - vert=True, - showmeans=True, - showmedians=True, - showextrema=True, - subplot_index=(0,), - set_title=None, - **kwargs, - ): + def plot_violin( + self, + field, + positions=None, + dsname=None, + vert=True, + showmeans=True, + showmedians=True, + showextrema=True, + subplot_index=(0,), + set_title=None, + **kwargs, + ): """ This procedure will produce a violin plot for the selected field (or fields). @@ -819,14 +826,15 @@ def plot_violin(self, axtitle = field # Display the scatter plot, pass keyword args for unspecified attributes - scc = self.axes[subplot_index].violinplot(ndata, - positions=positions, - vert=vert, - showmeans=showmeans, - showmedians=showmedians, - showextrema=showextrema, - **kwargs - ) + scc = self.axes[subplot_index].violinplot( + ndata, + positions=positions, + vert=vert, + showmeans=showmeans, + showmedians=showmedians, + showextrema=showextrema, + **kwargs, + ) if showmeans is True: scc['cmeans'].set_edgecolor('red') scc['cmeans'].set_label('mean') @@ -855,3 +863,98 @@ def plot_violin(self, self.axes[subplot_index].set_yticks([]) return self.axes[subplot_index] + + def plot_pie_chart( + self, + fields, + time=None, + time_slice=None, + threshold=None, + fill_value=0.0, + dsname=None, + subplot_index=(0,), + set_title=None, + autopct='%1.1f%%', + **kwargs, + ): + """ + This procedure will produce a pie chart for the selected fields. + + Parameters + ---------- + fields : list + The list of fields to calculate percentages on for the pie chart. + time : datetime + A single datetime to be passed into the act.utils.calculate percentages function + if desired. Default is None and all data will be included. + time_slice : tuple + A tuple of two datetimes to grab all data between those two datetimes for + act.utils.calculate_percentages. Default is None and all data will be included. + threshold : float + Threshold in which anything below will be considered invalid. + Default is None. + fill_value : float + Fill value for invalid data. Only used if a threshold is provided. + dsname : str or None + The name of the datastream the field is contained in. Set + to None to let ACT automatically determine this. + subplot_index : tuple + The subplot index to place the plot in + set_title : str + The title of the plot. + autopct : str + Format string for the percentages. Default is float with one + decimal place. If this parameter is set to None, no percentage + string values are displayed. + **kwargs : keywords + Keywords to pass through to :func:`matplotlib.pyplot.pie`. + + Returns + ------- + ax : matplotlib axis handle + The matplotlib axis handle of the plot + + """ + if dsname is None and len(self._ds.keys()) > 1: + raise ValueError( + 'You must choose a datastream when there are 2 ' + + 'or more datasets in the DistributionDisplay ' + + 'object.' + ) + elif dsname is None: + dsname = list(self._ds.keys())[0] + + # Get the current plotting axis + if self.fig is None: + self.fig = plt.figure() + if self.axes is None: + self.axes = np.array([plt.axes()]) + self.fig.add_axes(self.axes[0]) + + # Set Title + if set_title is None: + set_title = ' '.join( + [ + dsname, + 'on', + dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[0]), + ] + ) + self.axes[subplot_index].set_title(set_title) + + percentages = calculate_percentages( + self._ds[dsname], + fields, + time=time, + time_slice=time_slice, + threshold=threshold, + fill_value=fill_value, + ) + + self.axes[subplot_index].pie( + [percentages[field] for field in percentages.keys()], + labels=percentages.keys(), + autopct=autopct, + **kwargs, + ) + return self.axes[subplot_index] diff --git a/act/plotting/geodisplay.py b/act/plotting/geodisplay.py index b01d425b43..297689e2b4 100644 --- a/act/plotting/geodisplay.py +++ b/act/plotting/geodisplay.py @@ -3,7 +3,6 @@ """ -import warnings import matplotlib import matplotlib.pyplot as plt @@ -122,6 +121,11 @@ def geoplot( is made. See the matplotlib documentation for further details on what keyword arguments are available. + Returns + ------- + ax : matplotlib axis handle + The matplotlib axis handle of the plot. + """ if dsname is None and len(self._ds.keys()) > 1: raise ValueError( diff --git a/act/plotting/plot.py b/act/plotting/plot.py index 0d5a31eca1..9dfc15c5de 100644 --- a/act/plotting/plot.py +++ b/act/plotting/plot.py @@ -71,8 +71,7 @@ class with this set to None will create a new figure handle. See the """ - def __init__(self, ds, subplot_shape=(1,), ds_name=None, subplot_kw=None, - **kwargs): + def __init__(self, ds, subplot_shape=(1,), ds_name=None, subplot_kw=None, **kwargs): if isinstance(ds, xr.Dataset): if 'datastream' in ds.attrs.keys() is not None: self._ds = {ds.attrs['datastream']: ds} @@ -122,8 +121,7 @@ def __init__(self, ds, subplot_shape=(1,), ds_name=None, subplot_kw=None, if subplot_shape is not None: self.add_subplots(subplot_shape, subplot_kw=subplot_kw, **kwargs) - def add_subplots(self, subplot_shape=(1,), secondary_y=False, subplot_kw=None, - **kwargs): + def add_subplots(self, subplot_shape=(1,), secondary_y=False, subplot_kw=None, **kwargs): """ Adds subplots to the Display object. The current figure in the object will be deleted and overwritten. @@ -235,8 +233,9 @@ def assign_to_figure_axis(self, fig, ax): self.fig = fig self.axes = np.array([ax]) - def add_colorbar(self, mappable, title=None, subplot_index=(0,), pad=None, - width=None, **kwargs): + def add_colorbar( + self, mappable, title=None, subplot_index=(0,), pad=None, width=None, **kwargs + ): """ Adds a colorbar to the plot. @@ -301,7 +300,7 @@ def group_by(self, units): return DisplayGroupby(self, units) -class DisplayGroupby(object): +class DisplayGroupby: def __init__(self, display, units): """ @@ -348,8 +347,7 @@ def plot_group(self, func_name, dsname=None, **kwargs): func = getattr(self.display, func_name) if not callable(func): - raise RuntimeError("The specified string is not a function of " - "the Display object.") + raise RuntimeError("The specified string is not a function of " "the Display object.") subplot_shape = self.display.axes.shape i = 0 @@ -382,18 +380,25 @@ def plot_group(self, func_name, dsname=None, **kwargs): days_in_year = 365 year_diff = ds1.time.dt.year - first_year time_diff = np.array( - [np.timedelta64(x * days_in_year, 'D') for x in year_diff.values]) + [np.timedelta64(x * days_in_year, 'D') for x in year_diff.values] + ) ds1['time'] = ds1.time - time_diff self.display._ds[key + '%d_%d' % (k, yr)] = ds1 func(dsname=key + '%d_%d' % (k, yr), label=str(yr), **kwargs) self.mapping[key + '%d_%d' % (k, yr)] = subplot_index - self.xlims[key + '%d_%d' % (k, yr)] = (ds1.time.values.min(), ds1.time.values.max()) + self.xlims[key + '%d_%d' % (k, yr)] = ( + ds1.time.values.min(), + ds1.time.values.max(), + ) del self.display._ds[key + '_%d' % k] else: func(dsname=key + '_%d' % k, **kwargs) self.mapping[key + '_%d' % k] = subplot_index if self.isTimeSeriesDisplay: - self.xlims[key + '_%d' % k] = (ds.time.values.min(), ds.time.values.max()) + self.xlims[key + '_%d' % k] = ( + ds.time.values.min(), + ds.time.values.max(), + ) i = i + 1 if wrap_around is False and i < np.prod(subplot_shape): diff --git a/act/plotting/skewtdisplay.py b/act/plotting/skewtdisplay.py index 36190cf457..6fab52ce42 100644 --- a/act/plotting/skewtdisplay.py +++ b/act/plotting/skewtdisplay.py @@ -3,7 +3,6 @@ """ -import warnings from copy import deepcopy import matplotlib.pyplot as plt @@ -56,8 +55,7 @@ def __init__(self, ds, subplot_shape=(1,), subplot=None, ds_name=None, set_fig=N # We want to use our routine to handle subplot adding, not the main # one new_kwargs = kwargs.copy() - super().__init__(ds, None, ds_name, subplot_kw=dict(projection='skewx'), - **new_kwargs) + super().__init__(ds, None, ds_name, subplot_kw=dict(projection='skewx'), **new_kwargs) # Make a SkewT object for each subplot self.add_subplots(subplot_shape, set_fig=set_fig, subplot=subplot, **kwargs) @@ -354,7 +352,9 @@ def plot_from_u_and_v( if not all(p[i] <= p[i + 1] for i in range(len(p) - 1)): if 'time' in self._ds: self._ds[dsname][p_field] = ( - self._ds[dsname][p_field].rolling(time=smooth_p, min_periods=1, center=True).mean() + self._ds[dsname][p_field] + .rolling(time=smooth_p, min_periods=1, center=True) + .mean() ) p = self._ds[dsname][p_field] @@ -450,7 +450,9 @@ def plot_from_u_and_v( self.SkewT[subplot_index].plot_dry_adiabats(pressure=plp, t0=t0, **dry_adiabats_kwargs) if plot_moist_adiabats: - self.SkewT[subplot_index].plot_moist_adiabats(t0=t0, pressure=plp, **moist_adiabats_kwargs) + self.SkewT[subplot_index].plot_moist_adiabats( + t0=t0, pressure=plp, **moist_adiabats_kwargs + ) if plot_mixing_lines: self.SkewT[subplot_index].plot_mixing_lines(pressure=plp, **mixing_lines_kwargs) @@ -458,7 +460,7 @@ def plot_from_u_and_v( # Set Title if set_title is None: if 'time' in self._ds[dsname]: - title_time = dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[0]), + title_time = (dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[0]),) elif '_file_dates' in self._ds[dsname].attrs: title_time = self._ds[dsname].attrs['_file_dates'][0] else: diff --git a/act/plotting/timeseriesdisplay.py b/act/plotting/timeseriesdisplay.py index 70aee57dce..45f321aa08 100644 --- a/act/plotting/timeseriesdisplay.py +++ b/act/plotting/timeseriesdisplay.py @@ -151,7 +151,9 @@ def day_night_background(self, dsname=None, subplot_index=(0,)): for value, name in zip(lat_lon_list, ['Latitude', 'Longitude']): if not np.isfinite(value): - warnings.warn(f"{name} value in dataset equal to '{value}' is not finite. ", RuntimeWarning) + warnings.warn( + f"{name} value in dataset equal to '{value}' is not finite. ", RuntimeWarning + ) return lat = lat_lon_list[0] @@ -196,7 +198,7 @@ def day_night_background(self, dsname=None, subplot_index=(0,)): for ii in noon: ax.axvline(x=ii, linestyle='--', color='y', zorder=1) - def set_xrng(self, xrng, subplot_index=(0, 0)): + def set_xrng(self, xrng, subplot_index=(0,)): """ Sets the x range of the plot. @@ -215,15 +217,20 @@ def set_xrng(self, xrng, subplot_index=(0, 0)): # This is to catch that and expand the range so we avoid the warning. if xrng[0] == xrng[1]: if isinstance(xrng[0], np.datetime64): - print(f'\nAttempting to set xlim range to single value {xrng[0]}. ' - 'Expanding range by 2 seconds.\n') + print( + f'\nAttempting to set xlim range to single value {xrng[0]}. ' + 'Expanding range by 2 seconds.\n' + ) xrng[0] -= np.timedelta64(1, 's') xrng[1] += np.timedelta64(1, 's') elif isinstance(xrng[0], dt.datetime): - print(f'\nAttempting to set xlim range to single value {xrng[0]}. ' - 'Expanding range by 2 seconds.\n') + print( + f'\nAttempting to set xlim range to single value {xrng[0]}. ' + 'Expanding range by 2 seconds.\n' + ) xrng[0] -= dt.timedelta(seconds=1) xrng[1] += dt.timedelta(seconds=1) + self.axes[subplot_index].set_xlim(xrng) # Make sure that the xrng value is a numpy array not pandas @@ -312,7 +319,7 @@ def plot( cbar_h_adjust=None, y_axis_flag_meanings=False, colorbar_labels=None, - cb_friendly=False, + cvd_friendly=False, match_line_label_color=False, **kwargs, ): @@ -405,8 +412,8 @@ def plot( 1: {'text': 'Liquid', 'color': 'green'}, 2: {'text': 'Ice', 'color': 'blue'}, 3: {'text': 'Mixed phase', 'color': 'purple'}} - cb_friendly : boolean - Set to true if you want to use the integrated colorblind friendly + cvd_friendly : boolean + Set to true if you want to use the integrated color vision deficiency (CVD) friendly colors for green/red based on the Homeyer colormap. match_line_label_color : boolean Will set the y label to match the line color in the plot. This @@ -433,12 +440,24 @@ def plot( if y_axis_flag_meanings: kwargs['linestyle'] = '' - if cb_friendly: + if cvd_friendly: cmap = 'HomeyerRainbow' - assessment_overplot_category_color['Bad'] = (0.9285714285714286, 0.7130901016453677, 0.7130901016453677) - assessment_overplot_category_color['Incorrect'] = (0.9285714285714286, 0.7130901016453677, 0.7130901016453677) - assessment_overplot_category_color['Not Failing'] = (0.0, 0.4240129715562796, 0.4240129715562796), - assessment_overplot_category_color['Acceptable'] = (0.0, 0.4240129715562796, 0.4240129715562796), + assessment_overplot_category_color['Bad'] = ( + 0.9285714285714286, + 0.7130901016453677, + 0.7130901016453677, + ) + assessment_overplot_category_color['Incorrect'] = ( + 0.9285714285714286, + 0.7130901016453677, + 0.7130901016453677, + ) + assessment_overplot_category_color['Not Failing'] = ( + (0.0, 0.4240129715562796, 0.4240129715562796), + ) + assessment_overplot_category_color['Acceptable'] = ( + (0.0, 0.4240129715562796, 0.4240129715562796), + ) # Get data and dimensions data = self._ds[dsname][field] @@ -633,9 +652,7 @@ def plot( ] ) else: - date_result = search( - r'\d{4}-\d{1,2}-\d{1,2}', self._ds[dsname].time.attrs['units'] - ) + date_result = search(r'\d{4}-\d{1,2}-\d{1,2}', self._ds[dsname].time.attrs['units']) if date_result is not None: set_title = ' '.join([dsname, field, 'on', date_result.group(0)]) else: @@ -833,6 +850,8 @@ def plot_barbs_from_u_v( invert_y_axis=True, num_barbs_x=20, num_barbs_y=20, + barb_step_x=None, + barb_step_y=None, use_var_for_y=None, **kwargs, ): @@ -866,6 +885,12 @@ def plot_barbs_from_u_v( The number of wind barbs to plot in the x axis. num_barbs_y : int The number of wind barbs to plot in the y axis. + barb_step_x : int + Step between each wind barb to plot. If set, will override + values given for num_barbs_x + barb_step_y : int + Step between each wind barb to plot. If set, will override + values given for num_barbs_y cmap : matplotlib.colors.LinearSegmentedColormap A color map to use with wind barbs. If this is set the plt.barbs routine will be passed the C parameter scaled as sqrt of sum of the @@ -902,8 +927,9 @@ def plot_barbs_from_u_v( v = self._ds[dsname][v_field].values dim = list(self._ds[dsname][u_field].dims) xdata = self._ds[dsname][dim[0]].values - num_x = xdata.shape[-1] - barb_step_x = round(num_x / num_barbs_x) + if barb_step_x is None: + num_x = xdata.shape[-1] + barb_step_x = round(num_x / num_barbs_x) if barb_step_x == 0: barb_step_x = 1 @@ -920,8 +946,9 @@ def plot_barbs_from_u_v( else: units = '' ytitle = ''.join(['(', units, ')']) - num_y = ydata.shape[0] - barb_step_y = round(num_y / num_barbs_y) + if barb_step_y is None: + num_y = ydata.shape[0] + barb_step_y = round(num_y / num_barbs_y) if barb_step_y == 0: barb_step_y = 1 @@ -1183,9 +1210,7 @@ def plot_time_height_xsection_from_1d_data( ax = self.axes[subplot_index] - mesh = ax.pcolormesh( - x_times, y_levels, np.transpose(data), shading=set_shading, **kwargs - ) + mesh = ax.pcolormesh(x_times, y_levels, np.transpose(data), shading=set_shading, **kwargs) if day_night_background is True: self.day_night_background(subplot_index=subplot_index, dsname=dsname) @@ -1263,7 +1288,7 @@ def time_height_scatter( cb_label=None, subplot_index=(0,), plot_alt_field=False, - cb_friendly=False, + cvd_friendly=False, day_night_background=False, set_title=None, **kwargs, @@ -1294,7 +1319,7 @@ def time_height_scatter( The index of the subplot to set the x range of. plot_alt_field : boolean Set to true to plot the altitude field on the secondary y-axis - cb_friendly : boolean + cvd_friendly : boolean If set to True will use the Homeyer colormap day_night_background : boolean If set to True will plot the day_night_background @@ -1324,7 +1349,7 @@ def time_height_scatter( self.axes = np.array([plt.axes()]) self.fig.add_axes(self.axes[0]) - if cb_friendly: + if cvd_friendly: cmap = 'HomeyerRainbow' ax = self.axes[subplot_index] @@ -1366,9 +1391,7 @@ def time_height_scatter( ] ) else: - date_result = search( - r'\d{4}-\d{1,2}-\d{1,2}', self._ds[dsname].time.attrs['units'] - ) + date_result = search(r'\d{4}-\d{1,2}-\d{1,2}', self._ds[dsname].time.attrs['units']) if date_result is not None: set_title = ' '.join([dsname, data_field, 'on', date_result.group(0)]) else: @@ -1423,7 +1446,7 @@ def qc_flag_block_plot( assessment_color=None, edgecolor='face', set_shading='auto', - cb_friendly=False, + cvd_friendly=False, **kwargs, ): """ @@ -1452,8 +1475,8 @@ def qc_flag_block_plot( set_shading : string Option to to set the matplotlib.pcolormesh shading parameter. Default to 'auto' - cb_friendly : boolean - Set to true if you want to use the integrated colorblind friendly + cvd_friendly : boolean + Set to true if you want to use the integrated color vision deficiency (CVD) friendly colors for green/red based on the Homeyer colormap **kwargs : keyword arguments The keyword arguments for :func:`plt.broken_barh`. @@ -1469,7 +1492,7 @@ def qc_flag_block_plot( 'Not Failing': 'green', 'Acceptable': 'green', } - if cb_friendly: + if cvd_friendly: color_lookup['Bad'] = (0.9285714285714286, 0.7130901016453677, 0.7130901016453677) color_lookup['Incorrect'] = (0.9285714285714286, 0.7130901016453677, 0.7130901016453677) color_lookup['Not Failing'] = (0.0, 0.4240129715562796, 0.4240129715562796) @@ -1594,7 +1617,6 @@ def qc_flag_block_plot( yvalues = self._ds[dsname][dims[1]].values cMap = mplcolors.ListedColormap(plot_colors) - print(plot_colors) mesh = ax.pcolormesh( xvalues, yvalues, @@ -1646,7 +1668,6 @@ def qc_flag_block_plot( ) else: - test_nums = [] for ii, assess in enumerate(flag_assessments): if assess not in color_lookup: @@ -1664,9 +1685,7 @@ def qc_flag_block_plot( # Get test number from flag_mask bitpacked number test_nums.append(parse_bit(flag_masks[ii])) # Get masked array data to use mask for finding if/where test is set - data = self._ds[dsname].qcfilter.get_masked_data( - data_field, rm_tests=test_nums[-1] - ) + data = self._ds[dsname].qcfilter.get_masked_data(data_field, rm_tests=test_nums[-1]) if np.any(data.mask): # Get time ranges from time and masked data barh_list = reduce_time_ranges( diff --git a/act/plotting/windrosedisplay.py b/act/plotting/windrosedisplay.py index ef128c2ccc..fe99f41c62 100644 --- a/act/plotting/windrosedisplay.py +++ b/act/plotting/windrosedisplay.py @@ -36,8 +36,7 @@ class and has therefore has the same attributes as that class. """ def __init__(self, ds, subplot_shape=(1,), ds_name=None, **kwargs): - super().__init__(ds, subplot_shape, ds_name, subplot_kw=dict(projection='polar'), - **kwargs) + super().__init__(ds, subplot_shape, ds_name, subplot_kw=dict(projection='polar'), **kwargs) def set_thetarng(self, trng=(0.0, 360.0), subplot_index=(0,)): """ @@ -223,18 +222,14 @@ def plot( **kwargs, ) ) - ax.legend( - loc=legend_loc, bbox_to_anchor=legend_bbox, title=legend_title - ) + ax.legend(loc=legend_loc, bbox_to_anchor=legend_bbox, title=legend_title) ax.set_theta_zero_location('N') ax.set_theta_direction(-1) # Add an annulus with text stating % of time calm pct_calm = np.sum(spd_data <= calm_threshold) / len(spd_data) * 100 ax.set_rorigin(-2.5) - ax.annotate( - '%3.2f%%\n calm' % pct_calm, xy=(0, -2.5), ha='center', va='center' - ) + ax.annotate('%3.2f%%\n calm' % pct_calm, xy=(0, -2.5), ha='center', va='center') # Set the ticks to be nice numbers tick_max = tick_interval * round(np.nanmax(np.cumsum(wind_hist, axis=1)) / tick_interval) @@ -353,10 +348,10 @@ def plot_data( for i, d in enumerate(dir_bins_mid): if i < len(dir_bins_mid) - 1: idx = np.where((dir_data > d) & (dir_data <= dir_bins_mid[i + 1]))[0] - bins.append(d + (dir_bins_mid[i + 1] - d) / 2.) + bins.append(d + (dir_bins_mid[i + 1] - d) / 2.0) else: - idx = np.where((dir_data > d) & (dir_data <= 360.))[0] - bins.append(d + (360. - d) / 2.) + idx = np.where((dir_data > d) & (dir_data <= 360.0))[0] + bins.append(d + (360.0 - d) / 2.0) if plot_type == 'line': if line_plot_calc == 'mean': @@ -398,8 +393,12 @@ def plot_data( ) hist = np.insert(hist, -1, hist[0], axis=0) cplot = self.axes[subplot_index].contourf( - np.deg2rad(xedges), yedges[0:-1], np.transpose(hist), - cmap=cmap, levels=clevels, **kwargs + np.deg2rad(xedges), + yedges[0:-1], + np.transpose(hist), + cmap=cmap, + levels=clevels, + **kwargs, ) plot_type_str = 'Heatmap of' cbar = self.fig.colorbar(cplot, ax=self.axes[subplot_index]) @@ -447,8 +446,13 @@ def plot_data( clevels = np.linspace(vmin, vmax, clevels) cplot = self.axes[subplot_index].contourf( - np.deg2rad(bins), spd_bins, np.transpose(mean_data), - cmap=cmap, levels=clevels, extend='both', **kwargs + np.deg2rad(bins), + spd_bins, + np.transpose(mean_data), + cmap=cmap, + levels=clevels, + extend='both', + **kwargs, ) plot_type_str = 'Mean of' cbar = self.fig.colorbar(cplot, ax=self.axes[subplot_index]) @@ -461,8 +465,8 @@ def plot_data( self.axes[subplot_index].set_theta_direction(-1) # Set Title - sdate = dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[0]), - edate = dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[-1]), + sdate = (dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[0]),) + edate = (dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[-1]),) if sdate == edate: date_str = 'on ' + sdate[0] @@ -474,13 +478,7 @@ def plot_data( units = '' if set_title is None: set_title = ' '.join( - [ - plot_type_str, - data_field + ' (' + units + ')', - 'by\n', - dir_field, - date_str - ] + [plot_type_str, data_field + ' (' + units + ')', 'by\n', dir_field, date_str] ) self.axes[subplot_index].set_title(set_title) plt.tight_layout(h_pad=1.05) diff --git a/act/plotting/xsectiondisplay.py b/act/plotting/xsectiondisplay.py index d989594c0c..0538e8d08d 100644 --- a/act/plotting/xsectiondisplay.py +++ b/act/plotting/xsectiondisplay.py @@ -75,6 +75,7 @@ def __init__(self, ds, subplot_shape=(1,), ds_name=None, **kwargs): super().__init__(ds, subplot_shape, ds_name, **kwargs) def set_subplot_to_map(self, subplot_index): + self.fig.delaxes(self.axes[subplot_index]) total_num_plots = self.axes.shape if len(total_num_plots) == 2: @@ -235,9 +236,9 @@ def plot_xsection( yc = y if x is None: - ax = my_dataarray.plot(ax=self.axes[subplot_index], **kwargs) + my_dataarray.plot(ax=self.axes[subplot_index], **kwargs) else: - ax = my_dataarray.plot(ax=self.axes[subplot_index], x=xc, y=yc, **kwargs) + my_dataarray.plot(ax=self.axes[subplot_index], x=xc, y=yc, **kwargs) the_coords = [the_keys for the_keys in my_dataarray.coords.keys()] if x is None: @@ -255,7 +256,7 @@ def plot_xsection( yrng = self.axes[subplot_index].get_ylim() self.set_yrng(yrng, subplot_index) del temp_ds - return ax + return self.axes[subplot_index] def plot_xsection_map( self, dsname, varname, subplot_index=(0,), coastlines=True, background=False, **kwargs @@ -290,7 +291,6 @@ def plot_xsection_map( raise ImportError( 'Cartopy needs to be installed in order to plot ' + 'cross sections on maps!' ) - self.set_subplot_to_map(subplot_index) self.plot_xsection(dsname, varname, subplot_index=subplot_index, **kwargs) xlims = self.xrng[subplot_index].flatten() diff --git a/act/qc/__init__.py b/act/qc/__init__.py index df8a866305..2b043494fd 100644 --- a/act/qc/__init__.py +++ b/act/qc/__init__.py @@ -12,8 +12,10 @@ 'add_supplemental_qc', 'arm', 'bsrn_tests', + 'clean', 'comparison_tests', 'qcfilter', + 'qc_summary', 'qctests', 'radiometer_tests', 'sp2', @@ -26,9 +28,11 @@ 'set_bit', 'unset_bit', ], + 'qc_summary': ['QCSummary'], 'qctests': [ 'QCTests', ], + 'clean': ['CleanDataset'], 'radiometer_tests': [ 'fft_shading_test', 'fft_shading_test_process', diff --git a/act/qc/add_supplemental_qc.py b/act/qc/add_supplemental_qc.py index 2d94f15a2d..21e463bc94 100644 --- a/act/qc/add_supplemental_qc.py +++ b/act/qc/add_supplemental_qc.py @@ -2,7 +2,6 @@ import numpy as np from pathlib import Path from dateutil import parser -from os import environ # Example of the YAML file and how to construct. # The times are set as inclusive start to inclusive end time. @@ -66,9 +65,8 @@ def read_yaml_supplemental_qc( datetime64=True, time_delim=(';', ',', '|', r'\t'), none_if_empty=True, - quiet=False + quiet=False, ): - """ Returns a dictionary converstion of YAML file for flagging data. The dictionary will contain variable names as first key, assessents as second keys containing @@ -136,7 +134,8 @@ def read_yaml_supplemental_qc( except KeyError: raise RuntimeError( 'Unable to determine datastream name from Dataset. Need to set global attribute ' - '_datastream in Dataset or provided full path to flag file.') + '_datastream in Dataset or provided full path to flag file.' + ) flag_file = list(Path(fullpath).glob(f'{datastream}.yml')) flag_file.extend(list(Path(fullpath).glob(f'{datastream}.yaml'))) @@ -164,7 +163,7 @@ def read_yaml_supplemental_qc( assessments = [ii.capitalize() for ii in assessments] # Read YAML file - with open(flag_file, "r") as fp: + with open(flag_file) as fp: try: data_dict = yaml.load(fp, Loader=yaml.FullLoader) except AttributeError: @@ -230,9 +229,8 @@ def apply_supplemental_qc( assessments=None, apply_all=True, exclude_all_variables=None, - quiet=False + quiet=False, ): - """ Apply flagging from supplemental QC file by adding new QC tests. @@ -284,7 +282,8 @@ def apply_supplemental_qc( exclude_vars.extend(exclude_all_variables) flag_dict = read_yaml_supplemental_qc( - ds, fullpath, variables=variables, assessments=assessments, quiet=quiet) + ds, fullpath, variables=variables, assessments=assessments, quiet=quiet + ) if flag_dict is None: return @@ -301,7 +300,8 @@ def apply_supplemental_qc( indexes = np.array([], dtype=np.int32) for vals in times: ind = np.argwhere( - (ds['time'].values >= vals[0]) & (ds['time'].values <= vals[1])) + (ds['time'].values >= vals[0]) & (ds['time'].values <= vals[1]) + ) if len(ind) > 0: indexes = np.append(indexes, ind) @@ -311,7 +311,8 @@ def apply_supplemental_qc( var_name, index=indexes, test_meaning=description, - test_assessment=asses_name) + test_assessment=asses_name, + ) var_name = '_all' if apply_all and var_name in flag_dict.keys(): @@ -325,7 +326,8 @@ def apply_supplemental_qc( indexes = np.array([], dtype=np.int32) for vals in times: ind = np.argwhere( - (ds['time'].values >= vals[0]) & (ds['time'].values <= vals[1])) + (ds['time'].values >= vals[0]) & (ds['time'].values <= vals[1]) + ) if ind.size > 0: indexes = np.append(indexes, np.ndarray.flatten(ind)) @@ -347,4 +349,5 @@ def apply_supplemental_qc( all_var_name, index=indexes, test_meaning=description, - test_assessment=asses_name) + test_assessment=asses_name, + ) diff --git a/act/qc/arm.py b/act/qc/arm.py index 0fb84597d4..bf323e77ac 100644 --- a/act/qc/arm.py +++ b/act/qc/arm.py @@ -93,8 +93,10 @@ def add_dqr_to_qc( raise ValueError('Dataset does not have datastream attribute') if datastream == DEFAULT_DATASTREAM_NAME: - raise ValueError("'datastream' name required for DQR service set to default value " - f"{datastream}. Unable to perform DQR service query.") + raise ValueError( + "'datastream' name required for DQR service set to default value " + f"{datastream}. Unable to perform DQR service query." + ) # Clean up QC to conform to CF conventions if cleanup_qc: @@ -163,8 +165,10 @@ def add_dqr_to_qc( } if dqr_link: - print(f"{dqr_number} - {quality_category.lower().capitalize()}: " - f"https://adc.arm.gov/ArchiveServices/DQRService?dqrid={dqr_number}") + print( + f"{dqr_number} - {quality_category.lower().capitalize()}: " + f"https://adc.arm.gov/ArchiveServices/DQRService?dqrid={dqr_number}" + ) # Check to ensure variable is list if variable and not isinstance(variable, (list, tuple)): @@ -173,7 +177,6 @@ def add_dqr_to_qc( loc_vars = ['lat', 'lon', 'alt', 'latitude', 'longitude', 'altitude'] for key, value in dqr_results.items(): for var_name in value['variables']: - # Do not process on location variables if skip_location_vars and var_name in loc_vars: continue @@ -182,12 +185,21 @@ def add_dqr_to_qc( if variable is not None and var_name not in variable: continue + # Do not process quality control variables as this will create a new + # quality control variable for the quality control varible. + try: + if ds[var_name].attrs['standard_name'] == 'quality_flag': + continue + except KeyError: + pass + try: ds.qcfilter.add_test( var_name, index=np.unique(value['index']), test_meaning=value['test_meaning'], - test_assessment=value['test_assessment']) + test_assessment=value['test_assessment'], + ) except KeyError: # Variable name not in Dataset continue diff --git a/act/qc/bsrn_tests.py b/act/qc/bsrn_tests.py index c585d01770..1e3c5e4ef4 100644 --- a/act/qc/bsrn_tests.py +++ b/act/qc/bsrn_tests.py @@ -45,11 +45,12 @@ def _calculate_solar_parameters(ds, lat_name, lon_name, solar_constant): # Calculate solar parameters elevation, _, solar_distance = get_solar_azimuth_elevation( - latitude=latitude, longitude=longitude, time=ds['time'].values) + latitude=latitude, longitude=longitude, time=ds['time'].values + ) solar_distance = np.nanmean(solar_distance) Sa = solar_constant / solar_distance**2 - sza = 90. - elevation + sza = 90.0 - elevation return (sza, Sa) @@ -117,9 +118,8 @@ def bsrn_limits_test( solar_constant=1366, lat_name='lat', lon_name='lon', - use_dask=False + use_dask=False, ): - """ Method to apply BSRN limits test and add results to ancillary quality control variable. Need to provide variable name for each measurement for the test to be performed. If no @@ -188,71 +188,88 @@ def bsrn_limits_test( test = test.lower() test_names = [ii.lower() for ii in test_names_org] if test not in test_names: - raise ValueError(f"Value of '{test}' in keyword 'test' not recognized. " - f"Must a single value in options {test_names_org}") + raise ValueError( + f"Value of '{test}' in keyword 'test' not recognized. " + f"Must a single value in options {test_names_org}" + ) sza, Sa = _calculate_solar_parameters(self._ds, lat_name, lon_name, solar_constant) if test == test_names[0]: if sw_min_limit is None: - sw_min_limit = -4. + sw_min_limit = -4.0 if lw_min_dn_limit is None: - lw_min_dn_limit = 40. + lw_min_dn_limit = 40.0 if lw_min_up_limit is None: - lw_min_up_limit = 40. + lw_min_up_limit = 40.0 if lw_max_dn_limit is None: - lw_max_dn_limit = 700. + lw_max_dn_limit = 700.0 if lw_max_up_limit is None: - lw_max_up_limit = 900. + lw_max_up_limit = 900.0 elif test == test_names[1]: if sw_min_limit is None: - sw_min_limit = -2. + sw_min_limit = -2.0 if lw_min_dn_limit is None: - lw_min_dn_limit = 60. + lw_min_dn_limit = 60.0 if lw_min_up_limit is None: - lw_min_up_limit = 60. + lw_min_up_limit = 60.0 if lw_max_dn_limit is None: - lw_max_dn_limit = 500. + lw_max_dn_limit = 500.0 if lw_max_up_limit is None: - lw_max_up_limit = 700. + lw_max_up_limit = 700.0 # Global Shortwave downwelling min and max tests if gbl_SW_dn_name is not None: cos_sza = np.cos(np.radians(sza)) - cos_sza[sza > 90.] = 0. + cos_sza[sza > 90.0] = 0.0 if test == test_names[0]: - sw_max_limit = Sa * 1.5 * cos_sza**1.2 + 100. + sw_max_limit = Sa * 1.5 * cos_sza**1.2 + 100.0 elif test == test_names[1]: - sw_max_limit = Sa * 1.2 * cos_sza**1.2 + 50. + sw_max_limit = Sa * 1.2 * cos_sza**1.2 + 50.0 - index_min, index_max = _find_indexes(self._ds, gbl_SW_dn_name, sw_min_limit, sw_max_limit, use_dask) + index_min, index_max = _find_indexes( + self._ds, gbl_SW_dn_name, sw_min_limit, sw_max_limit, use_dask + ) self._ds.qcfilter.add_test( - gbl_SW_dn_name, index=index_min, test_assessment='Bad', - test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2") + gbl_SW_dn_name, + index=index_min, + test_assessment='Bad', + test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2", + ) self._ds.qcfilter.add_test( - gbl_SW_dn_name, index=index_max, test_assessment='Bad', - test_meaning=f"Value greater than BSRN {test.lower()} limit") + gbl_SW_dn_name, + index=index_max, + test_assessment='Bad', + test_meaning=f"Value greater than BSRN {test.lower()} limit", + ) # Diffuse Shortwave downwelling min and max tests if glb_diffuse_SW_dn_name is not None: with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) if test == test_names[0]: - sw_max_limit = Sa * 0.95 * np.cos(np.radians(sza))**1.2 + 50. + sw_max_limit = Sa * 0.95 * np.cos(np.radians(sza)) ** 1.2 + 50.0 elif test == test_names[1]: - sw_max_limit = Sa * 0.75 * np.cos(np.radians(sza))**1.2 + 30. + sw_max_limit = Sa * 0.75 * np.cos(np.radians(sza)) ** 1.2 + 30.0 - index_min, index_max = _find_indexes(self._ds, glb_diffuse_SW_dn_name, sw_min_limit, - sw_max_limit, use_dask) + index_min, index_max = _find_indexes( + self._ds, glb_diffuse_SW_dn_name, sw_min_limit, sw_max_limit, use_dask + ) self._ds.qcfilter.add_test( - glb_diffuse_SW_dn_name, index=index_min, test_assessment='Bad', - test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2") + glb_diffuse_SW_dn_name, + index=index_min, + test_assessment='Bad', + test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2", + ) self._ds.qcfilter.add_test( - glb_diffuse_SW_dn_name, index=index_max, test_assessment='Bad', - test_meaning=f"Value greater than BSRN {test.lower()} limit") + glb_diffuse_SW_dn_name, + index=index_max, + test_assessment='Bad', + test_meaning=f"Value greater than BSRN {test.lower()} limit", + ) # Direct Normal Shortwave downwelling min and max tests if direct_normal_SW_dn_name is not None: @@ -261,17 +278,24 @@ def bsrn_limits_test( if test == test_names[0]: sw_max_limit = Sa elif test == test_names[1]: - sw_max_limit = Sa * 0.95 * np.cos(np.radians(sza))**0.2 + 10. + sw_max_limit = Sa * 0.95 * np.cos(np.radians(sza)) ** 0.2 + 10.0 - index_min, index_max = _find_indexes(self._ds, direct_normal_SW_dn_name, - sw_min_limit, sw_max_limit, use_dask) + index_min, index_max = _find_indexes( + self._ds, direct_normal_SW_dn_name, sw_min_limit, sw_max_limit, use_dask + ) self._ds.qcfilter.add_test( - direct_normal_SW_dn_name, index=index_min, test_assessment='Bad', - test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2") + direct_normal_SW_dn_name, + index=index_min, + test_assessment='Bad', + test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2", + ) self._ds.qcfilter.add_test( - direct_normal_SW_dn_name, index=index_max, test_assessment='Bad', - test_meaning=f"Value greater than BSRN {test.lower()} limit") + direct_normal_SW_dn_name, + index=index_max, + test_assessment='Bad', + test_meaning=f"Value greater than BSRN {test.lower()} limit", + ) # Direct Shortwave downwelling min and max tests if direct_SW_dn_name is not None: @@ -280,64 +304,92 @@ def bsrn_limits_test( if test == test_names[0]: sw_max_limit = Sa * np.cos(np.radians(sza)) elif test == test_names[1]: - sw_max_limit = Sa * 0.95 * np.cos(np.radians(sza))**1.2 + 10 + sw_max_limit = Sa * 0.95 * np.cos(np.radians(sza)) ** 1.2 + 10 - index_min, index_max = _find_indexes(self._ds, direct_SW_dn_name, - sw_min_limit, sw_max_limit, use_dask) + index_min, index_max = _find_indexes( + self._ds, direct_SW_dn_name, sw_min_limit, sw_max_limit, use_dask + ) self._ds.qcfilter.add_test( - direct_SW_dn_name, index=index_min, test_assessment='Bad', - test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2") + direct_SW_dn_name, + index=index_min, + test_assessment='Bad', + test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2", + ) self._ds.qcfilter.add_test( - direct_SW_dn_name, index=index_max, test_assessment='Bad', - test_meaning=f"Value greater than BSRN {test.lower()} limit") + direct_SW_dn_name, + index=index_max, + test_assessment='Bad', + test_meaning=f"Value greater than BSRN {test.lower()} limit", + ) # Shortwave up welling min and max tests if glb_SW_up_name is not None: with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) if test == test_names[0]: - sw_max_limit = Sa * 1.2 * np.cos(np.radians(sza))**1.2 + 50 + sw_max_limit = Sa * 1.2 * np.cos(np.radians(sza)) ** 1.2 + 50 elif test == test_names[1]: - sw_max_limit = Sa * np.cos(np.radians(sza))**1.2 + 50 + sw_max_limit = Sa * np.cos(np.radians(sza)) ** 1.2 + 50 - index_min, index_max = _find_indexes(self._ds, glb_SW_up_name, - sw_min_limit, sw_max_limit, use_dask) + index_min, index_max = _find_indexes( + self._ds, glb_SW_up_name, sw_min_limit, sw_max_limit, use_dask + ) self._ds.qcfilter.add_test( - glb_SW_up_name, index=index_min, test_assessment='Bad', - test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2") + glb_SW_up_name, + index=index_min, + test_assessment='Bad', + test_meaning=f"Value less than BSRN {test.lower()} limit of {sw_min_limit} W/m^2", + ) self._ds.qcfilter.add_test( - glb_SW_up_name, index=index_max, test_assessment='Bad', - test_meaning=f"Value greater than BSRN {test.lower()} limit") + glb_SW_up_name, + index=index_max, + test_assessment='Bad', + test_meaning=f"Value greater than BSRN {test.lower()} limit", + ) # Longwave downwelling min and max tests if glb_LW_dn_name is not None: - index_min, index_max = _find_indexes(self._ds, glb_LW_dn_name, - lw_min_dn_limit, lw_max_dn_limit, use_dask) + index_min, index_max = _find_indexes( + self._ds, glb_LW_dn_name, lw_min_dn_limit, lw_max_dn_limit, use_dask + ) self._ds.qcfilter.add_test( - glb_LW_dn_name, index=index_min, test_assessment='Bad', - test_meaning=f"Value less than BSRN {test.lower()} limit of {lw_min_dn_limit} W/m^2") + glb_LW_dn_name, + index=index_min, + test_assessment='Bad', + test_meaning=f"Value less than BSRN {test.lower()} limit of {lw_min_dn_limit} W/m^2", + ) self._ds.qcfilter.add_test( - glb_LW_dn_name, index=index_max, test_assessment='Bad', - test_meaning=f"Value greater than BSRN {test.lower()} limit of {lw_max_dn_limit} W/m^2") + glb_LW_dn_name, + index=index_max, + test_assessment='Bad', + test_meaning=f"Value greater than BSRN {test.lower()} limit of {lw_max_dn_limit} W/m^2", + ) # Longwave upwelling min and max tests if glb_LW_up_name is not None: - index_min, index_max = _find_indexes(self._ds, glb_LW_up_name, - lw_min_up_limit, lw_max_up_limit, use_dask) + index_min, index_max = _find_indexes( + self._ds, glb_LW_up_name, lw_min_up_limit, lw_max_up_limit, use_dask + ) self._ds.qcfilter.add_test( - glb_LW_up_name, index=index_min, test_assessment='Bad', - test_meaning=f"Value less than BSRN {test.lower()} limit of {lw_min_up_limit} W/m^2") + glb_LW_up_name, + index=index_min, + test_assessment='Bad', + test_meaning=f"Value less than BSRN {test.lower()} limit of {lw_min_up_limit} W/m^2", + ) self._ds.qcfilter.add_test( - glb_LW_up_name, index=index_max, test_assessment='Bad', - test_meaning=f"Value greater than BSRN {test.lower()} limit of {lw_max_up_limit} W/m^2") + glb_LW_up_name, + index=index_max, + test_assessment='Bad', + test_meaning=f"Value greater than BSRN {test.lower()} limit of {lw_max_up_limit} W/m^2", + ) def bsrn_comparison_tests( self, @@ -352,9 +404,9 @@ def bsrn_comparison_tests( test_assessment='Indeterminate', lat_name='lat', lon_name='lon', - LWdn_lt_LWup_component=25., - LWdn_gt_LWup_component=300., - use_dask=False + LWdn_lt_LWup_component=25.0, + LWdn_gt_LWup_component=300.0, + use_dask=False, ): """ Method to apply BSRN comparison tests and add results to ancillary quality control variable. @@ -418,23 +470,36 @@ def bsrn_comparison_tests( if isinstance(test, str): test = [test] - test_options = ['Global over Sum SW Ratio', 'Diffuse Ratio', 'SW up', 'LW down to air temp', - 'LW up to air temp', 'LW down to LW up'] + test_options = [ + 'Global over Sum SW Ratio', + 'Diffuse Ratio', + 'SW up', + 'LW down to air temp', + 'LW up to air temp', + 'LW down to LW up', + ] solar_constant = 1360.8 sza, Sa = _calculate_solar_parameters(self._ds, lat_name, lon_name, solar_constant) # Ratio of Global over Sum SW if test_options[0] in test: - if gbl_SW_dn_name is None or glb_diffuse_SW_dn_name is None or direct_normal_SW_dn_name is None: - raise ValueError('Must set keywords gbl_SW_dn_name, glb_diffuse_SW_dn_name, ' - f'direct_normal_SW_dn_name for {test_options[0]} test.') + if ( + gbl_SW_dn_name is None + or glb_diffuse_SW_dn_name is None + or direct_normal_SW_dn_name is None + ): + raise ValueError( + 'Must set keywords gbl_SW_dn_name, glb_diffuse_SW_dn_name, ' + f'direct_normal_SW_dn_name for {test_options[0]} test.' + ) with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) if use_dask and isinstance(self._ds[glb_diffuse_SW_dn_name].data, da.Array): - sum_sw_down = (self._ds[glb_diffuse_SW_dn_name].data - + self._ds[direct_normal_SW_dn_name].data * np.cos(np.radians(sza))) + sum_sw_down = self._ds[glb_diffuse_SW_dn_name].data + self._ds[ + direct_normal_SW_dn_name + ].data * np.cos(np.radians(sza)) sum_sw_down[sum_sw_down < 50] = np.nan ratio = self._ds[gbl_SW_dn_name].data / sum_sw_down index_a = sza < 75 @@ -445,8 +510,9 @@ def bsrn_comparison_tests( index_4 = da.where((ratio < 0.85) & index_b, True, False) index = (index_1 | index_2 | index_3 | index_4).compute() else: - sum_sw_down = (self._ds[glb_diffuse_SW_dn_name].values - + self._ds[direct_normal_SW_dn_name].values * np.cos(np.radians(sza))) + sum_sw_down = self._ds[glb_diffuse_SW_dn_name].values + self._ds[ + direct_normal_SW_dn_name + ].values * np.cos(np.radians(sza)) sum_sw_down[sum_sw_down < 50] = np.nan ratio = self._ds[gbl_SW_dn_name].values / sum_sw_down index_a = sza < 75 @@ -458,18 +524,32 @@ def bsrn_comparison_tests( index = index_1 | index_2 | index_3 | index_4 test_meaning = "Ratio of Global over Sum shortwave larger than expected" - self._ds.qcfilter.add_test(gbl_SW_dn_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) - self._ds.qcfilter.add_test(glb_diffuse_SW_dn_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) - self._ds.qcfilter.add_test(direct_normal_SW_dn_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) + self._ds.qcfilter.add_test( + gbl_SW_dn_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) + self._ds.qcfilter.add_test( + glb_diffuse_SW_dn_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) + self._ds.qcfilter.add_test( + direct_normal_SW_dn_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) # Diffuse Ratio if test_options[1] in test: if gbl_SW_dn_name is None or glb_diffuse_SW_dn_name is None: - raise ValueError('Must set keywords gbl_SW_dn_name, glb_diffuse_SW_dn_name ' - f'for {test_options[1]} test.') + raise ValueError( + 'Must set keywords gbl_SW_dn_name, glb_diffuse_SW_dn_name ' + f'for {test_options[1]} test.' + ) with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) @@ -482,7 +562,9 @@ def bsrn_comparison_tests( index_2 = da.where((ratio >= 1.10) & index_b, True, False) index = (index_1 | index_2).compute() else: - ratio = self._ds[glb_diffuse_SW_dn_name].values / self._ds[gbl_SW_dn_name].values + ratio = ( + self._ds[glb_diffuse_SW_dn_name].values / self._ds[gbl_SW_dn_name].values + ) ratio[self._ds[gbl_SW_dn_name].values < 50] = np.nan index_a = sza < 75 index_1 = (ratio >= 1.05) & index_a @@ -491,104 +573,180 @@ def bsrn_comparison_tests( index = index_1 | index_2 test_meaning = "Ratio of Diffuse Shortwave over Global Shortwave larger than expected" - self._ds.qcfilter.add_test(gbl_SW_dn_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) - self._ds.qcfilter.add_test(glb_diffuse_SW_dn_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) + self._ds.qcfilter.add_test( + gbl_SW_dn_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) + self._ds.qcfilter.add_test( + glb_diffuse_SW_dn_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) # Shortwave up comparison if test_options[2] in test: - if glb_SW_up_name is None or glb_diffuse_SW_dn_name is None or direct_normal_SW_dn_name is None: - raise ValueError('Must set keywords glb_SW_up_name, glb_diffuse_SW_dn_name, ' - f'direct_normal_SW_dn_name for {test_options[2]} test.') + if ( + glb_SW_up_name is None + or glb_diffuse_SW_dn_name is None + or direct_normal_SW_dn_name is None + ): + raise ValueError( + 'Must set keywords glb_SW_up_name, glb_diffuse_SW_dn_name, ' + f'direct_normal_SW_dn_name for {test_options[2]} test.' + ) with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) if use_dask and isinstance(self._ds[glb_diffuse_SW_dn_name].data, da.Array): - sum_sw_down = (self._ds[glb_diffuse_SW_dn_name].data - + self._ds[direct_normal_SW_dn_name].data * np.cos(np.radians(sza))) + sum_sw_down = self._ds[glb_diffuse_SW_dn_name].data + self._ds[ + direct_normal_SW_dn_name + ].data * np.cos(np.radians(sza)) sum_sw_down[sum_sw_down < 50] = np.nan - index = da.where(self._ds[glb_SW_up_name].data > sum_sw_down, True, False).compute() + index = da.where( + self._ds[glb_SW_up_name].data > sum_sw_down, True, False + ).compute() else: - sum_sw_down = (self._ds[glb_diffuse_SW_dn_name].values - + self._ds[direct_normal_SW_dn_name].values * np.cos(np.radians(sza))) + sum_sw_down = self._ds[glb_diffuse_SW_dn_name].values + self._ds[ + direct_normal_SW_dn_name + ].values * np.cos(np.radians(sza)) sum_sw_down[sum_sw_down < 50] = np.nan index = self._ds[glb_SW_up_name].values > sum_sw_down test_meaning = "Ratio of Shortwave Upwelling greater than Shortwave Sum" - self._ds.qcfilter.add_test(glb_SW_up_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) - self._ds.qcfilter.add_test(glb_diffuse_SW_dn_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) - self._ds.qcfilter.add_test(direct_normal_SW_dn_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) + self._ds.qcfilter.add_test( + glb_SW_up_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) + self._ds.qcfilter.add_test( + glb_diffuse_SW_dn_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) + self._ds.qcfilter.add_test( + direct_normal_SW_dn_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) # Longwave down to air temperature comparison if test_options[3] in test: if glb_LW_dn_name is None or air_temp_name is None: - raise ValueError('Must set keywords glb_LW_dn_name, air_temp_name ' - f' for {test_options[3]} test.') - - air_temp = convert_units(self._ds[air_temp_name].values, - self._ds[air_temp_name].attrs['units'], 'degK') + raise ValueError( + 'Must set keywords glb_LW_dn_name, air_temp_name ' + f' for {test_options[3]} test.' + ) + + air_temp = convert_units( + self._ds[air_temp_name].values, self._ds[air_temp_name].attrs['units'], 'degK' + ) if use_dask and isinstance(self._ds[glb_LW_dn_name].data, da.Array): air_temp = da.array(air_temp) conversion = da.array(Stefan_Boltzmann * air_temp**4) index_1 = (0.4 * conversion) > self._ds[glb_LW_dn_name].data - index_2 = (conversion + 25.) < self._ds[glb_LW_dn_name].data + index_2 = (conversion + 25.0) < self._ds[glb_LW_dn_name].data index = (index_1 | index_2).compute() else: conversion = Stefan_Boltzmann * air_temp**4 index_1 = (0.4 * conversion) > self._ds[glb_LW_dn_name].values - index_2 = (conversion + 25.) < self._ds[glb_LW_dn_name].values + index_2 = (conversion + 25.0) < self._ds[glb_LW_dn_name].values index = index_1 | index_2 - test_meaning = "Longwave downwelling comparison to air temperature out side of expected range" - self._ds.qcfilter.add_test(glb_LW_dn_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) + test_meaning = ( + "Longwave downwelling comparison to air temperature out side of expected range" + ) + self._ds.qcfilter.add_test( + glb_LW_dn_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) # Longwave up to air temperature comparison if test_options[4] in test: if glb_LW_up_name is None or air_temp_name is None: - raise ValueError('Must set keywords glb_LW_up_name, air_temp_name ' - f'for {test_options[3]} test.') - - air_temp = convert_units(self._ds[air_temp_name].values, - self._ds[air_temp_name].attrs['units'], 'degK') + raise ValueError( + 'Must set keywords glb_LW_up_name, air_temp_name ' + f'for {test_options[3]} test.' + ) + + air_temp = convert_units( + self._ds[air_temp_name].values, self._ds[air_temp_name].attrs['units'], 'degK' + ) if use_dask and isinstance(self._ds[glb_LW_up_name].data, da.Array): air_temp = da.array(air_temp) - index_1 = (Stefan_Boltzmann * (air_temp - 15)**4) > self._ds[glb_LW_up_name].data - index_2 = (Stefan_Boltzmann * (air_temp + 25)**4) < self._ds[glb_LW_up_name].data + index_1 = (Stefan_Boltzmann * (air_temp - 15) ** 4) > self._ds[glb_LW_up_name].data + index_2 = (Stefan_Boltzmann * (air_temp + 25) ** 4) < self._ds[glb_LW_up_name].data index = (index_1 | index_2).compute() else: - index_1 = (Stefan_Boltzmann * (air_temp - 15)**4) > self._ds[glb_LW_up_name].values - index_2 = (Stefan_Boltzmann * (air_temp + 25)**4) < self._ds[glb_LW_up_name].values + index_1 = (Stefan_Boltzmann * (air_temp - 15) ** 4) > self._ds[ + glb_LW_up_name + ].values + index_2 = (Stefan_Boltzmann * (air_temp + 25) ** 4) < self._ds[ + glb_LW_up_name + ].values index = index_1 | index_2 - test_meaning = "Longwave upwelling comparison to air temperature out side of expected range" - self._ds.qcfilter.add_test(glb_LW_up_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) + test_meaning = ( + "Longwave upwelling comparison to air temperature out side of expected range" + ) + self._ds.qcfilter.add_test( + glb_LW_up_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) # Lonwave down to longwave up comparison if test_options[5] in test: if glb_LW_dn_name is None or glb_LW_up_name is None: - raise ValueError('Must set keywords glb_LW_dn_name, glb_LW_up_name ' - f'for {test_options[3]} test.') + raise ValueError( + 'Must set keywords glb_LW_dn_name, glb_LW_up_name ' + f'for {test_options[3]} test.' + ) if use_dask and isinstance(self._ds[glb_LW_dn_name].data, da.Array): - index_1 = da.where(self._ds[glb_LW_dn_name].data - > (self._ds[glb_LW_up_name].data + LWdn_lt_LWup_component), True, False) - index_2 = da.where(self._ds[glb_LW_dn_name].data - < (self._ds[glb_LW_up_name].data - LWdn_gt_LWup_component), True, False) + index_1 = da.where( + self._ds[glb_LW_dn_name].data + > (self._ds[glb_LW_up_name].data + LWdn_lt_LWup_component), + True, + False, + ) + index_2 = da.where( + self._ds[glb_LW_dn_name].data + < (self._ds[glb_LW_up_name].data - LWdn_gt_LWup_component), + True, + False, + ) index = (index_1 | index_2).compute() else: - index_1 = self._ds[glb_LW_dn_name].values > (self._ds[glb_LW_up_name].values + LWdn_lt_LWup_component) - index_2 = self._ds[glb_LW_dn_name].values < (self._ds[glb_LW_up_name].values - LWdn_gt_LWup_component) + index_1 = self._ds[glb_LW_dn_name].values > ( + self._ds[glb_LW_up_name].values + LWdn_lt_LWup_component + ) + index_2 = self._ds[glb_LW_dn_name].values < ( + self._ds[glb_LW_up_name].values - LWdn_gt_LWup_component + ) index = index_1 | index_2 - test_meaning = "Lonwave downwelling compared to longwave upwelling outside of expected range" - self._ds.qcfilter.add_test(glb_LW_dn_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) - self._ds.qcfilter.add_test(glb_LW_up_name, index=index, test_assessment=test_assessment, - test_meaning=test_meaning) + test_meaning = ( + "Lonwave downwelling compared to longwave upwelling outside of expected range" + ) + self._ds.qcfilter.add_test( + glb_LW_dn_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) + self._ds.qcfilter.add_test( + glb_LW_up_name, + index=index, + test_assessment=test_assessment, + test_meaning=test_meaning, + ) diff --git a/act/qc/clean.py b/act/qc/clean.py index c84a30efef..2c0bb7baa7 100644 --- a/act/qc/clean.py +++ b/act/qc/clean.py @@ -91,6 +91,7 @@ def cleanup( link_qc_variables=True, normalize_assessment=False, cleanup_cf_qc=True, + cleanup_incorrect_qc_attributes=True, **kwargs, ): """ @@ -118,6 +119,9 @@ def cleanup( Option to clean up assessments to use the same terminology. Set to False for default because should only be an issue after adding DQRs and the function to add DQRs calls this method. + cleanup_incorrect_qc_attributes : bool + Fix incorrectly named quality control variable attributes before + converting to standardized QC. **kwargs : keywords Keyword arguments passed through to clean.clean_arm_qc method. @@ -131,6 +135,12 @@ def cleanup( ds.clean.cleanup() """ + # There are some QC variables with incorrect bit_#_description attribute names. + # This will check for the incorrect attribute names and correct to allow next + # process to work correctly + if cleanup_incorrect_qc_attributes: + self._ds.clean.fix_incorrect_variable_bit_description_attributes() + # Convert ARM QC to be more like CF state fields if cleanup_arm_qc: self._ds.clean.clean_arm_qc(**kwargs) @@ -198,7 +208,6 @@ def handle_missing_values(self, default_missing_value=np.int32(-9999)): np.dtype('float32'), np.dtype('float64'), ]: - # Look at units variable to see if this is the stupid way some # ARM products mix data and state variables. If the units are not # in the normal list of unitless type assume this is a data variable @@ -437,7 +446,7 @@ def get_attr_info(self, variable=None, flag=False): 'Value is equal to missing_value.', 'Value is less than the valid_min.', 'Value is greater than the valid_max.', - 'Difference between current and previous values exceeds valid_delta.' + 'Difference between current and previous values exceeds valid_delta.', ] return_dict['flag_tests'] = [1, 2, 3, 4] return_dict['flag_masks'] = [1, 2, 4, 8] @@ -452,7 +461,7 @@ def get_attr_info(self, variable=None, flag=False): 'bit_3_description', 'bit_3_assessment', 'bit_4_description', - 'bit_4_assessment' + 'bit_4_assessment', ] return return_dict @@ -493,10 +502,8 @@ def clean_arm_state_variables( for var in variables: flag_info = self.get_attr_info(variable=var, flag=integer_flag) if flag_info is not None: - # Add new attributes to variable for attr in ['flag_values', 'flag_meanings', 'flag_masks']: - if len(flag_info[attr]) > 0: # Only add if attribute does not exist. if attr in self._ds[var].attrs.keys() is False: @@ -582,10 +589,7 @@ def correct_valid_minmax(self, qc_variable): def link_variables(self): """ - Add some attributes to link and explain data - to QC data relationship. Will use non-CF standard_name - of quality_flag. Hopefully this will be added to the - standard_name table in the future. + Add some attributes to link and explain data to QC data relationship. """ for var in self._ds.data_vars: aa = re.match(r'^qc_(.+)', var) @@ -594,9 +598,10 @@ def link_variables(self): qc_variable = var except AttributeError: continue + # Skip data quality fields. try: - if not ('Quality check results on field:' in self._ds[var].attrs['long_name']): + if not self._ds[var].attrs["long_name"].startswith("Quality check results on"): continue except KeyError: pass @@ -610,7 +615,11 @@ def link_variables(self): # If the QC variable is not in ancillary_variables add if qc_variable not in ancillary_variables: ancillary_variables = qc_variable - self._ds[variable].attrs['ancillary_variables'] = copy.copy(ancillary_variables) + + try: + self._ds[variable].attrs['ancillary_variables'] = copy.copy(ancillary_variables) + except KeyError: + pass # Check if QC variable has correct standard_name and iff not fix it. correct_standard_name = 'quality_flag' @@ -626,7 +635,7 @@ def clean_arm_qc( clean_units_string=True, correct_valid_min_max=True, remove_unset_global_tests=True, - **kwargs + **kwargs, ): """ Method to clean up Xarray dataset QC variables. @@ -651,8 +660,8 @@ def clean_arm_qc( """ global_qc = self.get_attr_info() + qc_attributes = None for qc_var in self.matched_qc_variables: - # Clean up units attribute from unitless to udunits '1' try: if clean_units_string and self._ds[qc_var].attrs['units'] == 'unitless': @@ -719,7 +728,6 @@ def clean_arm_qc( flag_masks = self._ds[qc_var_name].attrs['flag_masks'] tests_to_remove = [] for ii, flag_meaning in enumerate(flag_meanings): - # Loop over usual test attribute names looking to see if they # are listed in test description. If so use that name for look up. test_attribute_limit_name = None @@ -754,13 +762,38 @@ def clean_arm_qc( qc_var_name=qc_var_name, test_number=test_to_remove ) + # If the QC was not cleaned up because it is not correctly formatted with SERI QC + # call the SERI QC method. + if global_qc is None and qc_attributes is None: + try: + DQMS = self._ds.attrs['qc_method'] == 'DQMS' + self._ds.attrs['comment'] + except KeyError: + try: + DQMS = 'sirs_seriqc' in self._ds.attrs['Command_Line'] + except KeyError: + DQMS = False + + if DQMS: + self._ds.clean.clean_seri_qc() + + # If the QC was not cleaned up because it is not correctly formatted with + # SWATS global attributes call the SWATS QC method. + try: + text = 'SWATS QC checks (bit values)' + SWATS_QC = text in self._ds.attrs['Mentor_QC_Field_Information'] + except KeyError: + SWATS_QC = False + + if SWATS_QC and global_qc is None and qc_attributes is None: + self._ds.clean.clean_swats_qc() + def normalize_assessment( self, variables=None, exclude_variables=None, qc_lookup={'Incorrect': 'Bad', 'Suspect': 'Indeterminate'}, ): - """ Method to clean up assessment terms used to be consistent between embedded QC and DQRs. @@ -902,3 +935,205 @@ def clean_cf_qc(self, variables=None, sep='__', **kwargs): except KeyError: pass + + def fix_incorrect_variable_bit_description_attributes(self): + """ + Method to correct incorrectly defined quality control variable attributes. + There are some datastreams with the attribute names incorrectly having 'qc_' + prepended to the attribute name. This will fix those attributes so the cleanqc + method can correctly read the attributes. + + If the variable long_name starts with the string "Quality check results on" + and a variable attribute follows the pattern qc_bit_#_description the 'qc_' part of + the variable attribute will be removed. + + """ + + attr_description_pattern = r'^qc_bit_([0-9]+)_description$' + attr_assessment_pattern = r'^qc_bit_([0-9]+)_assessment$' + + for var_name in self._ds.data_vars: + try: + if not self._ds[var_name].attrs['long_name'].startswith("Quality check results on"): + continue + except KeyError: + continue + + for attr, value in self._ds[var_name].attrs.copy().items(): + for pattern in [attr_description_pattern, attr_assessment_pattern]: + description = re.match(pattern, attr) + if description is not None: + new_attr = attr[3:] + self._ds[var_name].attrs[new_attr] = self._ds[var_name].attrs.pop(attr) + + def clean_seri_qc(self): + """ + Method to apply SERI QC to the quality control variables. The definition of the QC + is listed in a single global attribute and not easily parsable. This method will update + the quality control variable to correctly set the test descriptions for each of the + SERI QC tests defined in the global attributes. + + """ + for var_name in self._ds.data_vars: + if not self._ds[var_name].attrs['long_name'].startswith("Quality check results on"): + continue + + qc_var_name = var_name + var_name = var_name.replace('qc_', '') + qc_data = self._ds[qc_var_name].values.copy() + self._ds[qc_var_name] = xr.zeros_like(self._ds[qc_var_name], dtype=np.int32) + + if qc_var_name in [ + "qc_down_short_diffuse", + "qc_short_direct_normal", + "qc_down_short_hemisp", + ]: + value_number = [1, 2, 3, 6, 7, 8, 9, 94, 95, 96, 97] + test_number = list(range(2, len(value_number) + 2)) + test_description = [ + 'Passed 1-component test; data fall within max-min limits of Kt,Kn, or Kd', + 'Passed 2-component test; data fall within 0.03 of the Gompertz boundaries', + 'Passed 3-component test; data come within +/- 0.03 of satifying Kt=Kn+Kd', + 'Value estimated; passes all pertinent SERI QC tests', + 'Failed 1-component test; lower than allowed minimum', + 'Falied 1-component test; higher than allowed maximum', + 'Passed 3-component test but failed 2-component test by >0.05', + 'Data fall into a physically impossible region where Kn>Kt by K-space distances of 0.05 to 0.10.', + 'Data fall into a physically impossible region where Kn>Kt by K-space distances of 0.10 to 0.15.', + 'Data fall into a physically impossible region where Kn>Kt by K-space distances of 0.15 to 0.20.', + 'Data fall into a physically impossible region where Kn>Kt by K-space distances of >= 0.20.', + ] + test_assessment = [ + 'Not failing', + 'Not failing', + 'Not failing', + 'Not failing', + 'Bad', + 'Bad', + 'Indeterminate', + 'Bad', + 'Bad', + 'Bad', + 'Bad', + ] + elif qc_var_name in ["qc_up_long_hemisp", "qc_down_long_hemisp_shaded"]: + value_number = [1, 2, 7, 8, 31] + test_number = list(range(2, len(value_number) + 2)) + test_description = [ + 'Passed 1-component test; data fall within max-min limits of up_long_hemisp and down_long_hemisp_shaded, but short_direct_normal and down_short_hemisp or down_short_diffuse fail the SERI QC tests.', + 'Passed 2-component test; data fall within max-min limits of up_long_hemisp and down_long_hemisp_shaded, and short_direct_normal, or down_short_hemisp and down_short_diffuse pass the SERI QC tests while the difference between down_short_hemisp and down_short_diffuse is greater than 20 W/m2.', + 'Failed 1-component test; lower than allowed minimum', + 'Failed 1-component test; higher than allowed maximum', + 'Failed 2-component test', + ] + test_assessment = [ + 'Not failing', + 'Not failing', + 'Bad', + 'Bad', + 'Bad', + ] + elif qc_var_name in ["qc_up_short_hemisp"]: + value_number = [1, 2, 7, 8, 31] + test_number = list(range(2, len(value_number) + 2)) + test_description = [ + 'Passed 1-component test', + 'Passed 2-component test', + 'Failed 1-component test; lower than allowed minimum', + 'Failed 1-component test; higher than allowed maximum', + 'Failed 2-component test; solar zenith angle is less than 80 degrees and down_short_hemisp is 0 or missing', + ] + test_assessment = [ + 'Not failing', + 'Not failing', + 'Bad', + 'Bad', + 'Bad', + ] + + self._ds[var_name].attrs['ancillary_variables'] = qc_var_name + self._ds[qc_var_name].attrs['standard_name'] = 'quality_flag' + self._ds[qc_var_name].attrs['flag_masks'] = [] + self._ds[qc_var_name].attrs['flag_meanings'] = [] + self._ds[qc_var_name].attrs['flag_assessments'] = [] + + self._ds.qcfilter.add_missing_value_test(var_name) + + for ii, _ in enumerate(value_number): + index = qc_data == value_number[ii] + self._ds.qcfilter.add_test( + var_name, + index=index, + test_number=test_number[ii], + test_meaning=test_description[ii], + test_assessment=test_assessment[ii], + ) + + if qc_var_name in [ + "qc_down_short_diffuse", + "qc_short_direct_normal", + "qc_down_short_hemisp", + ]: + calculation = ((qc_data + 2) / 4.0) % 4 + calculation = calculation.astype(np.int16) + value_number = [0, 1, 2, 3] + test_description = [ + 'Parameter too low by 3-component test (Kt=Kn+Kd)', + 'Parameter too high by 3-component test (Kt=Kn+Kd)', + 'Parameter too low by 2-component test (Gompertz boundary)', + 'Parameter too high by 2-component test (Gompertz boundary)', + ] + test_assessment = ['Bad', 'Bad', 'Bad', 'Bad'] + for ii, _ in enumerate(value_number): + index = (qc_data >= 10) & (qc_data <= 93) & (calculation == value_number[ii]) + self._ds.qcfilter.add_test( + var_name, + index=index, + test_meaning=test_description[ii], + test_assessment=test_assessment[ii], + ) + + def clean_swats_qc(self, fix_data_units=True): + """ + Method to apply SWATS global attribute quality control definition to the + quality control variables. + + Parameters + ---------- + fix_data_units : bool + The units string for some data variables incorrectly defines degrees Celsius + as 'C' insted of the udunits 'degC'. When set to true those units strings + are updated. + + """ + + for var_name in self._ds.data_vars: + if fix_data_units: + try: + unit = self._ds[var_name].attrs['units'] + if unit == 'C': + self._ds[var_name].attrs['units'] = 'degC' + except KeyError: + pass + + if not self._ds[var_name].attrs['long_name'].startswith("Quality check results on"): + continue + + qc_var_name = var_name + self._ds[qc_var_name].attrs['flag_masks'] = [1, 2, 4, 8] + self._ds[qc_var_name].attrs['flag_meanings'] = [ + 'Value is set to missing_value.', + 'Data value less than valid_min.', + 'Data value greater than valid_max.', + 'Difference between current and previous values exceeds valid_delta.', + ] + self._ds[qc_var_name].attrs['flag_assessments'] = [ + 'Bad', + 'Bad', + 'Bad', + 'Indeterminate', + ] + + self._ds.clean.correct_valid_minmax(qc_var_name) + + del self._ds.attrs['Mentor_QC_Field_Information'] diff --git a/act/qc/comparison_tests.py b/act/qc/comparison_tests.py index 731eae0230..0ac88c3b34 100644 --- a/act/qc/comparison_tests.py +++ b/act/qc/comparison_tests.py @@ -3,7 +3,6 @@ """ import copy -import warnings import numpy as np import xarray as xr @@ -86,9 +85,9 @@ def compare_time_series_trends( sum_diff = np.array([], dtype=float) time_diff = np.array([], dtype=np.int32) for tm_shift in range(-1 * time_shift, time_shift + int(time_step), int(time_step)): - self_da_shifted = self_da.assign_coords( - time=self_da.time.values.astype('datetime64[s]') + tm_shift - ) + time = self_da.time.values.astype('datetime64[s]') + tm_shift + time = time.astype('datetime64[ns]') + self_da_shifted = self_da.assign_coords(time=time) data_matched, comp_data_matched = xr.align(self_da, comp_da) self_da_shifted = self_da_shifted.reindex( diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py new file mode 100644 index 0000000000..ff6518dde0 --- /dev/null +++ b/act/qc/qc_summary.py @@ -0,0 +1,124 @@ +""" +Method for creating Quality Control Summary variables from the embedded +quality control varialbes. The summary variable is a simplified version of +quality control that uses flag integers instead of bit-packed masks. The +number of descriptions is simplified to consolidate all categories into one +description. + +""" + +import datetime + + +class QCSummary: + """ + This is a Mixins class used to allow using qcfilter class that is already + registered to the Xarray dataset. All the methods in this class will be added + to the qcfilter class. Doing this to make the code spread across more files + so it is more manageable and readable. + + """ + + def __init__(self, ds): + """initialize""" + self._ds = ds + + def create_qc_summary(self, cleanup_qc=False): + """ + Method to convert embedded quality control to summary QC that utilzes + flag values instead of flag masks and summarizes the assessments to only + a few states. Lowest level of quality control will be listed first with most + sever having higher integer numbers. Dataset is updated in place. + + cleanup_qc : boolean + Call clean.cleanup() method to convert to standardized ancillary quality control + variables. The quality control summary requires the current embedded quality + control variables to use ACT standards. + + Returns + ------- + return_ds : Xarray.dataset + ACT Xarray dataset with quality control variables converted to summary flag values. + + """ + + standard_assessments = [ + 'Suspect', + 'Indeterminate', + 'Incorrect', + 'Bad', + ] + standard_meanings = [ + "Data suspect, further analysis recommended", + "Data suspect, further analysis recommended", + "Data incorrect, use not recommended", + "Data incorrect, use not recommended", + ] + + if cleanup_qc: + self._ds.clean.cleanup() + + return_ds = self._ds.copy() + + added = False + for var_name in list(self._ds.data_vars): + qc_var_name = self.check_for_ancillary_qc(var_name, add_if_missing=False, cleanup=False) + + if qc_var_name is None: + continue + + added = True + + assessments = list(set(self._ds[qc_var_name].attrs['flag_assessments'])) + + import xarray as xr + + result = xr.zeros_like(return_ds[qc_var_name]) + for attr in ['flag_masks', 'flag_meanings', 'flag_assessments', 'flag_values']: + try: + del result.attrs[attr] + except KeyError: + pass + + return_ds[qc_var_name] = result + + return_ds.qcfilter.add_test( + var_name, + index=None, + test_number=0, + test_meaning='Not failing quality control tests', + test_assessment='Not failing', + flag_value=True, + ) + + for ii, assessment in enumerate(standard_assessments): + if assessment not in assessments: + continue + + qc_mask = self.get_masked_data( + var_name, rm_assessments=assessment, return_mask_only=True + ) + + # Do not really know how to handle scalars yet. + if qc_mask.ndim == 0: + continue + + return_ds.qcfilter.add_test( + var_name, + index=qc_mask, + test_meaning=standard_meanings[ii], + test_assessment=assessment, + flag_value=True, + ) + + self._ds.update({qc_var_name: return_ds[qc_var_name]}) + + if added: + history = return_ds.attrs['history'] + history += ( + " ; Quality control summary implemented by ACT at " + f"{datetime.datetime.utcnow().isoformat()} UTC." + ) + return_ds.attrs['history'] = history + + return return_ds diff --git a/act/qc/qcfilter.py b/act/qc/qcfilter.py index 6137db9f92..c8a64ca5e4 100644 --- a/act/qc/qcfilter.py +++ b/act/qc/qcfilter.py @@ -9,11 +9,11 @@ import numpy as np import xarray as xr -from act.qc import comparison_tests, qctests, bsrn_tests +from act.qc import comparison_tests, qctests, bsrn_tests, qc_summary @xr.register_dataset_accessor('qcfilter') -class QCFilter(qctests.QCTests, comparison_tests.QCTests, bsrn_tests.QCTests): +class QCFilter(qctests.QCTests, comparison_tests.QCTests, bsrn_tests.QCTests, qc_summary.QCSummary): """ A class for building quality control variables containing arrays for filtering data based on a set of test condition typically based on the @@ -26,13 +26,7 @@ def __init__(self, ds): """initialize""" self._ds = ds - def check_for_ancillary_qc( - self, - var_name, - add_if_missing=True, - cleanup=False, - flag_type=False - ): + def check_for_ancillary_qc(self, var_name, add_if_missing=True, cleanup=False, flag_type=False): """ Method to check if a quality control variable exist in the dataset and return the quality control varible name. @@ -119,10 +113,7 @@ def check_for_ancillary_qc( return qc_var_name def create_qc_variable( - self, var_name, - flag_type=False, - flag_values_set_value=0, - qc_var_name=None + self, var_name, flag_type=False, flag_values_set_value=0, qc_var_name=None ): """ Method to create a quality control variable in the dataset. @@ -206,9 +197,7 @@ def create_qc_variable( # Update if using flag_values and don't want 0 to be default value. if flag_type and flag_values_set_value != 0: - self._ds[qc_var_name].values = self._ds[qc_var_name].values + int( - flag_values_set_value - ) + self._ds[qc_var_name].values = self._ds[qc_var_name].values + int(flag_values_set_value) # Add requried variable attributes. if flag_type: @@ -260,7 +249,6 @@ def update_ancillary_variable(self, var_name, qc_var_name=None): try: ancillary_variables = self._ds[var_name].attrs['ancillary_variables'] if qc_var_name not in ancillary_variables: - ancillary_variables = ' '.join([ancillary_variables, qc_var_name]) except KeyError: ancillary_variables = qc_var_name @@ -551,7 +539,10 @@ def set_test(self, var_name, index=None, test_number=None, flag_value=False): if index is not None: if flag_value: - qc_variable[index] = test_number + if len(qc_variable.shape) == 0: + qc_variable = test_number + else: + qc_variable[index] = test_number else: if bool(np.shape(index)): qc_variable[index] = set_bit(qc_variable[index], test_number) @@ -804,8 +795,8 @@ def get_masked_data( return_nan_array=False, ma_fill_value=None, return_inverse=False, + return_mask_only=False, ): - """ Returns a numpy masked array containing data and mask or a numpy float array with masked values set to NaN. @@ -831,6 +822,8 @@ def get_masked_data( Invert the masked array mask or return data array where mask is set to False instead of True set to NaN. Useful for overplotting where failing. + return_mask_only : boolean + Return the boolean mask only as a numpy array. Returns ------- @@ -915,9 +908,21 @@ def get_masked_data( if variable.dtype in (np.float64, np.int64): nan_dtype = np.float64 - mask = np.zeros(variable.shape, dtype=bool) + try: + # Get shape of mask from QC variable since there is a chance it will + # be a different shape than data variable. + mask = np.zeros(self._ds[qc_var_name].shape, dtype=bool) + except KeyError: + # If there is no QC variable make mask from shape of data variable. + mask = np.zeros(self._ds[var_name].shape, dtype=bool) + for test in test_numbers: - mask = mask | self._ds.qcfilter.get_qc_test_mask(var_name, test, flag_value=flag_value) + qc_test_mask = self._ds.qcfilter.get_qc_test_mask(var_name, test, flag_value=flag_value) + mask = mask | qc_test_mask + + # If requested only return the mask. + if return_mask_only: + return mask # Convert data numpy array into masked array try: @@ -1030,25 +1035,25 @@ def datafilter( except KeyError: pass - print(f'No quality control variable for {var_name} found ' - f'in call to .qcfilter.datafilter()') + print( + f'No quality control variable for {var_name} found ' + f'in call to .qcfilter.datafilter()' + ) continue # Need to return data as Numpy array with NaN values. Setting the Dask array # to Numpy masked array does not work with other tools. data = self.get_masked_data( - var_name, - rm_assessments=rm_assessments, - rm_tests=rm_tests, - return_nan_array=True + var_name, rm_assessments=rm_assessments, rm_tests=rm_tests, return_nan_array=True ) # If data was orginally stored as Dask array return values to Dataset as Dask array # else set as Numpy array. try: self._ds[var_name].data = dask.array.from_array( - data, chunks=self._ds[var_name].data.chunksize) + data, chunks=self._ds[var_name].data.chunksize + ) except AttributeError: self._ds[var_name].values = data diff --git a/act/qc/qctests.py b/act/qc/qctests.py index 45bfb7d179..3b9916d1f8 100644 --- a/act/qc/qctests.py +++ b/act/qc/qctests.py @@ -1353,8 +1353,7 @@ def add_iqr_test( from scikit_posthocs import outliers_iqr except ImportError: raise ImportError( - 'scikit_posthocs needs to be installed on your system to ' - 'run add_iqr_test.' + 'scikit_posthocs needs to be installed on your system to ' 'run add_iqr_test.' ) if test_meaning is None: @@ -1452,8 +1451,7 @@ def add_gesd_test( from scikit_posthocs import outliers_gesd except ImportError: raise ImportError( - 'scikit_posthocs needs to be installed on your system to ' - 'run add_gesd_test.' + 'scikit_posthocs needs to be installed on your system to ' 'run add_gesd_test.' ) if test_meaning is None: @@ -1510,7 +1508,7 @@ def add_atmospheric_pressure_test( test_number=None, flag_value=False, prepend_text=None, - use_dask=False + use_dask=False, ): """ Method to perform a limit test on atmospheric pressure data using @@ -1592,8 +1590,10 @@ def add_atmospheric_pressure_test( upper_limit = upper_limit.magnitude if test_meaning is None: - test_meaning = ('Value outside of atmospheric pressure range test range: ' - f'{round(lower_limit, 2)} to {round(upper_limit, 2)} {data_units}') + test_meaning = ( + 'Value outside of atmospheric pressure range test range: ' + f'{round(lower_limit, 2)} to {round(upper_limit, 2)} {data_units}' + ) if prepend_text is not None: test_meaning = ': '.join((prepend_text, test_meaning)) @@ -1605,7 +1605,9 @@ def add_atmospheric_pressure_test( index2 = da.where(self._ds[var_name].data > upper_limit, True, False) index = (index1 | index2).compute() else: - index = (self._ds[var_name].values > upper_limit) | (self._ds[var_name].values < lower_limit) + index = (self._ds[var_name].values > upper_limit) | ( + self._ds[var_name].values < lower_limit + ) result = self._ds.qcfilter.add_test( var_name, diff --git a/act/qc/radiometer_tests.py b/act/qc/radiometer_tests.py index 34ba7687c2..6429aef15b 100644 --- a/act/qc/radiometer_tests.py +++ b/act/qc/radiometer_tests.py @@ -3,7 +3,6 @@ """ -import datetime import warnings import dask @@ -13,7 +12,7 @@ from scipy.fftpack import rfft, rfftfreq from act.utils.datetime_utils import determine_time_delta -from act.utils.geo_utils import get_sunrise_sunset_noon, is_sun_visible +from act.utils.geo_utils import is_sun_visible def fft_shading_test( diff --git a/act/qc/sp2.py b/act/qc/sp2.py index 9a67123e32..526405408e 100644 --- a/act/qc/sp2.py +++ b/act/qc/sp2.py @@ -119,7 +119,8 @@ def __init__(self): 'Attempting to use SP2ParticleCriteria without' 'PySP2 installed. SP2ParticleCriteria will' 'not have any functionality besides this' - 'warning message.', RuntimeWarning + 'warning message.', + RuntimeWarning, ) diff --git a/act/retrievals/cbh.py b/act/retrievals/cbh.py index dd7aa0c04f..f68f47e52c 100644 --- a/act/retrievals/cbh.py +++ b/act/retrievals/cbh.py @@ -16,7 +16,7 @@ def generic_sobel_cbh( fill_na=None, return_thresh=False, filter_type='uniform', - edge_thresh=5., + edge_thresh=5.0, ): """ Function for calculating cloud base height from lidar/radar data diff --git a/act/retrievals/doppler_lidar.py b/act/retrievals/doppler_lidar.py index 0e5dd85154..0998fa821f 100644 --- a/act/retrievals/doppler_lidar.py +++ b/act/retrievals/doppler_lidar.py @@ -103,6 +103,7 @@ def compute_winds_from_ppi( else: try: snr = ds[snr_name].values + var_name = snr_name except KeyError: intensity = ds['intensity'].values snr = intensity - 1 @@ -132,9 +133,16 @@ def compute_winds_from_ppi( task.append( dask.delayed(process_ppi_winds)( - time[scan_index], elevation[scan_index], azimuth[scan_index], snr[scan_index, :], - doppler[scan_index, :], rng, condition_limit, snr_threshold, remove_all_missing, - height_units + time[scan_index], + elevation[scan_index], + azimuth[scan_index], + snr[scan_index, :], + doppler[scan_index, :], + rng, + condition_limit, + snr_threshold, + remove_all_missing, + height_units, ) ) @@ -144,7 +152,9 @@ def compute_winds_from_ppi( results = [results[ii] for ii, value in enumerate(is_Dataset) if value is True] new_ds = xr.concat(results, 'time') - if isinstance(return_ds, xr.core.dataset.Dataset) and isinstance(new_ds, xr.core.dataset.Dataset): + if isinstance(return_ds, xr.core.dataset.Dataset) and isinstance( + new_ds, xr.core.dataset.Dataset + ): return_ds = xr.concat([return_ds, new_ds], dim='time') else: return_ds = new_ds @@ -152,8 +162,18 @@ def compute_winds_from_ppi( return return_ds -def process_ppi_winds(time, elevation, azimuth, snr, doppler, rng, condition_limit, - snr_threshold, remove_all_missing, height_units): +def process_ppi_winds( + time, + elevation, + azimuth, + snr, + doppler, + rng, + condition_limit, + snr_threshold, + remove_all_missing, + height_units, +): """ This function is for processing the winds using dask from the compute_winds_from_ppi function. This should not be used standalone. @@ -235,9 +255,7 @@ def process_ppi_winds(time, elevation, azimuth, snr, doppler, rng, condition_lim wdir = np.degrees(np.arctan2(u_wind, v_wind) + np.pi) wspd_err = np.sqrt((u_wind * u_err) ** 2 + (v_wind * v_err) ** 2) / wspd - wdir_err = np.degrees( - np.sqrt((u_wind * v_err) ** 2 + (v_wind * u_err) ** 2) / wspd**2 - ) + wdir_err = np.degrees(np.sqrt((u_wind * v_err) ** 2 + (v_wind * u_err) ** 2) / wspd**2) if remove_all_missing and np.isnan(wspd).all(): return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan diff --git a/act/retrievals/radiation.py b/act/retrievals/radiation.py index 4d1964ebdd..09c5d1ade3 100644 --- a/act/retrievals/radiation.py +++ b/act/retrievals/radiation.py @@ -7,7 +7,6 @@ import xarray as xr from scipy.constants import Stefan_Boltzmann -from act.utils.datetime_utils import datetime64_to_datetime from act.utils.geo_utils import get_solar_azimuth_elevation @@ -59,7 +58,7 @@ def calculate_dsh_from_dsdh_sdn( attrs={ 'long_name': 'Derived Downwelling Shortwave Hemispheric Irradiance', 'units': 'W/m^2', - } + }, ) return ds @@ -144,7 +143,6 @@ def calculate_net_radiation( dlhs='down_long_hemisp_shaded', smooth=None, ): - """ Function to calculate the net radiation from upwelling short and long-wave irradiance and @@ -207,7 +205,6 @@ def calculate_longwave_radiation( emiss_a=0.61, emiss_b=0.06, ): - """ Function to calculate longwave radiation during clear and cloudy sky conditions @@ -249,7 +246,6 @@ def calculate_longwave_radiation( """ if met_ds is not None: - T = met_ds[temperature_var] + 273.15 # C to K e = met_ds[vapor_pressure_var] * 10.0 # kpa to hpa else: diff --git a/act/retrievals/sonde.py b/act/retrievals/sonde.py index 242b7f9abb..01cf3ce045 100644 --- a/act/retrievals/sonde.py +++ b/act/retrievals/sonde.py @@ -3,9 +3,7 @@ """ -import warnings import numpy as np -import pandas as pd import xarray as xr from operator import itemgetter from itertools import groupby @@ -174,9 +172,7 @@ def calculate_stability_indicies( ds['parcel_temperature'].attrs['units'] = t_profile.units # Calculate CAPE, CIN, LCL - sbcape, sbcin = mpcalc.surface_based_cape_cin(p_sorted, - t_sorted, - td_sorted) + sbcape, sbcin = mpcalc.surface_based_cape_cin(p_sorted, t_sorted, td_sorted) lcl = mpcalc.lcl(p_sorted[0], t_sorted[0], td_sorted[0]) try: @@ -277,8 +273,14 @@ def calculate_pbl_liu_liang( """ # Preprocess the sonde data to ensure the same methods across all retrievals - ds2 = preprocess_sonde_data(ds, temperature=temperature, pressure=pressure, - height=height, smooth_height=smooth_height, base=5.) + ds2 = preprocess_sonde_data( + ds, + temperature=temperature, + pressure=pressure, + height=height, + smooth_height=smooth_height, + base=5.0, + ) pres = ds2[pressure].values wspd = ds2[windspeed].values @@ -417,7 +419,7 @@ def calculate_pbl_heffter( pressure='pres', height='alt', smooth_height=3, - base=5., + base=5.0, ): """ Function for calculating the PBL height from a radiosonde profile @@ -460,8 +462,14 @@ def calculate_pbl_heffter( """ # Preprocess the sonde data to ensure the same methods across all retrievals - ds2 = preprocess_sonde_data(ds, temperature=temperature, pressure=pressure, - height=height, smooth_height=smooth_height, base=base) + ds2 = preprocess_sonde_data( + ds, + temperature=temperature, + pressure=pressure, + height=height, + smooth_height=smooth_height, + base=base, + ) # Get data pres = ds2[pressure].values @@ -496,25 +504,25 @@ def calculate_pbl_heffter( # For each layer, calculate the difference in theta from # top and bottom of the layer. The lowest layer where the # difference is > 2 K is set as the PBL. - pbl = 0. + pbl = 0.0 theta_diff_layer = [] bottom_inversion = [] top_inversion = [] for r in ranges: - if agl[r[1]] > 4000.: + if agl[r[1]] > 4000.0: continue theta_diff = theta[r[1]] - theta[r[0]] theta_diff_layer.append(theta_diff) bottom_inversion.append(alt[r[0]]) top_inversion.append(alt[r[1]]) - if pbl == 0. and theta_diff > 2.0: + if pbl == 0.0 and theta_diff > 2.0: pbl = alt[r[0]] if len(theta_diff_layer) == 0: - pbl = -9999. + pbl = -9999.0 # If PBL is not set, set it to the layer with the max theta diff - if pbl == 0.: + if pbl == 0.0: idx = np.argmax(theta_diff_layer) pbl = bottom_inversion[idx] @@ -536,11 +544,21 @@ def calculate_pbl_heffter( ds['alt_ss'] = da atts = {'units': 'm', 'long_name': 'Bottom height of inversion layers'} - da = xr.DataArray(bottom_inversion, coords={'layers': list(range(len(bottom_inversion)))}, dims=['layers'], attrs=atts) + da = xr.DataArray( + bottom_inversion, + coords={'layers': list(range(len(bottom_inversion)))}, + dims=['layers'], + attrs=atts, + ) ds['bottom_inversion'] = da atts = {'units': 'm', 'long_name': 'Top height of inversion layers'} - da = xr.DataArray(top_inversion, coords={'layers': list(range(len(top_inversion)))}, dims=['layers'], attrs=atts) + da = xr.DataArray( + top_inversion, + coords={'layers': list(range(len(top_inversion)))}, + dims=['layers'], + attrs=atts, + ) ds['top_inversion'] = da return ds @@ -552,7 +570,7 @@ def preprocess_sonde_data( pressure='pres', height='alt', smooth_height=3, - base=5., + base=5.0, ): """ Function for processing the SONDE data for the PBL calculations. @@ -628,7 +646,7 @@ def preprocess_sonde_data( temp = ds2[temperature].values # Perform Pre-processing checks - if len(temp) == 0.: + if len(temp) == 0.0: raise ValueError('No data in profile') if np.nanmax(alt) < 1000.0: diff --git a/act/tests/__init__.py b/act/tests/__init__.py index 9ae16bd6a3..c41d4b7d83 100644 --- a/act/tests/__init__.py +++ b/act/tests/__init__.py @@ -55,12 +55,14 @@ 'EXAMPLE_INI', 'EXAMPLE_SP2B', 'EXAMPLE_MET_YAML', - 'EXAMPLE_CLOUDPHASE' - 'EXAMPLE_ECOR', + 'EXAMPLE_CLOUDPHASE' 'EXAMPLE_ECOR', 'EXAMPLE_SEBS', 'EXAMPLE_ENA_MET', 'EXAMPLE_CCN', 'EXAMPLE_OLD_QC', + 'EXAMPLE_AOSACSM', + 'EXAMPLE_SIRS_SIRI_QC', + 'EXAMPLE_SWATS', ] }, ) diff --git a/act/tests/sample_files.py b/act/tests/sample_files.py index 06adc4a4a2..47704a63e2 100644 --- a/act/tests/sample_files.py +++ b/act/tests/sample_files.py @@ -4,11 +4,11 @@ """ -import os from arm_test_data import DATASETS # Single files +EXAMPLE_MPEG = DATASETS.fetch('nsacamskyradmovieC1.a1.20240401.100300.mpg') EXAMPLE_MET1 = DATASETS.fetch('sgpmetE13.b1.20190101.000000.cdf') EXAMPLE_MET_SAIL = DATASETS.fetch('gucmetM1.b1.20230301.000000.cdf') EXAMPLE_MET_CSV = DATASETS.fetch('sgpmetE13.b1.20210401.000000.csv') @@ -27,13 +27,13 @@ EXAMPLE_AERI = DATASETS.fetch('sgpaerich1C1.b1.20190501.000342.nc') EXAMPLE_IRTSST = DATASETS.fetch('marirtsstM1.b1.20190320.000000.nc') EXAMPLE_MFRSR = DATASETS.fetch('sgpmfrsr7nchE11.b1.20210329.070000.nc') -EXAMPLE_SURFSPECALB1MLAWER = DATASETS.fetch( - 'nsasurfspecalb1mlawerC1.c1.20160609.080000.nc' -) +EXAMPLE_SURFSPECALB1MLAWER = DATASETS.fetch('nsasurfspecalb1mlawerC1.c1.20160609.080000.nc') EXAMPLE_SIGMA_MPLV5 = DATASETS.fetch('201509021500.bi') EXAMPLE_RL1 = DATASETS.fetch('sgprlC1.a0.20160131.000000.nc') EXAMPLE_CO2FLX4M = DATASETS.fetch('sgpco2flx4mC1.b1.20201007.001500.nc') +EXAMPLE_SWATS = DATASETS.fetch('sgpswatsE8.b1.20071229.000700.cdf') EXAMPLE_SIRS = DATASETS.fetch('sgpsirsE13.b1.20190101.000000.cdf') +EXAMPLE_SIRS_SIRI_QC = DATASETS.fetch('sgpsirsC1.b1.20040101.000000.cdf') EXAMPLE_GML_RADIATION = DATASETS.fetch('brw21001.dat') EXAMPLE_GML_MET = DATASETS.fetch('met_brw_insitu_1_obop_hour_2020.txt') EXAMPLE_GML_OZONE = DATASETS.fetch('brw_12_2020_hour.dat') @@ -49,88 +49,106 @@ EXAMPLE_MET_YAML = DATASETS.fetch('sgpmetE13.b1.yaml') EXAMPLE_CLOUDPHASE = DATASETS.fetch('nsacloudphaseC1.c1.20180601.000000.nc') EXAMPLE_AAF_ICARTT = DATASETS.fetch('AAFNAV_COR_20181104_R0.ict') -EXAMPLE_NEON = DATASETS.fetch('NEON.D18.BARR.DP1.00002.001.000.010.001.SAAT_1min.2022-10.expanded.20221107T205629Z.csv') +EXAMPLE_NEON = DATASETS.fetch( + 'NEON.D18.BARR.DP1.00002.001.000.010.001.SAAT_1min.2022-10.expanded.20221107T205629Z.csv' +) EXAMPLE_NEON_VARIABLE = DATASETS.fetch('NEON.D18.BARR.DP1.00002.001.variables.20221201T110553Z.csv') -EXAMPLE_NEON_POSITION = DATASETS.fetch('NEON.D18.BARR.DP1.00002.001.sensor_positions.20221107T205629Z.csv') +EXAMPLE_NEON_POSITION = DATASETS.fetch( + 'NEON.D18.BARR.DP1.00002.001.sensor_positions.20221107T205629Z.csv' +) EXAMPLE_DOD = DATASETS.fetch('vdis.b1') EXAMPLE_EBBR1 = DATASETS.fetch('sgp30ebbrE32.b1.20191125.000000.nc') EXAMPLE_EBBR2 = DATASETS.fetch('sgp30ebbrE32.b1.20191130.000000.nc') EXAMPLE_EBBR3 = DATASETS.fetch('sgp30ebbrE13.b1.20190601.000000.nc') EXAMPLE_ECOR = DATASETS.fetch('sgp30ecorE14.b1.20190601.000000.cdf') EXAMPLE_SEBS = DATASETS.fetch('sgpsebsE14.b1.20190601.000000.cdf') +EXAMPLE_SEBS_E39 = DATASETS.fetch('sgpsebsE39.b1.20230601.000000.cdf') +EXAMPLE_ECORSF_E39 = DATASETS.fetch('sgpecorsfE39.b1.20230601.000000.nc') +EXAMPLE_STAMP_E39 = DATASETS.fetch('sgpstampE39.b1.20230601.000000.nc') +EXAMPLE_STAMPPCP_E39 = DATASETS.fetch('sgpstamppcpE39.b1.20230601.000000.nc') +EXAMPLE_AMC_E39 = DATASETS.fetch('sgpamcE39.b1.20230601.000000.nc') EXAMPLE_MFAS_SODAR = DATASETS.fetch('sodar.20230404.mnd') EXAMPLE_ENA_MET = DATASETS.fetch('enametC1.b1.20221109.000000.cdf') EXAMPLE_CCN = DATASETS.fetch('sgpaosccn2colaE13.b1.20170903.000000.nc') EXAMPLE_OLD_QC = DATASETS.fetch('sgp30ecorE6.b1.20040705.000000.cdf') EXAMPLE_SONDE_WILDCARD = DATASETS.fetch('sgpsondewnpnC1.b1.20190101.053200.cdf') EXAMPLE_CEIL_WILDCARD = DATASETS.fetch('sgpceilC1.b1.20190101.000000.nc') +EXAMPLE_HYSPLIT = DATASETS.fetch('houstonaug300.0summer2010080100') +EXAMPLE_AOSACSM = DATASETS.fetch('sgpaosacsmE13.b2.20230420.000109.nc') # Multiple files in a list -dlppi_multi_list = ['sgpdlppiC1.b1.20191015.120023.cdf', - 'sgpdlppiC1.b1.20191015.121506.cdf'] +dlppi_multi_list = ['sgpdlppiC1.b1.20191015.120023.cdf', 'sgpdlppiC1.b1.20191015.121506.cdf'] EXAMPLE_DLPPI_MULTI = [DATASETS.fetch(file) for file in dlppi_multi_list] -noaa_psl_list = ['ayp22199.21m', - 'ayp22200.00m'] +noaa_psl_list = ['ayp22199.21m', 'ayp22200.00m'] EXAMPLE_NOAA_PSL_SURFACEMET = [DATASETS.fetch(file) for file in noaa_psl_list] -met_wildcard_list = ['sgpmetE13.b1.20190101.000000.cdf', - 'sgpmetE13.b1.20190102.000000.cdf', - 'sgpmetE13.b1.20190103.000000.cdf', - 'sgpmetE13.b1.20190104.000000.cdf', - 'sgpmetE13.b1.20190105.000000.cdf', - 'sgpmetE13.b1.20190106.000000.cdf', - 'sgpmetE13.b1.20190107.000000.cdf'] +met_wildcard_list = [ + 'sgpmetE13.b1.20190101.000000.cdf', + 'sgpmetE13.b1.20190102.000000.cdf', + 'sgpmetE13.b1.20190103.000000.cdf', + 'sgpmetE13.b1.20190104.000000.cdf', + 'sgpmetE13.b1.20190105.000000.cdf', + 'sgpmetE13.b1.20190106.000000.cdf', + 'sgpmetE13.b1.20190107.000000.cdf', +] EXAMPLE_MET_WILDCARD = [DATASETS.fetch(file) for file in met_wildcard_list] -met_contour_list = ['sgpmetE15.b1.20190508.000000.cdf', - 'sgpmetE31.b1.20190508.000000.cdf', - 'sgpmetE32.b1.20190508.000000.cdf', - 'sgpmetE33.b1.20190508.000000.cdf', - 'sgpmetE34.b1.20190508.000000.cdf', - 'sgpmetE35.b1.20190508.000000.cdf', - 'sgpmetE36.b1.20190508.000000.cdf', - 'sgpmetE37.b1.20190508.000000.cdf', - 'sgpmetE38.b1.20190508.000000.cdf', - 'sgpmetE39.b1.20190508.000000.cdf', - 'sgpmetE40.b1.20190508.000000.cdf', - 'sgpmetE9.b1.20190508.000000.cdf', - 'sgpmetE13.b1.20190508.000000.cdf'] +met_contour_list = [ + 'sgpmetE15.b1.20190508.000000.cdf', + 'sgpmetE31.b1.20190508.000000.cdf', + 'sgpmetE32.b1.20190508.000000.cdf', + 'sgpmetE33.b1.20190508.000000.cdf', + 'sgpmetE34.b1.20190508.000000.cdf', + 'sgpmetE35.b1.20190508.000000.cdf', + 'sgpmetE36.b1.20190508.000000.cdf', + 'sgpmetE37.b1.20190508.000000.cdf', + 'sgpmetE38.b1.20190508.000000.cdf', + 'sgpmetE39.b1.20190508.000000.cdf', + 'sgpmetE40.b1.20190508.000000.cdf', + 'sgpmetE9.b1.20190508.000000.cdf', + 'sgpmetE13.b1.20190508.000000.cdf', +] EXAMPLE_MET_CONTOUR = [DATASETS.fetch(file) for file in met_contour_list] -twp_sonde_wildcard_list = ['twpsondewnpnC3.b1.20060119.050300.custom.cdf', - 'twpsondewnpnC3.b1.20060119.112000.custom.cdf', - 'twpsondewnpnC3.b1.20060119.163300.custom.cdf', - 'twpsondewnpnC3.b1.20060119.231600.custom.cdf', - 'twpsondewnpnC3.b1.20060120.043800.custom.cdf', - 'twpsondewnpnC3.b1.20060120.111900.custom.cdf', - 'twpsondewnpnC3.b1.20060120.170800.custom.cdf', - 'twpsondewnpnC3.b1.20060120.231500.custom.cdf', - 'twpsondewnpnC3.b1.20060121.051500.custom.cdf', - 'twpsondewnpnC3.b1.20060121.111600.custom.cdf', - 'twpsondewnpnC3.b1.20060121.171600.custom.cdf', - 'twpsondewnpnC3.b1.20060121.231600.custom.cdf', - 'twpsondewnpnC3.b1.20060122.052600.custom.cdf', - 'twpsondewnpnC3.b1.20060122.111500.custom.cdf', - 'twpsondewnpnC3.b1.20060122.171800.custom.cdf', - 'twpsondewnpnC3.b1.20060122.232600.custom.cdf', - 'twpsondewnpnC3.b1.20060123.052500.custom.cdf', - 'twpsondewnpnC3.b1.20060123.111700.custom.cdf', - 'twpsondewnpnC3.b1.20060123.171600.custom.cdf', - 'twpsondewnpnC3.b1.20060123.231500.custom.cdf', - 'twpsondewnpnC3.b1.20060124.051500.custom.cdf', - 'twpsondewnpnC3.b1.20060124.111800.custom.cdf', - 'twpsondewnpnC3.b1.20060124.171700.custom.cdf', - 'twpsondewnpnC3.b1.20060124.231500.custom.cdf'] +twp_sonde_wildcard_list = [ + 'twpsondewnpnC3.b1.20060119.050300.custom.cdf', + 'twpsondewnpnC3.b1.20060119.112000.custom.cdf', + 'twpsondewnpnC3.b1.20060119.163300.custom.cdf', + 'twpsondewnpnC3.b1.20060119.231600.custom.cdf', + 'twpsondewnpnC3.b1.20060120.043800.custom.cdf', + 'twpsondewnpnC3.b1.20060120.111900.custom.cdf', + 'twpsondewnpnC3.b1.20060120.170800.custom.cdf', + 'twpsondewnpnC3.b1.20060120.231500.custom.cdf', + 'twpsondewnpnC3.b1.20060121.051500.custom.cdf', + 'twpsondewnpnC3.b1.20060121.111600.custom.cdf', + 'twpsondewnpnC3.b1.20060121.171600.custom.cdf', + 'twpsondewnpnC3.b1.20060121.231600.custom.cdf', + 'twpsondewnpnC3.b1.20060122.052600.custom.cdf', + 'twpsondewnpnC3.b1.20060122.111500.custom.cdf', + 'twpsondewnpnC3.b1.20060122.171800.custom.cdf', + 'twpsondewnpnC3.b1.20060122.232600.custom.cdf', + 'twpsondewnpnC3.b1.20060123.052500.custom.cdf', + 'twpsondewnpnC3.b1.20060123.111700.custom.cdf', + 'twpsondewnpnC3.b1.20060123.171600.custom.cdf', + 'twpsondewnpnC3.b1.20060123.231500.custom.cdf', + 'twpsondewnpnC3.b1.20060124.051500.custom.cdf', + 'twpsondewnpnC3.b1.20060124.111800.custom.cdf', + 'twpsondewnpnC3.b1.20060124.171700.custom.cdf', + 'twpsondewnpnC3.b1.20060124.231500.custom.cdf', +] EXAMPLE_TWP_SONDE_WILDCARD = [DATASETS.fetch(file) for file in twp_sonde_wildcard_list] -twp_sonde_20060121_list = ['twpsondewnpnC3.b1.20060121.051500.custom.cdf', - 'twpsondewnpnC3.b1.20060121.111600.custom.cdf', - 'twpsondewnpnC3.b1.20060121.171600.custom.cdf', - 'twpsondewnpnC3.b1.20060121.231600.custom.cdf'] +twp_sonde_20060121_list = [ + 'twpsondewnpnC3.b1.20060121.051500.custom.cdf', + 'twpsondewnpnC3.b1.20060121.111600.custom.cdf', + 'twpsondewnpnC3.b1.20060121.171600.custom.cdf', + 'twpsondewnpnC3.b1.20060121.231600.custom.cdf', +] EXAMPLE_TWP_SONDE_20060121 = [DATASETS.fetch(file) for file in twp_sonde_20060121_list] -stamp_wildcard_list = ['sgpstampE13.b1.20200101.000000.nc', - 'sgpstampE31.b1.20200101.000000.nc', - 'sgpstampE32.b1.20200101.000000.nc', - 'sgpstampE33.b1.20200101.000000.nc', - 'sgpstampE34.b1.20200101.000000.nc', - 'sgpstampE9.b1.20200101.000000.nc'] +stamp_wildcard_list = [ + 'sgpstampE13.b1.20200101.000000.nc', + 'sgpstampE31.b1.20200101.000000.nc', + 'sgpstampE32.b1.20200101.000000.nc', + 'sgpstampE33.b1.20200101.000000.nc', + 'sgpstampE34.b1.20200101.000000.nc', + 'sgpstampE9.b1.20200101.000000.nc', +] EXAMPLE_STAMP_WILDCARD = [DATASETS.fetch(file) for file in stamp_wildcard_list] -mmcr_list = ['sgpmmcrC1.b1.1.cdf', - 'sgpmmcrC1.b1.2.cdf'] +mmcr_list = ['sgpmmcrC1.b1.1.cdf', 'sgpmmcrC1.b1.2.cdf'] EXAMPLE_MMCR = [DATASETS.fetch(file) for file in mmcr_list] diff --git a/act/utils/__init__.py b/act/utils/__init__.py index 035bf99de3..6e72622d50 100644 --- a/act/utils/__init__.py +++ b/act/utils/__init__.py @@ -7,7 +7,16 @@ __getattr__, __dir__, __all__ = lazy.attach( __name__, - submodules=['data_utils', 'datetime_utils', 'geo_utils', 'inst_utils', 'io_utils', 'qc_utils', 'radiance_utils', 'ship_utils'], + submodules=[ + 'data_utils', + 'datetime_utils', + 'geo_utils', + 'inst_utils', + 'io_utils', + 'qc_utils', + 'radiance_utils', + 'ship_utils', + ], submod_attrs={ 'data_utils': [ 'ChangeUnits', @@ -23,6 +32,8 @@ 'convert_to_potential_temp', 'arm_site_location_search', 'DatastreamParserARM', + 'calculate_percentages', + 'convert_2d_to_1d', ], 'datetime_utils': [ 'dates_between', @@ -31,7 +42,7 @@ 'numpy_to_arm_date', 'reduce_time_ranges', 'date_parser', - 'adjust_timestamp' + 'adjust_timestamp', ], 'geo_utils': [ 'add_solar_variable', @@ -44,13 +55,15 @@ 'qc_utils': ['calculate_dqr_times'], 'radiance_utils': ['planck_converter'], 'ship_utils': ['calc_cog_sog', 'proc_scog'], - 'io_utils': ['pack_tar', - 'unpack_tar', - 'cleanup_files', - 'is_gunzip_file', - 'pack_gzip', - 'unpack_gzip', - 'generate_movie' + 'io_utils': [ + 'pack_tar', + 'unpack_tar', + 'cleanup_files', + 'is_gunzip_file', + 'pack_gzip', + 'unpack_gzip', + 'generate_movie', + 'arm_standards_validator', ], }, ) diff --git a/act/utils/data_utils.py b/act/utils/data_utils.py index 5d499338dc..d8c98d10b2 100644 --- a/act/utils/data_utils.py +++ b/act/utils/data_utils.py @@ -15,6 +15,7 @@ from pathlib import Path import re import requests +from os import PathLike spec = importlib.util.find_spec('pyart') if spec is not None: @@ -35,7 +36,13 @@ def __init__(self, ds): self._ds = ds def change_units( - self, variables=None, desired_unit=None, skip_variables=None, skip_standard=True + self, + variables=None, + desired_unit=None, + skip_variables=None, + skip_standard=True, + verbose=False, + raise_error=False, ): """ Parameters @@ -51,6 +58,13 @@ def change_units( Flag indicating the QC variables that will not need changing are skipped. Makes the processing faster when processing all variables in dataset. + verbose : boolean + Option to print statement when an attempted conversion fails. Set to False + as default because many units strings are not udunits complient and when + trying to convert all varialbes of a type of units (eg temperature) the code + can print a lot of unecessary information. + raise_error : boolean + Raise an error if conversion is not successful. Returns ------- @@ -102,13 +116,21 @@ def change_units( pint.errors.UndefinedUnitError, np.core._exceptions.UFuncTypeError, ): - continue + if raise_error: + raise ValueError( + f"Unable to convert '{var_name}' to units of '{desired_unit}'." + ) + elif verbose: + print( + f"\n Unable to convert '{var_name}' to units of '{desired_unit}'. " + f"Skipping unit converstion for '{var_name}'.\n" + ) return self._ds # @xr.register_dataset_accessor('utils') -class DatastreamParserARM(object): +class DatastreamParserARM: ''' Class to parse ARM datastream names or filenames into its components. Will return None for each attribute if not extracted from the filename. @@ -144,6 +166,7 @@ class DatastreamParserARM(object): ''' + def __init__(self, ds=''): ''' Constructor that initializes datastream data member and runs @@ -154,11 +177,10 @@ def __init__(self, ds=''): The datastream or filename to parse ''' - - if isinstance(ds, str): + if isinstance(ds, (str, PathLike)): self.__datastream = Path(ds).name else: - raise ValueError('Datastream or filename name must be a string') + raise ValueError('Datastream or filename name must be a string or pathlib.PosixPath.') try: self.__parse_datastream() @@ -215,15 +237,17 @@ def __parse_datastream(self): match = True if not match: - m = re.search(r'(^[a-z]{3})(\w+)$', tempstring[0]) + m = re.search(r'(^[a-z]{3})([^A-Z]+)$', tempstring[0]) if m is not None: self.__site = m.group(1) self.__class = m.group(2) match = True if not match and len(tempstring[0]) == 3: - self.__site = tempstring[0] - match = True + m = re.search(r'(^[a-z]{3})', tempstring[0]) + if m is not None: + self.__site = m.group(1) + match = True if not match: raise ValueError(self.__datastream) @@ -246,8 +270,7 @@ def datastream(self): ''' try: - return ''.join((self.__site, self.__class, self.__facility, '.', - self.__level)) + return ''.join((self.__site, self.__class, self.__facility, '.', self.__level)) except TypeError: return None @@ -303,8 +326,7 @@ def datastream_standard(self): ''' try: - return ''.join((self.site, self.datastream_class, self.facility, - '.', self.level)) + return ''.join((self.site, self.datastream_class, self.facility, '.', self.level)) except TypeError: return None @@ -361,7 +383,7 @@ def assign_coordinates(ds, coord_list): if coord not in ds.variables.keys(): raise KeyError(coord + ' is not a variable in the Dataset.') - if ds.dims[coord_list[coord]] != len(ds.variables[coord]): + if ds.sizes[coord_list[coord]] != len(ds.variables[coord]): raise IndexError( coord + ' must have the same ' + 'value as length of ' + coord_list[coord] ) @@ -433,8 +455,14 @@ def add_in_nan(time, data): mode = stats.mode(diff, keepdims=True).mode[0] except TypeError: mode = stats.mode(diff).mode[0] + index = np.where(diff > (2.0 * mode)) + # If the data is not float time and we try to insert a NaN it will + # not auto upconvert the data. Need to convert before inserting NaN. + if len(index) > 0 and np.issubdtype(data.dtype, np.integer): + data = data.astype('float32') + offset = 0 for i in index[0]: corr_i = i + offset @@ -987,7 +1015,6 @@ def convert_to_potential_temp( temp_var_units=None, press_var_units=None, ): - """ Converts temperature to potential temperature. @@ -1257,9 +1284,7 @@ def arm_site_location_search(site_code='sgp', facility_code=None): "distinct_facility_code": { "terms": { "field": "facility_code.keyword", - "order": { - "_key": "asc" - }, + "order": {"_key": "asc"}, "size": 7000, }, "aggs": { @@ -1271,7 +1296,7 @@ def arm_site_location_search(site_code='sgp', facility_code=None): "facility_code", "location", ], - "size": 1 + "size": 1, }, }, }, @@ -1286,7 +1311,9 @@ def arm_site_location_search(site_code='sgp', facility_code=None): } # Uses requests to grab metadata from arm.gov. - response = requests.get('https://adc.arm.gov/elastic/metadata/_search', headers=headers, json=json_data) + response = requests.get( + 'https://adc.arm.gov/elastic/metadata/_search', headers=headers, json=json_data + ) # Loads the text to a dictionary response_dict = json.loads(response.text) @@ -1294,19 +1321,203 @@ def arm_site_location_search(site_code='sgp', facility_code=None): coord_dict = {} # Loop through each facility. for i in range(len(response_dict['aggregations']['distinct_facility_code']['buckets'])): - site_info = response_dict['aggregations']['distinct_facility_code']['buckets'][i]['hits']['hits']['hits'][0]['_source'] + site_info = response_dict['aggregations']['distinct_facility_code']['buckets'][i]['hits'][ + 'hits' + ]['hits'][0]['_source'] site = site_info['site_code'] facility = site_info['facility_code'] # Some sites do not contain coordinate information, return None if that is the case. if site_info['location'] is None: - coords = {'latitude': None, - 'longitude': None} + coords = {'latitude': None, 'longitude': None} else: lat, lon = site_info['location'].split(',') lat = float(lat) lon = float(lon) - coords = {'latitude': lat, - 'longitude': lon} + coords = {'latitude': lat, 'longitude': lon} coord_dict.setdefault(site + ' ' + facility, coords) return coord_dict + + +def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None, fill_value=0.0): + """ + This function calculates percentages of different fields of a dataset. + + Parameters + ---------- + ds : ACT Dataset + The ACT dataset to calculate the percentages on. + fields : list + A list of all the fields to use in the percentage calculations. + time : datetime + A single datetime to calculate percentages on if desired. Default + is None and all data will be included. + time_slice : tuple + A tuple of two datetimes to grab all data between those two datatimes. + Default is None and all data will be included. + threshold : float + Threshold in which anything below will be considered invalid. + Default is None. + fill_value : float + Fill value for invalid data. Only used if a threshold is provided. + + Returns + ------- + percentages : dict + A dictionary containing the fields provided and their corresponding + percentage that was calculated. + + """ + # Copy Dataset so we are not overriding the data. + ds_percent = ds.copy() + + # Check if any incorrect values based on a threshold and replace with a fill + # value. + if threshold is not None: + for field in fields: + ds_percent[field] = ds_percent[field].where(ds_percent[field] > threshold, fill_value) + + # Raise warning if negative values present in a field. + if threshold is None: + for field in fields: + res = np.all(ds_percent[field].values >= 0.0) + if not res: + warnings.warn( + f"{field} contains negatives values, consider using a threshold.", + UserWarning, + ) + + # Select the data based on time, multiple times within a slice, or + # a sample of times per a timestep. + if time is not None: + ds_percent = ds_percent.sel(time=time) + elif time_slice is not None: + ds_percent = ds_percent.sel(time=slice(time_slice[0], time_slice[1])) + else: + warnings.warn( + "No time parameter used, calculating a mean for each field for the whole dataset.", + UserWarning, + ) + + # Calculate concentration percentage of each field in the air. + values = [ds_percent[field].mean(skipna=True).values for field in fields] + total = sum(values) + percent_values = [(value / total) * 100 for value in values] + + # Create a dictionary of the fields and their percentages. + percentages = {} + for i, j in zip(fields, percent_values): + percentages[i] = j + ds_percent.close() + return percentages + + +def convert_2d_to_1d( + ds, + parse=None, + variables=None, + keep_name_if_one=False, + use_dim_value_in_name=False, + dim_labels=None, +): + """ + Function to convert a single 2D variable into multiple 1D + variables using the second dimension in the new variable name. + + Parameters + ---------- + ds: xarray.dataset + Object containing 2D variable to be converted + parse: str or None + Coordinate dimension name to parse along. If set to None will + guess the non-time dimension is the parse dimension. + variables: str or list of str + Variable name or names to parse. If not provided will attempt to + parse all two dimensional variables with the parse coordinate + dimension. + keep_name_if_one: boolean + Option to not modify the variable name if the coordinate dimension + has only one value. Essentially converting a 2D (i.e. (100,1) + variable into a 1D variable (i.e. (100)). + use_dim_value_in_name: boolean + Option to use value from the coordinate dimension in new variable + name instead of indexing number. Will use the value prepended + to the units of the dimension. + dim_labels: str or list of str + Allows for use of custom label to append to end of variable names + + Returns + ------- + A new object copied from input object with the multi-dimensional + variable split into multiple single-dimensional variables. + + Example + ------- + # This will get the name of the coordinate dimension so it does not need to + # be hard coded. + >>> parse_dim = (list(set(list(ds.dims)) - set(['time'])))[0] + + # Now use the parse_dim name to parse the variable and return new object. + >>> new_ds = convert_2d_to_1d(ds, parse=parse_dim) + + """ + # If no parse dimension name given assume it is the one not equal to 'time' + if parse is None: + parse = (list(set(list(ds.dims)) - {'time'}))[0] + + new_ds = ds.copy() + + if variables is not None and isinstance(variables, str): + variables = [variables] + + if variables is None: + variables = list(new_ds.variables) + + if dim_labels is not None and isinstance(dim_labels, (str,)): + dim_labels = [dim_labels] + + # Check if we want to keep the names the same if the second dimension + # is of size one. + num_dims = 1 + if keep_name_if_one: + num_dims = 2 + + parse_values = ds[parse].values + for var in variables: + if var == parse: + continue + # Check if the parse dimension is in the dimension tuple + if parse in new_ds[var].dims: + if len(new_ds[parse]) >= num_dims: + for i in range(0, new_ds.sizes[parse]): + if dim_labels is not None: + new_var_name = '_'.join([var, dim_labels[i]]) + elif use_dim_value_in_name: + level = str(parse_values[i]) + ds[parse].attrs['units'] + new_var_name = '_'.join([var, parse, level]) + else: + new_var_name = '_'.join([var, parse, str(i)]) + new_var = new_ds[var].copy() + new_ds[new_var_name] = new_var.isel(indexers={parse: i}) + + try: + ancillary_variables = new_ds[new_var_name].attrs['ancillary_variables'] + current_qc_var_name = ds.qcfilter.check_for_ancillary_qc( + var, add_if_missing=False + ) + if current_qc_var_name is not None: + ancillary_variables = ancillary_variables.replace( + current_qc_var_name, 'qc_' + new_var_name + ) + new_ds[new_var_name].attrs['ancillary_variables'] = ancillary_variables + except KeyError: + pass + + # Remove the old 2D variable after extracting + del new_ds[var] + + else: + # Keep the same name but remove the dimension equal to size 1 + new_ds[var] = new_ds[var].squeeze(dim=parse) + + return new_ds diff --git a/act/utils/datetime_utils.py b/act/utils/datetime_utils.py index e2e890bf9e..b65cfbec79 100644 --- a/act/utils/datetime_utils.py +++ b/act/utils/datetime_utils.py @@ -55,13 +55,15 @@ def numpy_to_arm_date(_date, returnTime=False): """ from dateutil.parser._parser import ParserError + from pandas._libs.tslibs.parsing import DateParseError + try: date = pd.to_datetime(str(_date)) if returnTime is False: date = date.strftime('%Y%m%d') else: date = date.strftime('%H%M%S') - except ParserError: + except (ParserError, DateParseError): date = None return date @@ -261,9 +263,12 @@ def adjust_timestamp(ds, time_bounds='time_bounds', align='left', offset=None): elif align == 'right': time_start = [np.datetime64(t[1]) for t in time_bounds] elif align == 'center': - time_start = [np.datetime64(t[0]) + (np.datetime64(t[0]) - np.datetime64(t[1])) / 2. for t in time_bounds] + time_start = [ + np.datetime64(t[0]) + (np.datetime64(t[0]) - np.datetime64(t[1])) / 2.0 + for t in time_bounds + ] else: - raise ValueError('Align should be set to one of [left, right, middle]') + raise ValueError('Align should be set to one of [left, right, center]') elif offset is not None: time = ds['time'].values @@ -271,6 +276,7 @@ def adjust_timestamp(ds, time_bounds='time_bounds', align='left', offset=None): else: raise ValueError('time_bounds variable is not available') + time_start = np.array(time_start).astype('datetime64[ns]') ds = ds.assign_coords({'time': time_start}) return ds diff --git a/act/utils/geo_utils.py b/act/utils/geo_utils.py index 27c2dac85c..b7361751e9 100644 --- a/act/utils/geo_utils.py +++ b/act/utils/geo_utils.py @@ -4,13 +4,10 @@ """ -import re from datetime import datetime, timedelta, timezone from pathlib import Path -import dateutil.parser import numpy as np -import pandas as pd import pytz from skyfield import almanac from skyfield.api import load, load_file, wgs84 diff --git a/act/utils/inst_utils.py b/act/utils/inst_utils.py index e48f95af0e..c97a40de87 100644 --- a/act/utils/inst_utils.py +++ b/act/utils/inst_utils.py @@ -140,6 +140,10 @@ def decode_present_weather(ds, variable=None, decoded_name=None): # Get data and fill nans with -9999 data = ds[variable] data = data.fillna(-9999) + data.values[data.values == 'ER'] = -9999 + + # Check if string codes instead of ints + data = data.astype('int64') # Get the weather type for each code wx_type = [weather[d] for d in data.values] diff --git a/act/utils/io_utils.py b/act/utils/io_utils.py index 839b7b8ae9..236f1beccb 100644 --- a/act/utils/io_utils.py +++ b/act/utils/io_utils.py @@ -1,20 +1,22 @@ +import act +import numpy as np +import pandas as pd from pathlib import Path import tarfile +from os import sep from os import PathLike from shutil import rmtree -import random -import string import gzip import shutil import tempfile -import numpy as np import types try: + import moviepy.editor as moviepy_editor import moviepy.video.io.ImageSequenceClip - from moviepy.video.io.VideoFileClip import VideoFileClip + MOVIEPY_AVAILABLE = True -except ImportError: +except (ImportError, RuntimeError): MOVIEPY_AVAILABLE = False @@ -23,8 +25,6 @@ def pack_tar(filenames, write_filename=None, write_directory=None, remove=False) Creates TAR file from list of filenames provided. Currently only works with all files existing in the same directory. - ... - Parameters ---------- filenames : str or list @@ -77,13 +77,12 @@ def pack_tar(filenames, write_filename=None, write_directory=None, remove=False) return str(write_filename) -def unpack_tar(tar_files, write_directory=None, temp_dir=False, randomize=True, - return_files=True, remove=False): +def unpack_tar( + tar_files, write_directory=None, temp_dir=False, randomize=True, return_files=True, remove=False +): """ Unpacks TAR file contents into provided base directory - ... - Parameters ---------- tar_files : str or list @@ -138,7 +137,7 @@ def unpack_tar(tar_files, write_directory=None, temp_dir=False, randomize=True, files.extend(result) tar.close() except tarfile.ReadError: - print(f"\nCould not extract files from {tar_file}") + print("Could not extract files from the tar_file") if return_files is False: files = str(out_dir) @@ -156,8 +155,6 @@ def cleanup_files(dirname=None, files=None): """ Cleans up files and directory possibly created from unpacking TAR files with unpack_tar() - ... - Parameters ---------- dirname : str, pathlib.Path, None @@ -210,8 +207,6 @@ def pack_gzip(filename, write_directory=None, remove=False): """ Creates a gunzip file from a filename path - ... - Parameters ---------- filename : str, pathlib.Path @@ -250,8 +245,6 @@ def unpack_gzip(filename, write_directory=None, remove=False): """ Extracts file from a gunzip file. - ... - Parameters ---------- filename : str, pathlib.Path @@ -291,8 +284,6 @@ def generate_movie(images, write_filename=None, fps=10, **kwargs): """ Creates a movie from a list of images or convert movie to different type - ... - Parameters ---------- images : list, PosixPath generator, path to a directory, single string/PosixPath to movie @@ -316,9 +307,7 @@ def generate_movie(images, write_filename=None, fps=10, **kwargs): """ if not MOVIEPY_AVAILABLE: - raise ImportError( - 'MoviePy needs to be installed on your system to make movies.' - ) + raise ImportError('MoviePy needs to be installed on your system to make movies.') # Set default movie name if write_filename is None: @@ -340,13 +329,227 @@ def generate_movie(images, write_filename=None, fps=10, **kwargs): write_directory.mkdir(parents=True, exist_ok=True) if IS_MOVIE: - with VideoFileClip(images) as clip: - # Not sure why but need to set the duration of the clip with subclip() to write - # the full file out. - clip = clip.subclip(t_start=clip.start, t_end=clip.end * clip.fps) - clip.write_videofile(str(write_filename), fps=fps, **kwargs) + with moviepy_editor.VideoFileClip(images) as clip: + # There can be an issue converting mpeg to other movie format because the + # duration parameter in the movie file is not set. So moviepy guesses and + # can get the duration wrong. This will find the correct duration (correct to 0.2 seconds) + # and set before writing. + if Path(images).suffix == '.mpg': + import numpy as np + import warnings + from collections import deque + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=UserWarning) + desired_len = 3 + frame_sums = deque() + duration = 0.0 # Duration of movie in seconds + while True: + result = clip.get_frame(duration) + frame_sums.append(np.sum(result)) + if len(frame_sums) > desired_len: + frame_sums.popleft() + + if len(set(frame_sums)) == 1: + break + + duration += 0.1 + + clip = clip.set_start(0) + clip = clip.set_duration(duration) + clip = clip.set_end(duration) + clip.write_videofile(str(write_filename), **kwargs) + + else: + clip.write_videofile(str(write_filename), **kwargs) + else: clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(images, fps=fps) clip.write_videofile(str(write_filename), **kwargs) return str(write_filename) + + +def arm_standards_validator(file=None, dataset=None, verbose=True): + """ + ARM Data Validator (ADV) - Checks to ensure that ARM standards are being followed + in the files or dataset passed to it. Note, this includes a minimal set of + standards that it checks against + + Parameters + ---------- + file : str + Filename to check against ARM standards. Do not pass in both a file and dataset + dataset : xarray.DataSet + Xarray dataset of an already read in file. + verbose : boolean + Defaults to print out errors in addition to returning a list of them + + Returns + ------- + err : list + List of errors in the data + + """ + + # Set up the error tracking list + err = [] + if file is not None and isinstance(file, str): + # Check file naming standards + if len(file.split(sep)[-1]) > 60.0: + err.append('Filename length exceeds 60 characters') + try: + f_obj = act.utils.data_utils.DatastreamParserARM(file) + except Exception as e: + print(e) + + if ( + (f_obj.site is None) + or (f_obj.datastream_class is None) + or (f_obj.level is None) + or (f_obj.facility is None) + or (f_obj.date is None) + or (f_obj.time is None) + or (f_obj.ext is None) + ): + err.append( + 'Filename does not follow the normal ARM convention: ' + + '(sss)(inst)(qualifier)(temporal)(Fn).(dl).(yyyymmdd).(hhmmss).nc' + ) + else: + if f_obj.level[0] not in ['0', 'a', 'b', 'c', 's', 'm']: + err.append(f_obj.level + ' is not a standard ARM data level') + + results = act.utils.arm_site_location_search( + site_code=f_obj.site, facility_code=f_obj.facility + ) + if len(results) == 0: + err.append('Site and facility are not ARM standard') + + # The ability to read a file from NetCDF to xarray will catch a lot of the + # problems with formatting. This would leave standard ARM checks + try: + if dataset is None and file is not None: + ds = act.io.read_arm_netcdf(file) + elif dataset is not None: + ds = dataset + else: + raise ValueError('File and dataset are both None') + except Exception as e: + return ['File is not in a standard format that is readable by xarray: ' + str(e)] + + # Review time variables for errors for conformance to standards + if 'time' not in list(ds.dims)[0]: + err.append('"time" is required to be the first dimension.') + + for c in list(ds.coords): + if c not in ds.dims: + err.append(c + ': Coordinate is not included in dimensions.') + + if any(np.isnan(ds['time'].values)): + err.append('Time must not include NaNs.') + + duplicates = sum(ds['time'].to_pandas().duplicated()) + if duplicates > 0: + err.append('Duplicate times present in the file') + + diff = ds['time'].diff('time') + idx = np.where(diff <= pd.Timedelta(0)) + if len(idx[0]) > 0: + err.append('Time is not in increasing order') + + if 'base_time' not in ds or 'time_offset' not in ds: + err.append('ARM requires base_time and time_offset variables.') + + # Check to make sure other coordinate variables don't have nans + # Also check to make sure coordinate variables are not decreasing + if len(list(ds.coords)) > 1: + for d in ds.coords: + if d == 'time': + continue + if any(np.isnan(ds[d].values)): + err.append('Coordinates must not include NaNs ' + d) + + diff = ds[d].diff(d) + idx = np.where(diff <= 0.0) + if len(idx[0]) > 0: + err.append(d + ' is not in increasing order') + if 'missing_value' in ds[d].encoding: + err.append(d + ' should not include missing value') + + # Verify that each variable has a long_name and units attributes + for v in ds: + if (len(ds[v].dims) > 0) and ('time' not in list(ds[v].dims)[0]) and ('bounds' not in v): + err.append(v + ': "time" is required to be the first dimension.') + if (ds[v].size == 1) and (len(ds[v].dims) > 0): + err.append(v + ': is not defined as a scalar.') + if 'long_name' not in ds[v].attrs: + err.append('Required attribute long_name not in ' + v) + else: + if not ds[v].attrs['long_name'][0].isupper(): + err.append(v + ' long_name attribute does not start with uppercase') + + if ( + ('qc_' not in v) + and (v not in ['time', 'time_offset', 'base_time', 'lat', 'lon', 'alt']) + and ('bounds' not in v) + ): + if ('missing_value' not in ds[v].encoding) and ('FillValue' not in ds[v].encoding): + err.append(v + ' does not include missing_value or FillValue attributes') + + # QC variable checks + if 'qc_' in v: + if v[3:] not in ds: + err.append('QC variable does not have a corresponding variable ' + v[3:]) + if 'ancillary_variables' not in ds[v[3:]].attrs: + err.append( + v[3:] + ' does not include ancillary_variable attribute pointing to ' + v + ) + if 'description' not in ds[v].attrs: + err.append(v + ' does not include description attribute') + if 'flag_method' not in ds[v].attrs: + err.append(v + ' does not include flag_method attribute') + + if (v not in ['base_time', 'time_offset']) and ('bounds' not in v): + if 'units' not in ds[v].attrs: + err.append('Required attribute units not in ' + v) + + # Lat/Lon/Alt Checks + if 'lat' not in ds: + err.append('ARM requires the latitude variable to be named lat') + else: + if 'standard_name' in ds['lat'].attrs: + if ds['lat'].attrs['standard_name'] != 'latitude': + err.append('ARM requires the lat standard_name to be latitude') + else: + err.append('"lat" variable does not have a standard_name attribute') + if 'lon' not in ds: + err.append('ARM requires the longitude variable to be named lon') + else: + if 'standard_name' in ds['lon'].attrs: + if ds['lon'].attrs['standard_name'] != 'longitude': + err.append('ARM requires the lon standard_name to be longitude') + else: + err.append('"long" variable does not have a standard_name attribute') + if 'alt' not in ds: + err.append('ARM requires the altitude variable to be named alt') + else: + if 'standard_name' in ds['alt'].attrs: + if ds['alt'].attrs['standard_name'] != 'altitude': + err.append('ARM requires the alt standard_name to be altitude') + else: + err.append('"alt" variable does not have a standard_name attribute') + + # Required global attributes + req_att = ['doi', 'sampling_interval', 'averaging_interval'] + for ra in req_att: + if ra not in ds.attrs: + err.append('Global attribute is missing: ' + ra) + + if verbose: + if len(err) > 0: + [print(e) for e in err] + else: + print('File is passing standards checks') + + return err diff --git a/continuous_integration/environment_actions.yml b/continuous_integration/environment_actions.yml index 0443eb0502..1351459a6f 100644 --- a/continuous_integration/environment_actions.yml +++ b/continuous_integration/environment_actions.yml @@ -24,11 +24,11 @@ dependencies: - coveralls - pandas - shapely - - pip - lazy_loader - cmweather - - arm-test-data - moviepy + - ruff + - pip - pip: - mpl2nc - metpy @@ -36,3 +36,4 @@ dependencies: - arm_pyart - icartt - aiohttp>=3.9.0b1 + - git+https://github.com/ARM-DOE/arm-test-data.git diff --git a/docs/environment_docs.yml b/docs/environment_docs.yml index a1c5c2a7e4..dd1089e231 100644 --- a/docs/environment_docs.yml +++ b/docs/environment_docs.yml @@ -18,14 +18,13 @@ dependencies: - scikit-posthocs - pip - shapely<1.8.3 - - arm-test-data - moviepy - pip: - mpl2nc - lazy_loader - metpy>=1.2 - arm-pyart - - sphinx + - sphinx<7.2 - sphinx_gallery - sphinx-copybutton - pydata-sphinx-theme<0.9.0 @@ -40,3 +39,4 @@ dependencies: - sphinxcontrib-htmlhelp==2.0.4 - sphinxcontrib-qthelp==1.0.6 - sphinxcontrib-serializinghtml==1.1.9 + - git+https://github.com/ARM-DOE/arm-test-data.git diff --git a/docs/source/blog_posts/2022/sail_campaign_arm_and_noaa.ipynb b/docs/source/blog_posts/2022/sail_campaign_arm_and_noaa.ipynb index ac8baad20b..dea412251c 100644 --- a/docs/source/blog_posts/2022/sail_campaign_arm_and_noaa.ipynb +++ b/docs/source/blog_posts/2022/sail_campaign_arm_and_noaa.ipynb @@ -76,9 +76,11 @@ "source": [ "# Download the NOAA KPS site files from 22:00 and 23:00\n", "result_22_kps = act.discovery.download_noaa_psl_data(\n", - " site='kps', instrument='Radar FMCW Moment', startdate='20220801', hour='22')\n", + " site='kps', instrument='Radar FMCW Moment', startdate='20220801', hour='22'\n", + ")\n", "result_23_kps = act.discovery.download_noaa_psl_data(\n", - " site='kps', instrument='Radar FMCW Moment', startdate='20220801', hour='23')" + " site='kps', instrument='Radar FMCW Moment', startdate='20220801', hour='23'\n", + ")" ] }, { @@ -94,8 +96,10 @@ "ds1_kps = act.io.noaapsl.read_psl_radar_fmcw_moment([result_22_kps[-1], result_23_kps[-1]])\n", "\n", "# Read in the parsivel files from NOAA's webpage.\n", - "url = ['https://downloads.psl.noaa.gov/psd2/data/realtime/DisdrometerParsivel/Stats/kps/2022/213/kps2221322_stats.txt',\n", - " 'https://downloads.psl.noaa.gov/psd2/data/realtime/DisdrometerParsivel/Stats/kps/2022/213/kps2221323_stats.txt']\n", + "url = [\n", + " 'https://downloads.psl.noaa.gov/psd2/data/realtime/DisdrometerParsivel/Stats/kps/2022/213/kps2221322_stats.txt',\n", + " 'https://downloads.psl.noaa.gov/psd2/data/realtime/DisdrometerParsivel/Stats/kps/2022/213/kps2221323_stats.txt',\n", + "]\n", "ds2_kps = act.io.noaapsl.read_psl_parsivel(url)" ] }, @@ -124,13 +128,23 @@ "# Create display object with both datasets\n", "display = act.plotting.TimeSeriesDisplay(\n", " {\"NOAA Site KPS PSL Radar FMCW\": kps_ds1, \"NOAA Site KPS Parsivel\": kps_ds2},\n", - " subplot_shape=(2,), figsize=(10, 10))\n", + " subplot_shape=(2,),\n", + " figsize=(10, 10),\n", + ")\n", "\n", "# Plot the subplots\n", - "display.plot('reflectivity_uncalibrated', dsname='NOAA Site KPS PSL Radar FMCW',\n", - " cmap='act_HomeyerRainbow', subplot_index=(0,))\n", - "display.plot('number_density_drops', dsname='NOAA Site KPS Parsivel',\n", - " cmap='act_HomeyerRainbow', subplot_index=(1,))\n", + "display.plot(\n", + " 'reflectivity_uncalibrated',\n", + " dsname='NOAA Site KPS PSL Radar FMCW',\n", + " cmap='act_HomeyerRainbow',\n", + " subplot_index=(0,),\n", + ")\n", + "display.plot(\n", + " 'number_density_drops',\n", + " dsname='NOAA Site KPS Parsivel',\n", + " cmap='act_HomeyerRainbow',\n", + " subplot_index=(1,),\n", + ")\n", "# Set limits\n", "display.axes[1].set_ylim([0, 10])\n", "plt.show()" @@ -154,8 +168,8 @@ "# Use arm username and token to retrieve files.\n", "# This is commented out as the files have already been downloaded.\n", "\n", - "#token = 'arm_token'\n", - "#username = 'arm_username'" + "# token = 'arm_token'\n", + "# username = 'arm_username'" ] }, { @@ -165,16 +179,16 @@ "metadata": {}, "outputs": [], "source": [ - "#Specify datastream and date range for KAZR data\n", + "# Specify datastream and date range for KAZR data\n", "ds_kazr = 'guckazrcfrgeM1.a1'\n", "startdate = '2022-08-01'\n", "enddate = '2022-08-01'\n", "\n", "# Data already retrieved, but showing code below on how to download the files.\n", - "#act.discovery.download_data(username, token, ds_kazr, startdate, enddate)\n", + "# act.discovery.download_data(username, token, ds_kazr, startdate, enddate)\n", "\n", "# Index last 2 files for the 22:00 and 23:00 timeframe.\n", - "kazr_files = glob.glob(''.join(['./',ds_kazr,'/*nc']))\n", + "kazr_files = glob.glob(''.join(['./', ds_kazr, '/*nc']))\n", "kazr_files[-2:]\n", "kazr_ds = act.io.arm.read_arm_netcdf(kazr_files[-2:])" ] @@ -186,16 +200,16 @@ "metadata": {}, "outputs": [], "source": [ - "#Specify datastream and date range for KAZR data\n", + "# Specify datastream and date range for KAZR data\n", "ds_ld = 'gucldM1.b1'\n", "startdate = '2022-08-01'\n", "enddate = '2022-08-01'\n", "\n", "# Data already retrieved, but showing code below on how to download the files.\n", - "#act.discovery.download_data(username, token, ds_ld, startdate, enddate)\n", + "# act.discovery.download_data(username, token, ds_ld, startdate, enddate)\n", "\n", "# Index last 2 files for the 22:00 and 23:00 timeframe.\n", - "ld_files = glob.glob(''.join(['./',ds_ld,'/*cdf']))\n", + "ld_files = glob.glob(''.join(['./', ds_ld, '/*cdf']))\n", "ld_ds = act.io.arm.read_arm_netcdf(ld_files[0])" ] }, @@ -233,22 +247,39 @@ "\n", "# Create a series display with all 4 datasets\n", "display = act.plotting.TimeSeriesDisplay(\n", - " {\"NOAA KPS PSL Radar FMCW\": kps_ds1, \"NOAA KPS Parsivel\": kps_ds2,\n", - " \"guckazrcfrgeM1.a1\": kazr_ds, 'gucldM1.b1': ld_ds},\n", - " subplot_shape=(2, 2), figsize=(22, 12))\n", + " {\n", + " \"NOAA KPS PSL Radar FMCW\": kps_ds1,\n", + " \"NOAA KPS Parsivel\": kps_ds2,\n", + " \"guckazrcfrgeM1.a1\": kazr_ds,\n", + " 'gucldM1.b1': ld_ds,\n", + " },\n", + " subplot_shape=(2, 2),\n", + " figsize=(22, 12),\n", + ")\n", "\n", "# Set custom 2 line title for space\n", "title = \"NOAA KPS PSL Radar FMCW\\n reflectivity_uncalibrated on 20220801\"\n", "\n", "# Plot the four subplots\n", - "display.plot('reflectivity_uncalibrated', dsname='NOAA KPS PSL Radar FMCW',\n", - " cmap='act_HomeyerRainbow', set_title=title, subplot_index=(0, 1))\n", - "display.plot('number_density_drops', dsname='NOAA KPS Parsivel',\n", - " cmap='act_HomeyerRainbow', subplot_index=(1, 1))\n", - "display.plot('reflectivity', dsname='guckazrcfrgeM1.a1',\n", - " cmap='act_HomeyerRainbow', subplot_index=(0, 0))\n", - "display.plot('number_density_drops', dsname='gucldM1.b1',\n", - " cmap='act_HomeyerRainbow', subplot_index=(1, 0))\n", + "display.plot(\n", + " 'reflectivity_uncalibrated',\n", + " dsname='NOAA KPS PSL Radar FMCW',\n", + " cmap='act_HomeyerRainbow',\n", + " set_title=title,\n", + " subplot_index=(0, 1),\n", + ")\n", + "display.plot(\n", + " 'number_density_drops',\n", + " dsname='NOAA KPS Parsivel',\n", + " cmap='act_HomeyerRainbow',\n", + " subplot_index=(1, 1),\n", + ")\n", + "display.plot(\n", + " 'reflectivity', dsname='guckazrcfrgeM1.a1', cmap='act_HomeyerRainbow', subplot_index=(0, 0)\n", + ")\n", + "display.plot(\n", + " 'number_density_drops', dsname='gucldM1.b1', cmap='act_HomeyerRainbow', subplot_index=(1, 0)\n", + ")\n", "\n", "# Update limits\n", "display.axes[1, 0].set_ylim([0, 10])\n", @@ -257,11 +288,11 @@ "\n", "\n", "display.axes[0, 0].set_ylim([0, 10000])\n", - "display.axes[0, 0].set_yticklabels(['0', '2', '4','6', '8', '10'])\n", + "display.axes[0, 0].set_yticklabels(['0', '2', '4', '6', '8', '10'])\n", "display.axes[0, 0].set_ylabel('km')\n", "\n", "display.axes[0, 1].set_ylim([0, 10000])\n", - "display.axes[0, 1].set_yticklabels(['0', '2', '4','6', '8', '10'])\n", + "display.axes[0, 1].set_yticklabels(['0', '2', '4', '6', '8', '10'])\n", "display.axes[0, 1].set_ylabel('km')\n", "\n", "plt.show()" @@ -282,13 +313,13 @@ "metadata": {}, "outputs": [], "source": [ - "#Specify datastream and date range for KAZR data\n", + "# Specify datastream and date range for KAZR data\n", "ds_dl = 'gucdlppiM1.b1'\n", "startdate = '2022-08-01'\n", "enddate = '2022-08-01'\n", "\n", - "#act.discovery.download_data(username, token, ds_dl, startdate, enddate)\n", - "dl_ppi_files = glob.glob(''.join(['./',ds_dl,'/*cdf']))" + "# act.discovery.download_data(username, token, ds_dl, startdate, enddate)\n", + "dl_ppi_files = glob.glob(''.join(['./', ds_dl, '/*cdf']))" ] }, { @@ -306,7 +337,8 @@ " ds = act.io.arm.read_arm_netcdf(file)\n", " # Calculate the winds for each gucdlppi dataset.\n", " wind_ds = act.retrievals.compute_winds_from_ppi(\n", - " ds, remove_all_missing=True, snr_threshold=0.008)\n", + " ds, remove_all_missing=True, snr_threshold=0.008\n", + " )\n", " multi_ds.append(wind_ds)\n", "\n", "wind_ds = xr.merge(multi_ds)" @@ -332,15 +364,20 @@ "source": [ "# Create a display object.\n", "display = act.plotting.TimeSeriesDisplay(\n", - " {\"GUC DLPPI Computed Winds over KAZR\": wind_ds,\n", - " \"guckazrcfrgeM1.a1\": kazr_ds,}, figsize=(20, 10))\n", + " {\n", + " \"GUC DLPPI Computed Winds over KAZR\": wind_ds,\n", + " \"guckazrcfrgeM1.a1\": kazr_ds,\n", + " },\n", + " figsize=(20, 10),\n", + ")\n", "\n", "# Plot the wind barbs overlayed on the KAZR reflectivity\n", - "display.plot('reflectivity', dsname='guckazrcfrgeM1.a1',\n", - " cmap='act_HomeyerRainbow', vmin=-20, vmax=30)\n", - "display.plot_barbs_from_spd_dir('wind_speed', 'wind_direction',\n", - " dsname='GUC DLPPI Computed Winds over KAZR',\n", - " invert_y_axis=False)\n", + "display.plot(\n", + " 'reflectivity', dsname='guckazrcfrgeM1.a1', cmap='act_HomeyerRainbow', vmin=-20, vmax=30\n", + ")\n", + "display.plot_barbs_from_spd_dir(\n", + " 'wind_speed', 'wind_direction', dsname='GUC DLPPI Computed Winds over KAZR', invert_y_axis=False\n", + ")\n", "\n", "# Update the x-limits to make sure both wind profiles are shown\n", "# Update the y-limits to show plotted winds\n", diff --git a/docs/source/blog_posts/2024/2024_Eclipse_SGP.ipynb b/docs/source/blog_posts/2024/2024_Eclipse_SGP.ipynb new file mode 100644 index 0000000000..064d4e47c3 --- /dev/null +++ b/docs/source/blog_posts/2024/2024_Eclipse_SGP.ipynb @@ -0,0 +1,392 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ba8afc29-c4e7-45d3-b161-df6130572294", + "metadata": {}, + "source": [ + "# April 8, 2024 Eclipse - Data from ARM's Southern Great Plains (SGP) Observatory" + ] + }, + { + "cell_type": "markdown", + "id": "3df58de5-b3d2-4946-bf31-abde776706c4", + "metadata": {}, + "source": [ + "The 2024 solar eclipse was an exciting event that saw roughly 90% coverage at ARM's SGP observatory. The large array of instrumentation at the site provides a unique view into the atmospheric impacts of the eclipse. This notebook gathers a few of the instruments together for a close look at the data during the event.\n", + "\n", + "## Step 1: Import our libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 316, + "id": "0ef46098-7491-4e17-875e-c724351a2b5b", + "metadata": {}, + "outputs": [], + "source": [ + "import act\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.colors as colors" + ] + }, + { + "cell_type": "markdown", + "id": "df2a97fd-d79f-4e15-a0f7-fbda69c091b3", + "metadata": {}, + "source": [ + "## Step 2: Download data!\n", + "If you have not used the ARM Live webservice before, please take a moment to check it out and login (https://adc.arm.gov/armlive/). Please insert your own username and token after you log in below.\n", + "\n", + "For this case, we're going to look at a few different instruments and download the data using ACT.\n", + "\n", + "### Instruments\n", + "#### Doppler Lidar (DL)\n", + "The doppler lidar measures radial velocity and can be used to observe vertical air motion.\n", + "\n", + "#### Solar and Infrared Radiation Station (SIRS)\n", + "SIRS provides continuous measurements of broadband shortwave and longwave irradiances.\n", + "\n", + "#### Surface Meteorological Station (MET)\n", + "The MET system measures basic surface meteorology like air temperature and wind speeds.\n", + "\n", + "#### Eddy Correlation Flux Measurement System (ECOR)\n", + "The eddy correlation (ECOR) flux measurement system provides half-hour measurements of the surface turbulent fluxes of momentum, sensible heat, latent heat, and carbon dioxide.\n", + "\n", + "#### Radar Wind Profiler (RWP)\n", + "The RWP transmits electromagnetic energy into the atmosphere in as many as five directions (four tilted in opposing vertical planes and on vertical) and measures the strength and frequency of backscattered energy. Calculation of the Doppler shift of the returned signal allows calculation of the atmospheric wind profile.\n", + "\n", + "#### Ultrafine Condensation Particle Counter (CPCUF)\n", + "Depending on the CPC type, typical CPC instruments can count particles with sizes that range from 3 to 3000 nanometers (ultrafine CPC) or from 10 to 3000 nanometers (fine CPC). The CPC is part of the aerosol observing system (AOS).\n", + "\n", + "#### Scanning Mobility Particle Sizer\n", + "The scanning mobility particle sizer (SMPS) is a particle size spectrometer that measures the aerosol number size distribution by sizing particles based on their electrical mobility diameter using a differential mobility analyzer (DMA) and by counting particles using a condensation particle counter (CPC). \n", + "\n", + "* Some descriptions taken from the www.arm.gov instrument webpages." + ] + }, + { + "cell_type": "code", + "execution_count": 351, + "id": "3b417b83-d59d-4e69-a6e3-762cae8455ae", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[DOWNLOADING] sgpdlfptE13.b1.20240408.190019.cdf\n", + "[DOWNLOADING] sgpdlfptE13.b1.20240408.180018.cdf\n", + "[DOWNLOADING] sgpdlfptE13.b1.20240408.200019.cdf\n", + "[DOWNLOADING] sgpdlfptE13.b1.20240408.170018.cdf\n", + "\n", + "If you use these data to prepare a publication, please cite:\n", + "\n", + "Newsom, R., Shi, Y., & Krishnamurthy, R. Doppler Lidar (DLFPT). Atmospheric\n", + "Radiation Measurement (ARM) User Facility. https://doi.org/10.5439/1025185\n", + "\n", + "[DOWNLOADING] sgpecorsfE14.b1.20240408.000000.nc\n", + "\n", + "If you use these data to prepare a publication, please cite:\n", + "\n", + "Sullivan, R., Cook, D., Shi, Y., Keeler, E., & Pal, S. Eddy Correlation Flux\n", + "Measurement System (ECORSF). Atmospheric Radiation Measurement (ARM) User\n", + "Facility. https://doi.org/10.5439/1494128\n", + "\n", + "[DOWNLOADING] sgpmetE13.b1.20240408.000000.cdf\n", + "\n", + "If you use these data to prepare a publication, please cite:\n", + "\n", + "Kyrouac, J., Shi, Y., & Tuftedal, M. Surface Meteorological Instrumentation\n", + "(MET). Atmospheric Radiation Measurement (ARM) User Facility.\n", + "https://doi.org/10.5439/1786358\n", + "\n", + "[DOWNLOADING] sgpsirsE13.b1.20240408.000000.cdf\n", + "\n", + "If you use these data to prepare a publication, please cite:\n", + "\n", + "Shi, Y., Sengupta, M., Xie, Y., Jaker, S., Yang, J., Reda, I., Andreas, A., &\n", + "Habte, A. Solar and Infrared Radiation Station for Downwelling and Upwelling\n", + "Radiation (SIRS). Atmospheric Radiation Measurement (ARM) User Facility.\n", + "https://doi.org/10.5439/1475460\n", + "\n", + "[DOWNLOADING] sgpaoscpcufE13.b1.20240408.000000.nc\n", + "\n", + "If you use these data to prepare a publication, please cite:\n", + "\n", + "Koontz, A., Bullard, R., Andrews, E., Kuang, C., Hayes, C., Singh, A., & Salwen,\n", + "C. Condensation Particle Counter (AOSCPCUF). Atmospheric Radiation Measurement\n", + "(ARM) User Facility. https://doi.org/10.5439/1378933\n", + "\n", + "[DOWNLOADING] sgpaossmpsE13.b1.20240408.000000.nc\n", + "\n", + "If you use these data to prepare a publication, please cite:\n", + "\n", + "Kuang, C., Singh, A., Howie, J., Salwen, C., & Hayes, C. Scanning mobility\n", + "particle sizer (AOSSMPS). Atmospheric Radiation Measurement (ARM) User Facility.\n", + "https://doi.org/10.5439/1476898\n", + "\n", + "[DOWNLOADING] sgp915rwpwindmomloC1.a0.20240408.000011.nc\n", + "\n", + "If you use these data to prepare a publication, please cite:\n", + "\n", + "Muradyan, P., & Ermold, B. Radar Wind Profiler (915RWPWINDMOMLO). Atmospheric\n", + "Radiation Measurement (ARM) User Facility. https://doi.org/10.5439/1573768\n", + "\n" + ] + } + ], + "source": [ + "# Set your username and token here!\n", + "username = 'YourUserName'\n", + "token = 'YourToken'\n", + "\n", + "# Set the datastream and start/enddates for the DL data\n", + "datastream = 'sgpdlfptE13.b1'\n", + "startdate = '2024-04-08T17:00:00'\n", + "enddate = '2024-04-08T21:00:00'\n", + "\n", + "# Use ACT to easily download the data. Watch for the data citation! Show some support\n", + "# for ARM's instrument experts and cite their data if you use it in a publication\n", + "dl_files = act.discovery.download_arm_data(username, token, datastream, startdate, enddate)\n", + "ds_dl = act.io.read_arm_netcdf(dl_files)\n", + "ds_dl = ds_dl.where(ds_dl.range < 1500, drop=True)\n", + "\n", + "# Now download the rest of the data!\n", + "startdate = '2024-04-08'\n", + "enddate = '2024-04-08'\n", + "datastream = 'sgpecorsfE14.b1'\n", + "ecor_files = act.discovery.download_arm_data(username, token, datastream, startdate, enddate)\n", + "ds_ecor = act.io.read_arm_netcdf(ecor_files)\n", + "\n", + "datastream = 'sgpmetE13.b1'\n", + "met_files = act.discovery.download_arm_data(username, token, datastream, startdate, enddate)\n", + "ds_met = act.io.read_arm_netcdf(met_files)\n", + "\n", + "datastream = 'sgpsirsE13.b1'\n", + "sirs_files = act.discovery.download_arm_data(username, token, datastream, startdate, enddate)\n", + "ds_sirs = act.io.read_arm_netcdf(sirs_files)\n", + "\n", + "datastream = 'sgpaoscpcufE13.b1'\n", + "cpcuf_files = act.discovery.download_arm_data(username, token, datastream, startdate, enddate)\n", + "ds_cpcuf = act.io.read_arm_netcdf(cpcuf_files)\n", + "\n", + "datastream = 'sgpaossmpsE13.b1'\n", + "smps_files = act.discovery.download_arm_data(username, token, datastream, startdate, enddate)\n", + "ds_smps = act.io.read_arm_netcdf(smps_files)\n", + "\n", + "datastream = 'sgp915rwpwindmomloC1.a0'\n", + "rwp_files = act.discovery.download_arm_data(username, token, datastream, startdate, enddate)\n", + "ds_rwp = act.io.read_arm_netcdf(rwp_files)" + ] + }, + { + "cell_type": "markdown", + "id": "6f4952e5-3eff-4165-bbf3-badc74710801", + "metadata": {}, + "source": [ + "### Step 2.1: Transform the RWP dataset for easy plotting" + ] + }, + { + "cell_type": "code", + "execution_count": 352, + "id": "43f0e132-9038-4c7c-bb6e-92b3e57e0f14", + "metadata": {}, + "outputs": [], + "source": [ + "# Select the first mode and beam to plot up\n", + "ds_rwp = ds_rwp.where(ds_rwp.beam_azimuth == 104)\n", + "ds_rwp = ds_rwp.where(ds_rwp.signal_to_noise_ratio > -10)" + ] + }, + { + "cell_type": "markdown", + "id": "58a7d0b3-d812-4dd6-a848-b9745c803658", + "metadata": {}, + "source": [ + "## Step 3: Visualize the Data\n", + "Let's plot out the data using ACT's built in functionality. It's easy to change variables around if you want to look at more data. Note that we are applying a day/night background and the vertical dashed line is local solar noon." + ] + }, + { + "cell_type": "code", + "execution_count": 353, + "id": "047dbd27-fc40-441d-af85-e36c9e5cb8cf", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Set up the plot\n", + "display = act.plotting.TimeSeriesDisplay(\n", + " {\n", + " 'DL': ds_dl,\n", + " 'SIRS': ds_sirs,\n", + " 'MET': ds_met,\n", + " 'ECOR': ds_ecor,\n", + " 'CEIL': ds_ceil,\n", + " 'CPCUF': ds_cpcuf,\n", + " 'SMPS': ds_smps,\n", + " 'RWP': ds_rwp,\n", + " },\n", + " figsize=(10, 18),\n", + " subplot_shape=(7,),\n", + ")\n", + "\n", + "# Plot the direct solar radiation\n", + "display.plot(\n", + " 'short_direct_normal',\n", + " dsname='SIRS',\n", + " subplot_index=(0,),\n", + " set_title='SIRS Short Direct Normal Irradiance',\n", + ")\n", + "display.day_night_background(dsname='SIRS', subplot_index=(0,))\n", + "display.set_xrng(\n", + " [np.datetime64('2024-04-08T17:00:00.0'), np.datetime64('2024-04-08T21:00:00.0')],\n", + " subplot_index=(0,),\n", + ")\n", + "\n", + "# Plot up the surface temperature\n", + "display.plot('temp_mean', dsname='MET', subplot_index=(1,), set_title='Surface Temperature')\n", + "display.set_yrng([16, 22], subplot_index=(1,))\n", + "display.day_night_background(dsname='MET', subplot_index=(1,))\n", + "display.axes[1].sharex(display.axes[0])\n", + "\n", + "# Surface fluxes\n", + "display.plot('latent_flux', dsname='ECOR', subplot_index=(2,))\n", + "display.plot(\n", + " 'sensible_heat_flux',\n", + " dsname='ECOR',\n", + " subplot_index=(2,),\n", + " set_title='ECOR Latent, Sensible Heat, and CO2 Flux',\n", + ")\n", + "display.day_night_background(dsname='ECOR', subplot_index=(2,))\n", + "display.axes[2].sharex(display.axes[0])\n", + "ecor_ax = display.axes[2].twinx()\n", + "ecor_ax.plot(ds_ecor['time'], ds_ecor['co2_flux'], 'purple')\n", + "ecor_ax.set_ylabel('CO2 Flux (' + ds_ecor['co2_flux'].attrs['units'] + ')')\n", + "\n", + "# Plot the DL data and adjust the limits\n", + "display.plot(\n", + " 'radial_velocity',\n", + " dsname='DL',\n", + " vmin=-4,\n", + " vmax=4,\n", + " cmap='balance',\n", + " subplot_index=(3,),\n", + " set_title='DL Radial Velocity',\n", + ")\n", + "display.set_yrng([50, 1500], subplot_index=(3,))\n", + "display.axes[3].sharex(display.axes[0])\n", + "\n", + "# display.plot('radial_velocity_snr', dsname='RWP', cb_friendly=True, subplot_index=(4,))\n", + "display.plot(\n", + " 'signal_to_noise_ratio',\n", + " dsname='RWP',\n", + " cb_friendly=True,\n", + " subplot_index=(4,),\n", + " set_title='RWP Signal-to-Noise Ratio',\n", + ")\n", + "display.set_yrng([50, 1500], subplot_index=(4,))\n", + "display.axes[4].sharex(display.axes[0])\n", + "\n", + "\n", + "# Ultrafine CPC\n", + "display.plot('concentration', dsname='CPCUF', subplot_index=(5,), set_title='CPCUF Concentration')\n", + "display.day_night_background(dsname='CPCUF', subplot_index=(5,))\n", + "display.axes[5].sharex(display.axes[0])\n", + "\n", + "# SMPS\n", + "display.plot(\n", + " 'dN_dlogDp',\n", + " dsname='SMPS',\n", + " subplot_index=(6,),\n", + " cmap='nipy_spectral',\n", + " set_title='SMPS Size Distribution',\n", + " norm=colors.LogNorm(vmin=100.0, vmax=20000),\n", + ")\n", + "display.day_night_background(dsname='SMPS', subplot_index=(6,))\n", + "display.axes[6].sharex(display.axes[0])\n", + "display.axes[6].set_yscale('log')\n", + "display.axes[6].set_ylim([10, 500])\n", + "\n", + "plt.subplots_adjust(wspace=0, hspace=0.3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "898a3fb5-ae07-4bed-a16d-bc724fa60ba8", + "metadata": {}, + "source": [ + "# Summary\n", + "\n", + "As expected, the direct solar radiation decreases during the eclipse to near 0 (panel 1). Not shown but visible in the [TSI video](https://plot.adc.arm.gov/PLOTS/sgp/sgptsiskycover/20240408/sgptsiskycoverC1.b1.tsimovie.20240408.mp4) is that there was a layer of clouds which impacted the radiation measurements as well.\n", + "\n", + "With less direct radiation reaching the surface ([10-meter IRT Surface IR Temperature](https://plot.adc.arm.gov/PLOTS/sgp/sgpirt10m/20240408/sgpirt10mC1.b1.irt_10m.20240408.png), the surface temperatures started to decrease (panel 2), reaching a minimum shortly after totality.\n", + "\n", + "There was also a noticeable drop in heat fluxes (panel 3) and a corresponding plant response with a drop in CO2 uptake (purple line) as noted by the mentor Ryan Sullivan.\n", + "\n", + "The doppler lidar (DL) measures radial velocity and there is a clear decrease in the thermals/vertical motion (panel 4). This corresponds with a lowering of the planetary bouldary layer height as observed in the radar wind profiler (RWP) data (panel 5).\n", + "\n", + "Data from the ultrafine condensation particle counter (CPCUF; panel 6) and the scanning mobility particle sizer (SMPS; panel 7) were included to see if there was a burst in new particle formation (NPF) once photochemical activity started back up. While there is a second peak in the size distribution post eclipse, it cannot be concluded that it was related. The mentor, Ashish Singh, had noted \"In springtime at SGP, there's a recurring and/or a particle burst, which appears to be regional (based on published papers). Typically, it initiates around 16:00 UTC or later consistently, coinciding with the timeframe of a solar eclipse (17:30-20:00 UTC and beyond) on April 08. Analyzing the data from nSMPS and SMPS size distribution timeseries, it's unclear whether the fluctuations in solar insolation during the eclipse are directly linked to the onset of the burst or formation event, especially concerning particles below 50 nm.\"\n", + "\n", + "There's a lot of interesting data to explore from the eclipse. You can learn more about ARM's instruments and data by visiting [ARM's website.](https://arm.gov/)\n", + "\n", + "Thank you to the ARM staff that helped contribute ideas and feedback to this blog post.\n", + "* Max Grover\n", + "* Ryan Sullivan\n", + "* Ashish Singh\n", + "* Janek Uin\n", + "* Paytsar Muradyan\n", + "* Jenni Kyrouac" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f5dc51b-c3af-4fdb-9e34-91f52582cedc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "author": "Adam Theisen", + "date": "2024-04-15", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + }, + "tags": "eclipse,SGP,DL,RWP,MET,SIRS,ECOR,SMPS,CPCUF", + "title": "Exploration of data from the 2024 eclipse at SGP" + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/conf.py b/docs/source/conf.py index 8a2ac05536..d04bc24fb6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -91,7 +91,7 @@ # |version| and |release|, also used in various other places throughout the # built documents. # -import act +import act # noqa # The short X.Y version. version = act.__version__ @@ -160,16 +160,16 @@ 'blog': [ 'search-field.html', 'sidebar-nav-bs.html', - 'postcard.html', - 'recentposts.html', - 'archives.html', + 'ablog/postcard.html', + 'ablog/recentposts.html', + 'ablog/archives.html', ], 'blog_posts/*/*': [ 'search-field.html', 'sidebar-nav-bs.html', - 'postcard.html', - 'recentposts.html', - 'archives.html', + 'ablog/postcard.html', + 'ablog/recentposts.html', + 'ablog/archives.html', ], } diff --git a/docs/source/userguide/usage.rst b/docs/source/userguide/usage.rst index 202cc22a18..1be2d2bea7 100644 --- a/docs/source/userguide/usage.rst +++ b/docs/source/userguide/usage.rst @@ -10,7 +10,7 @@ Start by importing Atmospheric data Community Toolkit. The Atmospheric data Community Toolkit comes with modules for loading ARM datasets. The main dataset object that is used in ACT is based off of an extension of -the `xarray.Dataset` +the `xarray.Dataset `_ object. In particular ACT adds a DatasetAccessor that stores the additional properties required by act in the .act property of a Dataset. For example, if we want to access the name of the datastream, we simply do: @@ -49,8 +49,14 @@ In addition, the figure and axes handles of each subplot are stored in the be used to modify the properties of each plot if the user desires further customization. -Finally, ACT is able to download data from the ARM archive given that a -user's username and token are provided. +Finally, ACT is able to download data from the ARM given that a +user's username and token are provided. To obtain an ARM username and token, +you must first create an `ARM account `_ +by providing an email and following the setup instructions. Once you +have an ARM account, you can receive your token associated with your account +by logging in `here `_ and clicking on the login +to receive your account access token button. Once you have your username and +token, you can provide them in the example code block below: .. code-block:: python diff --git a/examples/discovery/plot_improve.py b/examples/discovery/plot_improve.py new file mode 100644 index 0000000000..e2a90cb3d6 --- /dev/null +++ b/examples/discovery/plot_improve.py @@ -0,0 +1,30 @@ +""" +IMPROVE Data +----------- + +This example shows how to get IMPROVE data for the +system located at ARM's Southern Great Plains site. + +""" + +import act +import matplotlib.pyplot as plt + +# Pull the data using the site_id from IMPROVE +# https://views.cira.colostate.edu/adms/Pub/SiteSummary.aspx?dsidse=10001&siidse=244 +ds = act.discovery.get_improve_data(site_id='244', start_date='1/1/2023', end_date='12/31/2023') + +# Remove all data that's set to the FillValue +ds = ds.where(ds['aluminum_fine'] != ds['aluminum_fine'].attrs['_FillValue']) + +display = act.plotting.TimeSeriesDisplay(ds, figsize=(10, 6)) +display.plot('aluminum_fine') + +# Print out the known problems documented by IMPROVE +print(ds.attrs['site_problems']) + +# Write out the data to netCDF and csv +ds.to_netcdf('./sgpimprove.20230101.nc') +ds.to_dataframe().to_csv('sgpimprove.20230101.csv') + +plt.show() diff --git a/examples/discovery/plot_neon.py b/examples/discovery/plot_neon.py index 038fa62778..293e0d70b1 100644 --- a/examples/discovery/plot_neon.py +++ b/examples/discovery/plot_neon.py @@ -11,7 +11,6 @@ import os import glob import matplotlib.pyplot as plt -import numpy as np import act @@ -21,7 +20,9 @@ if token is not None and len(token) > 0: # Download ARM data if a username/token are set - files = act.discovery.download_arm_data(username, token, 'nsametC1.b1', '2022-10-01', '2022-10-07') + files = act.discovery.download_arm_data( + username, token, 'nsametC1.b1', '2022-10-01', '2022-10-07' + ) ds = act.io.arm.read_arm_netcdf(files) # Download NEON Data @@ -33,19 +34,23 @@ # A number of files are downloaded and further explained in the readme file that's downloaded. # These are the files we will need for reading 1 minute NEON data - file = glob.glob(os.path.join( - '.', - 'BARR_DP1.00002.001', - 'NEON.D18.BARR.DP1.00002.001.000.010.001.SAAT_1min.2022-10.expanded.*.csv', - )) - variable_file = glob.glob(os.path.join( - '.', 'BARR_DP1.00002.001', 'NEON.D18.BARR.DP1.00002.001.variables.*.csv' - )) - position_file = glob.glob(os.path.join( - '.', - 'BARR_DP1.00002.001', - 'NEON.D18.BARR.DP1.00002.001.sensor_positions.*.csv', - )) + file = glob.glob( + os.path.join( + '.', + 'BARR_DP1.00002.001', + 'NEON.D18.BARR.DP1.00002.001.000.010.001.SAAT_1min.2022-10.expanded.*.csv', + ) + ) + variable_file = glob.glob( + os.path.join('.', 'BARR_DP1.00002.001', 'NEON.D18.BARR.DP1.00002.001.variables.*.csv') + ) + position_file = glob.glob( + os.path.join( + '.', + 'BARR_DP1.00002.001', + 'NEON.D18.BARR.DP1.00002.001.sensor_positions.*.csv', + ) + ) # Read in the data using the ACT reader, passing with it the variable and position files # for added information in the dataset ds2 = act.io.read_neon_csv(file, variable_files=variable_file, position_files=position_file) diff --git a/examples/io/plot_convert_ameriflux.py b/examples/io/plot_convert_ameriflux.py new file mode 100644 index 0000000000..4517d6400d --- /dev/null +++ b/examples/io/plot_convert_ameriflux.py @@ -0,0 +1,123 @@ +""" +Convert Data to AmeriFlux Format +-------------------------------- + +This script shows how to convert ARM data to AmeriFlux format +using an ACT function, and write it out to csv. More information +on AmeriFlux and their file formats and naming conventions can be +found here: https://ameriflux.lbl.gov/ + +Author: Adam Theisen + +""" + +import act +import glob +import xarray as xr +import os +import matplotlib.pyplot as plt + +# Read in the ECOR data +files = glob.glob(act.tests.sample_files.EXAMPLE_ECORSF_E39) +ds_ecor = act.io.arm.read_arm_netcdf(files) + +# The ECOR time stamp as at the end of the Averaging period so adjusting +# it to be consistent with the other systems +ds_ecor = act.utils.datetime_utils.adjust_timestamp(ds_ecor) + +# Clean up and QC the data based on embedded QC and ARM DQRs +ds_ecor.clean.cleanup() +ds_ecor = act.qc.arm.add_dqr_to_qc(ds_ecor) +ds_ecor.qcfilter.datafilter( + del_qc_var=False, rm_assessments=['Bad', 'Incorrect', 'Indeterminate', 'Suspect'] +) + +# Then we do this same thing for the other instruments +# SEBS +files = glob.glob(act.tests.sample_files.EXAMPLE_SEBS_E39) +ds_sebs = act.io.arm.read_arm_netcdf(files) +# SEBS does not have a time_bounds variable so we have to manually adjust it +ds_sebs = act.utils.datetime_utils.adjust_timestamp(ds_sebs, offset=-30 * 60) +ds_sebs.clean.cleanup() +ds_sebs = act.qc.arm.add_dqr_to_qc(ds_sebs) +ds_sebs.qcfilter.datafilter( + del_qc_var=False, rm_assessments=['Bad', 'Incorrect', 'Indeterminate', 'Suspect'] +) + +# STAMP +files = glob.glob(act.tests.sample_files.EXAMPLE_STAMP_E39) +ds_stamp = act.io.arm.read_arm_netcdf(files) +ds_stamp.clean.cleanup() +ds_stamp = act.qc.arm.add_dqr_to_qc(ds_stamp) +ds_stamp.qcfilter.datafilter( + del_qc_var=False, rm_assessments=['Bad', 'Incorrect', 'Indeterminate', 'Suspect'] +) + +# STAMP Precipitation +files = glob.glob(act.tests.sample_files.EXAMPLE_STAMPPCP_E39) +ds_stamppcp = act.io.arm.read_arm_netcdf(files) +ds_stamppcp.clean.cleanup() +ds_stamppcp = act.qc.arm.add_dqr_to_qc(ds_stamppcp) +ds_stamppcp.qcfilter.datafilter( + del_qc_var=False, rm_assessments=['Bad', 'Incorrect', 'Indeterminate', 'Suspect'] +) +# These are minute data so we need to resample and sum up to 30 minutes +ds_stamppcp = ds_stamppcp['precip'].resample(time='30Min').sum() + +# AMC +files = glob.glob(act.tests.sample_files.EXAMPLE_AMC_E39) +ds_amc = act.io.arm.read_arm_netcdf(files) +ds_amc.clean.cleanup() +ds_amc = act.qc.arm.add_dqr_to_qc(ds_amc) +ds_amc.qcfilter.datafilter( + del_qc_var=False, rm_assessments=['Bad', 'Incorrect', 'Indeterminate', 'Suspect'] +) + +# Merge these datasets together +ds = xr.merge([ds_ecor, ds_sebs, ds_stamp, ds_stamppcp, ds_amc], compat='override') + +# Convert the data to AmeriFlux format and get a DataFrame in return +# Note, this does not return an xarray Dataset as it's assumed the data +# will just be written out to csv format. +df = act.io.ameriflux.convert_to_ameriflux(ds) + +# Write the data out to file +site = 'US-A14' +directory = './' + site + 'mergedflux/' +if not os.path.exists(directory): + os.makedirs(directory) + +# Following the AmeriFlux file naming convention +filename = ( + site + + '_HH_' + + str(df['TIMESTAMP_START'].iloc[0]) + + '_' + + str(df['TIMESTAMP_END'].iloc[-1]) + + '.csv' +) +df.to_csv(directory + filename, index=False) + + +# Plot up merged data for visualization +display = act.plotting.TimeSeriesDisplay(ds, subplot_shape=(4,), figsize=(12, 10)) +display.plot('latent_flux', subplot_index=(0,)) +display.plot('co2_flux', subplot_index=(0,)) +display.plot('sensible_heat_flux', subplot_index=(0,)) +display.day_night_background(subplot_index=(0,)) + +display.plot('precip', subplot_index=(1,)) +display.day_night_background(subplot_index=(1,)) + +display.plot('surface_soil_heat_flux_1', subplot_index=(2,)) +display.plot('surface_soil_heat_flux_2', subplot_index=(2,)) +display.plot('surface_soil_heat_flux_3', subplot_index=(2,)) +display.day_night_background(subplot_index=(2,)) + +display.plot('soil_specific_water_content_west', subplot_index=(3,)) +display.axes[3].set_ylim(display.axes[3].get_ylim()[::-1]) + +display.day_night_background(subplot_index=(3,)) + +plt.subplots_adjust(hspace=0.35) +plt.show() diff --git a/examples/io/plot_create_arm_ds.py b/examples/io/plot_create_arm_ds.py index 9840680405..fc2fb1332d 100644 --- a/examples/io/plot_create_arm_ds.py +++ b/examples/io/plot_create_arm_ds.py @@ -37,7 +37,7 @@ 'command_line': 'python plot_create_arm_ds.py', 'process_version': '1.2.3', 'history': 'Processed with Jupyter Workbench', - 'random': '1234253sdgfadf' + 'random': '1234253sdgfadf', } for a in atts: if a in ds.attrs: diff --git a/examples/io/plot_hysplit.py b/examples/io/plot_hysplit.py new file mode 100644 index 0000000000..6e53009311 --- /dev/null +++ b/examples/io/plot_hysplit.py @@ -0,0 +1,23 @@ +""" +Read and plot a HYSPLIT trajectory file from a HYSPlIT run. +----------------------------------------------------------- + +This example shows how to read and plot a backtrajectory calculated by the NOAA +HYSPLIT model over Houston. + +Author: Robert Jackson +""" + +import act +import matplotlib.pyplot as plt + +from arm_test_data import DATASETS + +# Load the data +filename = DATASETS.fetch('houstonaug300.0summer2010080100') +ds = act.io.read_hysplit(filename) + +# Use the GeographicPlotDisplay object to make the plot +disp = act.plotting.GeographicPlotDisplay(ds) +disp.geoplot('PRESSURE', cartopy_feature=['STATES', 'OCEAN', 'LAND']) +plt.show() diff --git a/examples/io/plot_icartt.py b/examples/io/plot_icartt.py index 4e7bc22bc9..df76f95c24 100644 --- a/examples/io/plot_icartt.py +++ b/examples/io/plot_icartt.py @@ -11,7 +11,6 @@ from arm_test_data import DATASETS import matplotlib.pyplot as plt -import numpy as np import act from act.io.icartt import read_icartt diff --git a/examples/io/plot_sodar.py b/examples/io/plot_sodar.py index 0b6e0898fd..a240d5ff18 100644 --- a/examples/io/plot_sodar.py +++ b/examples/io/plot_sodar.py @@ -23,11 +23,11 @@ # Create an ACT TimeSeriesDisplay. display = act.plotting.TimeSeriesDisplay( - {'Shear, Wind Direction, and Speed at ANL ATMOS': ds}, - subplot_shape=(1,), figsize=(15, 5)) + {'Shear, Wind Direction, and Speed at ANL ATMOS': ds}, subplot_shape=(1,), figsize=(15, 5) +) # Plot shear with a wind barb overlay, while using a color vision # deficiency (CVD) colormap. -display.plot('shear', subplot_index=(0,), cb_friendly=True) +display.plot('shear', subplot_index=(0,), cvd_friendly=True) display.plot_barbs_from_spd_dir('speed', 'dir') plt.show() diff --git a/examples/io/plot_surfrad.py b/examples/io/plot_surfrad.py index 473185f24d..538700cc5d 100644 --- a/examples/io/plot_surfrad.py +++ b/examples/io/plot_surfrad.py @@ -19,7 +19,7 @@ # But it's easy enough to read form the URLs as well url = [ 'https://gml.noaa.gov/aftp/data/radiation/surfrad/Boulder_CO/2023/tbl23008.dat', - 'https://gml.noaa.gov/aftp/data/radiation/surfrad/Boulder_CO/2023/tbl23009.dat' + 'https://gml.noaa.gov/aftp/data/radiation/surfrad/Boulder_CO/2023/tbl23009.dat', ] ds = act.io.read_surfrad(url) diff --git a/examples/plotting/plot_ceil.py b/examples/plotting/plot_ceil.py index b24f810203..626b075d13 100644 --- a/examples/plotting/plot_ceil.py +++ b/examples/plotting/plot_ceil.py @@ -25,14 +25,16 @@ ceil_ds = act.io.arm.read_arm_netcdf(filename_ceil, engine='netcdf4') else: # Example to show how easy it is to download ARM data if a username/token are set - results = act.discovery.download_arm_data(username, token, 'sgpceilC1.b1', '2022-01-14', '2022-01-19') + results = act.discovery.download_arm_data( + username, token, 'sgpceilC1.b1', '2022-01-14', '2022-01-19' + ) ceil_ds = act.io.arm.read_arm_netcdf(results) # Adjust ceilometer data for plotting ceil_ds = act.corrections.ceil.correct_ceil(ceil_ds, -9999.0) -# Plot up ceilometer backscatter using HomeyerRainbow cb friendly colormap +# Plot up ceilometer backscatter using HomeyerRainbow CVD friendly colormap # The same could be done with keyword 'cmap='HomeyerRainbow' display = act.plotting.TimeSeriesDisplay(ceil_ds, subplot_shape=(1,), figsize=(15, 5)) -display.plot('backscatter', subplot_index=(0,), cb_friendly=True) +display.plot('backscatter', subplot_index=(0,), cvd_friendly=True) plt.show() diff --git a/examples/plotting/plot_contour.py b/examples/plotting/plot_contour.py index fba82cecfd..7a63750a01 100644 --- a/examples/plotting/plot_contour.py +++ b/examples/plotting/plot_contour.py @@ -9,26 +9,27 @@ """ -import glob from arm_test_data import DATASETS import matplotlib.pyplot as plt import act -met_contour_list = ['sgpmetE15.b1.20190508.000000.cdf', - 'sgpmetE31.b1.20190508.000000.cdf', - 'sgpmetE32.b1.20190508.000000.cdf', - 'sgpmetE33.b1.20190508.000000.cdf', - 'sgpmetE34.b1.20190508.000000.cdf', - 'sgpmetE35.b1.20190508.000000.cdf', - 'sgpmetE36.b1.20190508.000000.cdf', - 'sgpmetE37.b1.20190508.000000.cdf', - 'sgpmetE38.b1.20190508.000000.cdf', - 'sgpmetE39.b1.20190508.000000.cdf', - 'sgpmetE40.b1.20190508.000000.cdf', - 'sgpmetE9.b1.20190508.000000.cdf', - 'sgpmetE13.b1.20190508.000000.cdf'] +met_contour_list = [ + 'sgpmetE15.b1.20190508.000000.cdf', + 'sgpmetE31.b1.20190508.000000.cdf', + 'sgpmetE32.b1.20190508.000000.cdf', + 'sgpmetE33.b1.20190508.000000.cdf', + 'sgpmetE34.b1.20190508.000000.cdf', + 'sgpmetE35.b1.20190508.000000.cdf', + 'sgpmetE36.b1.20190508.000000.cdf', + 'sgpmetE37.b1.20190508.000000.cdf', + 'sgpmetE38.b1.20190508.000000.cdf', + 'sgpmetE39.b1.20190508.000000.cdf', + 'sgpmetE40.b1.20190508.000000.cdf', + 'sgpmetE9.b1.20190508.000000.cdf', + 'sgpmetE13.b1.20190508.000000.cdf', +] met_contour_filenames = [DATASETS.fetch(file) for file in met_contour_list] diff --git a/examples/plotting/plot_data_rose.py b/examples/plotting/plot_data_rose.py index 16166d83ee..a0f6361091 100644 --- a/examples/plotting/plot_data_rose.py +++ b/examples/plotting/plot_data_rose.py @@ -14,19 +14,20 @@ """ from arm_test_data import DATASETS -import numpy as np from matplotlib import pyplot as plt import act # Read in some data with wind speed/direction in the file -met_wildcard_list = ['sgpmetE13.b1.20190101.000000.cdf', - 'sgpmetE13.b1.20190102.000000.cdf', - 'sgpmetE13.b1.20190103.000000.cdf', - 'sgpmetE13.b1.20190104.000000.cdf', - 'sgpmetE13.b1.20190105.000000.cdf', - 'sgpmetE13.b1.20190106.000000.cdf', - 'sgpmetE13.b1.20190107.000000.cdf'] +met_wildcard_list = [ + 'sgpmetE13.b1.20190101.000000.cdf', + 'sgpmetE13.b1.20190102.000000.cdf', + 'sgpmetE13.b1.20190103.000000.cdf', + 'sgpmetE13.b1.20190104.000000.cdf', + 'sgpmetE13.b1.20190105.000000.cdf', + 'sgpmetE13.b1.20190106.000000.cdf', + 'sgpmetE13.b1.20190107.000000.cdf', +] met_filenames = [DATASETS.fetch(file) for file in met_wildcard_list] ds = act.io.arm.read_arm_netcdf(met_filenames) diff --git a/examples/plotting/plot_days.py b/examples/plotting/plot_days.py index 70af4b7c1f..bc7b790f33 100644 --- a/examples/plotting/plot_days.py +++ b/examples/plotting/plot_days.py @@ -10,26 +10,34 @@ from arm_test_data import DATASETS import matplotlib.pyplot as plt -import numpy as np import act # Read in the sample MET data -met_wildcard_list = ['sgpmetE13.b1.20190101.000000.cdf', - 'sgpmetE13.b1.20190102.000000.cdf', - 'sgpmetE13.b1.20190103.000000.cdf', - 'sgpmetE13.b1.20190104.000000.cdf', - 'sgpmetE13.b1.20190105.000000.cdf', - 'sgpmetE13.b1.20190106.000000.cdf', - 'sgpmetE13.b1.20190107.000000.cdf'] +met_wildcard_list = [ + 'sgpmetE13.b1.20190101.000000.cdf', + 'sgpmetE13.b1.20190102.000000.cdf', + 'sgpmetE13.b1.20190103.000000.cdf', + 'sgpmetE13.b1.20190104.000000.cdf', + 'sgpmetE13.b1.20190105.000000.cdf', + 'sgpmetE13.b1.20190106.000000.cdf', + 'sgpmetE13.b1.20190107.000000.cdf', +] met_filenames = [DATASETS.fetch(file) for file in met_wildcard_list] ds = act.io.arm.read_arm_netcdf(met_filenames) # Create Plot Display display = act.plotting.WindRoseDisplay(ds, figsize=(15, 15), subplot_shape=(3, 3)) groupby = display.group_by('day') -groupby.plot_group('plot_data', None, dir_field='wdir_vec_mean', spd_field='wspd_vec_mean', - data_field='temp_mean', num_dirs=12, plot_type='line') +groupby.plot_group( + 'plot_data', + None, + dir_field='wdir_vec_mean', + spd_field='wspd_vec_mean', + data_field='temp_mean', + num_dirs=12, + plot_type='line', +) # Set theta tick markers for each axis inside display to be inside the polar axes for i in range(3): diff --git a/examples/plotting/plot_daytime_averages.py b/examples/plotting/plot_daytime_averages.py index 117bf29a5e..ef77de0179 100644 --- a/examples/plotting/plot_daytime_averages.py +++ b/examples/plotting/plot_daytime_averages.py @@ -14,13 +14,15 @@ import act # Read in the sample MET data -met_wildcard_list = ['sgpmetE13.b1.20190101.000000.cdf', - 'sgpmetE13.b1.20190102.000000.cdf', - 'sgpmetE13.b1.20190103.000000.cdf', - 'sgpmetE13.b1.20190104.000000.cdf', - 'sgpmetE13.b1.20190105.000000.cdf', - 'sgpmetE13.b1.20190106.000000.cdf', - 'sgpmetE13.b1.20190107.000000.cdf'] +met_wildcard_list = [ + 'sgpmetE13.b1.20190101.000000.cdf', + 'sgpmetE13.b1.20190102.000000.cdf', + 'sgpmetE13.b1.20190103.000000.cdf', + 'sgpmetE13.b1.20190104.000000.cdf', + 'sgpmetE13.b1.20190105.000000.cdf', + 'sgpmetE13.b1.20190106.000000.cdf', + 'sgpmetE13.b1.20190107.000000.cdf', +] met_filenames = [DATASETS.fetch(file) for file in met_wildcard_list] ds = act.io.arm.read_arm_netcdf(met_filenames) @@ -31,7 +33,7 @@ ds = ds.where(ds['sun_variable'] == 1) # Take daily mean using xarray features -ds = ds.resample(time='1d', skipna=True, keep_attrs=True).mean() +ds = ds.resample(time='1d', skipna=True).mean() # Creat Plot Display display = act.plotting.TimeSeriesDisplay(ds, figsize=(15, 10)) diff --git a/examples/plotting/plot_enhanced_skewt.py b/examples/plotting/plot_enhanced_skewt.py index 5223c6c3e4..f3258d5b64 100644 --- a/examples/plotting/plot_enhanced_skewt.py +++ b/examples/plotting/plot_enhanced_skewt.py @@ -9,13 +9,9 @@ """ -import glob from arm_test_data import DATASETS from matplotlib import pyplot as plt -import metpy -import numpy as np -import xarray as xr import act diff --git a/examples/plotting/plot_examples.py b/examples/plotting/plot_examples.py index 96a8fe21e6..c9cee8e55b 100644 --- a/examples/plotting/plot_examples.py +++ b/examples/plotting/plot_examples.py @@ -9,7 +9,6 @@ from arm_test_data import DATASETS import matplotlib.pyplot as plt -import xarray as xr import act diff --git a/examples/plotting/plot_heatmap.py b/examples/plotting/plot_heatmap.py index 86238424c8..9e15a1ceab 100644 --- a/examples/plotting/plot_heatmap.py +++ b/examples/plotting/plot_heatmap.py @@ -15,13 +15,15 @@ import act # Read MET data in from the test data area -met_wildcard_list = ['sgpmetE13.b1.20190101.000000.cdf', - 'sgpmetE13.b1.20190102.000000.cdf', - 'sgpmetE13.b1.20190103.000000.cdf', - 'sgpmetE13.b1.20190104.000000.cdf', - 'sgpmetE13.b1.20190105.000000.cdf', - 'sgpmetE13.b1.20190106.000000.cdf', - 'sgpmetE13.b1.20190107.000000.cdf'] +met_wildcard_list = [ + 'sgpmetE13.b1.20190101.000000.cdf', + 'sgpmetE13.b1.20190102.000000.cdf', + 'sgpmetE13.b1.20190103.000000.cdf', + 'sgpmetE13.b1.20190104.000000.cdf', + 'sgpmetE13.b1.20190105.000000.cdf', + 'sgpmetE13.b1.20190106.000000.cdf', + 'sgpmetE13.b1.20190107.000000.cdf', +] met_filenames = [DATASETS.fetch(file) for file in met_wildcard_list] ds = act.io.arm.read_arm_netcdf(met_filenames) @@ -31,8 +33,9 @@ # Plot a heatmap and scatter plot up of RH vs Temperature # Set the number of bins for the x-axis to 25 and y to 20 title = 'Heatmap of MET RH vs Temp' -display.plot_heatmap('temp_mean', 'rh_mean', x_bins=25, y_bins=20, - threshold=0, subplot_index=(0, 0), set_title=title) +display.plot_heatmap( + 'temp_mean', 'rh_mean', x_bins=25, y_bins=20, threshold=0, subplot_index=(0, 0), set_title=title +) # Plot the scatter plot and shade by wind_speed title = 'Scatter plot of MET RH vs Temp' diff --git a/examples/plotting/plot_hist_kwargs.py b/examples/plotting/plot_hist_kwargs.py index 063e19d280..9c153233d5 100644 --- a/examples/plotting/plot_hist_kwargs.py +++ b/examples/plotting/plot_hist_kwargs.py @@ -20,6 +20,5 @@ # Plot data hist_kwargs = {'range': (-10, 10)} histdisplay = act.plotting.DistributionDisplay(met_ds) -histdisplay.plot_stacked_bar('temp_mean', bins=np.arange(-40, 40, 5), - hist_kwargs=hist_kwargs) +histdisplay.plot_stacked_bar('temp_mean', bins=np.arange(-40, 40, 5), hist_kwargs=hist_kwargs) plt.show() diff --git a/examples/plotting/plot_multiple_column.py b/examples/plotting/plot_multiple_column.py index 1836277b3e..066f52d6e8 100644 --- a/examples/plotting/plot_multiple_column.py +++ b/examples/plotting/plot_multiple_column.py @@ -14,13 +14,15 @@ import act # Read in MET files. -met_wildcard_list = ['sgpmetE13.b1.20190101.000000.cdf', - 'sgpmetE13.b1.20190102.000000.cdf', - 'sgpmetE13.b1.20190103.000000.cdf', - 'sgpmetE13.b1.20190104.000000.cdf', - 'sgpmetE13.b1.20190105.000000.cdf', - 'sgpmetE13.b1.20190106.000000.cdf', - 'sgpmetE13.b1.20190107.000000.cdf'] +met_wildcard_list = [ + 'sgpmetE13.b1.20190101.000000.cdf', + 'sgpmetE13.b1.20190102.000000.cdf', + 'sgpmetE13.b1.20190103.000000.cdf', + 'sgpmetE13.b1.20190104.000000.cdf', + 'sgpmetE13.b1.20190105.000000.cdf', + 'sgpmetE13.b1.20190106.000000.cdf', + 'sgpmetE13.b1.20190107.000000.cdf', +] met_filenames = [DATASETS.fetch(file) for file in met_wildcard_list] met_ds = act.io.arm.read_arm_netcdf(met_filenames) diff --git a/examples/plotting/plot_pie_chart.py b/examples/plotting/plot_pie_chart.py new file mode 100644 index 0000000000..c2f0a3a6c4 --- /dev/null +++ b/examples/plotting/plot_pie_chart.py @@ -0,0 +1,52 @@ +""" +Calculate and View Aerosol Percentages +-------------------------------------- + +Calculate the percentages of different aerosols in a Aerosol +Chemical Speciation (AOS) monitor dataset and view the percentages +in a pie chart. + +Written: Zach Sherman + +""" + +from arm_test_data import DATASETS +import matplotlib.pyplot as plt + +import act +from act.io.arm import read_arm_netcdf + +# Read an ARM AOS dataset +filename = DATASETS.fetch('sgpaosacsmE13.b2.20230420.000109.nc') +ds = read_arm_netcdf(filename) + +# Let us print out the fields in the dataset and see what it contains. +print(ds.data_vars.keys()) + +# Knowing what fields the dataset contains, let's create a list of fields +# to use in the plot. + +fields = ['sulfate', 'ammonium', 'nitrate', 'chloride'] + +# We also want to provide some keyword arguments to avoid invalid data such +# as negative values. +threshold = 0.0 +fill_value = 0.0 + +# Create a DistributionDisplay object to compare fields +display = act.plotting.DistributionDisplay(ds) + +# We can set one of the slices to explode and give it a nice shadow. +explode = (0, 0.1, 0, 0) +shadow = True + +# Create a pie chart using the fields list. The percentages of the +# fields will be calculated using act.utils.calculate_percentages. +display.plot_pie_chart( + fields, + threshold=threshold, + fill_value=fill_value, + explode=explode, + shadow=True, +) +plt.show() diff --git a/examples/plotting/plot_presentweathercode.py b/examples/plotting/plot_presentweathercode.py index dbec1e23d2..c30d72ed1f 100644 --- a/examples/plotting/plot_presentweathercode.py +++ b/examples/plotting/plot_presentweathercode.py @@ -9,7 +9,6 @@ """ from arm_test_data import DATASETS -import numpy as np from matplotlib.dates import DateFormatter from matplotlib.dates import num2date import matplotlib.pyplot as plt @@ -22,12 +21,12 @@ # Decode the Present Weather Codes # Pass it to the function to decode it along with the variable name -ds = act.utils.inst_utils.decode_present_weather(ds, - variable='pwd_pw_code_inst') +ds = act.utils.inst_utils.decode_present_weather(ds, variable='pwd_pw_code_inst') # Calculate Precipitation Accumulation -pre_accum = act.utils.accumulate_precip(ds.where(ds.qc_tbrg_precip_total == 0), - "tbrg_precip_total").tbrg_precip_total_accumulated.compute() +pre_accum = act.utils.accumulate_precip( + ds.where(ds.qc_tbrg_precip_total == 0), "tbrg_precip_total" +).tbrg_precip_total_accumulated.compute() # Add the Precipitation Accum to the MET DataSet ds['tbrg_accum'] = pre_accum @@ -46,8 +45,7 @@ # Assign the ACT display object to the matplotlib figure subplot display.assign_to_figure_axis(fig, ax) # Datastream Names are needed for plotting! -display.plot('tbrg_accum', - label='TBRG Accumualated Precip') +display.plot('tbrg_accum', label='TBRG Accumualated Precip') # Add a day/night background display.day_night_background() @@ -68,7 +66,10 @@ ndates = [num2date(x) for x in xticks] # Grab the PWD codes associated with those ticks -ncode = [ds['pwd_pw_code_inst_decoded'].sel(time=x.replace(tzinfo=None), method='nearest').data.tolist() for x in ndates] +ncode = [ + ds['pwd_pw_code_inst_decoded'].sel(time=x.replace(tzinfo=None), method='nearest').data.tolist() + for x in ndates +] pwd_code = ['\n'.join(x.split(' ')) if len(x) > 20 else x for x in ncode] # Display these select PWD codes as vertical texts along the x-axis @@ -77,11 +78,7 @@ # Plot the PWD code for i, key in enumerate(xticks): - ax.text(key, - ymin, - pwd_code[i], - rotation=90, - va='center') + ax.text(key, ymin, pwd_code[i], rotation=90, va='center') plt.subplots_adjust(bottom=0.20) diff --git a/examples/plotting/plot_rh_timeseries.py b/examples/plotting/plot_rh_timeseries.py index b4c7d65a2d..8dacfb4806 100644 --- a/examples/plotting/plot_rh_timeseries.py +++ b/examples/plotting/plot_rh_timeseries.py @@ -13,30 +13,32 @@ import act # Read in sonde files -twp_sonde_wildcard_list = ['twpsondewnpnC3.b1.20060119.050300.custom.cdf', - 'twpsondewnpnC3.b1.20060119.112000.custom.cdf', - 'twpsondewnpnC3.b1.20060119.163300.custom.cdf', - 'twpsondewnpnC3.b1.20060119.231600.custom.cdf', - 'twpsondewnpnC3.b1.20060120.043800.custom.cdf', - 'twpsondewnpnC3.b1.20060120.111900.custom.cdf', - 'twpsondewnpnC3.b1.20060120.170800.custom.cdf', - 'twpsondewnpnC3.b1.20060120.231500.custom.cdf', - 'twpsondewnpnC3.b1.20060121.051500.custom.cdf', - 'twpsondewnpnC3.b1.20060121.111600.custom.cdf', - 'twpsondewnpnC3.b1.20060121.171600.custom.cdf', - 'twpsondewnpnC3.b1.20060121.231600.custom.cdf', - 'twpsondewnpnC3.b1.20060122.052600.custom.cdf', - 'twpsondewnpnC3.b1.20060122.111500.custom.cdf', - 'twpsondewnpnC3.b1.20060122.171800.custom.cdf', - 'twpsondewnpnC3.b1.20060122.232600.custom.cdf', - 'twpsondewnpnC3.b1.20060123.052500.custom.cdf', - 'twpsondewnpnC3.b1.20060123.111700.custom.cdf', - 'twpsondewnpnC3.b1.20060123.171600.custom.cdf', - 'twpsondewnpnC3.b1.20060123.231500.custom.cdf', - 'twpsondewnpnC3.b1.20060124.051500.custom.cdf', - 'twpsondewnpnC3.b1.20060124.111800.custom.cdf', - 'twpsondewnpnC3.b1.20060124.171700.custom.cdf', - 'twpsondewnpnC3.b1.20060124.231500.custom.cdf'] +twp_sonde_wildcard_list = [ + 'twpsondewnpnC3.b1.20060119.050300.custom.cdf', + 'twpsondewnpnC3.b1.20060119.112000.custom.cdf', + 'twpsondewnpnC3.b1.20060119.163300.custom.cdf', + 'twpsondewnpnC3.b1.20060119.231600.custom.cdf', + 'twpsondewnpnC3.b1.20060120.043800.custom.cdf', + 'twpsondewnpnC3.b1.20060120.111900.custom.cdf', + 'twpsondewnpnC3.b1.20060120.170800.custom.cdf', + 'twpsondewnpnC3.b1.20060120.231500.custom.cdf', + 'twpsondewnpnC3.b1.20060121.051500.custom.cdf', + 'twpsondewnpnC3.b1.20060121.111600.custom.cdf', + 'twpsondewnpnC3.b1.20060121.171600.custom.cdf', + 'twpsondewnpnC3.b1.20060121.231600.custom.cdf', + 'twpsondewnpnC3.b1.20060122.052600.custom.cdf', + 'twpsondewnpnC3.b1.20060122.111500.custom.cdf', + 'twpsondewnpnC3.b1.20060122.171800.custom.cdf', + 'twpsondewnpnC3.b1.20060122.232600.custom.cdf', + 'twpsondewnpnC3.b1.20060123.052500.custom.cdf', + 'twpsondewnpnC3.b1.20060123.111700.custom.cdf', + 'twpsondewnpnC3.b1.20060123.171600.custom.cdf', + 'twpsondewnpnC3.b1.20060123.231500.custom.cdf', + 'twpsondewnpnC3.b1.20060124.051500.custom.cdf', + 'twpsondewnpnC3.b1.20060124.111800.custom.cdf', + 'twpsondewnpnC3.b1.20060124.171700.custom.cdf', + 'twpsondewnpnC3.b1.20060124.231500.custom.cdf', +] sonde_filenames = [DATASETS.fetch(file) for file in twp_sonde_wildcard_list] sonde_ds = act.io.arm.read_arm_netcdf(sonde_filenames) diff --git a/examples/plotting/plot_scatter.py b/examples/plotting/plot_scatter.py index dd9202f05d..e3a7fc14f4 100644 --- a/examples/plotting/plot_scatter.py +++ b/examples/plotting/plot_scatter.py @@ -27,57 +27,36 @@ display = act.plotting.DistributionDisplay(ds) # Compare aircraft ground speed with indicated airspeed -display.plot_scatter('true_airspeed', - 'ground_speed', - m_field='ambient_temp', - marker='x', - cbar_label='Ambient Temperature ($^\circ$C)' - ) +display.plot_scatter( + 'true_airspeed', + 'ground_speed', + m_field='ambient_temp', + marker='x', + cbar_label=r'Ambient Temperature ($^\circ$C)', # noqa W605 +) # Set the range of the field on the x-axis display.set_xrng((40, 140)) display.set_yrng((40, 140)) # Determine the best fit line -z = np.ma.polyfit(ds['true_airspeed'], - ds['ground_speed'], - 1 - ) +z = np.ma.polyfit(ds['true_airspeed'], ds['ground_speed'], 1) p = np.poly1d(z) # Plot the best fit line -display.axes[0].plot(ds['true_airspeed'], - p(ds['true_airspeed']), - 'r', - linewidth=2 - ) +display.axes[0].plot(ds['true_airspeed'], p(ds['true_airspeed']), 'r', linewidth=2) # Display the line equation -display.axes[0].text(45, - 135, - "y = %.3fx + (%.3f)" % (z[0], z[1]), - color='r', - fontsize=12 - ) +display.axes[0].text(45, 135, f"y = {z[0]:.3f}x + ({z[1]:.3f})", color='r', fontsize=12) # Calculate Pearson Correlation Coefficient -cc_conc = pearsonr(ds['true_airspeed'], - ds['ground_speed'] - ) +cc_conc = pearsonr(ds['true_airspeed'], ds['ground_speed']) # Display the Pearson CC -display.axes[0].text(45, - 130, - "Pearson CC: %.2f" % (cc_conc[0]), - fontsize=12 - ) +display.axes[0].text(45, 130, "Pearson CC: %.2f" % (cc_conc[0]), fontsize=12) # Display the total number of samples -display.axes[0].text(45, - 125, - "N = %.0f" % (ds['true_airspeed'].data.shape[0]), - fontsize=12 - ) +display.axes[0].text(45, 125, "N = %.0f" % (ds['true_airspeed'].data.shape[0]), fontsize=12) # Display the 1:1 ratio line display.set_ratio_line() diff --git a/examples/plotting/plot_secondary_y.py b/examples/plotting/plot_secondary_y.py index d6062857e9..a7193711eb 100644 --- a/examples/plotting/plot_secondary_y.py +++ b/examples/plotting/plot_secondary_y.py @@ -9,7 +9,6 @@ from arm_test_data import DATASETS import matplotlib.pyplot as plt -import xarray as xr import act diff --git a/examples/plotting/plot_skewt.py b/examples/plotting/plot_skewt.py index 69c3674da8..924a951623 100644 --- a/examples/plotting/plot_skewt.py +++ b/examples/plotting/plot_skewt.py @@ -8,7 +8,6 @@ """ from arm_test_data import DATASETS -import metpy import xarray as xr from matplotlib import pyplot as plt @@ -37,7 +36,15 @@ plt.show() # One could also add options like adiabats and mixing lines skewt = act.plotting.SkewTDisplay(sonde_ds, figsize=(15, 10)) -skewt.plot_from_u_and_v('u_wind', 'v_wind', 'pres', 'tdry', 'dp', plot_dry_adiabats=True, - plot_moist_adiabats=True, plot_mixing_lines=True) +skewt.plot_from_u_and_v( + 'u_wind', + 'v_wind', + 'pres', + 'tdry', + 'dp', + plot_dry_adiabats=True, + plot_moist_adiabats=True, + plot_mixing_lines=True, +) plt.show() sonde_ds.close() diff --git a/examples/plotting/plot_skewt_with_text.py b/examples/plotting/plot_skewt_with_text.py index d37bd8d483..5672ccfd91 100644 --- a/examples/plotting/plot_skewt_with_text.py +++ b/examples/plotting/plot_skewt_with_text.py @@ -11,7 +11,6 @@ from arm_test_data import DATASETS from matplotlib import pyplot as plt -import metpy import numpy as np import xarray as xr diff --git a/examples/plotting/plot_time_height_scatter.py b/examples/plotting/plot_time_height_scatter.py index 950b36d998..b36bd7b4cf 100644 --- a/examples/plotting/plot_time_height_scatter.py +++ b/examples/plotting/plot_time_height_scatter.py @@ -6,8 +6,6 @@ """ -import os -from arm_test_data import DATASETS import matplotlib.pyplot as plt import act from act.tests import sample_files @@ -18,7 +16,7 @@ # Create scatter plots of the sonde data display = act.plotting.TimeSeriesDisplay(ds, figsize=(7, 6), subplot_shape=(2,)) display.time_height_scatter('tdry', plot_alt_field=True, subplot_index=(0,)) -display.time_height_scatter('rh', subplot_index=(1,), cb_friendly=True, day_night_background=True) +display.time_height_scatter('rh', subplot_index=(1,), cvd_friendly=True, day_night_background=True) plt.tight_layout() ds.close() diff --git a/examples/plotting/plot_violin.py b/examples/plotting/plot_violin.py index ce343c8078..3176514dc3 100644 --- a/examples/plotting/plot_violin.py +++ b/examples/plotting/plot_violin.py @@ -25,14 +25,16 @@ display = act.plotting.DistributionDisplay(ds) # Compare aircraft ground speed with ambient temperature -display.plot_violin('ambient_temp', - positions=[1.0], - ) - -display.plot_violin('total_temp', - positions=[2.0], - set_title='Aircraft Temperatures 2018-11-04', - ) +display.plot_violin( + 'ambient_temp', + positions=[1.0], +) + +display.plot_violin( + 'total_temp', + positions=[2.0], + set_title='Aircraft Temperatures 2018-11-04', +) # Update the tick information display.axes[0].set_xticks([0.5, 1, 2, 2.5]) diff --git a/examples/plotting/plot_wind_rose.py b/examples/plotting/plot_wind_rose.py index 3e4eda965a..140e950024 100644 --- a/examples/plotting/plot_wind_rose.py +++ b/examples/plotting/plot_wind_rose.py @@ -14,30 +14,32 @@ import act # Read in sonde files -twp_sonde_wildcard_list = ['twpsondewnpnC3.b1.20060119.050300.custom.cdf', - 'twpsondewnpnC3.b1.20060119.112000.custom.cdf', - 'twpsondewnpnC3.b1.20060119.163300.custom.cdf', - 'twpsondewnpnC3.b1.20060119.231600.custom.cdf', - 'twpsondewnpnC3.b1.20060120.043800.custom.cdf', - 'twpsondewnpnC3.b1.20060120.111900.custom.cdf', - 'twpsondewnpnC3.b1.20060120.170800.custom.cdf', - 'twpsondewnpnC3.b1.20060120.231500.custom.cdf', - 'twpsondewnpnC3.b1.20060121.051500.custom.cdf', - 'twpsondewnpnC3.b1.20060121.111600.custom.cdf', - 'twpsondewnpnC3.b1.20060121.171600.custom.cdf', - 'twpsondewnpnC3.b1.20060121.231600.custom.cdf', - 'twpsondewnpnC3.b1.20060122.052600.custom.cdf', - 'twpsondewnpnC3.b1.20060122.111500.custom.cdf', - 'twpsondewnpnC3.b1.20060122.171800.custom.cdf', - 'twpsondewnpnC3.b1.20060122.232600.custom.cdf', - 'twpsondewnpnC3.b1.20060123.052500.custom.cdf', - 'twpsondewnpnC3.b1.20060123.111700.custom.cdf', - 'twpsondewnpnC3.b1.20060123.171600.custom.cdf', - 'twpsondewnpnC3.b1.20060123.231500.custom.cdf', - 'twpsondewnpnC3.b1.20060124.051500.custom.cdf', - 'twpsondewnpnC3.b1.20060124.111800.custom.cdf', - 'twpsondewnpnC3.b1.20060124.171700.custom.cdf', - 'twpsondewnpnC3.b1.20060124.231500.custom.cdf'] +twp_sonde_wildcard_list = [ + 'twpsondewnpnC3.b1.20060119.050300.custom.cdf', + 'twpsondewnpnC3.b1.20060119.112000.custom.cdf', + 'twpsondewnpnC3.b1.20060119.163300.custom.cdf', + 'twpsondewnpnC3.b1.20060119.231600.custom.cdf', + 'twpsondewnpnC3.b1.20060120.043800.custom.cdf', + 'twpsondewnpnC3.b1.20060120.111900.custom.cdf', + 'twpsondewnpnC3.b1.20060120.170800.custom.cdf', + 'twpsondewnpnC3.b1.20060120.231500.custom.cdf', + 'twpsondewnpnC3.b1.20060121.051500.custom.cdf', + 'twpsondewnpnC3.b1.20060121.111600.custom.cdf', + 'twpsondewnpnC3.b1.20060121.171600.custom.cdf', + 'twpsondewnpnC3.b1.20060121.231600.custom.cdf', + 'twpsondewnpnC3.b1.20060122.052600.custom.cdf', + 'twpsondewnpnC3.b1.20060122.111500.custom.cdf', + 'twpsondewnpnC3.b1.20060122.171800.custom.cdf', + 'twpsondewnpnC3.b1.20060122.232600.custom.cdf', + 'twpsondewnpnC3.b1.20060123.052500.custom.cdf', + 'twpsondewnpnC3.b1.20060123.111700.custom.cdf', + 'twpsondewnpnC3.b1.20060123.171600.custom.cdf', + 'twpsondewnpnC3.b1.20060123.231500.custom.cdf', + 'twpsondewnpnC3.b1.20060124.051500.custom.cdf', + 'twpsondewnpnC3.b1.20060124.111800.custom.cdf', + 'twpsondewnpnC3.b1.20060124.171700.custom.cdf', + 'twpsondewnpnC3.b1.20060124.231500.custom.cdf', +] sonde_filenames = [DATASETS.fetch(file) for file in twp_sonde_wildcard_list] sonde_ds = act.io.arm.read_arm_netcdf(sonde_filenames) diff --git a/examples/plotting/plot_xsection.py b/examples/plotting/plot_xsection.py index 126e3b9f84..6aec1f42a6 100644 --- a/examples/plotting/plot_xsection.py +++ b/examples/plotting/plot_xsection.py @@ -7,11 +7,9 @@ multi-dimensional dataset """ -from datetime import datetime from arm_test_data import DATASETS import matplotlib.pyplot as plt -import xarray as xr import act diff --git a/examples/qc/plot_arm_qc.py b/examples/qc/plot_arm_qc.py index d8abc6dd26..91d43645a0 100644 --- a/examples/qc/plot_arm_qc.py +++ b/examples/qc/plot_arm_qc.py @@ -145,7 +145,7 @@ # to give to other users. # There is a file in the same directory called sgpmfrsr7nchE11.b1.yaml with times of # incorrect or suspect values that can be read and applied to the Dataset. -from act.qc.add_supplemental_qc import apply_supplemental_qc +from act.qc.add_supplemental_qc import apply_supplemental_qc # noqa apply_supplemental_qc(ds, 'sgpmfrsr7nchE11.b1.yaml') diff --git a/examples/qc/plot_qc_bsrn.py b/examples/qc/plot_qc_bsrn.py index 5cbae6e687..37e3d907e7 100644 --- a/examples/qc/plot_qc_bsrn.py +++ b/examples/qc/plot_qc_bsrn.py @@ -25,10 +25,10 @@ display = act.plotting.TimeSeriesDisplay(ds, figsize=(15, 10), subplot_shape=(2,)) # Plot radiation data in top plot -display.plot(variable, subplot_index=(0,), day_night_background=True, cb_friendly=True) +display.plot(variable, subplot_index=(0,), day_night_background=True, cvd_friendly=True) # Plot ancillary QC data in bottom plot -display.qc_flag_block_plot(variable, subplot_index=(1,), cb_friendly=True) +display.qc_flag_block_plot(variable, subplot_index=(1,), cvd_friendly=True) plt.show() # Add initial BSRN QC tests to ancillary QC varialbles. Use defualts for @@ -69,9 +69,14 @@ display = act.plotting.TimeSeriesDisplay(ds, figsize=(15, 10), subplot_shape=(2,)) # Plot radiation data in top plot. Add QC information to top plot. -display.plot(variable, subplot_index=(0,), day_night_background=True, assessment_overplot=True, - cb_friendly=True) +display.plot( + variable, + subplot_index=(0,), + day_night_background=True, + assessment_overplot=True, + cvd_friendly=True, +) # Plot ancillary QC data in bottom plot -display.qc_flag_block_plot(variable, subplot_index=(1,), cb_friendly=True) +display.qc_flag_block_plot(variable, subplot_index=(1,), cvd_friendly=True) plt.show() diff --git a/examples/retrievals/plot_cbh_sobel.py b/examples/retrievals/plot_cbh_sobel.py index 100c2e9f25..e5764a1067 100644 --- a/examples/retrievals/plot_cbh_sobel.py +++ b/examples/retrievals/plot_cbh_sobel.py @@ -19,8 +19,9 @@ filename_ceil = DATASETS.fetch('sgpceilC1.b1.20190101.000000.nc') ds = act.io.arm.read_arm_netcdf(filename_ceil) -ds = act.retrievals.cbh.generic_sobel_cbh(ds, variable='backscatter', height_dim='range', - var_thresh=1000.0, fill_na=0.) +ds = act.retrievals.cbh.generic_sobel_cbh( + ds, variable='backscatter', height_dim='range', var_thresh=1000.0, fill_na=0.0 +) # Plot the cloud base height data display = act.plotting.TimeSeriesDisplay(ds, subplot_shape=(1, 2), figsize=(16, 6)) diff --git a/examples/templates/example_template.py b/examples/templates/example_template.py index 6119c56583..2de33a0c90 100644 --- a/examples/templates/example_template.py +++ b/examples/templates/example_template.py @@ -11,7 +11,8 @@ # Download and read file or files with the IO and discovery functions # within ACT, example: results = act.discovery.download_arm_data( - username, token, 'sgpceilC1.b1', '2022-01-14', '2022-01-19') + username, token, 'sgpceilC1.b1', '2022-01-14', '2022-01-19' +) ceil_ds = act.io.arm.read_arm_netcdf(results) # Plot file using the ACT display submodule, example: diff --git a/examples/templates/notebook_and_blog_template.ipynb b/examples/templates/notebook_and_blog_template.ipynb index cefe371a3b..0366a89eeb 100644 --- a/examples/templates/notebook_and_blog_template.ipynb +++ b/examples/templates/notebook_and_blog_template.ipynb @@ -192,11 +192,7 @@ "start_date = \"2022-01-01T12:00:00\"\n", "end_date = \"2022-01-07T12:00:00\"\n", "\n", - "files = act.discovery.download_data(arm_username,\n", - " arm_password,\n", - " datastream,\n", - " start_date,\n", - " end_date)" + "files = act.discovery.download_data(arm_username, arm_password, datastream, start_date, end_date)" ] }, { @@ -254,12 +250,12 @@ "source": [ "# Create an ACT TimeSeriesDisplay.\n", "display = act.plotting.TimeSeriesDisplay(\n", - " {'Shear, Wind Direction, and Speed at ANL ATMOS': ds},\n", - " subplot_shape=(1,), figsize=(15, 5))\n", + " {'Shear, Wind Direction, and Speed at ANL ATMOS': ds}, subplot_shape=(1,), figsize=(15, 5)\n", + ")\n", "\n", "# Plot shear with a wind barb overlay, while using a color vision\n", "# deficiency (CVD) colormap.\n", - "display.plot('shear', subplot_index=(0,), cb_friendly=True)\n", + "display.plot('shear', subplot_index=(0,), cvd_friendly=True)\n", "display.plot_barbs_from_spd_dir('speed', 'dir')\n", "plt.show()" ] diff --git a/examples/utils/plot_calculated_percentages.py b/examples/utils/plot_calculated_percentages.py new file mode 100644 index 0000000000..8bae97b357 --- /dev/null +++ b/examples/utils/plot_calculated_percentages.py @@ -0,0 +1,33 @@ +""" +Calculate and plot aerosol percentages. +---------------------------------------- + +Example on how to plot a Pie Chart of the composition of aerosols in a volume of air by +obtaining percentages of each aerosol from the dataset. + +Author: Zach Sherman +""" + +from arm_test_data import DATASETS +import matplotlib.pyplot as plt + +import act + +# Read in the data. +ds = act.io.read_arm_netcdf(DATASETS.fetch("sgpaosacsmE13.b2.20230420.000109.nc")) + +# Calculate percentages using selected fields. +fields = ['sulfate', 'ammonium', 'nitrate', 'chloride'] +time_slice = ('2023-04-20T17:38:20.000000000', '2023-04-20T20:29:47.000000000') +threshold = 0.0 +percentages = act.utils.calculate_percentages(ds, fields, time_slice=time_slice, threshold=0.0) + +# Get values for the pie chart. +labels = percentages.keys() +sizes = [percentages[i] for i in percentages.keys()] + +# Plot the figure +fig, ax = plt.subplots() +ax.pie(sizes, labels=labels, autopct='%1.1f%%') +plt.show() +ds.close() diff --git a/examples/utils/plot_tar.py b/examples/utils/plot_tar.py index d09c301871..b4dedaff0d 100644 --- a/examples/utils/plot_tar.py +++ b/examples/utils/plot_tar.py @@ -11,7 +11,6 @@ """ -import os from pathlib import Path # Import standard libraries @@ -27,13 +26,15 @@ # TAR file into read_arm_netcdf() to be unpacked and read. # Here we get a list of MET data files to pack into a TAR bundle -met_wildcard_list = ['sgpmetE13.b1.20190101.000000.cdf', - 'sgpmetE13.b1.20190102.000000.cdf', - 'sgpmetE13.b1.20190103.000000.cdf', - 'sgpmetE13.b1.20190104.000000.cdf', - 'sgpmetE13.b1.20190105.000000.cdf', - 'sgpmetE13.b1.20190106.000000.cdf', - 'sgpmetE13.b1.20190107.000000.cdf'] +met_wildcard_list = [ + 'sgpmetE13.b1.20190101.000000.cdf', + 'sgpmetE13.b1.20190102.000000.cdf', + 'sgpmetE13.b1.20190103.000000.cdf', + 'sgpmetE13.b1.20190104.000000.cdf', + 'sgpmetE13.b1.20190105.000000.cdf', + 'sgpmetE13.b1.20190106.000000.cdf', + 'sgpmetE13.b1.20190107.000000.cdf', +] met_files = [Path(DATASETS.fetch(file)) for file in met_wildcard_list] # We can pass the list of netCDF data files to the pack_tar() function. diff --git a/examples/workflows/plot_aerioe_with_cbh.py b/examples/workflows/plot_aerioe_with_cbh.py index 717d47933e..aed0041f01 100644 --- a/examples/workflows/plot_aerioe_with_cbh.py +++ b/examples/workflows/plot_aerioe_with_cbh.py @@ -22,9 +22,13 @@ if username is None or token is None or len(username) == 0 or len(token) == 0: pass else: - results = act.discovery.download_arm_data(username, token, 'sgpaerioe1turnC1.c1', '2022-02-11', '2022-02-11') + results = act.discovery.download_arm_data( + username, token, 'sgpaerioe1turnC1.c1', '2022-02-11', '2022-02-11' + ) aerioe_ds = act.io.arm.read_arm_netcdf(results) - results = act.discovery.download_arm_data(username, token, 'sgpceilC1.b1', '2022-02-11', '2022-02-11') + results = act.discovery.download_arm_data( + username, token, 'sgpceilC1.b1', '2022-02-11', '2022-02-11' + ) ceil_ds = act.io.arm.read_arm_netcdf(results) # There isn't information content from the AERI above 3 km @@ -39,20 +43,47 @@ # Create a TimeSeriesDisplay object display = act.plotting.TimeSeriesDisplay( - {'AERIoe': aerioe_ds, 'Ceilometer': ceil_ds}, - subplot_shape=(2,), figsize=(20, 10) + {'AERIoe': aerioe_ds, 'Ceilometer': ceil_ds}, subplot_shape=(2,), figsize=(20, 10) ) # Plot data - display.plot('first_cbh', dsname='Ceilometer', marker='+', color='black', markeredgewidth=3, - linewidth=0, subplot_index=(0,), label='cbh') - display.plot('temperature', dsname='AERIoe', cmap='viridis', set_shading='nearest', - add_nan=True, subplot_index=(0,)) - - display.plot('first_cbh', dsname='Ceilometer', marker='+', color='black', markeredgewidth=3, - linewidth=0, subplot_index=(1,), label='cbh') - display.plot('waterVapor', dsname='AERIoe', cmap='HomeyerRainbow', set_shading='nearest', - add_nan=True, subplot_index=(1,)) + display.plot( + 'first_cbh', + dsname='Ceilometer', + marker='+', + color='black', + markeredgewidth=3, + linewidth=0, + subplot_index=(0,), + label='cbh', + ) + display.plot( + 'temperature', + dsname='AERIoe', + cmap='viridis', + set_shading='nearest', + add_nan=True, + subplot_index=(0,), + ) + + display.plot( + 'first_cbh', + dsname='Ceilometer', + marker='+', + color='black', + markeredgewidth=3, + linewidth=0, + subplot_index=(1,), + label='cbh', + ) + display.plot( + 'waterVapor', + dsname='AERIoe', + cmap='HomeyerRainbow', + set_shading='nearest', + add_nan=True, + subplot_index=(1,), + ) # If you want to save it you can # plt.savefig('sgpaerioe1turnC1.c1.20220211.png') diff --git a/examples/workflows/plot_merged_product.py b/examples/workflows/plot_merged_product.py index bc9764bb83..0713ef40a2 100644 --- a/examples/workflows/plot_merged_product.py +++ b/examples/workflows/plot_merged_product.py @@ -36,7 +36,7 @@ # The ECOR and EBBR have different definitions of latent heat # flux and what is positive vs negative. Check out the ARM # Handbooks for more information -ds_ecor['lv_e'].values = ds_ecor['lv_e'].values * -1. +ds_ecor['lv_e'].values = ds_ecor['lv_e'].values * -1.0 # For example purposes, let's rename the ecor latent heat flux ds_ecor = ds_ecor.rename({'lv_e': 'latent_heat_flux_ecor'}) @@ -58,7 +58,9 @@ ds = xr.merge([ds_ecor, ds_ebbr, ds_sebs], compat='override') # Apply the QC information to set all flagged data to missing/NaN -ds.qcfilter.datafilter(del_qc_var=False, rm_assessments=['Bad', 'Incorrect', 'Indeterminate', 'Suspect']) +ds.qcfilter.datafilter( + del_qc_var=False, rm_assessments=['Bad', 'Incorrect', 'Indeterminate', 'Suspect'] +) # Plot up data from the merged dataset for each of the instruments display = act.plotting.TimeSeriesDisplay(ds, figsize=(15, 10), subplot_shape=(3,)) diff --git a/examples/workflows/plot_multiple_dataset.py b/examples/workflows/plot_multiple_dataset.py index fba3c58f05..40264b00fd 100644 --- a/examples/workflows/plot_multiple_dataset.py +++ b/examples/workflows/plot_multiple_dataset.py @@ -27,9 +27,13 @@ met_ds = act.io.arm.read_arm_netcdf(filename_met) else: # Download and read data - results = act.discovery.download_arm_data(username, token, 'sgpceilC1.b1', '2022-01-01', '2022-01-07') + results = act.discovery.download_arm_data( + username, token, 'sgpceilC1.b1', '2022-01-01', '2022-01-07' + ) ceil_ds = act.io.arm.read_arm_netcdf(results) - results = act.discovery.download_arm_data(username, token, 'sgpmetE13.b1', '2022-01-01', '2022-01-07') + results = act.discovery.download_arm_data( + username, token, 'sgpmetE13.b1', '2022-01-01', '2022-01-07' + ) met_ds = act.io.arm.read_arm_netcdf(results) # Read in CEIL data and correct it diff --git a/examples/workflows/plot_qc_transforms.py b/examples/workflows/plot_qc_transforms.py index b3ba93b01c..501318e0c0 100644 --- a/examples/workflows/plot_qc_transforms.py +++ b/examples/workflows/plot_qc_transforms.py @@ -10,7 +10,6 @@ from arm_test_data import DATASETS import matplotlib.pyplot as plt -import xarray as xr import act @@ -42,10 +41,17 @@ print('After: (2 5 - minute averages)', ds_5minb[variable].values[0:2]) ## Plot up the variable and qc block plot -display = act.plotting.TimeSeriesDisplay({'Original': ds, 'Average': ds_5min, 'Average_QCd': ds_5minb}, - figsize=(15, 10), subplot_shape=(2,)) +display = act.plotting.TimeSeriesDisplay( + {'Original': ds, 'Average': ds_5min, 'Average_QCd': ds_5minb}, + figsize=(15, 10), + subplot_shape=(2,), +) display.plot(variable, dsname='Original', subplot_index=(0,), day_night_background=True) -display.plot(variable, dsname='Average', subplot_index=(1,), day_night_background=True, label='No QC') -display.plot(variable, dsname='Average_QCd', subplot_index=(1,), day_night_background=True, label='QC') +display.plot( + variable, dsname='Average', subplot_index=(1,), day_night_background=True, label='No QC' +) +display.plot( + variable, dsname='Average_QCd', subplot_index=(1,), day_night_background=True, label='QC' +) plt.legend() plt.show() diff --git a/examples/workflows/plot_weighted_average.py b/examples/workflows/plot_weighted_average.py index b0645456fc..0199def978 100644 --- a/examples/workflows/plot_weighted_average.py +++ b/examples/workflows/plot_weighted_average.py @@ -35,13 +35,15 @@ # } # Get a list of filenames to use -met_wildcard_list = ['sgpmetE13.b1.20190101.000000.cdf', - 'sgpmetE13.b1.20190102.000000.cdf', - 'sgpmetE13.b1.20190103.000000.cdf', - 'sgpmetE13.b1.20190104.000000.cdf', - 'sgpmetE13.b1.20190105.000000.cdf', - 'sgpmetE13.b1.20190106.000000.cdf', - 'sgpmetE13.b1.20190107.000000.cdf'] +met_wildcard_list = [ + 'sgpmetE13.b1.20190101.000000.cdf', + 'sgpmetE13.b1.20190102.000000.cdf', + 'sgpmetE13.b1.20190103.000000.cdf', + 'sgpmetE13.b1.20190104.000000.cdf', + 'sgpmetE13.b1.20190105.000000.cdf', + 'sgpmetE13.b1.20190106.000000.cdf', + 'sgpmetE13.b1.20190107.000000.cdf', +] ds = {} new = {} diff --git a/guides/GUIDE_V2.rst b/guides/GUIDE_V2.rst index f276012691..fb26e247fb 100644 --- a/guides/GUIDE_V2.rst +++ b/guides/GUIDE_V2.rst @@ -51,7 +51,7 @@ Similar to the discovery module, functionality has not changed but the naming co Plotting ======== -A major change to how secondary y-axes are handled was implemented in the TimeSeriesDisplay and DistributionDisplay modules. Currently, those plotting routines return a 1-D array of display axes. This has always made the secondary y-axis more difficult to configure and use. In the new version, it will return a 2-D array of display axes [[left axes, right axes]] to make it simpler to utilize. +A major change to how secondary y-axes are handled was implemented in the TimeSeriesDisplay and DistributionDisplay modules. Currently, those plotting routines return a 1-D array of display axes. This has always made the secondary y-axis more difficult to configure and use. In the new version, it will return a 2-D array of display axes [[left axes, right axes]] to make it simpler to utilize. HistogramDisplay is being renamed to DistributionDisplay to be more inclusive of the variety of visualization types that are housed there. Additionally there are changes to two of the plot names to be more consistent with the others. diff --git a/guides/act_cheatsheet.tex b/guides/act_cheatsheet.tex index 819bc7a6bd..891a8e6a78 100644 --- a/guides/act_cheatsheet.tex +++ b/guides/act_cheatsheet.tex @@ -396,8 +396,8 @@ \begin{poster} { -headerborder=closed, colspacing=0.8em, bgColorOne=white, bgColorTwo=white, borderColor=lightblue, headerColorOne=black, headerColorTwo=lightblue, -headerFontColor=white, boxColorOne=white, textborder=roundedleft, eyecatcher=true, headerheight=0.06\textheight, headershape=roundedright, headerfont=\Large\bf\textsc, linewidth=2pt +headerborder=closed, colspacing=0.8em, bgColorOne=white, bgColorTwo=white, borderColor=lightblue, headerColorOne=black, headerColorTwo=lightblue, +headerFontColor=white, boxColorOne=white, textborder=roundedleft, eyecatcher=true, headerheight=0.06\textheight, headershape=roundedright, headerfont=\Large\bf\textsc, linewidth=2pt } %---------------------------------------------------------------- % Title @@ -436,7 +436,7 @@ $>$$>$$>$ display.put\_display\_in\_subplot(\\ \-\hspace{1.2cm} display, subplot\_index))\\ \-\hspace{0.2cm} $\bullet$ This will place a Display object into a specific\\ -\-\hspace{0.5cm} subplot. +\-\hspace{0.5cm} subplot. \end{tabular} \begin{tabular}{@{}ll@{}} diff --git a/pyproject.toml b/pyproject.toml index 06353c6682..6f4440c460 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,3 +6,9 @@ ignore = ["act/io/arm.py"] [tool.check-manifest] ignore = ["docs/*", "ci/*"] + +[tool.ruff] +target-version = "py39" +ignore = [ + "E501", +] diff --git a/requirements.txt b/requirements.txt index 9098993baf..6b8e108fbd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,4 @@ fsspec metpy lxml cmweather -aiohttp>=3.9.0b1 \ No newline at end of file +aiohttp>=3.9.0b1 diff --git a/scripts/ads.py b/scripts/ads.py index 5a61ff51fb..d4cd336d50 100644 --- a/scripts/ads.py +++ b/scripts/ads.py @@ -10,7 +10,6 @@ """ import argparse -import re import json import glob import ast @@ -21,6 +20,7 @@ try: import cartopy.crs as ccrs + CARTOPY_AVAILABLE = True except ImportError: CARTOPY_AVAILABLE = False @@ -51,8 +51,10 @@ def option_error_check(args, error_fields, check_all=False): if not value.startswith(prepend): error_fields[ii] = prepend + value - print(f"\n{pathlib.Path(__file__).name}: error: {how_many} of the arguments " - f"{' '.join(error_fields)} is requried\n") + print( + f"\n{pathlib.Path(__file__).name}: error: {how_many} of the arguments " + f"{' '.join(error_fields)} is requried\n" + ) exit() @@ -118,7 +120,8 @@ def find_drop_vars(args): keep_vars_additional = [] for var_name in keep_vars: qc_var_name = ds.qcfilter.check_for_ancillary_qc( - var_name, add_if_missing=False, cleanup=False) + var_name, add_if_missing=False, cleanup=False + ) if qc_var_name is not None: keep_vars_additional.append(qc_var_name) @@ -142,16 +145,25 @@ def geodisplay(args): except KeyError: pass - display = act.plotting.GeographicPlotDisplay({dsname: ds}, - figsize=args.figsize) - - display.geoplot(data_field=args.field, lat_field=args.latitude, - lon_field=args.longitude, dsname=dsname, - cbar_label=args.cb_label, title=args.set_title, - plot_buffer=args.plot_buffer, stamen=args.stamen, - tile=args.tile, cartopy_feature=args.cfeatures, - cmap=args.cmap, text=args.text, gridlines=args.gridlines, - projection=args.projection, **args.kwargs) + display = act.plotting.GeographicPlotDisplay({dsname: ds}, figsize=args.figsize) + + display.geoplot( + data_field=args.field, + lat_field=args.latitude, + lon_field=args.longitude, + dsname=dsname, + cbar_label=args.cb_label, + title=args.set_title, + plot_buffer=args.plot_buffer, + stamen=args.stamen, + tile=args.tile, + cartopy_feature=args.cfeatures, + cmap=args.cmap, + text=args.text, + gridlines=args.gridlines, + projection=args.projection, + **args.kwargs, + ) plt.savefig(args.out_path) plt.show() @@ -175,26 +187,33 @@ def skewt(args): display = act.plotting.SkewTDisplay({dsname: ds}, figsize=args.figsize) if args.from_u_and_v: - display.plot_from_u_and_v(u_field=args.u_wind, v_field=args.v_wind, - p_field=args.p_field, t_field=args.t_field, - td_field=args.td_field, - subplot_index=subplot_index, - dsname=dsname, show_parcel=args.show_parcel, - p_levels_to_plot=args.plevels_plot, - shade_cape=args.shade_cape, - shade_cin=args.shade_cin, - set_title=args.set_title, - plot_barbs_kwargs=args.plot_barbs_kwargs, - plot_kwargs=args.plot_kwargs) + display.plot_from_u_and_v( + u_field=args.u_wind, + v_field=args.v_wind, + p_field=args.p_field, + t_field=args.t_field, + td_field=args.td_field, + subplot_index=subplot_index, + dsname=dsname, + show_parcel=args.show_parcel, + p_levels_to_plot=args.plevels_plot, + shade_cape=args.shade_cape, + shade_cin=args.shade_cin, + set_title=args.set_title, + plot_barbs_kwargs=args.plot_barbs_kwargs, + plot_kwargs=args.plot_kwargs, + ) if args.from_spd_and_dir: - display.plot_from_spd_and_dir(spd_field=args.spd_field, - dir_field=args.dir_field, - p_field=args.p_field, - t_field=args.t_field, - td_field=args.td_field, - dsname=dsname, - **args.kwargs) + display.plot_from_spd_and_dir( + spd_field=args.spd_field, + dir_field=args.dir_field, + p_field=args.p_field, + t_field=args.t_field, + td_field=args.td_field, + dsname=dsname, + **args.kwargs, + ) plt.savefig(args.out_path) plt.show() @@ -218,18 +237,26 @@ def xsection(args): display = act.plotting.XSectionDisplay({dsname: ds}, figsize=args.figsize) if args.plot_xsection: - display.plot_xsection(dsname=dsname, varname=args.field, - x=args.x_field, y=args.y_field, - subplot_index=subplot_index, - sel_kwargs=args.sel_kwargs, - isel_kwargs=args.isel_kwargs, **args.kwargs) + display.plot_xsection( + dsname=dsname, + varname=args.field, + x=args.x_field, + y=args.y_field, + subplot_index=subplot_index, + sel_kwargs=args.sel_kwargs, + isel_kwargs=args.isel_kwargs, + **args.kwargs, + ) if args.xsection_map: - display.plot_xsection_map(dsname=dsname, varname=args.field, - subplot_index=subplot_index, - coastlines=args.coastlines, - background=args.background, - **args.kwargs) + display.plot_xsection_map( + dsname=dsname, + varname=args.field, + subplot_index=subplot_index, + coastlines=args.coastlines, + background=args.background, + **args.kwargs, + ) plt.savefig(args.out_path) plt.show() @@ -239,7 +266,6 @@ def xsection(args): def wind_rose(args): - drop_vars = find_drop_vars(args) ds = act.io.arm.read_arm_netcdf(args.file_path, drop_variables=drop_vars) @@ -253,15 +279,20 @@ def wind_rose(args): except KeyError: pass - display = act.plotting.WindRoseDisplay({dsname: ds}, - figsize=args.figsize) - - display.plot(dir_field=args.dir_field, spd_field=args.spd_field, - subplot_index=subplot_index, - dsname=dsname, cmap=args.cmap, - set_title=args.set_title, - num_dirs=args.num_dir, spd_bins=args.spd_bins, - tick_interval=args.tick_interval, **args.kwargs) + display = act.plotting.WindRoseDisplay({dsname: ds}, figsize=args.figsize) + + display.plot( + dir_field=args.dir_field, + spd_field=args.spd_field, + subplot_index=subplot_index, + dsname=dsname, + cmap=args.cmap, + set_title=args.set_title, + num_dirs=args.num_dir, + spd_bins=args.spd_bins, + tick_interval=args.tick_interval, + **args.kwargs, + ) plt.savefig(args.out_path) plt.show() plt.close(display.fig) @@ -270,7 +301,6 @@ def wind_rose(args): def timeseries(args): - drop_vars = find_drop_vars(args) ds = act.io.arm.read_arm_netcdf(args.file_path, drop_variables=drop_vars) @@ -289,11 +319,19 @@ def timeseries(args): pass display = act.plotting.TimeSeriesDisplay( - {dsname: ds}, figsize=args.figsize, - subplot_shape=subplot_shape) - - options = ['plot', 'barbs_spd_dir', 'barbs_u_v', 'xsection_from_1d', - 'time_height_scatter', 'qc', 'fill_between', 'multi_panel'] + {dsname: ds}, figsize=args.figsize, subplot_shape=subplot_shape + ) + + options = [ + 'plot', + 'barbs_spd_dir', + 'barbs_u_v', + 'xsection_from_1d', + 'time_height_scatter', + 'qc', + 'fill_between', + 'multi_panel', + ] option_error_check(args, options) if args.plot: @@ -303,20 +341,27 @@ def timeseries(args): else: yrange = args.set_yrange display.plot( - field=args.field, dsname=dsname, cmap=args.cmap, - set_title=args.set_title, add_nan=args.add_nan, + field=args.field, + dsname=dsname, + cmap=args.cmap, + set_title=args.set_title, + add_nan=args.add_nan, subplot_index=subplot_index, use_var_for_y=args.var_y, day_night_background=args.day_night, invert_y_axis=args.invert_y_axis, - abs_limits=args.abs_limits, time_rng=args.time_rng, + abs_limits=args.abs_limits, + time_rng=args.time_rng, assessment_overplot=args.assessment_overplot, assessment_overplot_category=args.overplot_category, assessment_overplot_category_color=args.category_color, - force_line_plot=args.force_line_plot, labels=args.labels, - cbar_label=args.cb_label, secondary_y=args.secondary_y, + force_line_plot=args.force_line_plot, + labels=args.labels, + cbar_label=args.cb_label, + secondary_y=args.secondary_y, y_rng=yrange, - **args.kwargs) + **args.kwargs, + ) if args.barbs_spd_dir: display.plot_barbs_from_spd_dir( @@ -324,12 +369,15 @@ def timeseries(args): spd_field=args.spd_field, pres_field=args.p_field, dsname=dsname, - **args.kwargs) + **args.kwargs, + ) if args.barbs_u_v: display.plot_barbs_from_u_v( - u_field=args.u_wind, v_field=args.v_wind, - pres_field=args.p_field, dsname=dsname, + u_field=args.u_wind, + v_field=args.v_wind, + pres_field=args.p_field, + dsname=dsname, set_title=args.set_title, invert_y_axis=args.invert_y_axis, day_night_background=args.day_night, @@ -337,49 +385,61 @@ def timeseries(args): num_barbs_y=args.num_barb_y, use_var_for_y=args.var_y, subplot_index=subplot_index, - **args.kwargs) + **args.kwargs, + ) if args.xsection_from_1d: option_error_check(args, 'field') display.plot_time_height_xsection_from_1d_data( - data_field=args.field, pres_field=args.p_field, - dsname=dsname, set_title=args.set_title, + data_field=args.field, + pres_field=args.p_field, + dsname=dsname, + set_title=args.set_title, day_night_background=args.day_night, num_time_periods=args.num_time_periods, num_y_levels=args.num_y_levels, invert_y_axis=args.invert_y_axis, subplot_index=subplot_index, cbar_label=args.cb_label, - **args.kwargs) + **args.kwargs, + ) if args.time_height_scatter: option_error_check(args, 'field') display.time_height_scatter( - data_field=args.field, dsname=dsname, - cmap=args.cmap, alt_label=args.alt_label, - alt_field=args.alt_field, cb_label=args.cb_label, - **args.kwargs) + data_field=args.field, + dsname=dsname, + cmap=args.cmap, + alt_label=args.alt_label, + alt_field=args.alt_field, + cb_label=args.cb_label, + **args.kwargs, + ) if args.qc: option_error_check(args, 'field') display.qc_flag_block_plot( - data_field=args.field, dsname=dsname, + data_field=args.field, + dsname=dsname, subplot_index=subplot_index, time_rng=args.time_rng, assessment_color=args.assessment_color, - **args.kwargs) + **args.kwargs, + ) if args.fill_between: option_error_check(args, 'field') display.fill_between( - field=args.field, dsname=dsname, + field=args.field, + dsname=dsname, subplot_index=subplot_index, set_title=args.set_title, secondary_y=args.secondary_y, - **args.kwargs) + **args.kwargs, + ) if args.multi_panel: option_error_check(args, ['fields', 'plot_type'], check_all=True) @@ -387,27 +447,36 @@ def timeseries(args): for i, j, k in zip(args.fields, subplot_index, args.plot_type): if k == 'plot': display.plot( - field=i, dsname=dsname, cmap=args.cmap, - set_title=args.set_title, add_nan=args.add_nan, + field=i, + dsname=dsname, + cmap=args.cmap, + set_title=args.set_title, + add_nan=args.add_nan, subplot_index=j, use_var_for_y=args.var_y, day_night_background=args.day_night, invert_y_axis=args.invert_y_axis, - abs_limits=args.abs_limits, time_rng=args.time_rng, + abs_limits=args.abs_limits, + time_rng=args.time_rng, assessment_overplot=args.assessment_overplot, assessment_overplot_category=args.overplot_category, assessment_overplot_category_color=args.category_color, - force_line_plot=args.force_line_plot, labels=args.labels, - cbar_label=args.cb_label, secondary_y=args.secondary_y, - **args.kwargs) + force_line_plot=args.force_line_plot, + labels=args.labels, + cbar_label=args.cb_label, + secondary_y=args.secondary_y, + **args.kwargs, + ) if k == 'qc': display.qc_flag_block_plot( - data_field=i, dsname=dsname, + data_field=i, + dsname=dsname, subplot_index=j, time_rng=args.time_rng, assessment_color=args.assessment_color, - **args.kwargs) + **args.kwargs, + ) plt.savefig(args.out_path) plt.show() @@ -417,7 +486,6 @@ def timeseries(args): def histogram(args): - drop_vars = find_drop_vars(args) ds = act.io.arm.read_arm_netcdf(args.file_path, drop_variables=drop_vars) @@ -433,44 +501,58 @@ def histogram(args): pass display = act.plotting.DistributionDisplay( - {dsname: ds}, figsize=args.figsize, - subplot_shape=subplot_shape) + {dsname: ds}, figsize=args.figsize, subplot_shape=subplot_shape + ) if args.stacked_bar_graph: display.plot_stacked_bar_graph( - field=args.field, dsname=dsname, - bins=args.bins, density=args.density, + field=args.field, + dsname=dsname, + bins=args.bins, + density=args.density, sortby_field=args.sortby_field, sortby_bins=args.sortby_bins, set_title=args.set_title, subplot_index=subplot_index, - **args.kwargs) + **args.kwargs, + ) if args.size_dist: display.plot_size_distribution( - field=args.field, bins=args.bin_field, - time=args.time, dsname=dsname, + field=args.field, + bins=args.bin_field, + time=args.time, + dsname=dsname, set_title=args.set_title, subplot_index=subplot_index, - **args.kwargs) + **args.kwargs, + ) if args.stairstep: display.plot_stairstep_graph( - field=args.field, dsname=dsname, - bins=args.bins, density=args.density, + field=args.field, + dsname=dsname, + bins=args.bins, + density=args.density, sortby_field=args.sortby_field, sortby_bins=args.sortby_bins, set_title=args.set_title, subplot_index=subplot_index, - **args.kwargs) + **args.kwargs, + ) if args.heatmap: display.plot_heatmap( - x_field=args.x_field, y_field=args.y_field, - dsname=dsname, x_bins=args.x_bins, - y_bins=args.y_bins, set_title=args.set_title, + x_field=args.x_field, + y_field=args.y_field, + dsname=dsname, + x_bins=args.x_bins, + y_bins=args.y_bins, + set_title=args.set_title, density=args.density, - subplot_index=subplot_index, **args.kwargs) + subplot_index=subplot_index, + **args.kwargs, + ) plt.savefig(args.out_path) plt.show() @@ -498,40 +580,51 @@ def contour(args): display = act.plotting.ContourDisplay(data, figsize=args.figsize) if args.create_contour: - display.create_contour(fields=fields, time=time, function=args.function, - grid_delta=args.grid_delta, - grid_buffer=args.grid_buffer, - subplot_index=args.subplot_index, - **args.kwargs) + display.create_contour( + fields=fields, + time=time, + function=args.function, + grid_delta=args.grid_delta, + grid_buffer=args.grid_buffer, + subplot_index=args.subplot_index, + **args.kwargs, + ) if args.contourf: - display.contourf(x=args.x, y=args.y, z=args.z, - subplot_index=args.subplot_index, - **args.kwargs) + display.contourf( + x=args.x, y=args.y, z=args.z, subplot_index=args.subplot_index, **args.kwargs + ) if args.plot_contour: - display.contour(x=args.x, y=args.y, z=args.z, - subplot_index=args.subplot_index, - **args.kwargs) + display.contour( + x=args.x, y=args.y, z=args.z, subplot_index=args.subplot_index, **args.kwargs + ) if args.vectors_spd_dir: - display.plot_vectors_from_spd_dir(fields=wind_fields, time=time, - mesh=args.mesh, function=args.function, - grid_delta=args.grid_delta, - grid_buffer=args.grid_buffer, - subplot_index=args.subplot_index, - **args.kwargs) + display.plot_vectors_from_spd_dir( + fields=wind_fields, + time=time, + mesh=args.mesh, + function=args.function, + grid_delta=args.grid_delta, + grid_buffer=args.grid_buffer, + subplot_index=args.subplot_index, + **args.kwargs, + ) if args.barbs: - display.barbs(x=args.x, y=args.y, u=args.u, v=args.v, - subplot_index=args.subplot_index, - **args.kwargs) + display.barbs( + x=args.x, y=args.y, u=args.u, v=args.v, subplot_index=args.subplot_index, **args.kwargs + ) if args.plot_station: - display.plot_station(fields=station_fields, time=time, - text_color=args.text_color, - subplot_index=args.subplot_index, - **args.kwargs) + display.plot_station( + fields=station_fields, + time=time, + text_color=args.text_color, + subplot_index=args.subplot_index, + **args.kwargs, + ) plt.savefig(args.out_path) plt.show() @@ -555,8 +648,11 @@ def convert_arg_line_to_args(line): def main(): prefix_char = '@' parser = argparse.ArgumentParser( - description=(f'Create plot from a data file. Can use command line opitons ' - f'or point to a configuration file using {prefix_char} character.')) + description=( + f'Create plot from a data file. Can use command line opitons ' + f'or point to a configuration file using {prefix_char} character.' + ) + ) # Allow user to reference a file by using the @ symbol for a specific # argument value @@ -565,336 +661,817 @@ def main(): # Update the file parsing logic to skip commented lines parser.convert_arg_line_to_args = convert_arg_line_to_args - parser.add_argument('-f', '--file_path', type=str, required=True, - help=('Required: Full path to file for creating Plot. For multiple ' - 'files use terminal syntax for matching muliple files. ' - 'For example "sgpmetE13.b1.202007*.*.nc" will match all files ' - 'for the month of July in 2020. Need to use double quotes ' - 'to stop terminal from expanding the search, and let the ' - 'python program perform search.')) + parser.add_argument( + '-f', + '--file_path', + type=str, + required=True, + help=( + 'Required: Full path to file for creating Plot. For multiple ' + 'files use terminal syntax for matching muliple files. ' + 'For example "sgpmetE13.b1.202007*.*.nc" will match all files ' + 'for the month of July in 2020. Need to use double quotes ' + 'to stop terminal from expanding the search, and let the ' + 'python program perform search.' + ), + ) out_path_default = 'image.png' - parser.add_argument('-o', '--out_path', type=str, default=out_path_default, - help=("Full path filename to use for saving image. " - "Default is '{out_path_default}'. If only a path is given " - "will use that path with image name '{out_path_default}', " - "else will use filename given.")) - parser.add_argument('-fd', '--field', type=str, default=None, - help='Name of the field to plot') - parser.add_argument('-fds', '--fields', nargs='+', - type=str, default=None, - help='Name of the fields to use to plot') - parser.add_argument('-wfs', '--wind_fields', nargs='+', - type=str, default=None, - help='Wind field names used to plot') - parser.add_argument('-sfs', '--station_fields', nargs='+', - type=str, default=None, - help='Station field names to plot sites') + parser.add_argument( + '-o', + '--out_path', + type=str, + default=out_path_default, + help=( + "Full path filename to use for saving image. " + "Default is '{out_path_default}'. If only a path is given " + "will use that path with image name '{out_path_default}', " + "else will use filename given." + ), + ) + parser.add_argument('-fd', '--field', type=str, default=None, help='Name of the field to plot') + parser.add_argument( + '-fds', + '--fields', + nargs='+', + type=str, + default=None, + help='Name of the fields to use to plot', + ) + parser.add_argument( + '-wfs', + '--wind_fields', + nargs='+', + type=str, + default=None, + help='Wind field names used to plot', + ) + parser.add_argument( + '-sfs', + '--station_fields', + nargs='+', + type=str, + default=None, + help='Station field names to plot sites', + ) default = 'lat' - parser.add_argument('-lat', '--latitude', type=str, default=default, - help=f"Name of latitude variable in file. Default is '{default}'") + parser.add_argument( + '-lat', + '--latitude', + type=str, + default=default, + help=f"Name of latitude variable in file. Default is '{default}'", + ) default = 'lon' - parser.add_argument('-lon', '--longitude', type=str, default=default, - help=f"Name of longitude variable in file. Default is '{default}'") - parser.add_argument('-xf', '--x_field', type=str, default=None, - help='Name of variable to plot on x axis') - parser.add_argument('-yf', '--y_field', type=str, default=None, - help='Name of variable to plot on y axis') - parser.add_argument('-x', type=np.array, - help='x coordinates or grid for z') - parser.add_argument('-y', type=np.array, - help='y coordinates or grid for z') - parser.add_argument('-z', type=np.array, - help='Values over which to contour') + parser.add_argument( + '-lon', + '--longitude', + type=str, + default=default, + help=f"Name of longitude variable in file. Default is '{default}'", + ) + parser.add_argument( + '-xf', '--x_field', type=str, default=None, help='Name of variable to plot on x axis' + ) + parser.add_argument( + '-yf', '--y_field', type=str, default=None, help='Name of variable to plot on y axis' + ) + parser.add_argument('-x', type=np.array, help='x coordinates or grid for z') + parser.add_argument('-y', type=np.array, help='y coordinates or grid for z') + parser.add_argument('-z', type=np.array, help='Values over which to contour') default = 'u_wind' - parser.add_argument('-u', '--u_wind', type=str, default=default, - help=f"File variable name for u_wind wind component. Default is '{default}'") + parser.add_argument( + '-u', + '--u_wind', + type=str, + default=default, + help=f"File variable name for u_wind wind component. Default is '{default}'", + ) default = 'v_wind' - parser.add_argument('-v', '--v_wind', type=str, default=default, - help=f"File variable name for v_wind wind compenent. Default is '{default}'") + parser.add_argument( + '-v', + '--v_wind', + type=str, + default=default, + help=f"File variable name for v_wind wind compenent. Default is '{default}'", + ) default = 'pres' - parser.add_argument('-pf', '--p_field', type=str, default=default, - help=f"File variable name for pressure. Default is '{default}'") + parser.add_argument( + '-pf', + '--p_field', + type=str, + default=default, + help=f"File variable name for pressure. Default is '{default}'", + ) default = 'tdry' - parser.add_argument('-tf', '--t_field', type=str, default=default, - help=f"File variable name for temperature. Default is '{default}'") + parser.add_argument( + '-tf', + '--t_field', + type=str, + default=default, + help=f"File variable name for temperature. Default is '{default}'", + ) default = 'dp' - parser.add_argument('-tdf', '--td_field', type=str, default=default, - help=f"File variable name for dewpoint temperature. Default is '{default}'") + parser.add_argument( + '-tdf', + '--td_field', + type=str, + default=default, + help=f"File variable name for dewpoint temperature. Default is '{default}'", + ) default = 'wspd' - parser.add_argument('-sf', '--spd_field', type=str, default=default, - help=f"File variable name for wind speed. Default is '{default}'") + parser.add_argument( + '-sf', + '--spd_field', + type=str, + default=default, + help=f"File variable name for wind speed. Default is '{default}'", + ) default = 'deg' - parser.add_argument('-df', '--dir_field', type=str, default=default, - help=f"File variable name for wind direction. Default is '{default}'") - parser.add_argument('-al', '--alt_label', type=str, default=None, - help='Altitude axis label') + parser.add_argument( + '-df', + '--dir_field', + type=str, + default=default, + help=f"File variable name for wind direction. Default is '{default}'", + ) + parser.add_argument('-al', '--alt_label', type=str, default=None, help='Altitude axis label') default = 'alt' - parser.add_argument('-af', '--alt_field', type=str, default=default, - help=f"File variable name for altitude. Default is '{default}'") + parser.add_argument( + '-af', + '--alt_field', + type=str, + default=default, + help=f"File variable name for altitude. Default is '{default}'", + ) global _default_dsname _default_dsname = 'act_datastream' - parser.add_argument('-ds', '--dsname', type=str, default=_default_dsname, - help=f"Name of datastream to plot. Default is '{_default_dsname}'") + parser.add_argument( + '-ds', + '--dsname', + type=str, + default=_default_dsname, + help=f"Name of datastream to plot. Default is '{_default_dsname}'", + ) default = '(0, )' - parser.add_argument('-si', '--subplot_index', type=ast.literal_eval, - default=default, - help=f'Index of the subplot via tuple syntax. ' - f'Example for two plots is "(0,), (1,)". ' - f"Default is '{default}'") - default = (1, ) - parser.add_argument('-ss', '--subplot_shape', nargs='+', type=int, - default=default, - help=(f'The number of (rows, columns) ' - f'for the subplots in the display. ' - f'Default is {default}')) + parser.add_argument( + '-si', + '--subplot_index', + type=ast.literal_eval, + default=default, + help=f'Index of the subplot via tuple syntax. ' + f'Example for two plots is "(0,), (1,)". ' + f"Default is '{default}'", + ) + default = (1,) + parser.add_argument( + '-ss', + '--subplot_shape', + nargs='+', + type=int, + default=default, + help=( + f'The number of (rows, columns) ' + f'for the subplots in the display. ' + f'Default is {default}' + ), + ) plot_type_options = ['plot', 'qc'] - parser.add_argument('-pt', '--plot_type', nargs='+', type=str, - help=f'Type of plot to make. Current options include: ' - f'{plot_type_options}') - parser.add_argument('-vy', '--var_y', type=str, default=None, - help=('Set this to the name of a data variable in ' - 'the Dataset to use as the y-axis variable ' - 'instead of the default dimension.')) - parser.add_argument('-plp', '--plevels_plot', - type=np.array, default=None, - help='Pressure levels to plot the wind barbs on.') - parser.add_argument('-cbl', '--cb_label', type=str, default=None, - help='Colorbar label to use') - parser.add_argument('-st', '--set_title', type=str, default=None, - help='Title for the plot') + parser.add_argument( + '-pt', + '--plot_type', + nargs='+', + type=str, + help=f'Type of plot to make. Current options include: ' f'{plot_type_options}', + ) + parser.add_argument( + '-vy', + '--var_y', + type=str, + default=None, + help=( + 'Set this to the name of a data variable in ' + 'the Dataset to use as the y-axis variable ' + 'instead of the default dimension.' + ), + ) + parser.add_argument( + '-plp', + '--plevels_plot', + type=np.array, + default=None, + help='Pressure levels to plot the wind barbs on.', + ) + parser.add_argument('-cbl', '--cb_label', type=str, default=None, help='Colorbar label to use') + parser.add_argument('-st', '--set_title', type=str, default=None, help='Title for the plot') default = 0.08 - parser.add_argument('-pb', '--plot_buffer', type=float, default=default, - help=(f'Buffer to add around data on plot in lat ' - f'and lon dimension. Default is {default}')) + parser.add_argument( + '-pb', + '--plot_buffer', + type=float, + default=default, + help=( + f'Buffer to add around data on plot in lat ' f'and lon dimension. Default is {default}' + ), + ) default = 'terrain-background' - parser.add_argument('-sm', '--stamen', type=str, default=default, - help=f"Dataset to use for background image. Default is '{default}'") + parser.add_argument( + '-sm', + '--stamen', + type=str, + default=default, + help=f"Dataset to use for background image. Default is '{default}'", + ) default = 8 - parser.add_argument('-tl', '--tile', type=int, default=default, - help=f'Tile zoom to use with background image. Default is {default}') - parser.add_argument('-cfs', '--cfeatures', nargs='+', type=str, default=None, - help='Cartopy feature to add to plot') - parser.add_argument('-txt', '--text', type=json.loads, default=None, - help=('Dictionary of {text:[lon,lat]} to add to plot. ' - 'Can have more than one set of text to add.')) + parser.add_argument( + '-tl', + '--tile', + type=int, + default=default, + help=f'Tile zoom to use with background image. Default is {default}', + ) + parser.add_argument( + '-cfs', + '--cfeatures', + nargs='+', + type=str, + default=None, + help='Cartopy feature to add to plot', + ) + parser.add_argument( + '-txt', + '--text', + type=json.loads, + default=None, + help=( + 'Dictionary of {text:[lon,lat]} to add to plot. ' + 'Can have more than one set of text to add.' + ), + ) default = 'rainbow' - parser.add_argument('-cm', '--cmap', default=default, - help=f"colormap to use. Defaut is '{default}'") - parser.add_argument('-abl', '--abs_limits', nargs='+', type=float, - default=(None, None), - help=('Sets the bounds on plot limits even if data ' - 'values exceed those limits. Y axis limits. Default is no limits.')) - parser.add_argument('-tr', '--time_rng', nargs='+', type=float, default=None, - help=('List or tuple with (min,max) values to set the ' - 'x-axis range limits')) + parser.add_argument( + '-cm', '--cmap', default=default, help=f"colormap to use. Defaut is '{default}'" + ) + parser.add_argument( + '-abl', + '--abs_limits', + nargs='+', + type=float, + default=(None, None), + help=( + 'Sets the bounds on plot limits even if data ' + 'values exceed those limits. Y axis limits. Default is no limits.' + ), + ) + parser.add_argument( + '-tr', + '--time_rng', + nargs='+', + type=float, + default=None, + help=('List or tuple with (min,max) values to set the ' 'x-axis range limits'), + ) default = 20 - parser.add_argument('-nd', '--num_dir', type=int, default=default, - help=(f'Number of directions to splot the wind rose into. ' - f'Default is {default}')) - parser.add_argument('-sb', '--spd_bins', nargs='+', type=float, default=None, - help='Bin boundaries to sort the wind speeds into') + parser.add_argument( + '-nd', + '--num_dir', + type=int, + default=default, + help=(f'Number of directions to splot the wind rose into. ' f'Default is {default}'), + ) + parser.add_argument( + '-sb', + '--spd_bins', + nargs='+', + type=float, + default=None, + help='Bin boundaries to sort the wind speeds into', + ) default = 3 - parser.add_argument('-ti', '--tick_interval', type=int, default=default, - help=(f'Interval (in percentage) for the ticks ' - f'on the radial axis. Default is {default}')) - parser.add_argument('-ac', '--assessment_color', type=json.loads, - default=None, - help=('dictionary lookup to override default ' - 'assessment to color')) + parser.add_argument( + '-ti', + '--tick_interval', + type=int, + default=default, + help=( + f'Interval (in percentage) for the ticks ' f'on the radial axis. Default is {default}' + ), + ) + parser.add_argument( + '-ac', + '--assessment_color', + type=json.loads, + default=None, + help=('dictionary lookup to override default ' 'assessment to color'), + ) default = False - parser.add_argument('-ao', '--assessment_overplot', - default=default, action='store_true', - help=(f'Option to overplot quality control colored ' - f'symbols over plotted data using ' - f'flag_assessment categories. Default is {default}')) - default = {'Incorrect': ['Bad', 'Incorrect'], - 'Suspect': ['Indeterminate', 'Suspect']} - parser.add_argument('-oc', '--overplot_category', type=json.loads, default=default, - help=(f'Look up to categorize assessments into groups. ' - f'This allows using multiple terms for the same ' - f'quality control level of failure. ' - f'Also allows adding more to the defaults. Default is {default}')) + parser.add_argument( + '-ao', + '--assessment_overplot', + default=default, + action='store_true', + help=( + f'Option to overplot quality control colored ' + f'symbols over plotted data using ' + f'flag_assessment categories. Default is {default}' + ), + ) + default = {'Incorrect': ['Bad', 'Incorrect'], 'Suspect': ['Indeterminate', 'Suspect']} + parser.add_argument( + '-oc', + '--overplot_category', + type=json.loads, + default=default, + help=( + f'Look up to categorize assessments into groups. ' + f'This allows using multiple terms for the same ' + f'quality control level of failure. ' + f'Also allows adding more to the defaults. Default is {default}' + ), + ) default = {'Incorrect': 'red', 'Suspect': 'orange'} - parser.add_argument('-co', '--category_color', type=json.loads, - default=default, - help=(f'Lookup to match overplot category color to ' - f'assessment grouping. Default is {default}')) - parser.add_argument('-flp', '--force_line_plot', default=False, - action='store_true', - help='Option to plot 2D data as 1D line plots') - parser.add_argument('-l', '--labels', nargs='+', default=False, - type=str, - help=('Option to overwrite the legend labels. ' - 'Must have same dimensions as number of ' - 'lines plottes.')) - parser.add_argument('-sy', '--secondary_y', default=False, action='store_true', - help='Option to plot on secondary y axis') + parser.add_argument( + '-co', + '--category_color', + type=json.loads, + default=default, + help=( + f'Lookup to match overplot category color to ' + f'assessment grouping. Default is {default}' + ), + ) + parser.add_argument( + '-flp', + '--force_line_plot', + default=False, + action='store_true', + help='Option to plot 2D data as 1D line plots', + ) + parser.add_argument( + '-l', + '--labels', + nargs='+', + default=False, + type=str, + help=( + 'Option to overwrite the legend labels. ' + 'Must have same dimensions as number of ' + 'lines plottes.' + ), + ) + parser.add_argument( + '-sy', + '--secondary_y', + default=False, + action='store_true', + help='Option to plot on secondary y axis', + ) if CARTOPY_AVAILABLE: default = ccrs.PlateCarree() - parser.add_argument('-prj', '--projection', type=str, - default=default, - help=f"Projection to use on plot. Default is {default}") + parser.add_argument( + '-prj', + '--projection', + type=str, + default=default, + help=f"Projection to use on plot. Default is {default}", + ) default = 20 - parser.add_argument('-bx', '--num_barb_x', type=int, default=default, - help=f'Number of wind barbs to plot in the x axis. Default is {default}') + parser.add_argument( + '-bx', + '--num_barb_x', + type=int, + default=default, + help=f'Number of wind barbs to plot in the x axis. Default is {default}', + ) default = 20 - parser.add_argument('-by', '--num_barb_y', type=int, default=default, - help=f"Number of wind barbs to plot in the y axis. Default is {default}") + parser.add_argument( + '-by', + '--num_barb_y', + type=int, + default=default, + help=f"Number of wind barbs to plot in the y axis. Default is {default}", + ) default = 20 - parser.add_argument('-tp', '--num_time_periods', type=int, default=default, - help=f'Set how many time periods. Default is {default}') - parser.add_argument('-bn', '--bins', nargs='+', type=int, default=None, - help='histogram bin boundaries to use') - parser.add_argument('-bf', '--bin_field', type=str, default=None, - help=('name of the field that stores the ' - 'bins for the spectra')) - parser.add_argument('-xb', '--x_bins', nargs='+', type=int, default=None, - help='Histogram bin boundaries to use for x axis variable') - parser.add_argument('-yb', '--y_bins', nargs='+', type=int, default=None, - help='Histogram bin boundaries to use for y axis variable') - parser.add_argument('-t', '--time', type=str, default=None, - help='Time period to be plotted') - parser.add_argument('-sbf', '--sortby_field', type=str, default=None, - help='Sort histograms by a given field parameter') - parser.add_argument('-sbb', '--sortby_bins', nargs='+', type=int, - default=None, - help='Bins to sort the histograms by') + parser.add_argument( + '-tp', + '--num_time_periods', + type=int, + default=default, + help=f'Set how many time periods. Default is {default}', + ) + parser.add_argument( + '-bn', '--bins', nargs='+', type=int, default=None, help='histogram bin boundaries to use' + ) + parser.add_argument( + '-bf', + '--bin_field', + type=str, + default=None, + help=('name of the field that stores the ' 'bins for the spectra'), + ) + parser.add_argument( + '-xb', + '--x_bins', + nargs='+', + type=int, + default=None, + help='Histogram bin boundaries to use for x axis variable', + ) + parser.add_argument( + '-yb', + '--y_bins', + nargs='+', + type=int, + default=None, + help='Histogram bin boundaries to use for y axis variable', + ) + parser.add_argument('-t', '--time', type=str, default=None, help='Time period to be plotted') + parser.add_argument( + '-sbf', + '--sortby_field', + type=str, + default=None, + help='Sort histograms by a given field parameter', + ) + parser.add_argument( + '-sbb', + '--sortby_bins', + nargs='+', + type=int, + default=None, + help='Bins to sort the histograms by', + ) default = 20 - parser.add_argument('-nyl', '--num_y_levels', type=int, default=default, - help=f'Number of levels in the y axis to use. Default is {default}') - parser.add_argument('-sk', '--sel_kwargs', type=json.loads, default=None, - help=('The keyword arguments to pass into ' - ':py:func:`xarray.DataArray.sel`')) - parser.add_argument('-ik', '--isel_kwargs', type=json.loads, default=None, - help=('The keyword arguments to pass into ' - ':py:func:`xarray.DataArray.sel`')) + parser.add_argument( + '-nyl', + '--num_y_levels', + type=int, + default=default, + help=f'Number of levels in the y axis to use. Default is {default}', + ) + parser.add_argument( + '-sk', + '--sel_kwargs', + type=json.loads, + default=None, + help=('The keyword arguments to pass into ' ':py:func:`xarray.DataArray.sel`'), + ) + parser.add_argument( + '-ik', + '--isel_kwargs', + type=json.loads, + default=None, + help=('The keyword arguments to pass into ' ':py:func:`xarray.DataArray.sel`'), + ) default = 'cubic' - parser.add_argument('-fn', '--function', type=str, default=default, - help=(f'Defaults to cubic function for interpolation. ' - f'See scipy.interpolate.Rbf for additional options. ' - f'Default is {default}')) + parser.add_argument( + '-fn', + '--function', + type=str, + default=default, + help=( + f'Defaults to cubic function for interpolation. ' + f'See scipy.interpolate.Rbf for additional options. ' + f'Default is {default}' + ), + ) default = 0.1 - parser.add_argument('-gb', '--grid_buffer', type=float, default=default, - help=f'Buffer to apply to grid. Default is {default}') + parser.add_argument( + '-gb', + '--grid_buffer', + type=float, + default=default, + help=f'Buffer to apply to grid. Default is {default}', + ) default = (0.01, 0.01) - parser.add_argument('-gd', '--grid_delta', nargs='+', - type=float, default=default, - help=f'X and Y deltas for creating grid. Default is {default}') - parser.add_argument('-fg', '--figsize', nargs='+', type=float, - default=None, - help='Width and height in inches of figure') + parser.add_argument( + '-gd', + '--grid_delta', + nargs='+', + type=float, + default=default, + help=f'X and Y deltas for creating grid. Default is {default}', + ) + parser.add_argument( + '-fg', + '--figsize', + nargs='+', + type=float, + default=None, + help='Width and height in inches of figure', + ) default = 'white' - parser.add_argument('-tc', '--text_color', type=str, default=default, - help=f"Color of text. Default is '{default}'") - parser.add_argument('-kwargs', type=json.loads, default=dict(), - help='keyword arguments to use in plotting function') - parser.add_argument('-pk', '--plot_kwargs', type=json.loads, default=dict(), - help=("Additional keyword arguments to pass " - "into MetPy's SkewT.plot")) - parser.add_argument('-pbk', '--plot_barbs_kwargs', type=json.loads, - default=dict(), - help=("Additional keyword arguments to pass " - "into MetPy's SkewT.plot_barbs")) + parser.add_argument( + '-tc', + '--text_color', + type=str, + default=default, + help=f"Color of text. Default is '{default}'", + ) + parser.add_argument( + '-kwargs', + type=json.loads, + default=dict(), + help='keyword arguments to use in plotting function', + ) + parser.add_argument( + '-pk', + '--plot_kwargs', + type=json.loads, + default=dict(), + help=("Additional keyword arguments to pass " "into MetPy's SkewT.plot"), + ) + parser.add_argument( + '-pbk', + '--plot_barbs_kwargs', + type=json.loads, + default=dict(), + help=("Additional keyword arguments to pass " "into MetPy's SkewT.plot_barbs"), + ) default = True - parser.add_argument('-cu', '--cleanup', default=default, action='store_false', - help=f'Turn off standard methods for obj cleanup. Default is {default}') - parser.add_argument('-gl', '--gridlines', default=False, action='store_true', - help='Use latitude and lingitude gridlines.') - parser.add_argument('-cl', '--coastlines', default=False, action='store_true', - help='Plot coastlines on geographical map') - parser.add_argument('-bg', '--background', default=False, action='store_true', - help='Plot a stock image background') - parser.add_argument('-nan', '--add_nan', default=False, action='store_true', - help='Fill in data gaps with NaNs') - parser.add_argument('-dn', '--day_night', default=False, action='store_true', - help=("Fill in color coded background according " - "to time of day.")) - parser.add_argument('-yr', '--set_yrange', default=None, nargs=2, - help=("Set the yrange for the specific plot")) - parser.add_argument('-iya', '--invert_y_axis', default=False, - action='store_true', - help='Invert y axis') - parser.add_argument('-sp', '--show_parcel', default=False, action='store_true', - help='set to true to plot the parcel path.') - parser.add_argument('-cape', '--shade_cape', default=False, - action='store_true', - help='set to true to shade regions of cape.') - parser.add_argument('-cin', '--shade_cin', default=False, action='store_true', - help='set to true to shade regions of cin.') - parser.add_argument('-d', '--density', default=False, action='store_true', - help='Plot a p.d.f. instead of a frequency histogram') - parser.add_argument('-m', '--mesh', default=False, action='store_true', - help=('Set to True to interpolate u and v to ' - 'grid and create wind barbs')) - parser.add_argument('-uv', '--from_u_and_v', default=False, action='store_true', - help='Create SkewTPLot with u and v wind') - parser.add_argument('-sd', '--from_spd_and_dir', default=False, action='store_true', - help='Create SkewTPlot with wind speed and direction') - parser.add_argument('-px', '--plot_xsection', default=False, action='store_true', - help='plots a cross section whose x and y coordinates') - parser.add_argument('-pxm', '--xsection_map', default=False, action='store_true', - help='plots a cross section of 2D data on a geographical map') - parser.add_argument('-p', '--plot', default=False, action='store_true', - help='Makes a time series plot') - parser.add_argument('-mp', '--multi_panel', default=False, - action='store_true', - help='Makes a 2 panel timeseries plot') - parser.add_argument('-qc', '--qc', default=False, action='store_true', - help='Create time series plot of embedded quality control values') - parser.add_argument('-fb', '--fill_between', default=False, action='store_true', - help='makes a fill betweem plot based on matplotlib') - parser.add_argument('-bsd', '--barbs_spd_dir', default=False, action='store_true', - help=('Makes time series plot of wind barbs ' - 'using wind speed and dir.')) - parser.add_argument('-buv', '--barbs_u_v', default=False, action='store_true', - help=('Makes time series plot of wind barbs ' - 'using u and v wind components.')) - parser.add_argument('-pxs', '--xsection_from_1d', default=False, - action='store_true', - help='Will plot a time-height cross section from 1D dataset') - parser.add_argument('-ths', '--time_height_scatter', - default=False, action='store_true', - help='Create a scatter time series plot') - parser.add_argument('-sbg', '--stacked_bar_graph', - default=False, action='store_true', - help='Create stacked bar graph histogram') - parser.add_argument('-psd', '--size_dist', default=False, action='store_true', - help='Plots a stairstep plot of size distribution') - parser.add_argument('-sg', '--stairstep', default=False, action='store_true', - help='Plots stairstep plot of a histogram') - parser.add_argument('-hm', '--heatmap', default=False, action='store_true', - help='Plot a heatmap histogram from 2 variables') - parser.add_argument('-cc', '--create_contour', default=False, action='store_true', - help='Extracts, grids, and creates a contour plot') - parser.add_argument('-cf', '--contourf', default=False, action='store_true', - help=('Base function for filled contours if user ' - 'already has data gridded')) - parser.add_argument('-ct', '--plot_contour', default=False, action='store_true', - help=('Base function for contours if user ' - 'already has data gridded')) - parser.add_argument('-vsd', '--vectors_spd_dir', default=False, action='store_true', - help='Extracts, grids, and creates a contour plot.') - parser.add_argument('-b', '--barbs', default=False, action='store_true', - help='Base function for wind barbs.') - parser.add_argument('-ps', '--plot_station', default=False, action='store_true', - help='Extracts, grids, and creates a contour plot') + parser.add_argument( + '-cu', + '--cleanup', + default=default, + action='store_false', + help=f'Turn off standard methods for obj cleanup. Default is {default}', + ) + parser.add_argument( + '-gl', + '--gridlines', + default=False, + action='store_true', + help='Use latitude and lingitude gridlines.', + ) + parser.add_argument( + '-cl', + '--coastlines', + default=False, + action='store_true', + help='Plot coastlines on geographical map', + ) + parser.add_argument( + '-bg', + '--background', + default=False, + action='store_true', + help='Plot a stock image background', + ) + parser.add_argument( + '-nan', '--add_nan', default=False, action='store_true', help='Fill in data gaps with NaNs' + ) + parser.add_argument( + '-dn', + '--day_night', + default=False, + action='store_true', + help=("Fill in color coded background according " "to time of day."), + ) + parser.add_argument( + '-yr', '--set_yrange', default=None, nargs=2, help=("Set the yrange for the specific plot") + ) + parser.add_argument( + '-iya', '--invert_y_axis', default=False, action='store_true', help='Invert y axis' + ) + parser.add_argument( + '-sp', + '--show_parcel', + default=False, + action='store_true', + help='set to true to plot the parcel path.', + ) + parser.add_argument( + '-cape', + '--shade_cape', + default=False, + action='store_true', + help='set to true to shade regions of cape.', + ) + parser.add_argument( + '-cin', + '--shade_cin', + default=False, + action='store_true', + help='set to true to shade regions of cin.', + ) + parser.add_argument( + '-d', + '--density', + default=False, + action='store_true', + help='Plot a p.d.f. instead of a frequency histogram', + ) + parser.add_argument( + '-m', + '--mesh', + default=False, + action='store_true', + help=('Set to True to interpolate u and v to ' 'grid and create wind barbs'), + ) + parser.add_argument( + '-uv', + '--from_u_and_v', + default=False, + action='store_true', + help='Create SkewTPLot with u and v wind', + ) + parser.add_argument( + '-sd', + '--from_spd_and_dir', + default=False, + action='store_true', + help='Create SkewTPlot with wind speed and direction', + ) + parser.add_argument( + '-px', + '--plot_xsection', + default=False, + action='store_true', + help='plots a cross section whose x and y coordinates', + ) + parser.add_argument( + '-pxm', + '--xsection_map', + default=False, + action='store_true', + help='plots a cross section of 2D data on a geographical map', + ) + parser.add_argument( + '-p', '--plot', default=False, action='store_true', help='Makes a time series plot' + ) + parser.add_argument( + '-mp', + '--multi_panel', + default=False, + action='store_true', + help='Makes a 2 panel timeseries plot', + ) + parser.add_argument( + '-qc', + '--qc', + default=False, + action='store_true', + help='Create time series plot of embedded quality control values', + ) + parser.add_argument( + '-fb', + '--fill_between', + default=False, + action='store_true', + help='makes a fill betweem plot based on matplotlib', + ) + parser.add_argument( + '-bsd', + '--barbs_spd_dir', + default=False, + action='store_true', + help=('Makes time series plot of wind barbs ' 'using wind speed and dir.'), + ) + parser.add_argument( + '-buv', + '--barbs_u_v', + default=False, + action='store_true', + help=('Makes time series plot of wind barbs ' 'using u and v wind components.'), + ) + parser.add_argument( + '-pxs', + '--xsection_from_1d', + default=False, + action='store_true', + help='Will plot a time-height cross section from 1D dataset', + ) + parser.add_argument( + '-ths', + '--time_height_scatter', + default=False, + action='store_true', + help='Create a scatter time series plot', + ) + parser.add_argument( + '-sbg', + '--stacked_bar_graph', + default=False, + action='store_true', + help='Create stacked bar graph histogram', + ) + parser.add_argument( + '-psd', + '--size_dist', + default=False, + action='store_true', + help='Plots a stairstep plot of size distribution', + ) + parser.add_argument( + '-sg', + '--stairstep', + default=False, + action='store_true', + help='Plots stairstep plot of a histogram', + ) + parser.add_argument( + '-hm', + '--heatmap', + default=False, + action='store_true', + help='Plot a heatmap histogram from 2 variables', + ) + parser.add_argument( + '-cc', + '--create_contour', + default=False, + action='store_true', + help='Extracts, grids, and creates a contour plot', + ) + parser.add_argument( + '-cf', + '--contourf', + default=False, + action='store_true', + help=('Base function for filled contours if user ' 'already has data gridded'), + ) + parser.add_argument( + '-ct', + '--plot_contour', + default=False, + action='store_true', + help=('Base function for contours if user ' 'already has data gridded'), + ) + parser.add_argument( + '-vsd', + '--vectors_spd_dir', + default=False, + action='store_true', + help='Extracts, grids, and creates a contour plot.', + ) + parser.add_argument( + '-b', '--barbs', default=False, action='store_true', help='Base function for wind barbs.' + ) + parser.add_argument( + '-ps', + '--plot_station', + default=False, + action='store_true', + help='Extracts, grids, and creates a contour plot', + ) # The mutually exclusive but one requried group group = parser.add_mutually_exclusive_group(required=True) - group.add_argument('-gp', '--geodisplay', dest='action', action='store_const', - const=geodisplay, help='Set to genereate a geographic plot') - group.add_argument('-skt', '--skewt', dest='action', action='store_const', - const=skewt, help='Set to genereate a skew-t plot') - group.add_argument('-xs', '--xsection', dest='action', action='store_const', - const=xsection, help='Set to genereate a XSection plot') - group.add_argument('-wr', '--wind_rose', dest='action', action='store_const', - const=wind_rose, help='Set to genereate a wind rose plot') - group.add_argument('-ts', '--timeseries', dest='action', action='store_const', - const=timeseries, help='Set to genereate a timeseries plot') - group.add_argument('-c', '--contour', dest='action', action='store_const', - const=contour, help='Set to genereate a contour plot') - group.add_argument('-hs', '--histogram', dest='action', action='store_const', - const=histogram, help='Set to genereate a histogram plot') + group.add_argument( + '-gp', + '--geodisplay', + dest='action', + action='store_const', + const=geodisplay, + help='Set to genereate a geographic plot', + ) + group.add_argument( + '-skt', + '--skewt', + dest='action', + action='store_const', + const=skewt, + help='Set to genereate a skew-t plot', + ) + group.add_argument( + '-xs', + '--xsection', + dest='action', + action='store_const', + const=xsection, + help='Set to genereate a XSection plot', + ) + group.add_argument( + '-wr', + '--wind_rose', + dest='action', + action='store_const', + const=wind_rose, + help='Set to genereate a wind rose plot', + ) + group.add_argument( + '-ts', + '--timeseries', + dest='action', + action='store_const', + const=timeseries, + help='Set to genereate a timeseries plot', + ) + group.add_argument( + '-c', + '--contour', + dest='action', + action='store_const', + const=contour, + help='Set to genereate a contour plot', + ) + group.add_argument( + '-hs', + '--histogram', + dest='action', + action='store_const', + const=histogram, + help='Set to genereate a histogram plot', + ) args = parser.parse_args() diff --git a/tests/discovery/test_cropscape.py b/tests/discovery/test_cropscape.py index 3d594cc35d..500a1ac822 100644 --- a/tests/discovery/test_cropscape.py +++ b/tests/discovery/test_cropscape.py @@ -17,4 +17,4 @@ def test_croptype(): assert crop == 'Dbl Crop WinWht/Sorghum' if crop2 is not None: # assert crop2 == 'Sorghum' - assert crop2 == 'Soybeans' + assert crop2 in ['Soybeans', 'Winter Wheat'] diff --git a/tests/discovery/test_improve.py b/tests/discovery/test_improve.py new file mode 100644 index 0000000000..c8a75f1fd6 --- /dev/null +++ b/tests/discovery/test_improve.py @@ -0,0 +1,20 @@ +import numpy as np +import act + + +def test_get_improve(): + ds = act.discovery.get_improve_data(site_id='244', start_date='1/1/2023', end_date='12/31/2023') + + assert len(list(ds)) == 216 + assert 'lat' in ds + assert 'lon' in ds + assert len(ds.time.values) == 121 + assert 'aluminum_fine' in ds + assert ds['ammonium_nitrate_fine'].values[0] == 1.41363 + + with np.testing.assert_raises(ValueError): + ds = act.discovery.get_improve_data() + with np.testing.assert_raises(ValueError): + ds = act.discovery.get_improve_data(site_id='244') + with np.testing.assert_raises(ValueError): + ds = act.discovery.get_improve_data(site_id='244', start_date='1/1/2023') diff --git a/tests/io/test_ameriflux.py b/tests/io/test_ameriflux.py new file mode 100644 index 0000000000..395983de39 --- /dev/null +++ b/tests/io/test_ameriflux.py @@ -0,0 +1,33 @@ +import act +import glob +import xarray as xr + + +def test_convert_to_ameriflux(): + files = glob.glob(act.tests.sample_files.EXAMPLE_ECORSF_E39) + ds_ecor = act.io.arm.read_arm_netcdf(files) + + df = act.io.ameriflux.convert_to_ameriflux(ds_ecor) + + assert 'FC' in df + assert 'WS_MAX' in df + + files = glob.glob(act.tests.sample_files.EXAMPLE_SEBS_E39) + ds_sebs = act.io.arm.read_arm_netcdf(files) + + ds = xr.merge([ds_ecor, ds_sebs]) + df = act.io.ameriflux.convert_to_ameriflux(ds) + + assert 'SWC_2_1_1' in df + assert 'TS_3_1_1' in df + assert 'G_2_1_1' in df + + files = glob.glob(act.tests.sample_files.EXAMPLE_STAMP_E39) + ds_stamp = act.io.arm.read_arm_netcdf(files) + + ds = xr.merge([ds_ecor, ds_sebs, ds_stamp], compat='override') + df = act.io.ameriflux.convert_to_ameriflux(ds) + + assert 'SWC_6_10_1' in df + assert 'G_2_1_1' in df + assert 'TS_5_2_1' in df diff --git a/tests/io/test_hysplit.py b/tests/io/test_hysplit.py new file mode 100644 index 0000000000..672956401d --- /dev/null +++ b/tests/io/test_hysplit.py @@ -0,0 +1,16 @@ +import act + +from act.tests import sample_files + + +def test_read_hysplit(): + filename = sample_files.EXAMPLE_HYSPLIT + ds = act.io.read_hysplit(filename) + assert 'lat' in ds.variables.keys() + assert 'lon' in ds.variables.keys() + assert 'alt' in ds.variables.keys() + assert 'PRESSURE' in ds.variables.keys() + assert ds.sizes["num_grids"] == 8 + assert ds.sizes["num_trajectories"] == 1 + assert ds.sizes['time'] == 120 + assert ds['age'].min() == -120 diff --git a/tests/io/test_noaapsl.py b/tests/io/test_noaapsl.py index 35326ea592..2964b5c879 100644 --- a/tests/io/test_noaapsl.py +++ b/tests/io/test_noaapsl.py @@ -11,12 +11,12 @@ def test_read_psl_wind_profiler(): act.tests.EXAMPLE_NOAA_PSL, transpose=False ) # test dimensions - assert 'time' and 'HT' in test_ds_low.dims.keys() - assert 'time' and 'HT' in test_ds_hi.dims.keys() - assert test_ds_low.dims['time'] == 4 - assert test_ds_hi.dims['time'] == 4 - assert test_ds_low.dims['HT'] == 49 - assert test_ds_hi.dims['HT'] == 50 + assert 'time' and 'HT' in test_ds_low.sizes.keys() + assert 'time' and 'HT' in test_ds_hi.sizes.keys() + assert test_ds_low.sizes['time'] == 4 + assert test_ds_hi.sizes['time'] == 4 + assert test_ds_low.sizes['HT'] == 49 + assert test_ds_hi.sizes['HT'] == 50 # test coordinates assert (test_ds_low.coords['HT'][0:5] == np.array([0.151, 0.254, 0.356, 0.458, 0.561])).all() diff --git a/tests/plotting/baseline/test_2D_timeseries_plot.png b/tests/plotting/baseline/test_2D_timeseries_plot.png index d8c1e9e6d0..25bfcd9d69 100644 Binary files a/tests/plotting/baseline/test_2D_timeseries_plot.png and b/tests/plotting/baseline/test_2D_timeseries_plot.png differ diff --git a/tests/plotting/baseline/test_2d_as_1d.png b/tests/plotting/baseline/test_2d_as_1d.png index ee49a3fa4a..1d8617ddba 100644 Binary files a/tests/plotting/baseline/test_2d_as_1d.png and b/tests/plotting/baseline/test_2d_as_1d.png differ diff --git a/tests/plotting/baseline/test_add_nan_line.png b/tests/plotting/baseline/test_add_nan_line.png index 4ef242543a..9605beaee0 100644 Binary files a/tests/plotting/baseline/test_add_nan_line.png and b/tests/plotting/baseline/test_add_nan_line.png differ diff --git a/tests/plotting/baseline/test_add_nan_line_integer.png b/tests/plotting/baseline/test_add_nan_line_integer.png new file mode 100644 index 0000000000..be7bda4d31 Binary files /dev/null and b/tests/plotting/baseline/test_add_nan_line_integer.png differ diff --git a/tests/plotting/baseline/test_assessment_overplot.png b/tests/plotting/baseline/test_assessment_overplot.png index c1108a15b0..55c6f15d29 100644 Binary files a/tests/plotting/baseline/test_assessment_overplot.png and b/tests/plotting/baseline/test_assessment_overplot.png differ diff --git a/tests/plotting/baseline/test_assessment_overplot_multi.png b/tests/plotting/baseline/test_assessment_overplot_multi.png index 520c977077..bc8ea63087 100644 Binary files a/tests/plotting/baseline/test_assessment_overplot_multi.png and b/tests/plotting/baseline/test_assessment_overplot_multi.png differ diff --git a/tests/plotting/baseline/test_barb_sounding_plot.png b/tests/plotting/baseline/test_barb_sounding_plot.png index 98b8e5b756..9380ca3b6e 100644 Binary files a/tests/plotting/baseline/test_barb_sounding_plot.png and b/tests/plotting/baseline/test_barb_sounding_plot.png differ diff --git a/tests/plotting/baseline/test_colorbar_labels.png b/tests/plotting/baseline/test_colorbar_labels.png index ac4ead2ede..fdb5347016 100644 Binary files a/tests/plotting/baseline/test_colorbar_labels.png and b/tests/plotting/baseline/test_colorbar_labels.png differ diff --git a/tests/plotting/baseline/test_contour.png b/tests/plotting/baseline/test_contour.png index c2651dee65..d97bc4eab7 100644 Binary files a/tests/plotting/baseline/test_contour.png and b/tests/plotting/baseline/test_contour.png differ diff --git a/tests/plotting/baseline/test_contour2.png b/tests/plotting/baseline/test_contour2.png index c6a9567e89..f53bc9d534 100644 Binary files a/tests/plotting/baseline/test_contour2.png and b/tests/plotting/baseline/test_contour2.png differ diff --git a/tests/plotting/baseline/test_contour_stamp.png b/tests/plotting/baseline/test_contour_stamp.png index 25a17ba8f2..fb8aac80e7 100644 Binary files a/tests/plotting/baseline/test_contour_stamp.png and b/tests/plotting/baseline/test_contour_stamp.png differ diff --git a/tests/plotting/baseline/test_contourf.png b/tests/plotting/baseline/test_contourf.png index 726e7a824b..f4e040797c 100644 Binary files a/tests/plotting/baseline/test_contourf.png and b/tests/plotting/baseline/test_contourf.png differ diff --git a/tests/plotting/baseline/test_contourf2.png b/tests/plotting/baseline/test_contourf2.png index 3421f15ad7..9b0fce57d7 100644 Binary files a/tests/plotting/baseline/test_contourf2.png and b/tests/plotting/baseline/test_contourf2.png differ diff --git a/tests/plotting/baseline/test_enhanced_skewt_plot.png b/tests/plotting/baseline/test_enhanced_skewt_plot.png index 958a6c1272..27de1a5985 100644 Binary files a/tests/plotting/baseline/test_enhanced_skewt_plot.png and b/tests/plotting/baseline/test_enhanced_skewt_plot.png differ diff --git a/tests/plotting/baseline/test_enhanced_skewt_plot_2.png b/tests/plotting/baseline/test_enhanced_skewt_plot_2.png index f5796d5537..79dd95a641 100644 Binary files a/tests/plotting/baseline/test_enhanced_skewt_plot_2.png and b/tests/plotting/baseline/test_enhanced_skewt_plot_2.png differ diff --git a/tests/plotting/baseline/test_fill_between.png b/tests/plotting/baseline/test_fill_between.png index c595d3ff43..e4417bd3b6 100644 Binary files a/tests/plotting/baseline/test_fill_between.png and b/tests/plotting/baseline/test_fill_between.png differ diff --git a/tests/plotting/baseline/test_geoplot.png b/tests/plotting/baseline/test_geoplot.png index ef1febd4c6..974034f45e 100644 Binary files a/tests/plotting/baseline/test_geoplot.png and b/tests/plotting/baseline/test_geoplot.png differ diff --git a/tests/plotting/baseline/test_geoplot_tile.png b/tests/plotting/baseline/test_geoplot_tile.png index 22cd40e5ed..8390b0901c 100644 Binary files a/tests/plotting/baseline/test_geoplot_tile.png and b/tests/plotting/baseline/test_geoplot_tile.png differ diff --git a/tests/plotting/baseline/test_groupby_plot.png b/tests/plotting/baseline/test_groupby_plot.png index fc35f8fe73..3f84b4502d 100644 Binary files a/tests/plotting/baseline/test_groupby_plot.png and b/tests/plotting/baseline/test_groupby_plot.png differ diff --git a/tests/plotting/baseline/test_heatmap.png b/tests/plotting/baseline/test_heatmap.png index f63777fb05..500809d117 100644 Binary files a/tests/plotting/baseline/test_heatmap.png and b/tests/plotting/baseline/test_heatmap.png differ diff --git a/tests/plotting/baseline/test_heatmap2.png b/tests/plotting/baseline/test_heatmap2.png index 50eb8b296f..75787da33b 100644 Binary files a/tests/plotting/baseline/test_heatmap2.png and b/tests/plotting/baseline/test_heatmap2.png differ diff --git a/tests/plotting/baseline/test_heatmap3.png b/tests/plotting/baseline/test_heatmap3.png index 37a5b32610..0218fd1538 100644 Binary files a/tests/plotting/baseline/test_heatmap3.png and b/tests/plotting/baseline/test_heatmap3.png differ diff --git a/tests/plotting/baseline/test_match_ylimits_plot.png b/tests/plotting/baseline/test_match_ylimits_plot.png index 748b7c8385..d529a6f7f8 100644 Binary files a/tests/plotting/baseline/test_match_ylimits_plot.png and b/tests/plotting/baseline/test_match_ylimits_plot.png differ diff --git a/tests/plotting/baseline/test_multi_skewt_plot.png b/tests/plotting/baseline/test_multi_skewt_plot.png index 857ebed1f7..f08a1e3522 100644 Binary files a/tests/plotting/baseline/test_multi_skewt_plot.png and b/tests/plotting/baseline/test_multi_skewt_plot.png differ diff --git a/tests/plotting/baseline/test_multidataset_plot_dict.png b/tests/plotting/baseline/test_multidataset_plot_dict.png index a9f6a660f9..5f4b72bf78 100644 Binary files a/tests/plotting/baseline/test_multidataset_plot_dict.png and b/tests/plotting/baseline/test_multidataset_plot_dict.png differ diff --git a/tests/plotting/baseline/test_multidataset_plot_tuple.png b/tests/plotting/baseline/test_multidataset_plot_tuple.png index 8e74f35dca..83991fb2e2 100644 Binary files a/tests/plotting/baseline/test_multidataset_plot_tuple.png and b/tests/plotting/baseline/test_multidataset_plot_tuple.png differ diff --git a/tests/plotting/baseline/test_plot.png b/tests/plotting/baseline/test_plot.png index 921753d729..5e690435b9 100644 Binary files a/tests/plotting/baseline/test_plot.png and b/tests/plotting/baseline/test_plot.png differ diff --git a/tests/plotting/baseline/test_plot_barbs_from_u_v.png b/tests/plotting/baseline/test_plot_barbs_from_u_v.png index a1c58e3410..dec24618da 100644 Binary files a/tests/plotting/baseline/test_plot_barbs_from_u_v.png and b/tests/plotting/baseline/test_plot_barbs_from_u_v.png differ diff --git a/tests/plotting/baseline/test_plot_barbs_from_u_v2.png b/tests/plotting/baseline/test_plot_barbs_from_u_v2.png index 0b360d4e6e..9112d54ed0 100644 Binary files a/tests/plotting/baseline/test_plot_barbs_from_u_v2.png and b/tests/plotting/baseline/test_plot_barbs_from_u_v2.png differ diff --git a/tests/plotting/baseline/test_plot_barbs_from_u_v3.png b/tests/plotting/baseline/test_plot_barbs_from_u_v3.png new file mode 100644 index 0000000000..7504c32894 Binary files /dev/null and b/tests/plotting/baseline/test_plot_barbs_from_u_v3.png differ diff --git a/tests/plotting/baseline/test_plot_barbs_from_u_v4.png b/tests/plotting/baseline/test_plot_barbs_from_u_v4.png new file mode 100644 index 0000000000..c5f07621bb Binary files /dev/null and b/tests/plotting/baseline/test_plot_barbs_from_u_v4.png differ diff --git a/tests/plotting/baseline/test_plot_barbs_from_u_v5.png b/tests/plotting/baseline/test_plot_barbs_from_u_v5.png new file mode 100644 index 0000000000..55fa440b50 Binary files /dev/null and b/tests/plotting/baseline/test_plot_barbs_from_u_v5.png differ diff --git a/tests/plotting/baseline/test_plot_datarose.png b/tests/plotting/baseline/test_plot_datarose.png index a903d2a340..1b85fb841e 100644 Binary files a/tests/plotting/baseline/test_plot_datarose.png and b/tests/plotting/baseline/test_plot_datarose.png differ diff --git a/tests/plotting/baseline/test_plot_pie_chart.png b/tests/plotting/baseline/test_plot_pie_chart.png new file mode 100644 index 0000000000..79eeee2b48 Binary files /dev/null and b/tests/plotting/baseline/test_plot_pie_chart.png differ diff --git a/tests/plotting/baseline/test_plot_pie_chart_kwargs.png b/tests/plotting/baseline/test_plot_pie_chart_kwargs.png new file mode 100644 index 0000000000..2d3d8ae441 Binary files /dev/null and b/tests/plotting/baseline/test_plot_pie_chart_kwargs.png differ diff --git a/tests/plotting/baseline/test_qc_bar_plot.png b/tests/plotting/baseline/test_qc_bar_plot.png index d8167eb7d2..d298b39a08 100644 Binary files a/tests/plotting/baseline/test_qc_bar_plot.png and b/tests/plotting/baseline/test_qc_bar_plot.png differ diff --git a/tests/plotting/baseline/test_qc_flag_block_plot.png b/tests/plotting/baseline/test_qc_flag_block_plot.png index 4ce6106337..dccaed9b30 100644 Binary files a/tests/plotting/baseline/test_qc_flag_block_plot.png and b/tests/plotting/baseline/test_qc_flag_block_plot.png differ diff --git a/tests/plotting/baseline/test_scatter.png b/tests/plotting/baseline/test_scatter.png index bdd98143f2..7fba0883d0 100644 Binary files a/tests/plotting/baseline/test_scatter.png and b/tests/plotting/baseline/test_scatter.png differ diff --git a/tests/plotting/baseline/test_scatter2.png b/tests/plotting/baseline/test_scatter2.png index 1f512fe019..2feb1f4c53 100644 Binary files a/tests/plotting/baseline/test_scatter2.png and b/tests/plotting/baseline/test_scatter2.png differ diff --git a/tests/plotting/baseline/test_size_distribution.png b/tests/plotting/baseline/test_size_distribution.png index 2fa19f74d9..783fa2fdf7 100644 Binary files a/tests/plotting/baseline/test_size_distribution.png and b/tests/plotting/baseline/test_size_distribution.png differ diff --git a/tests/plotting/baseline/test_size_distribution2.png b/tests/plotting/baseline/test_size_distribution2.png index e5d63d4ac3..40ddfaf84b 100644 Binary files a/tests/plotting/baseline/test_size_distribution2.png and b/tests/plotting/baseline/test_size_distribution2.png differ diff --git a/tests/plotting/baseline/test_skewt_options.png b/tests/plotting/baseline/test_skewt_options.png index f39af2fde8..8b477d0ab5 100644 Binary files a/tests/plotting/baseline/test_skewt_options.png and b/tests/plotting/baseline/test_skewt_options.png differ diff --git a/tests/plotting/baseline/test_skewt_plot.png b/tests/plotting/baseline/test_skewt_plot.png index b9335339b6..22c75423ba 100644 Binary files a/tests/plotting/baseline/test_skewt_plot.png and b/tests/plotting/baseline/test_skewt_plot.png differ diff --git a/tests/plotting/baseline/test_skewt_plot_spd_dir.png b/tests/plotting/baseline/test_skewt_plot_spd_dir.png index b9335339b6..22c75423ba 100644 Binary files a/tests/plotting/baseline/test_skewt_plot_spd_dir.png and b/tests/plotting/baseline/test_skewt_plot_spd_dir.png differ diff --git a/tests/plotting/baseline/test_stacked_bar_graph.png b/tests/plotting/baseline/test_stacked_bar_graph.png index bd52279209..a4c73bfe9e 100644 Binary files a/tests/plotting/baseline/test_stacked_bar_graph.png and b/tests/plotting/baseline/test_stacked_bar_graph.png differ diff --git a/tests/plotting/baseline/test_stacked_bar_graph2.png b/tests/plotting/baseline/test_stacked_bar_graph2.png index 13e1fc78ef..fb55f921a9 100644 Binary files a/tests/plotting/baseline/test_stacked_bar_graph2.png and b/tests/plotting/baseline/test_stacked_bar_graph2.png differ diff --git a/tests/plotting/baseline/test_stacked_bar_graph3.png b/tests/plotting/baseline/test_stacked_bar_graph3.png index bd483fb485..64ece6b91f 100644 Binary files a/tests/plotting/baseline/test_stacked_bar_graph3.png and b/tests/plotting/baseline/test_stacked_bar_graph3.png differ diff --git a/tests/plotting/baseline/test_stacked_bar_graph_sorted.png b/tests/plotting/baseline/test_stacked_bar_graph_sorted.png index 4607ad3d27..0af0344762 100644 Binary files a/tests/plotting/baseline/test_stacked_bar_graph_sorted.png and b/tests/plotting/baseline/test_stacked_bar_graph_sorted.png differ diff --git a/tests/plotting/baseline/test_stair_graph.png b/tests/plotting/baseline/test_stair_graph.png index 8c0e0fb4c6..d5392f3142 100644 Binary files a/tests/plotting/baseline/test_stair_graph.png and b/tests/plotting/baseline/test_stair_graph.png differ diff --git a/tests/plotting/baseline/test_stair_graph2.png b/tests/plotting/baseline/test_stair_graph2.png index 298b8fe834..1ad8a2e48c 100644 Binary files a/tests/plotting/baseline/test_stair_graph2.png and b/tests/plotting/baseline/test_stair_graph2.png differ diff --git a/tests/plotting/baseline/test_stair_graph_sorted.png b/tests/plotting/baseline/test_stair_graph_sorted.png index 5d2f888b0c..02641f0e8c 100644 Binary files a/tests/plotting/baseline/test_stair_graph_sorted.png and b/tests/plotting/baseline/test_stair_graph_sorted.png differ diff --git a/tests/plotting/baseline/test_time_height_scatter.png b/tests/plotting/baseline/test_time_height_scatter.png index 08ab88119c..4e5dc819e1 100644 Binary files a/tests/plotting/baseline/test_time_height_scatter.png and b/tests/plotting/baseline/test_time_height_scatter.png differ diff --git a/tests/plotting/baseline/test_time_height_scatter2.png b/tests/plotting/baseline/test_time_height_scatter2.png index d737ec75d8..0ead21c30f 100644 Binary files a/tests/plotting/baseline/test_time_height_scatter2.png and b/tests/plotting/baseline/test_time_height_scatter2.png differ diff --git a/tests/plotting/baseline/test_time_plot.png b/tests/plotting/baseline/test_time_plot.png index 46ab1e433e..4c772ba994 100644 Binary files a/tests/plotting/baseline/test_time_plot.png and b/tests/plotting/baseline/test_time_plot.png differ diff --git a/tests/plotting/baseline/test_time_plot2.png b/tests/plotting/baseline/test_time_plot2.png index 95b493c342..19273939ec 100644 Binary files a/tests/plotting/baseline/test_time_plot2.png and b/tests/plotting/baseline/test_time_plot2.png differ diff --git a/tests/plotting/baseline/test_time_plot_match_color_ylabel.png b/tests/plotting/baseline/test_time_plot_match_color_ylabel.png index e356448e1c..ba3ba31448 100644 Binary files a/tests/plotting/baseline/test_time_plot_match_color_ylabel.png and b/tests/plotting/baseline/test_time_plot_match_color_ylabel.png differ diff --git a/tests/plotting/baseline/test_timeseries_invert.png b/tests/plotting/baseline/test_timeseries_invert.png index e51dbfd11a..8686032e91 100644 Binary files a/tests/plotting/baseline/test_timeseries_invert.png and b/tests/plotting/baseline/test_timeseries_invert.png differ diff --git a/tests/plotting/baseline/test_violin.png b/tests/plotting/baseline/test_violin.png index 743911ffde..9af6ec9c89 100644 Binary files a/tests/plotting/baseline/test_violin.png and b/tests/plotting/baseline/test_violin.png differ diff --git a/tests/plotting/baseline/test_violin2.png b/tests/plotting/baseline/test_violin2.png index e642fe633f..ce0b7258f9 100644 Binary files a/tests/plotting/baseline/test_violin2.png and b/tests/plotting/baseline/test_violin2.png differ diff --git a/tests/plotting/baseline/test_wind_rose.png b/tests/plotting/baseline/test_wind_rose.png index 95e6b4c1ee..cc86e9e53b 100644 Binary files a/tests/plotting/baseline/test_wind_rose.png and b/tests/plotting/baseline/test_wind_rose.png differ diff --git a/tests/plotting/baseline/test_xlim_correction_plot.png b/tests/plotting/baseline/test_xlim_correction_plot.png index 0d64734bc8..f66fd2b44f 100644 Binary files a/tests/plotting/baseline/test_xlim_correction_plot.png and b/tests/plotting/baseline/test_xlim_correction_plot.png differ diff --git a/tests/plotting/baseline/test_xsection_plot.png b/tests/plotting/baseline/test_xsection_plot.png index cc0de83249..4f88b5c404 100644 Binary files a/tests/plotting/baseline/test_xsection_plot.png and b/tests/plotting/baseline/test_xsection_plot.png differ diff --git a/tests/plotting/baseline/test_xsection_plot_map.png b/tests/plotting/baseline/test_xsection_plot_map.png index fa4ed0b2a3..9165ad70c7 100644 Binary files a/tests/plotting/baseline/test_xsection_plot_map.png and b/tests/plotting/baseline/test_xsection_plot_map.png differ diff --git a/tests/plotting/baseline/test_y_axis_flag_meanings.png b/tests/plotting/baseline/test_y_axis_flag_meanings.png index 76de97076d..9d1d2f2eee 100644 Binary files a/tests/plotting/baseline/test_y_axis_flag_meanings.png and b/tests/plotting/baseline/test_y_axis_flag_meanings.png differ diff --git a/tests/plotting/test_contourdisplay.py b/tests/plotting/test_contourdisplay.py index 2b6f4096b7..f0ef8a144c 100644 --- a/tests/plotting/test_contourdisplay.py +++ b/tests/plotting/test_contourdisplay.py @@ -8,7 +8,7 @@ matplotlib.use('Agg') -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_contour(): files = sample_files.EXAMPLE_MET_CONTOUR time = '2019-05-08T04:00:00.000000000' @@ -36,7 +36,7 @@ def test_contour(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_contour_stamp(): files = sample_files.EXAMPLE_STAMP_WILDCARD test = {} @@ -58,7 +58,7 @@ def test_contour_stamp(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_contour2(): files = sample_files.EXAMPLE_MET_CONTOUR time = '2019-05-08T04:00:00.000000000' @@ -86,7 +86,7 @@ def test_contour2(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_contourf(): files = sample_files.EXAMPLE_MET_CONTOUR time = '2019-05-08T04:00:00.000000000' @@ -126,7 +126,7 @@ def test_contourf(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_contourf2(): files = sample_files.EXAMPLE_MET_CONTOUR time = '2019-05-08T04:00:00.000000000' diff --git a/tests/plotting/test_distributiondisplay.py b/tests/plotting/test_distributiondisplay.py index ba9bfa0116..ad906ab443 100644 --- a/tests/plotting/test_distributiondisplay.py +++ b/tests/plotting/test_distributiondisplay.py @@ -80,7 +80,7 @@ def test_distribution_errors(): matplotlib.pyplot.close(fig=histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_stair_graph(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) @@ -94,7 +94,7 @@ def test_stair_graph(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_stair_graph2(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) del sonde_ds['tdry'].attrs['units'] @@ -109,7 +109,7 @@ def test_stair_graph2(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_stair_graph_sorted(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) @@ -128,7 +128,7 @@ def test_stair_graph_sorted(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_stacked_bar_graph(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) @@ -142,7 +142,7 @@ def test_stacked_bar_graph(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_stacked_bar_graph2(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) @@ -158,7 +158,7 @@ def test_stacked_bar_graph2(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_stacked_bar_graph3(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) del sonde_ds['tdry'].attrs['units'] @@ -173,7 +173,7 @@ def test_stacked_bar_graph3(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_stacked_bar_graph_sorted(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) @@ -192,7 +192,7 @@ def test_stacked_bar_graph_sorted(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_heatmap(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) @@ -212,7 +212,7 @@ def test_heatmap(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_heatmap2(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) del sonde_ds['tdry'].attrs['units'] @@ -233,7 +233,7 @@ def test_heatmap2(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_heatmap3(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) del sonde_ds['tdry'].attrs['units'] @@ -253,7 +253,7 @@ def test_heatmap3(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_size_distribution(): sigma = 10 mu = 50 @@ -270,7 +270,7 @@ def test_size_distribution(): matplotlib.pyplot.close(histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_size_distribution2(): sigma = 10 mu = 50 @@ -334,7 +334,7 @@ def test_histogram_kwargs(): matplotlib.pyplot.close(fig=histdisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_violin(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) @@ -352,7 +352,7 @@ def test_violin(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_violin2(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) del ds['temp_mean'].attrs['units'] @@ -371,7 +371,7 @@ def test_violin2(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_scatter(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) # Create a DistributionDisplay object to compare fields @@ -394,7 +394,7 @@ def test_scatter(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_scatter2(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) del ds['wspd_arith_mean'].attrs['units'] @@ -412,3 +412,38 @@ def test_scatter2(): return display.fig finally: matplotlib.pyplot.close(display.fig) + + +@pytest.mark.mpl_image_compare(tolerance=10) +def test_plot_pie_chart(): + ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_AOSACSM) + fields = ['sulfate', 'ammonium', 'nitrate', 'chloride'] + display = DistributionDisplay(ds) + display.plot_pie_chart(fields) + ds.close() + + try: + return display.fig + finally: + matplotlib.pyplot.close(display.fig) + + +@pytest.mark.mpl_image_compare(tolerance=10) +def test_plot_pie_chart_kwargs(): + ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_AOSACSM) + fields = ['sulfate', 'ammonium', 'nitrate', 'chloride'] + threshold = 0.0 + fill_value = 0.0 + display = DistributionDisplay(ds) + display.plot_pie_chart( + fields, + threshold=threshold, + fill_value=fill_value, + colors=['olivedrab', 'rosybrown', 'gray', 'saddlebrown'], + ) + ds.close() + + try: + return display.fig + finally: + matplotlib.pyplot.close(display.fig) diff --git a/tests/plotting/test_geodisplay.py b/tests/plotting/test_geodisplay.py index e576e965a8..d767c2f576 100644 --- a/tests/plotting/test_geodisplay.py +++ b/tests/plotting/test_geodisplay.py @@ -7,7 +7,7 @@ from act.tests import sample_files try: - import cartopy + import cartopy # noqa CARTOPY_AVAILABLE = True except ImportError: @@ -17,7 +17,7 @@ @pytest.mark.skipif(not CARTOPY_AVAILABLE, reason='Cartopy is not installed.') -@pytest.mark.mpl_image_compare(style='default', tolerance=30) +@pytest.mark.mpl_image_compare(style='default', tolerance=10) def test_geoplot(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) geodisplay = GeographicPlotDisplay({'sgpsondewnpnC1.b1': sonde_ds}, figsize=(15, 8)) @@ -46,7 +46,7 @@ def test_geoplot(): @pytest.mark.skipif(not CARTOPY_AVAILABLE, reason='Cartopy is not installed.') -@pytest.mark.mpl_image_compare(style='default', tolerance=30) +@pytest.mark.mpl_image_compare(style='default', tolerance=10) def test_geoplot_tile(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) geodisplay = GeographicPlotDisplay({'sgpsondewnpnC1.b1': sonde_ds}, figsize=(15, 8)) diff --git a/tests/plotting/test_skewtdisplay.py b/tests/plotting/test_skewtdisplay.py index aad586fda1..21a45281a9 100644 --- a/tests/plotting/test_skewtdisplay.py +++ b/tests/plotting/test_skewtdisplay.py @@ -1,4 +1,5 @@ import matplotlib +import matplotlib.pyplot as plt import numpy as np import pytest @@ -9,7 +10,7 @@ matplotlib.use('Agg') -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_skewt_plot(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) skewt = SkewTDisplay(sonde_ds) @@ -18,7 +19,7 @@ def test_skewt_plot(): return skewt.fig -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_skewt_plot_spd_dir(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) skewt = SkewTDisplay(sonde_ds, ds_name='act_datastream') @@ -27,7 +28,7 @@ def test_skewt_plot_spd_dir(): return skewt.fig -@pytest.mark.mpl_image_compare(tolerance=81) +@pytest.mark.mpl_image_compare(tolerance=10) def test_multi_skewt_plot(): files = sample_files.EXAMPLE_TWP_SONDE_20060121 test = {} @@ -37,7 +38,7 @@ def test_multi_skewt_plot(): sonde_ds = sonde_ds.resample(time='30s').nearest() test.update({time: sonde_ds}) - skewt = SkewTDisplay(test, subplot_shape=(2, 2)) + skewt = SkewTDisplay(test, subplot_shape=(2, 2), figsize=(12, 14)) i = 0 j = 0 for f in files: @@ -52,15 +53,17 @@ def test_multi_skewt_plot(): dsname=time, p_levels_to_plot=np.arange(10.0, 1000.0, 25), ) + skewt.axes[j, i].set_ylim([1000, 10]) if j == 1: i += 1 j = 0 elif j == 0: j += 1 + plt.tight_layout() return skewt.fig -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_enhanced_skewt_plot(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) display = act.plotting.SkewTDisplay(ds) @@ -69,7 +72,7 @@ def test_enhanced_skewt_plot(): return display.fig -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_enhanced_skewt_plot_2(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) display = act.plotting.SkewTDisplay(ds) @@ -87,7 +90,7 @@ def test_enhanced_skewt_plot_2(): return display.fig -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_skewt_options(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) skewt = SkewTDisplay(sonde_ds) diff --git a/tests/plotting/test_timeseriesdisplay.py b/tests/plotting/test_timeseriesdisplay.py index 3ae873473c..d1461b3779 100644 --- a/tests/plotting/test_timeseriesdisplay.py +++ b/tests/plotting/test_timeseriesdisplay.py @@ -14,7 +14,7 @@ matplotlib.use('Agg') -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_plot(): # Process MET data to get simple LCL files = sample_files.EXAMPLE_MET_WILDCARD @@ -130,7 +130,7 @@ def test_errors(): matplotlib.pyplot.close(fig=display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_multidataset_plot_tuple(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) ds2 = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SIRS) @@ -158,7 +158,7 @@ def test_multidataset_plot_tuple(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_multidataset_plot_dict(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) ds2 = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SIRS) @@ -179,7 +179,7 @@ def test_multidataset_plot_dict(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_barb_sounding_plot(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_TWP_SONDE_WILDCARD) BarbDisplay = TimeSeriesDisplay({'sonde_darwin': sonde_ds}) @@ -196,11 +196,11 @@ def test_barb_sounding_plot(): # Due to issues with pytest-mpl, for now we just test to see if it runs -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_time_height_scatter(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) - display = TimeSeriesDisplay({'sgpsondewnpnC1.b1': sonde_ds}, figsize=(7, 3)) + display = TimeSeriesDisplay({'sgpsondewnpnC1.b1': sonde_ds}, figsize=(10, 6)) display.time_height_scatter('tdry', plot_alt_field=True) sonde_ds.close() @@ -212,16 +212,22 @@ def test_time_height_scatter(): # Due to issues with pytest-mpl, for now we just test to see if it runs -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_time_height_scatter2(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) - display = TimeSeriesDisplay({'sgpsondewnpnC1.b1': sonde_ds}, figsize=(7, 6), subplot_shape=(2,)) + display = TimeSeriesDisplay( + {'sgpsondewnpnC1.b1': sonde_ds}, figsize=(8, 10), subplot_shape=(2,) + ) display.time_height_scatter( - 'tdry', day_night_background=True, subplot_index=(0,), cb_friendly=True, plot_alt_field=True + 'tdry', + day_night_background=True, + subplot_index=(0,), + cvd_friendly=True, + plot_alt_field=True, ) display.time_height_scatter( - 'rh', day_night_background=True, subplot_index=(1,), cb_friendly=True + 'rh', day_night_background=True, subplot_index=(1,), cvd_friendly=True ) sonde_ds.close() @@ -232,7 +238,7 @@ def test_time_height_scatter2(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_qc_bar_plot(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) ds.clean.cleanup() @@ -265,7 +271,7 @@ def test_qc_bar_plot(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_2d_as_1d(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_CEIL1) @@ -281,7 +287,7 @@ def test_2d_as_1d(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_fill_between(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET_WILDCARD) @@ -299,16 +305,16 @@ def test_fill_between(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_qc_flag_block_plot(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SURFSPECALB1MLAWER) - display = TimeSeriesDisplay(ds, subplot_shape=(2,), figsize=(8, 2 * 4)) + display = TimeSeriesDisplay(ds, subplot_shape=(2,), figsize=(10, 8)) display.plot('surface_albedo_mfr_narrowband_10m', force_line_plot=True, labels=True) display.qc_flag_block_plot( - 'surface_albedo_mfr_narrowband_10m', subplot_index=(1,), cb_friendly=True + 'surface_albedo_mfr_narrowband_10m', subplot_index=(1,), cvd_friendly=True ) ds.close() @@ -320,7 +326,7 @@ def test_qc_flag_block_plot(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_assessment_overplot(): var_name = 'temp_mean' files = sample_files.EXAMPLE_MET1 @@ -344,7 +350,7 @@ def test_assessment_overplot(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_assessment_overplot_multi(): var_name1, var_name2 = 'wspd_arith_mean', 'wspd_vec_mean' files = sample_files.EXAMPLE_MET1 @@ -377,7 +383,7 @@ def test_assessment_overplot_multi(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_plot_barbs_from_u_v(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_TWP_SONDE_WILDCARD) BarbDisplay = TimeSeriesDisplay({'sonde_darwin': sonde_ds}) @@ -389,7 +395,7 @@ def test_plot_barbs_from_u_v(): matplotlib.pyplot.close(BarbDisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_plot_barbs_from_u_v2(): bins = list(np.linspace(0, 1, 10)) xbins = list(pd.date_range(pd.to_datetime('2020-01-01'), pd.to_datetime('2020-01-02'), 12)) @@ -417,6 +423,7 @@ def test_plot_barbs_from_u_v2(): matplotlib.pyplot.close(BarbDisplay.fig) +@pytest.mark.mpl_image_compare(tolerance=10) def test_plot_barbs_from_u_v3(): bins = list(np.linspace(0, 1, 10)) xbins = list(pd.date_range(pd.to_datetime('2020-01-01'), pd.to_datetime('2020-01-02'), 12)) @@ -440,6 +447,7 @@ def test_plot_barbs_from_u_v3(): matplotlib.pyplot.close(BarbDisplay.fig) +@pytest.mark.mpl_image_compare(tolerance=10) def test_plot_barbs_from_u_v4(): bins = list(np.linspace(0, 1, 10)) xbins = [pd.to_datetime('2020-01-01')] @@ -465,6 +473,7 @@ def test_plot_barbs_from_u_v4(): matplotlib.pyplot.close(BarbDisplay.fig) +@pytest.mark.mpl_image_compare(tolerance=10) def test_plot_barbs_from_u_v5(): bins = list(np.linspace(0, 1, 10)) xbins = [pd.to_datetime('2020-01-01')] @@ -494,7 +503,7 @@ def test_plot_barbs_from_u_v5(): matplotlib.pyplot.close(BarbDisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_2D_timeseries_plot(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_CEIL1) display = TimeSeriesDisplay(ds) @@ -505,7 +514,7 @@ def test_2D_timeseries_plot(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_time_plot(): files = sample_files.EXAMPLE_MET1 ds = act.io.arm.read_arm_netcdf(files) @@ -514,7 +523,7 @@ def test_time_plot(): return display.fig -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_time_plot_match_color_ylabel(): files = sample_files.EXAMPLE_MET1 ds = act.io.arm.read_arm_netcdf(files) @@ -523,7 +532,7 @@ def test_time_plot_match_color_ylabel(): return display.fig -@pytest.mark.mpl_image_compare(tolerance=40) +@pytest.mark.mpl_image_compare(tolerance=10) def test_time_plot2(): files = sample_files.EXAMPLE_MET1 ds = act.io.arm.read_arm_netcdf(files, decode_times=False, use_cftime=False) @@ -532,7 +541,7 @@ def test_time_plot2(): return display.fig -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_y_axis_flag_meanings(): variable = 'detection_status' ds = act.io.arm.read_arm_netcdf( @@ -547,7 +556,7 @@ def test_y_axis_flag_meanings(): return display.fig -@pytest.mark.mpl_image_compare(tolerance=35) +@pytest.mark.mpl_image_compare(tolerance=10) def test_colorbar_labels(): variable = 'cloud_phase_hsrl' ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_CLOUDPHASE) @@ -568,7 +577,7 @@ def test_colorbar_labels(): return display.fig -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_add_nan_line(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) @@ -597,7 +606,35 @@ def test_add_nan_line(): matplotlib.pyplot.close(display.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) +def test_add_nan_line_integer(): + data = np.arange(100, dtype=np.int32) + time = np.array('2019-11-01T00:00:00', dtype='datetime64[m]') + np.arange(data.size) + time = time.astype('datetime64[ns]') # Only done to stop a warning appearing + + # Remove data to produce a gap + data = np.delete(data, np.arange(50, 60), axis=0) + time = np.delete(time, np.arange(50, 60), axis=0) + data = np.delete(data, np.arange(70, 75), axis=0) + time = np.delete(time, np.arange(70, 75), axis=0) + + ds = xr.Dataset( + data_vars={'data': ('time', data, {'long_name': 'Data values', 'units': 'degC'})}, + coords={'time': ('time', time, {'long_name': 'Time in UTC'})}, + ) + + display = TimeSeriesDisplay({'test_datastream': ds}, figsize=(15, 10), subplot_shape=(1,)) + display.plot('data', subplot_index=(0,), add_nan=True, marker='.', markersize=20, linewidth=5) + + assert np.issubdtype(ds['data'].dtype, np.integer) + + try: + return display.fig + finally: + matplotlib.pyplot.close(display.fig) + + +@pytest.mark.mpl_image_compare(tolerance=10) def test_timeseries_invert(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_IRT25m20s) display = TimeSeriesDisplay(ds, figsize=(10, 8)) @@ -622,11 +659,11 @@ def test_plot_time_rng(): display.plot('temp_mean', time_rng=xrng) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_match_ylimits_plot(): files = sample_files.EXAMPLE_MET_WILDCARD ds = act.io.arm.read_arm_netcdf(files) - display = act.plotting.TimeSeriesDisplay(ds, figsize=(10, 8), subplot_shape=(2, 2)) + display = act.plotting.TimeSeriesDisplay(ds, figsize=(14, 8), subplot_shape=(2, 2)) groupby = display.group_by('day') groupby.plot_group('plot', None, field='temp_mean', marker=' ') groupby.display.set_yrng([-20, 20], match_axes_ylimits=True) @@ -634,7 +671,7 @@ def test_match_ylimits_plot(): return display.fig -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_xlim_correction_plot(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) diff --git a/tests/plotting/test_windrosedisplay.py b/tests/plotting/test_windrosedisplay.py index b332e806ee..7566e9d8f0 100644 --- a/tests/plotting/test_windrosedisplay.py +++ b/tests/plotting/test_windrosedisplay.py @@ -9,7 +9,7 @@ matplotlib.use('Agg') -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_wind_rose(): sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_TWP_SONDE_WILDCARD) @@ -33,7 +33,7 @@ def test_wind_rose(): matplotlib.pyplot.close(WindDisplay.fig) -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_plot_datarose(): files = sample_files.EXAMPLE_MET_WILDCARD ds = act.io.arm.read_arm_netcdf(files) @@ -131,7 +131,7 @@ def test_plot_datarose(): return display.fig -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_groupby_plot(): ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET_WILDCARD) diff --git a/tests/plotting/test_xsectiondisplay.py b/tests/plotting/test_xsectiondisplay.py index 29d460a729..732165cd2e 100644 --- a/tests/plotting/test_xsectiondisplay.py +++ b/tests/plotting/test_xsectiondisplay.py @@ -7,7 +7,7 @@ from act.tests import sample_files try: - import cartopy + import cartopy # noqa CARTOPY_AVAILABLE = True except ImportError: @@ -34,7 +34,7 @@ def test_xsection_errors(): matplotlib.pyplot.close(fig=display.fig) -@pytest.mark.mpl_image_compare(tolerance=31) +@pytest.mark.mpl_image_compare(tolerance=20) def test_xsection_plot(): visst_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_CEIL1) @@ -51,7 +51,7 @@ def test_xsection_plot(): @pytest.mark.skipif(not CARTOPY_AVAILABLE, reason='Cartopy is not installed.') -@pytest.mark.mpl_image_compare(tolerance=30) +@pytest.mark.mpl_image_compare(tolerance=10) def test_xsection_plot_map(): radar_ds = act.io.arm.read_arm_netcdf( sample_files.EXAMPLE_VISST, combine='nested', concat_dim='time' diff --git a/tests/qc/conftest.py b/tests/qc/conftest.py new file mode 100644 index 0000000000..ab31c52f9f --- /dev/null +++ b/tests/qc/conftest.py @@ -0,0 +1,19 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--runbig", action="store_true", default=False, help="Run big tests") + + +def pytest_configure(config): + config.addinivalue_line("markers", "big: mark test as slow to run") + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--runbig"): + # --runbig given in cli: do not skip big tests + return + skip_big = pytest.mark.skip(reason="need --runbig option to run") + for item in items: + if "big" in item.keywords: + item.add_marker(skip_big) diff --git a/tests/qc/test_arm_qc.py b/tests/qc/test_arm_qc.py index e118648706..ec623c51b2 100644 --- a/tests/qc/test_arm_qc.py +++ b/tests/qc/test_arm_qc.py @@ -12,7 +12,7 @@ def test_scalar_dqr(): # DQR webservice does go down, so ensure it # properly runs first before testing try: - ds = add_dqr_to_qc(ds) + ds = add_dqr_to_qc(ds, assessment='Reprocessed,Suspect,Incorrect') ran = True except ValueError: ran = False diff --git a/tests/qc/test_clean.py b/tests/qc/test_clean.py index add485b3d9..ec1e7ba4e2 100644 --- a/tests/qc/test_clean.py +++ b/tests/qc/test_clean.py @@ -1,7 +1,14 @@ import numpy as np from act.io.arm import read_arm_netcdf -from act.tests import EXAMPLE_CEIL1, EXAMPLE_CO2FLX4M, EXAMPLE_MET1 +from act.tests import ( + EXAMPLE_CEIL1, + EXAMPLE_CO2FLX4M, + EXAMPLE_MET1, + EXAMPLE_MET_SAIL, + EXAMPLE_SIRS_SIRI_QC, + EXAMPLE_SWATS, +) def test_global_qc_cleanup(): @@ -154,3 +161,183 @@ def test_qc_flag_description(): unique_flag_assessments = list({'Acceptable', 'Indeterminate', 'Bad'}) for f in list(set(ds[qc_var_name].attrs['flag_assessments'])): assert f in unique_flag_assessments + + +def test_clean_sirs_siri_qc(): + ds = read_arm_netcdf(EXAMPLE_SIRS_SIRI_QC) + + data = ds["qc_short_direct_normal"].values + data[0:5] = 1 + data[5:10] = 2 + data[10:15] = 3 + data[15:20] = 6 + data[20:25] = 7 + data[25:30] = 8 + data[30:35] = 9 + data[35:40] = 94 + data[40:45] = 95 + data[45:50] = 96 + data[50:55] = 97 + data[55:60] = 14 + data[60:65] = 18 + data[65:70] = 22 + data[70:75] = 26 + ds["qc_short_direct_normal"].values = data + + data = ds["qc_up_long_hemisp"].values + data[0:5] = 1 + data[5:10] = 2 + data[10:15] = 7 + data[15:20] = 8 + data[20:25] = 31 + ds["qc_up_long_hemisp"].values = data + + data = ds["qc_up_short_hemisp"].values + data[0:5] = 1 + data[5:10] = 2 + data[10:15] = 7 + data[15:20] = 8 + data[20:25] = 31 + ds["qc_up_short_hemisp"].values = data + + ds.clean.cleanup() + + assert ds["qc_short_direct_normal"].attrs['flag_masks'] == [ + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128, + 256, + 512, + 1024, + 2048, + 4096, + 8192, + 16384, + 32768, + ] + assert ds["qc_short_direct_normal"].attrs['flag_meanings'] == [ + 'Value is set to missing_value.', + 'Passed 1-component test; data fall within max-min limits of Kt,Kn, or Kd', + 'Passed 2-component test; data fall within 0.03 of the Gompertz boundaries', + 'Passed 3-component test; data come within +/- 0.03 of satifying Kt=Kn+Kd', + 'Value estimated; passes all pertinent SERI QC tests', + 'Failed 1-component test; lower than allowed minimum', + 'Falied 1-component test; higher than allowed maximum', + 'Passed 3-component test but failed 2-component test by >0.05', + 'Data fall into a physically impossible region where Kn>Kt by K-space distances of 0.05 to 0.10.', + 'Data fall into a physically impossible region where Kn>Kt by K-space distances of 0.10 to 0.15.', + 'Data fall into a physically impossible region where Kn>Kt by K-space distances of 0.15 to 0.20.', + 'Data fall into a physically impossible region where Kn>Kt by K-space distances of >= 0.20.', + 'Parameter too low by 3-component test (Kt=Kn+Kd)', + 'Parameter too high by 3-component test (Kt=Kn+Kd)', + 'Parameter too low by 2-component test (Gompertz boundary)', + 'Parameter too high by 2-component test (Gompertz boundary)', + ] + + assert ds["qc_up_long_hemisp"].attrs['flag_masks'] == [1, 2, 4, 8, 16, 32] + assert ds["qc_up_long_hemisp"].attrs['flag_meanings'] == [ + 'Value is set to missing_value.', + 'Passed 1-component test; data fall within max-min limits of up_long_hemisp and down_long_hemisp_shaded, but short_direct_normal and down_short_hemisp or down_short_diffuse fail the SERI QC tests.', + 'Passed 2-component test; data fall within max-min limits of up_long_hemisp and down_long_hemisp_shaded, and short_direct_normal, or down_short_hemisp and down_short_diffuse pass the SERI QC tests while the difference between down_short_hemisp and down_short_diffuse is greater than 20 W/m2.', + 'Failed 1-component test; lower than allowed minimum', + 'Failed 1-component test; higher than allowed maximum', + 'Failed 2-component test', + ] + + assert ds["qc_up_short_hemisp"].attrs['flag_masks'] == [1, 2, 4, 8, 16, 32] + assert ds["qc_up_short_hemisp"].attrs['flag_meanings'] == [ + 'Value is set to missing_value.', + 'Passed 1-component test', + 'Passed 2-component test', + 'Failed 1-component test; lower than allowed minimum', + 'Failed 1-component test; higher than allowed maximum', + 'Failed 2-component test; solar zenith angle is less than 80 degrees and down_short_hemisp is 0 or missing', + ] + + assert np.all(ds["qc_short_direct_normal"].values[0:5] == 2) + assert np.all(ds["qc_short_direct_normal"].values[5:10] == 4) + assert np.all(ds["qc_short_direct_normal"].values[10:15] == 8) + assert np.all(ds["qc_short_direct_normal"].values[15:20] == 16) + assert np.all(ds["qc_short_direct_normal"].values[20:25] == 32) + assert np.all(ds["qc_short_direct_normal"].values[25:30] == 64) + assert np.all(ds["qc_short_direct_normal"].values[30:35] == 128) + assert np.all(ds["qc_short_direct_normal"].values[35:40] == 256) + assert np.all(ds["qc_short_direct_normal"].values[40:45] == 512) + assert np.all(ds["qc_short_direct_normal"].values[45:50] == 1024) + assert np.all(ds["qc_short_direct_normal"].values[50:55] == 2048) + assert np.all(ds["qc_short_direct_normal"].values[55:60] == 4096) + assert np.all(ds["qc_short_direct_normal"].values[60:65] == 8192) + assert np.all(ds["qc_short_direct_normal"].values[65:70] == 16384) + assert np.all(ds["qc_short_direct_normal"].values[70:75] == 32768) + + assert np.all(ds["qc_up_long_hemisp"].values[0:5] == 2) + assert np.all(ds["qc_up_long_hemisp"].values[5:10] == 4) + assert np.all(ds["qc_up_long_hemisp"].values[10:15] == 8) + assert np.all(ds["qc_up_long_hemisp"].values[15:20] == 16) + assert np.all(ds["qc_up_long_hemisp"].values[20:25] == 32) + + assert np.all(ds["qc_up_short_hemisp"].values[0:5] == 2) + assert np.all(ds["qc_up_short_hemisp"].values[5:10] == 4) + assert np.all(ds["qc_up_short_hemisp"].values[10:15] == 8) + assert np.all(ds["qc_up_short_hemisp"].values[15:20] == 16) + assert np.all(ds["qc_up_short_hemisp"].values[20:25] == 32) + + +def test_swats_qc(): + ds = read_arm_netcdf(EXAMPLE_SWATS) + ds.clean.cleanup() + + data_var_names = [] + for var_name in ds.data_vars: + try: + ds[f'qc_{var_name}'] + data_var_names.append(var_name) + except KeyError: + pass + + for var_name in data_var_names: + qc_var_name = f'qc_{var_name}' + + assert ds[qc_var_name].attrs['flag_masks'] == [1, 2, 4, 8] + assert ds[qc_var_name].attrs['flag_meanings'] == [ + 'Value is set to missing_value.', + 'Data value less than fail_min.', + 'Data value greater than fail_max.', + 'Difference between current and previous values exceeds fail_delta.', + ] + assert ds[qc_var_name].attrs['flag_assessments'] == ['Bad', 'Bad', 'Bad', 'Indeterminate'] + assert 'fail_min' in ds[qc_var_name].attrs + assert 'fail_max' in ds[qc_var_name].attrs + assert 'fail_delta' in ds[qc_var_name].attrs + + assert 'valid_min' not in ds[var_name].attrs + assert 'valid_max' not in ds[var_name].attrs + assert 'valid_delta' not in ds[var_name].attrs + assert ds[var_name].attrs['units'] != 'C' + + +def test_fix_incorrect_variable_bit_description_attributes(): + ds = read_arm_netcdf(EXAMPLE_MET_SAIL) + qc_var_name = 'qc_temp_mean' + ds[qc_var_name].attrs['qc_bit_2_description'] = ds[qc_var_name].attrs['bit_2_description'] + ds[qc_var_name].attrs['qc_bit_2_assessment'] = ds[qc_var_name].attrs['bit_2_assessment'] + del ds[qc_var_name].attrs['bit_2_description'] + del ds[qc_var_name].attrs['bit_2_assessment'] + + ds.clean.cleanup() + + assert ds[qc_var_name].attrs['flag_masks'] == [1, 2, 4, 8] + assert ds[qc_var_name].attrs['flag_meanings'] == [ + 'Value is equal to missing_value.', + 'Value is less than fail_min.', + 'Value is greater than fail_max.', + 'Difference between current and previous values exceeds fail_delta.', + ] + assert ds[qc_var_name].attrs['flag_assessments'] == ['Bad', 'Bad', 'Bad', 'Indeterminate'] + assert 'qc_bit_2_description' not in ds[qc_var_name].attrs + assert 'qc_bit_2_assessment' not in ds[qc_var_name].attrs diff --git a/tests/qc/test_qc_summary.py b/tests/qc/test_qc_summary.py new file mode 100644 index 0000000000..40cd8c5fc0 --- /dev/null +++ b/tests/qc/test_qc_summary.py @@ -0,0 +1,260 @@ +import numpy as np +from os import environ +from pathlib import Path +import random +import pytest +import datetime + +from act.io.arm import read_arm_netcdf +from act.tests import EXAMPLE_MET1 +from act.qc.qcfilter import set_bit +from act.utils.data_utils import DatastreamParserARM + + +def test_qc_summary(): + for cleanup in [False, True]: + ds = read_arm_netcdf(EXAMPLE_MET1, cleanup_qc=not cleanup) + for var_name in ['temp_mean', 'rh_mean']: + qc_var_name = f'qc_{var_name}' + qc_data = ds[qc_var_name].values + + assert np.sum(qc_data) == 0 + + index_4 = np.arange(100, 200) + qc_data[index_4] = set_bit(qc_data[index_4], 4) + index_1 = np.arange(170, 230) + qc_data[index_1] = set_bit(qc_data[index_1], 1) + index_2 = np.arange(250, 400) + qc_data[index_2] = set_bit(qc_data[index_2], 2) + index_3 = np.arange(450, 510) + qc_data[index_3] = set_bit(qc_data[index_3], 3) + ds[qc_var_name].values = qc_data + + result = ds.qcfilter.create_qc_summary(cleanup_qc=cleanup) + + for var_name in ['temp_mean', 'rh_mean']: + assert 'flag_masks' not in result[qc_var_name].attrs.keys() + assert isinstance(result[qc_var_name].attrs['flag_values'], list) + + assert np.sum(result[qc_var_name].values) == 610 + + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Indeterminate') + assert np.all(np.where(qc_ma.mask)[0] == np.arange(100, 170)) + + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Bad') + index = np.concatenate([index_1, index_2, index_3]) + assert np.all(np.where(qc_ma.mask)[0] == index) + + assert "Quality control summary implemented by ACT" in result.attrs['history'] + + del ds + + +def test_qc_summary_multiple_assessment_names(): + ds = read_arm_netcdf(EXAMPLE_MET1, cleanup_qc=True) + var_name = 'temp_mean' + qc_var_name = f'qc_{var_name}' + qc_data = ds[qc_var_name].values + + assert np.sum(qc_data) == 0 + + index_4 = np.arange(200, 300) + qc_data[index_4] = set_bit(qc_data[index_4], 4) + index_1 = np.arange(270, 330) + qc_data[index_1] = set_bit(qc_data[index_1], 1) + index_2 = np.arange(350, 500) + qc_data[index_2] = set_bit(qc_data[index_2], 2) + index_3 = np.arange(550, 610) + qc_data[index_3] = set_bit(qc_data[index_3], 3) + ds[qc_var_name].values = qc_data + + index_5 = np.arange(50, 150) + ds.qcfilter.add_test( + var_name, index=index_5, test_meaning='Testing Suspect', test_assessment='Suspect' + ) + + index_6 = np.arange(130, 210) + ds.qcfilter.add_test( + var_name, index=index_6, test_meaning='Testing Incorrect', test_assessment='Incorrect' + ) + + result = ds.qcfilter.create_qc_summary() + + assert result[qc_var_name].attrs['flag_assessments'] == [ + 'Not failing', + 'Suspect', + 'Indeterminate', + 'Incorrect', + 'Bad', + ] + + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Indeterminate') + assert np.sum(np.where(qc_ma.mask)[0]) == 14370 + + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Suspect') + assert np.sum(np.where(qc_ma.mask)[0]) == 7160 + + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Bad') + assert np.sum(np.where(qc_ma.mask)[0]) == 116415 + + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Incorrect') + assert np.sum(np.where(qc_ma.mask)[0]) == 13560 + + assert np.sum(np.where(result[qc_var_name].values == 0)) == 884575 + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Not failing') + assert np.sum(np.where(qc_ma.mask)[0]) == 884575 + + +@pytest.mark.big +@pytest.mark.skipif('ARCHIVE_DATA' not in environ, reason="Running outside ADC system.") +def test_qc_summary_big_data(): + """ + We want to test on as much ARM data as possible. But we do not want to force + a large amount of test data in GitHub. Plan is to see if the pytest code is being + run on ARM system and if so then run on historical data. If running on GitHub + then don't run tests. Also, have a switch to not force this big test to always + run as that would be mean to the developer. So need to periodicaly run with the + manual switch enabled. + + All exceptions are caught and a file name is sent to the output file when + an exception is found. Since this is testing 10,000+ files it will take hours + to run. I suggest you run in background and capture the standard out to a different + file. If no files are written to the output file then all tests passed. + + Output file name follows the convention of: + ~/test_qc_summary_big_data.{datetime}.txt + + To Run this test set keyword on pytest command line: + > pytest -s --runbig test_qc_summary.py::test_qc_summary_big_data &> ~/out.txt & + + + """ + + base_path = Path(environ['ARCHIVE_DATA']) + if not base_path.is_dir(): + return + + # Set number of files from each directory to test. + skip_sites = [ + 'shb', + 'wbu', + 'dna', + 'rld', + 'smt', + 'nic', + 'isp', + 'dmf', + 'nac', + 'rev', + 'yeu', + 'zrh', + 'osc', + ] + skip_datastream_codes = [ + 'mmcrmom', + # 'microbasepi', + # 'lblch1a', + # '30co2flx4mmet', + # 'microbasepi2', + # '30co2flx60m', + # 'bbhrpavg1mlawer', + # 'co', + # 'lblch1b', + # '30co2flx25m', + # '30co2flx4m', + # 'armbeatm', + # 'armtrajcld', + # '1swfanalsiros1long', + ] + # skip_datastreams = ['nimmfrsraod5chcorM1.c1', 'anxaoso3M1.b0'] + num_files = 3 + expected_assessments = ['Not failing', 'Suspect', 'Indeterminate', 'Incorrect', 'Bad'] + + testing_files = [] + + single_test = False + if len(testing_files) == 0: + single_test = True + filename = ( + f'test_qc_summary_big_data.{datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S")}.txt' + ) + output_file = Path(environ['HOME'], filename) + output_file.unlink(missing_ok=True) + output_file.touch() + + site_dirs = list(base_path.glob('???')) + for site_dir in site_dirs: + if site_dir.name in skip_sites: + continue + + datastream_dirs = list(site_dir.glob('*.[bc]?')) + for datastream_dir in datastream_dirs: + if '-' in datastream_dir.name: + continue + + # if datastream_dir.name in skip_datastreams: + # continue + + fn_obj = DatastreamParserARM(datastream_dir.name) + facility = fn_obj.facility + if facility is not None and facility[0] in ['A', 'X', 'U', 'F', 'N']: + continue + + datastream_class = fn_obj.datastream_class + if datastream_class is not None and datastream_class in skip_datastream_codes: + continue + + files = list(datastream_dir.glob('*.nc')) + files.extend(datastream_dir.glob('*.cdf')) + if len(files) == 0: + continue + + num_tests = num_files + if len(files) < num_files: + num_tests = len(files) + + for ii in range(0, num_tests): + testing_files.append(random.choice(files)) + + if single_test: + print(f"Testing {len(testing_files)} files\n") + print(f"Output file name = {output_file}\n") + + for file in testing_files: + try: + print(f"Testing: {file}") + ds = read_arm_netcdf(str(file), cleanup_qc=True, decode_times=False) + ds = ds.qcfilter.create_qc_summary() + + created_qc_summary = False + for var_name in ds.data_vars: + qc_var_name = ds.qcfilter.check_for_ancillary_qc( + var_name, add_if_missing=False, cleanup=False + ) + + if qc_var_name is None: + continue + + created_qc_summary = True + assert isinstance(ds[qc_var_name].attrs['flag_values'], list) + assert isinstance(ds[qc_var_name].attrs['flag_assessments'], list) + assert isinstance(ds[qc_var_name].attrs['flag_meanings'], list) + assert len(ds[qc_var_name].attrs['flag_values']) >= 1 + assert len(ds[qc_var_name].attrs['flag_assessments']) >= 1 + assert len(ds[qc_var_name].attrs['flag_meanings']) >= 1 + assert ds[qc_var_name].attrs['flag_assessments'][0] == 'Not failing' + assert ( + ds[qc_var_name].attrs['flag_meanings'][0] == 'Not failing quality control tests' + ) + + for assessment in ds[qc_var_name].attrs['flag_assessments']: + assert assessment in expected_assessments + + if created_qc_summary: + assert "Quality control summary implemented by ACT" in ds.attrs['history'] + + del ds + + except Exception: + with open(output_file, "a") as myfile: + myfile.write(f"{file}\n") diff --git a/tests/qc/test_qcfilter.py b/tests/qc/test_qcfilter.py index 01a0adf643..163206f56f 100644 --- a/tests/qc/test_qcfilter.py +++ b/tests/qc/test_qcfilter.py @@ -1,5 +1,5 @@ import copy -from datetime import datetime +from datetime import datetime, timezone import dask.array as da import numpy as np @@ -13,7 +13,7 @@ from act.tests import EXAMPLE_MET1, EXAMPLE_METE40, EXAMPLE_IRT25m20s try: - import scikit_posthocs + import scikit_posthocs # noqa SCIKIT_POSTHOCS_AVAILABLE = True except ImportError: @@ -390,12 +390,12 @@ def test_qc_speed(): coords={'time': time}, ) - start = datetime.utcnow() + start = datetime.now(timezone.utc) for name, var in noisy_data_mapping.items(): failed_qc = var > 0.75 # Consider data above 0.75 as bad. Negligible time here. ds.qcfilter.add_test(name, index=failed_qc, test_meaning='Value above threshold') - time_diff = datetime.utcnow() - start + time_diff = datetime.now(timezone.utc) - start assert time_diff.seconds <= 4 diff --git a/tests/retrievals/test_sp2_retrievals.py b/tests/retrievals/test_sp2_retrievals.py index e22db09722..b5bf49f927 100644 --- a/tests/retrievals/test_sp2_retrievals.py +++ b/tests/retrievals/test_sp2_retrievals.py @@ -4,7 +4,7 @@ import act try: - import pysp2 + import pysp2 # noqa PYSP2_AVAILABLE = True except ImportError: diff --git a/tests/utils/test_data_utils.py b/tests/utils/test_data_utils.py index bf10076164..0a626e135f 100644 --- a/tests/utils/test_data_utils.py +++ b/tests/utils/test_data_utils.py @@ -4,9 +4,13 @@ import pytest import xarray as xr from numpy.testing import assert_almost_equal +from contextlib import redirect_stdout +from io import StringIO +from pathlib import Path import act from act.utils.data_utils import DatastreamParserARM as DatastreamParser +from act.utils.data_utils import convert_2d_to_1d spec = importlib.util.find_spec('pyart') if spec is not None: @@ -18,7 +22,7 @@ def test_add_in_nan(): # Make a 1D array of 10 minute data time = np.arange('2019-01-01T01:00', '2019-01-01T01:10', dtype='datetime64[m]') - time = time.astype('datetime64[us]') + time = time.astype('datetime64[ns]') time = np.delete(time, range(3, 8)) data = np.linspace(0.0, 8.0, time.size) @@ -93,10 +97,8 @@ def test_convert_units(): data = act.utils.data_utils.convert_units(r_data, 'K', 'C') assert np.ceil(data[0]) == 12 - try: + with np.testing.assert_raises(ValueError): ds.utils.change_units() - except ValueError as error: - assert str(error) == "Need to provide 'desired_unit' keyword for .change_units() method" desired_unit = 'degF' skip_vars = [ii for ii in ds.data_vars if ii.startswith('qc_')] @@ -138,6 +140,27 @@ def test_convert_units(): ds.close() del ds + # Test if exception or print statement is issued when an error occurs with units string + ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_EBBR1) + with np.testing.assert_raises(ValueError): + ds.utils.change_units('home_signal_15', 'not_a_real_unit_string', raise_error=True) + + with np.testing.assert_raises(ValueError): + ds.utils.change_units('not_a_real_variable_name', 'degC', raise_error=True) + + f = StringIO() + var_name = 'home_signal_15' + unit = 'not_a_real_unit_string' + with redirect_stdout(f): + ds.utils.change_units('home_signal_15', 'not_a_real_unit_string', verbose=True) + s = f.getvalue() + assert ( + s.strip() + == f"Unable to convert '{var_name}' to units of '{unit}'. Skipping unit converstion for '{var_name}'." + ) + ds.close() + del ds + def test_ts_weighted_average(): ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_MET_WILDCARD) @@ -326,10 +349,12 @@ def test_height_adjusted_pressure(): def test_datastreamparser(): - pytest.raises(ValueError, DatastreamParser, 123) + test_values = [1234, 4321.0, True, ['sgpmetE13.b1'], ('sgpmetE13.b1',)] + for test_value in test_values: + pytest.raises(ValueError, DatastreamParser, test_value) - fn_obj = DatastreamParser() - pytest.raises(ValueError, fn_obj.set_datastream, None) + fn_obj = DatastreamParser() + pytest.raises(ValueError, fn_obj.set_datastream, test_values) fn_obj = DatastreamParser() assert fn_obj.site is None @@ -352,6 +377,16 @@ def test_datastreamparser(): assert fn_obj.time == '024254' assert fn_obj.ext == 'nc' + fn_obj = DatastreamParser(Path('/data/sgp/sgpmetE13.b1/sgpmetE13.b1.20190501.024254.nc')) + assert fn_obj.site == 'sgp' + assert fn_obj.datastream_class == 'met' + assert fn_obj.facility == 'E13' + assert fn_obj.level == 'b1' + assert fn_obj.datastream == 'sgpmetE13.b1' + assert fn_obj.date == '20190501' + assert fn_obj.time == '024254' + assert fn_obj.ext == 'nc' + fn_obj.set_datastream('nsatwrC1.a0.19991230.233451.cdf') assert fn_obj.site == 'nsa' assert fn_obj.datastream_class == 'twr' @@ -382,29 +417,29 @@ def test_datastreamparser(): assert fn_obj.time is None assert fn_obj.ext is None - fn_obj = DatastreamParser('sgpmetE13') + fn_obj = DatastreamParser(Path('sgpmetE13.b1')) assert fn_obj.site == 'sgp' assert fn_obj.datastream_class == 'met' assert fn_obj.facility == 'E13' - assert fn_obj.level is None - assert fn_obj.datastream is None + assert fn_obj.level == 'b1' + assert fn_obj.datastream == 'sgpmetE13.b1' assert fn_obj.date is None assert fn_obj.time is None assert fn_obj.ext is None - fn_obj = DatastreamParser('sgpmet') + fn_obj = DatastreamParser('sgpmetE13') assert fn_obj.site == 'sgp' assert fn_obj.datastream_class == 'met' - assert fn_obj.facility is None + assert fn_obj.facility == 'E13' assert fn_obj.level is None assert fn_obj.datastream is None assert fn_obj.date is None assert fn_obj.time is None assert fn_obj.ext is None - fn_obj = DatastreamParser('sgp') + fn_obj = DatastreamParser('sgpmet') assert fn_obj.site == 'sgp' - assert fn_obj.datastream_class is None + assert fn_obj.datastream_class == 'met' assert fn_obj.facility is None assert fn_obj.level is None assert fn_obj.datastream is None @@ -412,8 +447,8 @@ def test_datastreamparser(): assert fn_obj.time is None assert fn_obj.ext is None - fn_obj = DatastreamParser('sg') - assert fn_obj.site is None + fn_obj = DatastreamParser('sgp') + assert fn_obj.site == 'sgp' assert fn_obj.datastream_class is None assert fn_obj.facility is None assert fn_obj.level is None @@ -421,7 +456,38 @@ def test_datastreamparser(): assert fn_obj.date is None assert fn_obj.time is None assert fn_obj.ext is None - del fn_obj + + fn_obj = DatastreamParser(Path('zzzasoinfaoianasdfkansfaiZ99.s9.123456789.987654321.superlong')) + assert fn_obj.site == 'zzz' + assert fn_obj.datastream_class == 'asoinfaoianasdfkansfai' + assert fn_obj.facility == 'Z99' + assert fn_obj.level == 's9' + assert fn_obj.datastream == 'zzzasoinfaoianasdfkansfaiZ99.s9' + assert fn_obj.date == '123456789' + assert fn_obj.time == '987654321' + assert fn_obj.ext == 'superlong' + + values = [ + '', + ' ', + 'sg', + 'SGP', + 'SGPMETE13.B1', + Path('zzzasoinfaoianasdfkansfaiZ999.z1.123456789.987654321.superlong'), + Path('/data/not/a/real/path/AsgpmetE13.b1.20190501.024254.nc'), + '/data/not/a/real/path/AsgpmetE13.b1.20190501.024254.nc', + 'zzzasoinfaoianasdfkansfaiZ999.z1.123456789.987654321.superlong', + ] + for value in values: + fn_obj = DatastreamParser(value) + assert fn_obj.site is None + assert fn_obj.datastream_class is None + assert fn_obj.facility is None + assert fn_obj.level is None + assert fn_obj.datastream is None + assert fn_obj.date is None + assert fn_obj.time is None + assert fn_obj.ext is None def test_arm_site_location_search(): @@ -455,3 +521,87 @@ def test_arm_site_location_search(): assert list(test_dict_nsa)[0] == 'nsa C1' assert test_dict_nsa[list(test_dict_nsa)[0]]['latitude'] == 71.323 assert test_dict_nsa[list(test_dict_nsa)[0]]['longitude'] == -156.615 + + +def test_calculate_percentages(): + ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_AOSACSM) + fields = ['sulfate', 'ammonium', 'nitrate', 'chloride'] + time = '2023-04-20T03:49:45.000000000' + time_slice = ('2023-04-20T17:38:20.000000000', '2023-04-20T20:29:47.000000000') + threshold = 0.0 + + # Without threshold, chloride has invalid negative values so + # percentages will be incorrect. Check if warning is created + with pytest.warns(UserWarning) as record: + act.utils.calculate_percentages( + ds, fields, time='2023-04-20T03:49:45.000000000', threshold=None + ) + if not record: + pytest.fail("Expected a warning for invalid data.") + + # Test with threshold and singular time + percentages = act.utils.calculate_percentages(ds, fields, time=time, threshold=threshold) + assert 'sulfate' in percentages.keys() + assert 'chloride' in percentages.keys() + assert np.round(percentages["sulfate"], 3) == 66.125 + assert np.round(percentages["chloride"], 3) == 0.539 + + # Test with sliced time + percentages = act.utils.calculate_percentages(ds, fields, time_slice=time_slice, threshold=0.0) + assert np.round(percentages["sulfate"], 3) == 68.342 + assert np.round(percentages["chloride"], 3) == 1.042 + + # Run on all times and check if warning exists. + with pytest.warns(UserWarning) as record: + percentages = act.utils.calculate_percentages(ds, fields, threshold=0.0) + assert np.round(percentages["sulfate"], 3) == 66.373 + assert np.round(percentages["chloride"], 3) == 0.915 + if not record: + pytest.fail("Expected a warning for using all times.") + + +def test_convert_2d_to_1d(): + # Create a sample dataset + data = np.array([[1, 2], [3, 4], [5, 6]]) + ds = xr.Dataset( + {'var': (('time', 'level'), data)}, coords={'time': [0, 1, 2], 'level': [10, 20]} + ) + ds['level'].attrs['units'] = 'm' + + # Run the function + result = convert_2d_to_1d(ds, parse='level') + + # Check the results + assert 'var_level_0' in result + assert 'var_level_1' in result + np.testing.assert_array_equal(result['var_level_0'].values, [1, 3, 5]) + np.testing.assert_array_equal(result['var_level_1'].values, [2, 4, 6]) + + # Run the function with use_dim_value_in_name=True + result = convert_2d_to_1d(ds, parse='level', use_dim_value_in_name=True) + + # Check the results + assert 'var_level_10m' in result + assert 'var_level_20m' in result + np.testing.assert_array_equal(result['var_level_10m'].values, [1, 3, 5]) + np.testing.assert_array_equal(result['var_level_20m'].values, [2, 4, 6]) + + # Run the function with custom labels + result = convert_2d_to_1d(ds, parse='level', dim_labels=['low', 'high']) + + # Check the results + assert 'var_low' in result + assert 'var_high' in result + np.testing.assert_array_equal(result['var_low'].values, [1, 3, 5]) + np.testing.assert_array_equal(result['var_high'].values, [2, 4, 6]) + + # Create a sample dataset + data = np.array([[1], [3], [5]]) + ds = xr.Dataset({'var': (('time', 'level'), data)}, coords={'time': [0, 1, 2], 'level': [10]}) + + # Run the function with keep_name_if_one=True + result = convert_2d_to_1d(ds, parse='level', keep_name_if_one=True) + + # Check the results + assert 'var' in result + np.testing.assert_array_equal(result['var'].values, [1, 3, 5]) diff --git a/tests/utils/test_datetime_utils.py b/tests/utils/test_datetime_utils.py index ce03f21a83..79d4b65055 100644 --- a/tests/utils/test_datetime_utils.py +++ b/tests/utils/test_datetime_utils.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone import numpy as np import pandas as pd @@ -15,7 +15,7 @@ def test_dates_between(): answer = np.arange(start_string, end_string, dtype='datetime64[D]') answer = np.append(answer, answer[-1] + 1) answer = answer.astype('datetime64[s]').astype(int) - answer = [datetime.utcfromtimestamp(ii) for ii in answer] + answer = [datetime.fromtimestamp(ii, tz=timezone.utc).replace(tzinfo=None) for ii in answer] assert date_list == answer diff --git a/tests/utils/test_io_utils.py b/tests/utils/test_io_utils.py index 89cfee6ef6..6bde1635c0 100644 --- a/tests/utils/test_io_utils.py +++ b/tests/utils/test_io_utils.py @@ -1,24 +1,21 @@ -import glob -import os import random import shutil import tempfile -from os import PathLike, chdir, getcwd +from os import PathLike, chdir from pathlib import Path from string import ascii_letters import numpy as np import pytest -from arm_test_data import locate as test_data_locate import act from act.tests import sample_files try: - import moviepy.video.io.ImageSequenceClip + import moviepy.video.io.ImageSequenceClip # noqa MOVIEPY_AVAILABLE = True -except ImportError: +except (ImportError, RuntimeError): MOVIEPY_AVAILABLE = False @@ -278,5 +275,66 @@ def test_generate_movie(): assert Path(result).name == write_filename assert np.isclose(Path(result).stat().st_size, 173189, 1000) + # Test converting MPEG to mp4 + write_filename = 'movie3.mp4' + mpeg_file = sample_files.EXAMPLE_MPEG + result = act.utils.generate_movie(mpeg_file, write_filename=write_filename) + files = list(Path().glob(write_filename)) + assert len(files) == 1 + assert np.isclose(files[0].stat().st_size, 1625298, rtol=100, atol=100) + finally: chdir(cwd) + + +def test_arm_standards_validator(): + met_files = sample_files.EXAMPLE_MET_SAIL + errors = act.utils.arm_standards_validator(met_files) + assert len(errors) == 0 + + ds = act.io.read_arm_netcdf(met_files) + ds2 = ds.drop_vars(['lat', 'lon', 'alt']) + errors = act.utils.arm_standards_validator(dataset=ds2) + + assert len(errors) == 3 + + ds2 = ds + var = ['lat', 'lon', 'alt'] + for v in var: + del ds2[v].attrs['standard_name'] + + errors = act.utils.arm_standards_validator(dataset=ds2) + assert len(errors) == 3 + + ds2 = ds + for v in var: + ds2[v].attrs['standard_name'] = 'test' + errors = act.utils.arm_standards_validator(dataset=ds2) + assert len(errors) == 3 + + ds2 = ds + for v in ds2: + del ds2[v].attrs['long_name'] + errors = act.utils.arm_standards_validator(dataset=ds2) + assert len(errors) == 54 + + ds2 = act.io.read_arm_netcdf(met_files) + ds2['time'].values[1] = ds2['time'].values[10] + errors = act.utils.arm_standards_validator(dataset=ds2) + + assert 'Duplicate' in errors[0] + assert 'increasing' in errors[1] + + file = 'shpinstrumentX50.z1.202005.000000.nc' + errors = act.utils.arm_standards_validator(file) + + assert 'File is not in a standard format' in errors[0] + + file = 'sgpmetE13.z1.20200501.000000.nc' + errors = act.utils.arm_standards_validator(file) + + assert 'no files to open' in errors[0] + + ds = act.io.read_arm_netcdf(sample_files.EXAMPLE_CEIL1) + errors = act.utils.arm_standards_validator(dataset=ds) + assert len(errors) == 4