Skip to content

Commit

Permalink
Include GitHub Actions (NCAR#98)
Browse files Browse the repository at this point in the history
* Create linting.yaml

* Create .pre-commit-config.yaml

* remove trailing whitespace

* one last trailing whitespace

* end of file fix

* end of files

* strings

* reorder imports

* adjust import order

* some more formatting

* trailing commas

* ran nbblack

* ran black

* fix imports

* fix imports

* pyupgrade fixeas

* fix black-jupyter and typo

* adjust line length

* flake8

* flake8

* flake8 take 3

* remove autopep8 due to conflicts with black-jupyter

* fix remaining pre-commits
  • Loading branch information
TeaganKing authored May 17, 2024
1 parent 7134f3a commit b402eba
Show file tree
Hide file tree
Showing 24 changed files with 905 additions and 680 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/linting.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: code-style

on:
push:
branches: 'main'
pull_request:
branches: '*'

jobs:
linting:
name: 'pre-commit hooks'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: pre-commit/[email protected]
38 changes: 38 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: debug-statements
- id: check-docstring-first
- id: check-json

- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black-jupyter

- repo: https://github.com/asottile/reorder-python-imports
rev: v3.12.0
hooks:
- id: reorder-python-imports
args: [--py38-plus, --add-import, 'from __future__ import annotations']

- repo: https://github.com/asottile/add-trailing-comma
rev: v3.1.0
hooks:
- id: add-trailing-comma

- repo: https://github.com/asottile/pyupgrade
rev: v3.15.2
hooks:
- id: pyupgrade
args: [--py38-plus]

- repo: https://github.com/PyCQA/flake8
rev: 7.0.0
hooks:
- id: flake8
args: [--max-line-length=120]
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ or you can view `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-r
Furthermore, to clear the `computed_notebooks` folder which was generated by the `cupid-run` and `cupid-build` commands, you can run the following command:

``` bash
$ cupid-clear
$ cupid-clear
```

This will clear the `computed_notebooks` folder which is at the location pointed to by the `run_dir` variable in the `config.yml` file.
This will clear the `computed_notebooks` folder which is at the location pointed to by the `run_dir` variable in the `config.yml` file.

### CUPiD Options

Expand Down
17 changes: 9 additions & 8 deletions cupid/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,20 @@
The main function `build()` reads the configuration file (default config.yml),
extracts the necessary information such as the name of the book and the
directory containing computed notebooks, and then proceeds to clean and build the
Jupyter book using the `jupyter-book` command-line tool.
directory containing computed notebooks, and then proceeds to clean and build
the Jupyter book using the `jupyter-book` command-line tool.
Args:
CONFIG_PATH: str, path to configuration file (default config.yml)
Returns:
None
"""
from __future__ import annotations

import click
import subprocess
import sys

import click
import yaml


Expand All @@ -34,22 +35,22 @@ def build(config_path):
None
"""

with open(config_path, "r") as fid:
with open(config_path) as fid:
control = yaml.safe_load(fid)

sname = control["data_sources"]["sname"]
run_dir = control["data_sources"]["run_dir"]

subprocess.run(["jupyter-book", "clean", f"{run_dir}/computed_notebooks/{sname}"])
subprocess.run(
["jupyter-book", "build", f"{run_dir}/computed_notebooks/{sname}", "--all"]
["jupyter-book", "build", f"{run_dir}/computed_notebooks/{sname}", "--all"],
)

# Originally used this code to copy jupyter book HTML to a location to host it online

# if 'publish_location' in control:
# if "publish_location" in control:

# user = os.environ.get('USER')
# user = os.environ.get("USER")
# remote_mach = control["publish_location"]["remote_mach"]
# remote_dir = control["publish_location"]["remote_dir"]
# this seems more complicated than expected...people have mentioned paramiko library?
Expand Down
21 changes: 12 additions & 9 deletions cupid/clear.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
#!/usr/bin/env python
"""
This script provides functionality to clear the contents of the 'computed_notebooks' folder
at the location specified by the 'run_dir' variable in the CONFIG_PATH.
This script provides functionality to clear the contents of the "computed_notebooks" folder
at the location specified by the "run_dir" variable in the CONFIG_PATH.
The main function `clear()` takes the path to the configuration file as input, reads the config file
to obtain the 'run_dir' variable, and then deletes the contents of the 'computed_notebooks' folder
to obtain the "run_dir" variable, and then deletes the contents of the "computed_notebooks" folder
at that location.
"""
from __future__ import annotations

import os
import shutil

import click

import cupid.util


def read_config_file(config_path):
"""
Given the file path to the configuration file, this function reads the config file content and
returns the val of the run_dir string with '/computed_notebooks' appended to it
returns the val of the run_dir string with `/computed_notebooks` appended to it
Args:
CONFIG_PATH: str, path to configuration file (default config.yml)
Expand All @@ -31,26 +34,26 @@ def read_config_file(config_path):
run_dir = control["data_sources"].get("run_dir", None)

if run_dir:
# Append '/computed_notebooks' to the run_dir value if it is not empty
# Append `/computed_notebooks` to the run_dir value if it is not empty
full_path = os.path.join(run_dir, "computed_notebooks")
return full_path

# else run_dir is empty/wasn't found in config file so return error
# else run_dir is empty/was not found in config file so return error
raise ValueError("'run_dir' was empty/not found in the config file.")


@click.command()
@click.argument("config_path", default="config.yml")
# Entry point to this script
def clear(config_path):
"""Clears the contents of the 'computed_notebooks' folder at the location
specified by the 'run_dir' variable in the CONFIG_PATH.
"""Clears the contents of the "computed_notebooks" folder at the location
specified by the "run_dir" variable in the CONFIG_PATH.
Args: CONFIG_PATH - The path to the configuration file.
"""

run_dir = read_config_file(config_path)
# Delete the 'computed_notebooks' folder and all the contents inside of it
# Delete the "computed_notebooks" folder and all the contents inside of it
shutil.rmtree(run_dir)
print(f"All contents in {run_dir} have been cleared.")
7 changes: 4 additions & 3 deletions cupid/quickstart.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
### To be created: a script, maybe called through a command line entry point,
### that sets up a directory with a config.yml file and
### basics necessary to set up a notebook collection
# To be created: a script, maybe called through a command line entry point,
# that sets up a directory with a config.yml file and
# basics necessary to set up a notebook collection
from __future__ import annotations
14 changes: 8 additions & 6 deletions cupid/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- get_collection(path_to_catalog, **kwargs): Get a collection of datasets from an
intake catalog based on specified criteria.
"""
from __future__ import annotations

import intake
import yaml
Expand All @@ -21,21 +22,22 @@ def read_yaml(path_to_yaml):
def get_collection(path_to_catalog, **kwargs):
"""Get collection of datasets from intake catalog"""
cat = intake.open_esm_datastore(path_to_catalog)
### note that the json file points to the csv, so the path that the
### yaml file contains doesn't actually get used. this can cause issues
# note that the json file points to the csv, so the path that the
# yaml file contains does not actually get used. this can cause issues

cat_subset = cat.search(**kwargs)

if "variable" in kwargs.keys():
# pylint: disable=invalid-name
def preprocess(ds):
## the double brackets return a Dataset rather than a DataArray
## this is fragile and could cause issues, not sure what subsetting on time_bound does
# the double brackets return a Dataset rather than a DataArray
# this is fragile and could cause issues, not sure what subsetting on time_bound does
return ds[[kwargs["variable"], "time_bound"]]

## not sure what the chunking kwarg is doing here either
# not sure what the chunking kwarg is doing here either
dsets = cat_subset.to_dataset_dict(
xarray_open_kwargs={"chunks": {"time": -1}}, preprocess=preprocess
xarray_open_kwargs={"chunks": {"time": -1}},
preprocess=preprocess,
)

else:
Expand Down
28 changes: 16 additions & 12 deletions cupid/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python

"""
Main script for running all notebooks and scripts specified in the configuration file.
Expand All @@ -21,19 +20,24 @@
-config_path Path to the YAML configuration file containing specifications for notebooks (default: config.yml)
-h, --help Show this message and exit.
"""
from __future__ import annotations

import os
import warnings

import click
import intake
import ploomber
import cupid.util

import cupid.timeseries
import cupid.util

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])

# fmt: off
# pylint: disable=line-too-long


@click.command(context_settings=CONTEXT_SETTINGS)
@click.option("--serial", "-s", is_flag=True, help="Do not use LocalCluster objects")
@click.option("--time-series", "-ts", is_flag=True, help="Run time series generation scripts prior to diagnostics")
Expand Down Expand Up @@ -135,7 +139,7 @@ def run(
output_dir = run_dir + "/computed_notebooks/" + control["data_sources"]["sname"]
temp_data_path = run_dir + "/temp_data"
nb_path_root = os.path.realpath(
os.path.expanduser(control["data_sources"]["nb_path_root"])
os.path.expanduser(control["data_sources"]["nb_path_root"]),
)

#####################################################################
Expand All @@ -147,7 +151,7 @@ def run(

if "path_to_cat_json" in control["data_sources"]:
full_cat_path = os.path.realpath(
os.path.expanduser(control["data_sources"]["path_to_cat_json"])
os.path.expanduser(control["data_sources"]["path_to_cat_json"]),
)
full_cat = intake.open_esm_datastore(full_cat_path)

Expand All @@ -159,7 +163,7 @@ def run(
# This pulls out the name of the catalog from the path
cat_subset_name = full_cat_path.split("/")[-1].split(".")[0] + "_subset"
cat_subset.serialize(
directory=temp_data_path, name=cat_subset_name, catalog_type="file"
directory=temp_data_path, name=cat_subset_name, catalog_type="file",
)
cat_path = temp_data_path + "/" + cat_subset_name + ".json"
else:
Expand Down Expand Up @@ -191,7 +195,7 @@ def run(
all_nbs[nb]["output_dir"] = output_dir + "/" + comp_name
elif comp_bool and not all:
warnings.warn(
f"No notebooks for {comp_name} component specified in config file."
f"No notebooks for {comp_name} component specified in config file.",
)

# Checking for existence of environments
Expand All @@ -200,9 +204,9 @@ def run(
if not control["env_check"][info["kernel_name"]]:
bad_env = info["kernel_name"]
warnings.warn(
f"Environment {bad_env} specified for {nb}.ipynb could not be found;"+
f" {nb}.ipynb will not be run."+
f"See README.md for environment installation instructions."
f"Environment {bad_env} specified for {nb}.ipynb could not be found;" +
f" {nb}.ipynb will not be run." +
"See README.md for environment installation instructions.",
)
all_nbs.pop(nb)

Expand Down Expand Up @@ -234,7 +238,7 @@ def run(
all_scripts[script]["nb_path_root"] = nb_path_root + "/" + comp_name
elif comp_bool and not all:
warnings.warn(
f"No scripts for {comp_name} component specified in config file."
f"No scripts for {comp_name} component specified in config file.",
)

# Checking for existence of environments
Expand All @@ -243,8 +247,8 @@ def run(
if not control["env_check"][info["kernel_name"]]:
bad_env = info["kernel_name"]
warnings.warn(
f"Environment {bad_env} specified for {script}.py could not be found;"+
f"{script}.py will not be run."
f"Environment {bad_env} specified for {script}.py could not be found;" +
f"{script}.py will not be run.",
)
all_scripts.pop(script)

Expand Down
Loading

0 comments on commit b402eba

Please sign in to comment.