Skip to content

Commit

Permalink
Merge pull request NCAR#88 from rmshkv/component-api
Browse files Browse the repository at this point in the history
API updates to specify components
  • Loading branch information
mnlevy1981 authored May 3, 2024
2 parents bf65cdd + f114b83 commit d14ee72
Show file tree
Hide file tree
Showing 14 changed files with 195 additions and 261 deletions.
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,16 @@ Usage: cupid-run [OPTIONS] CONFIG_PATH
Options:
-s, --serial Do not use LocalCluster objects
-ts, --time-series Run time series generation scripts prior to diagnostics
-atm, --atmosphere Run atmosphere component diagnostics
-ocn, --ocean Run ocean component diagnostics
-lnd, --land Run land component diagnostics
-ice, --seaice Run sea ice component diagnostics
-glc, --landice Run land ice component diagnostics
-h, --help Show this message and exit.
```

#### Running in serial

By default, several of the example notebooks provided use a dask `LocalCluster` object to run in parallel.
However, the `--serial` option will pass a logical flag to each notebook that can be used to skip starting the cluster.

Expand All @@ -98,6 +105,10 @@ if not serial:
client
```

#### Specifying components

If no component flags are provided, all component diagnostics listed in `config.yml` will be executed by default. Multiple flags can be used together to select a group of components, for example: `cupid-run -ocn -ice config.yml`.


### Timeseries File Generation
CUPiD also has the capability to generate single variable timeseries files from history files for all components. To run timeseries, edit the `config.yml` file's timeseries section to fit your preferences, and then run `cupid-run config.yml -ts`.

180 changes: 100 additions & 80 deletions cupid/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,24 @@
import dask
import time
import ploomber
import yaml
import warnings

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])


@click.command(context_settings=CONTEXT_SETTINGS)
@click.option("--serial", "-s", is_flag=True, help="Do not use LocalCluster objects")
@click.option(
"--time-series",
"-ts",
is_flag=True,
help="Run time series generation scripts prior to diagnostics",
)
@click.option("--time-series", "-ts", is_flag=True,
help="Run time series generation scripts prior to diagnostics")
# Options to turn components on or off
@click.option("--atmosphere", "-atm", is_flag=True, help="Run atmosphere component diagnostics")
@click.option("--ocean", "-ocn", is_flag=True, help="Run ocean component diagnostics")
@click.option("--land", "-lnd", is_flag=True, help="Run land component diagnostics")
@click.option("--seaice", "-ice", is_flag=True, help="Run sea ice component diagnostics")
@click.option("--landice", "-glc", is_flag=True, help="Run land ice component diagnostics")
@click.argument("config_path")
def run(config_path, serial=False, time_series=False):

def run(config_path, serial=False, time_series=False,
all=False, atmosphere=False, ocean=False, land=False, seaice=False, landice=False):
"""
Main engine to set up running all the notebooks.
"""
Expand All @@ -34,41 +37,56 @@ def run(config_path, serial=False, time_series=False):
control = cupid.util.get_control_dict(config_path)
cupid.util.setup_book(config_path)

#####################################################################
component_options = {"atm": atmosphere,
"ocn": ocean,
"lnd": land,
"ice": seaice,
"glc": landice}

# Automatically run all if no components specified

if True not in [atmosphere, ocean, land, seaice, landice]:
all = True
for key in component_options.keys():
component_options[key] = True

#####################################################################
# Managing global parameters

global_params = dict()

if "global_params" in control:
global_params = control["global_params"]

global_params['serial'] = serial

####################################################################

if time_series:
timeseries_params = control["timeseries"]

# general timeseries arguments for all components
num_procs = timeseries_params["num_procs"]



for component in ['atm', 'ocn', 'lnd', 'ice', 'glc']:
cupid.timeseries.create_time_series(
component,
timeseries_params[component]["vars"],
timeseries_params[component]["derive_vars"],
[timeseries_params["case_name"]], # could also grab from compute_notebooks section of config file
timeseries_params[component]["hist_str"],
[global_params["CESM_output_dir"] + "/" + timeseries_params["case_name"] + f"/{component}/hist/"], # could also grab from compute_notebooks section of config file
[global_params["CESM_output_dir"]+'/'+timeseries_params['case_name']+f'/{component}/proc/tseries/'],
# Note that timeseries output will eventually go in /glade/derecho/scratch/${USER}/archive/${CASE}/${component}/proc/tseries/
timeseries_params["ts_done"],
timeseries_params["overwrite_ts"],
timeseries_params[component]["start_years"], # could get from yaml file in adf_quick_run.parameter_groups.none.config_fil_str, or for other notebooks config files, eg ocean_surface.parameter_gropus.none.mom6_tools_config.start_date
timeseries_params[component]["end_years"], # could get from yaml file in adf_quick_run.parameter_groups.none.config_fil_str, or for other notebooks config files, eg ocean_surface.parameter_gropus.none.mom6_tools_config.end_date
timeseries_params[component]["level"],
num_procs,
serial,
)
for component, comp_bool in component_options.items():
if comp_bool:
cupid.timeseries.create_time_series(
component,
timeseries_params[component]["vars"],
timeseries_params[component]["derive_vars"],
[timeseries_params["case_name"]], # could also grab from compute_notebooks section of config file
timeseries_params[component]["hist_str"],
[global_params["CESM_output_dir"] + "/" + timeseries_params["case_name"] + f"/{component}/hist/"], # could also grab from compute_notebooks section of config file
[global_params["CESM_output_dir"]+'/'+timeseries_params['case_name']+f'/{component}/proc/tseries/'],
# Note that timeseries output will eventually go in /glade/derecho/scratch/${USER}/archive/${CASE}/${component}/proc/tseries/
timeseries_params["ts_done"],
timeseries_params["overwrite_ts"],
timeseries_params[component]["start_years"], # could get from yaml file in adf_quick_run.parameter_groups.none.config_fil_str, or for other notebooks config files, eg ocean_surface.parameter_gropus.none.mom6_tools_config.start_date
timeseries_params[component]["end_years"], # could get from yaml file in adf_quick_run.parameter_groups.none.config_fil_str, or for other notebooks config files, eg ocean_surface.parameter_gropus.none.mom6_tools_config.end_date
timeseries_params[component]["level"],
num_procs,
serial,
)

# Grab paths

Expand Down Expand Up @@ -113,35 +131,39 @@ def run(config_path, serial=False, time_series=False):
dag = ploomber.DAG(executor=ploomber.executors.Serial())

#####################################################################
# Organizing notebooks - holdover from manually managing dependencies before

all_nbs = dict()

for nb, info in control["compute_notebooks"].items():

all_nbs[nb] = info

# Setting up notebook tasks

for nb, info in all_nbs.items():

global_params["serial"] = serial
if "dependency" in info:
cupid.util.create_ploomber_nb_task(
nb,
info,
cat_path,
nb_path_root,
output_dir,
global_params,
dag,
dependency=info["dependency"],
)

else:
cupid.util.create_ploomber_nb_task(
nb, info, cat_path, nb_path_root, output_dir, global_params, dag
)
# Organizing notebooks to run

if 'compute_notebooks' in control:

all_nbs = dict()

for nb, info in control['compute_notebooks']['infrastructure'].items():
all_nbs[nb] = info
all_nbs[nb]['nb_path_root'] = nb_path_root + '/infrastructure'
all_nbs[nb]['output_dir'] = output_dir + '/infrastructure'

for comp_name, comp_bool in component_options.items():
if comp_name in control['compute_notebooks'] and comp_bool:
for nb, info in control['compute_notebooks'][comp_name].items():
all_nbs[nb] = info
all_nbs[nb]['nb_path_root'] = nb_path_root + '/' + comp_name
all_nbs[nb]['output_dir'] = output_dir + '/' + comp_name
elif comp_bool and not all:
warnings.warn(f"No notebooks for {comp_name} component specified in config file.")

# Checking for existence of environments

for nb, info in all_nbs.copy().items():
if not control["env_check"][info["kernel_name"]]:
bad_env = info["kernel_name"]
warnings.warn(f"Environment {bad_env} specified for {nb}.ipynb could not be found; {nb}.ipynb will not be run. See README.md for environment installation instructions.")
all_nbs.pop(nb)

# Setting up notebook tasks

for nb, info in all_nbs.items():
cupid.util.create_ploomber_nb_task(nb, info, cat_path, info["nb_path_root"],
info["output_dir"], global_params, dag, dependency=info.get("dependency"))

#####################################################################
# Organizing scripts
Expand All @@ -150,29 +172,27 @@ def run(config_path, serial=False, time_series=False):

all_scripts = dict()

for script, info in control["compute_scripts"].items():

all_scripts[script] = info

for comp_name, comp_bool in component_options.items():
if comp_name in control['compute_scripts'] and comp_bool:
for script, info in control['compute_scripts'][comp_name].items():
all_scripts[script] = info
all_scripts[script]['nb_path_root'] = nb_path_root + '/' + comp_name
elif comp_bool and not all:
warnings.warn(f"No scripts for {comp_name} component specified in config file.")

# Checking for existence of environments

for script, info in all_scripts.copy().items():
if not control["env_check"][info["kernel_name"]]:
bad_env = info["kernel_name"]
warnings.warn(f"Environment {bad_env} specified for {script}.py could not be found; {script}.py will not be run.")
all_scripts.pop(script)

# Setting up script tasks

for script, info in all_scripts.items():

if "dependency" in info:
cupid.util.create_ploomber_script_task(
script,
info,
cat_path,
nb_path_root,
global_params,
dag,
dependency=info["dependency"],
)

else:
cupid.util.create_ploomber_script_task(
script, info, cat_path, nb_path_root, global_params, dag
)
cupid.util.create_ploomber_script_task(script, info, cat_path, info['nb_path_root'],
global_params, dag, dependency=info.get("dependency"))

# Run the full DAG

Expand Down
Loading

0 comments on commit d14ee72

Please sign in to comment.