Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consolidate cmor setups and logging #261

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
fdbe225
consolidate cmor setups and e2c logging
TonyB9000 Jun 27, 2024
580a1a4
revert handler cmor setup to logfile
TonyB9000 Jun 27, 2024
0a380d4
add commentary on do_shuffle and delete_tempfiles as future flags
TonyB9000 Jun 27, 2024
20feb79
minor cleanup
TonyB9000 Jun 27, 2024
1aea912
minor cleanup
TonyB9000 Jun 27, 2024
811d208
convert mpas handlers setup_cmor calls to employ named parameters
TonyB9000 Jul 10, 2024
14c50ef
converted residual var handlers to call setup_cmor with named parameters
TonyB9000 Jul 10, 2024
c2bf739
changed varname to var_name in util setup_cmor to match named params …
TonyB9000 Jul 10, 2024
fe151b3
Update e3sm_to_cmip/_logger.py
TonyB9000 Jul 16, 2024
cd381e8
applied suggested changes to PR
TonyB9000 Jul 16, 2024
f8aeadd
cleaned up import listings in handlers
TonyB9000 Jul 17, 2024
1094638
Employed docstring
TonyB9000 Jul 17, 2024
76490c2
addressed Tom Vo's comments and suggestions - phase one.
TonyB9000 Jul 17, 2024
c087201
used merge --no-ff to find and resolve conflicts...
TonyB9000 Jul 17, 2024
93238fd
fixed typo comment missing initial #
TonyB9000 Jul 18, 2024
d670c1f
corrected case error
TonyB9000 Jul 18, 2024
3019e37
added global logger explanation
TonyB9000 Jul 19, 2024
d268ca5
Update e3sm_to_cmip/__main__.py
TonyB9000 Aug 2, 2024
db96ce2
Update e3sm_to_cmip/__main__.py
TonyB9000 Aug 2, 2024
7e83c4f
Update e3sm_to_cmip/__main__.py
TonyB9000 Aug 2, 2024
7a1c302
Update e3sm_to_cmip/_logger.py
TonyB9000 Aug 2, 2024
2b12405
Update e3sm_to_cmip/cmor_handlers/handler.py
TonyB9000 Aug 2, 2024
94b4e70
Update e3sm_to_cmip/__main__.py
TonyB9000 Aug 2, 2024
6fd1670
convert _logger calls to employ logging log_levels directly, updated …
TonyB9000 Aug 2, 2024
6e4bec0
Merge remote-tracking branch 'refs/remotes/origin/consolidate_cmor_se…
TonyB9000 Aug 2, 2024
28f82c5
forgot one instance of import logging for defined constant
TonyB9000 Aug 26, 2024
5e60221
Code review changes
tomvothecoder Aug 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 79 additions & 38 deletions e3sm_to_cmip/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import argparse
import logging
import os
import signal
import sys
Expand All @@ -21,25 +22,28 @@
from tqdm import tqdm

from e3sm_to_cmip import ROOT_HANDLERS_DIR, __version__, resources
from e3sm_to_cmip._logger import _setup_logger, _setup_root_logger
from e3sm_to_cmip._logger import _logger
from e3sm_to_cmip.cmor_handlers.handler import _instantiate_handler_logger
from e3sm_to_cmip.cmor_handlers.utils import (
MPAS_REALMS,
REALMS,
Frequency,
MPASRealm,
Realm,
_get_mpas_handlers,
_instantiate_h_utils_logger,
derive_handlers,
load_all_handlers,
)
from e3sm_to_cmip.util import (
FREQUENCIES,
_get_table_info,
get_handler_info_msg,
_instantiate_util_logger,
add_metadata,
copy_user_metadata,
find_atm_files,
find_mpas_files,
get_handler_info_msg,
precheck,
print_debug,
print_message,
Expand All @@ -50,11 +54,6 @@
warnings.filterwarnings("ignore")


# Setup the root logger and this module's logger.
log_filename = _setup_root_logger()
logger = _setup_logger(__name__, propagate=True)


@dataclass
class CLIArguments:
"""A data class storing the command line arguments for e3sm_to_cmip.
Expand Down Expand Up @@ -97,9 +96,19 @@ class CLIArguments:

class E3SMtoCMIP:
def __init__(self, args: Optional[List[str]] = None):
# logger assignment is moved into __init__ AFTER the call to _parse_args
# to prevent the default logfile directory being created whenever a call
# to "--help" or "--version" is invoked. Doing so, however, makes the
# logger unavailable to the functions in this class unless made global.
global logger
tomvothecoder marked this conversation as resolved.
Show resolved Hide resolved

# A dictionary of command line arguments.
parsed_args = self._parse_args(args)

# Setup this module's logger AFTER args are parsed in __init__, so that
# default log file is NOT created for "--help" or "--version" calls.
logger = _logger(name=__name__, log_level=logging.INFO, to_logfile=True)

# NOTE: The order of these attributes align with class CLIArguments.
# ======================================================================
# Run Mode settings.
Expand All @@ -115,8 +124,6 @@ def __init__(self, args: Optional[List[str]] = None):
self.debug: bool = parsed_args.debug
self.timeout: int = parsed_args.timeout

# ======================================================================
# CMOR settings.
# ======================================================================
self.var_list: List[str] = self._get_var_list(parsed_args.var_list)
self.realm: Union[Realm, MPASRealm] = parsed_args.realm
Expand All @@ -140,6 +147,9 @@ def __init__(self, args: Optional[List[str]] = None):
if self.precheck_path is not None:
self._run_precheck()

self.handlers = self._get_handlers()

def print_config(self):
logger.info("--------------------------------------")
logger.info("| E3SM to CMIP Configuration")
logger.info("--------------------------------------")
Expand All @@ -149,9 +159,6 @@ def __init__(self, args: Optional[List[str]] = None):
logger.info(f" * precheck_path='{self.precheck_path}'")
logger.info(f" * freq='{self.freq}'")
logger.info(f" * realm='{self.realm}'")
logger.info(f" * Writing log output file to: {log_filename}")

self.handlers = self._get_handlers()

def run(self):
# Setup logger information and print out e3sm_to_cmip CLI arguments.
Expand All @@ -161,17 +168,16 @@ def run(self):
self.output_path, "user_metadata.json"
)

# Setup directories using the CLI argument paths (e.g., output dir).
# ======================================================================
if not self.info_mode:
self._setup_dirs_with_paths()

# Run e3sm_to_cmip with info mode.
# ======================================================================
if self.info_mode:
self._run_info_mode()
sys.exit(0)

# Setup directories using the CLI argument paths (e.g., output dir).
# ======================================================================
self._setup_dirs_with_paths()

# Run e3sm_to_cmip to CMORize serially or in parallel.
# ======================================================================
timer = None
Expand Down Expand Up @@ -306,7 +312,9 @@ def _parse_args(self, args: Optional[List[str]]) -> CLIArguments:
sys.exit(1)

# Parse the arguments and perform validation.
# NOTE: exits here if args == "-h" or "--help" or "--version", else validate
parsed_args = argparser.parse_args(args_to_parse)

self._validate_parsed_args(parsed_args)

# Convert to this data class for type checking to work.
Expand Down Expand Up @@ -410,7 +418,9 @@ def _setup_argparser(self) -> argparse.ArgumentParser:
nargs="+",
required=True,
metavar="",
help=("Space separated list of CMIP variables to convert from E3SM to CMIP."),
help=(
"Space separated list of CMIP variables to convert from E3SM to CMIP."
),
)
optional_run.add_argument(
"--realm",
Expand Down Expand Up @@ -531,7 +541,6 @@ def _setup_argparser(self) -> argparse.ArgumentParser:
"-h",
"--help",
action="help",
default=argparse.SUPPRESS,
help="show this help message and exit",
)

Expand Down Expand Up @@ -658,23 +667,24 @@ def _run_info_mode(self): # noqa: C901

# if the user asked if the variable is included in the table
# but didnt ask about the files in the inpath
elif self.freq and self.tables_path and not self.input_path: # info mode 2
elif self.freq and self.tables_path and not self.input_path: # info mode 2
for handler in self.handlers:
table_info = _get_table_info(self.tables_path, handler["table"])
if handler["name"] not in table_info["variable_entry"]:
msg = f"Variable {handler['name']} is not included in the table {handler['table']}" # type: ignore
print_message(msg, status="error")
continue
else:
if self.freq == "mon" and handler['table'] == "CMIP6_day.json":
if self.freq == "mon" and handler["table"] == "CMIP6_day.json":
continue
if ( self.freq == "day" or self.freq == "3hr" ) and handler['table'] == "CMIP6_Amon.json":
if (self.freq == "day" or self.freq == "3hr") and handler[
"table"
] == "CMIP6_Amon.json":
continue
hand_msg = get_handler_info_msg(handler)
messages.append(hand_msg)

elif self.freq and self.tables_path and self.input_path: # info mode 3

elif self.freq and self.tables_path and self.input_path: # info mode 3
file_path = next(Path(self.input_path).glob("*.nc"))

with xr.open_dataset(file_path) as ds:
Expand Down Expand Up @@ -714,9 +724,11 @@ def _run_info_mode(self): # noqa: C901
# but we only want the latter in the "hand_msg" output.
# The vars "hass" and "rlut" have multiple freqs.

if self.freq == "mon" and handler['table'] == "CMIP6_day.json":
if self.freq == "mon" and handler["table"] == "CMIP6_day.json":
continue
if ( self.freq == "day" or self.freq == "3hr" ) and handler['table'] == "CMIP6_Amon.json":
if (self.freq == "day" or self.freq == "3hr") and handler[
"table"
] == "CMIP6_Amon.json":
continue

hand_msg = None
Expand Down Expand Up @@ -778,12 +790,17 @@ def _run_serial(self) -> int: # noqa: C901
int
1 if an error occurs, else 0
"""

# TODO: Make this a command-line flag.
use_pbar = False

try:
num_handlers = len(self.handlers)
num_success = 0
num_failure = 0
name = None

if self.realm != "atm":
if self.realm != "atm" and use_pbar:
pbar = tqdm(total=len(self.handlers))

for _, handler in enumerate(self.handlers):
Expand Down Expand Up @@ -838,26 +855,31 @@ def _run_serial(self) -> int: # noqa: C901
except Exception as e:
print_debug(e)

if name is not None:
if name is not None and name != "":
num_success += 1
msg = f"Finished {name}, {num_success}/{num_handlers} jobs complete"
msg = f"Finished {name}, {num_success}/{num_handlers} jobs complete (via run_serial)"
logger.info(msg)
else:
num_failure += 1
msg = f"Error running handler {handler['name']}"
logger.info(msg)

if self.realm != "atm":
if self.realm != "atm" and use_pbar:
pbar.update(1)
if self.realm != "atm":
pbar.close()

if self.realm != "atm" and use_pbar:
pbar.close()
except Exception as error:
print_debug(error)

return 1
else:
msg = f"{num_success} of {num_handlers} handlers complete"
logger.info(msg)

if num_failure > 0:
return 1

return 0

def _run_parallel(self) -> int: # noqa: C901
Expand All @@ -872,6 +894,9 @@ def _run_parallel(self) -> int: # noqa: C901
pool_res = list()
will_run = []

# TODO: Make this a command-line flag.
do_pbar = False

for idx, handler in enumerate(self.handlers):
handler_method = handler["method"]
handler_variables = handler["raw_variables"]
Expand Down Expand Up @@ -926,8 +951,10 @@ def _run_parallel(self) -> int: # noqa: C901
pool_res.append(res)

# wait for each result to complete
pbar = tqdm(total=len(pool_res))
if do_pbar:
pbar = tqdm(total=len(pool_res))
num_success = 0
num_failure = 0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like you have more update than what is described in the PR. Can you list these changes too?

For example, here you update the logic to capture num_failure to exit e3sm_to_cmip.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently, "failed" was coming up False, and the function returning 0, when there were indeed failures. So I augmented the condition at the bottom to ensure a non-zero return when any failure was encountered.

This is questionable. When running multiple variables in parallel, should a single variable failure be treated as a failure (non-zero return) or not?

Copy link
Collaborator

@tomvothecoder tomvothecoder Jul 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is questionable. When running multiple variables in parallel, should a single variable failure be treated as a failure (non-zero return) or not?

I think the behavior should be the same as with a serial run. Maybe what it should do is continue running even with failures, capture the failures, then output the list of failures at the end. That way e3sm_to_cmip doesn't end prematurely and other variables that might be successful can be CMORized still. This would make debugging more precise because of e2c ends prematurely, other variables might also fail but we won't know until later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense. I'll see if I can engineer this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TonyB9000 Can you open a separate GitHub issue for my comment above? If we decide to address it, then it should be performed in a separate PR.

num_handlers = len(self.handlers)
finished_success = []
for idx, res in enumerate(pool_res):
Expand All @@ -936,26 +963,32 @@ def _run_parallel(self) -> int: # noqa: C901
finished_success.append(out)
if out:
num_success += 1
msg = f"Finished {out}, {idx + 1}/{num_handlers} jobs complete"
msg = f"Finished {out}, {idx + 1}/{num_handlers} jobs complete (via run_parallel)"
else:
num_failure += 1
msg = f'Error running handler {self.handlers[idx]["name"]}'
logger.error(msg)

logger.info(msg)
except Exception as e:
print_debug(e)
pbar.update(1)

pbar.close()
if do_pbar:
pbar.update(1)
TonyB9000 marked this conversation as resolved.
Show resolved Hide resolved

if do_pbar:
pbar.close()
TonyB9000 marked this conversation as resolved.
Show resolved Hide resolved

pool.shutdown()

msg = f"{num_success} of {num_handlers} handlers complete"
logger.info(msg)

failed = set(will_run) - set(finished_success)
if failed:
if failed or num_failure > 0:
logger.error(f"{', '.join(list(failed))} failed to complete")
logger.error(msg)
return 1

return 0

Expand All @@ -966,7 +999,15 @@ def _timeout_exit(self):

def main(args: Optional[List[str]] = None):
app = E3SMtoCMIP(args)
app.run()

# These calls allow module loggers that create default logfiles to avoid being
# instantiated by arguments "--help" or "--version" upon import.
_instantiate_util_logger()
_instantiate_h_utils_logger()
_instantiate_handler_logger()

app.print_config()
return app.run()


if __name__ == "__main__":
Expand Down
Loading
Loading