From 4596873c8e21b838c82e47e36cd30fdbffac8de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20S=C3=A9n=C3=A9si?= Date: Wed, 1 Sep 2021 16:22:08 +0200 Subject: [PATCH 1/3] Allow to declare and launch ESMValTool diagnostic scripts --- climaf/ESMValTool_diags.py | 327 ++++++++++++++++++ climaf/__init__.py | 2 +- climaf/cache.py | 2 +- climaf/classes.py | 43 ++- climaf/driver.py | 129 ++++--- climaf/operators.py | 2 +- doc/contents.rst | 1 + doc/esmvaltool.rst | 134 +++++++ doc/index.rst | 1 + doc/news.rst | 4 + examples/ESMValTool_cvdp.py | 52 +++ ...ValTool_python_diags_wrapper_for_ciclad.sh | 31 ++ 12 files changed, 659 insertions(+), 69 deletions(-) create mode 100644 climaf/ESMValTool_diags.py create mode 100644 doc/esmvaltool.rst create mode 100644 examples/ESMValTool_cvdp.py create mode 100755 scripts/ESMValTool_python_diags_wrapper_for_ciclad.sh diff --git a/climaf/ESMValTool_diags.py b/climaf/ESMValTool_diags.py new file mode 100644 index 00000000..ad624841 --- /dev/null +++ b/climaf/ESMValTool_diags.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Driver and utilities for launching an ESMValTool diagnostic. + +Originally developped with ESMValTool version 2.2.0 +""" +from __future__ import print_function, division, unicode_literals, absolute_import + +# We assume that the dataset period has complete years (because (some) EVT diags do assume it too) +# Created : S.Senesi - 2021 + +import sys +import os +import yaml +import subprocess +import time +import datetime +import logging +# +from env.environment import * +from env.clogging import clogger, dedent as cdedent +from env.site_settings import atIPSL +# +import climaf +from climaf import classes +from climaf.utils import Climaf_Error +from climaf.anynetcdf import ncf +from climaf.classes import timePeriod + +#: Path for the wrapper script for setting ESMValTool's diag scripts environment and launching them +wrapper = None + +def evt_script(climaf_name, script): + """Create a function named CLIMAF_NAME for launching an ESMValTool's + diagnostic script SCRIPT (which follows EMSValTool naming + convention) + + The created function will actually invoke :py:func:`~climaf.driver.ceval_evt` with + argument SCRIPT and own arguments. + + """ + + doc = "CliMAF wrapper for EVT script : %s" % script + defs = 'def %s(*args,**dic) :\n """%s"""\n '% (climaf_name, doc) + \ + 'return climaf.driver.ceval_evt("%s","%s",*args,**dic)\n' \ + %(climaf_name, script) + exec(defs, globals()) # + exec(defs, locals()) # + exec("from climaf.ESMValTool_diags import %s" % climaf_name, sys.modules['__main__'].__dict__) + clogger.debug("ESMValTool script %s has been declared as function %s" % (script, climaf_name)) + + +def call_evt_script(climaf_name, script, ensembles, *operands, **parameters) : + """ + Driver for calling an ESMValTool diagnostic script (DS). + + This function is NOT supposed to be called directly except by CliMAF driver, see doc. + + Arguments : + + - climaf_name : name of the python function associated to the DS + + - script : name of the DS, according to ESMValTool convention + + - ensembles : list of datasets ensemble objects to provide to the + DS (one member per variable) + + - operands : values of the ensemble objects (i.e. filenames) + + - parameters : additional key/value pairs to provide to the DS + + This drivers creates a directory dedicated to running that DS, and all + necessary interface files. It checks that execution was successfull. + + Returns a pair : DS working directory, dictionnary of provenance information + + """ + + # Initalize most settings + settings = { + 'recipe' : 'CliMAF', + 'script' : script, + 'version' : climaf.version, + + # User may wish to change next attributes for each call + 'auxiliary_data_dir' : None, + 'log_level' : _translate_loglevel(clogger.getEffectiveLevel()), + 'output_file_type' : 'png', + 'profile_diagnostic' : False, + 'write_netcdf' : True, + 'write_plots' : True, + 'quick_plot' : {}, + } + + # Account for dynamical, un-controlled, script call parameters to update settings + settings.update(parameters) + + # Create a working directory according to ESMValTool habits + # (e.g. cvdp_20210523_044731) + output_dir=settings.get('output_dir','./evtscript_output/') + tmpdir = output_dir+"/%s_%s"%(climaf_name, datetime.datetime.now().strftime("%Y%m%d_%H%M%S")) + tmpdir = os.path.abspath(tmpdir)+"/" + os.makedirs(tmpdir) + + # Create sub-directories according to ESMValTol habits + for adir in ['work_dir','run_dir','preproc_dir']: + settings[adir] = tmpdir + adir.replace('_dir','') + if not os.path.exists(settings[adir]): + os.mkdir(settings[adir]) + # Plot dir doesn't stick to the rule ('plot' -> 'plots') + settings['plot_dir'] = tmpdir + 'plots' + if not os.path.exists(settings['plot_dir']): + os.mkdir(settings['plot_dir']) + + # Create medata files (one per variable/ensemble, gouped in dict + # metadatas with key = variable, value = metadata file): + metadatas = dict() + for ensemble, value in zip(operands, ensembles) : + _create_metadata_file(script, ensemble, value, settings['preproc_dir'], metadatas) + + # on pourrait faire un controle sur le fait qu'il y a bien un + # ensemble par variable déclarée, mais pas sûr que déclarer les + # variables soit pratiqué dans EVT + + settings['input_files'] = [ metadatas[variable] for variable in metadatas ] + + # Write settings.yaml using dict settings + settings_filename = settings['run_dir'] + "/settings.yml" + with open(settings_filename, 'w') as file: + yaml.safe_dump(settings, file) + + if wrapper is None : + _init_wrapper() + + # Launch the diagnsotic script using a wrapper + command = [ wrapper, script, settings_filename ] + clogger.info("Launching command : " + repr(command)) + tim1 = time.time() + process = subprocess.Popen( + command, + bufsize=2**20, # Use a large buffer to prevent NCL crash + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + cwd=settings['run_dir'], + env=dict(os.environ), + ) + logname = settings['run_dir'] + '/log.txt' + returncode = None + with open(logname, 'ab') as log: + while returncode is None: + returncode = process.poll() + log.write(process.stdout.read()) + # wait, but not long because the stdout buffer may fill up: + # https://docs.python.org/3.6/library/subprocess.html#subprocess.Popen.stdout + time.sleep(0.001) + + if returncode == 0: + clogger.debug("Script %s (%s) completed successfully", climaf_name, script) + else: + raise Climaf_Error( + "Diagnostic script {} ({}) failed with return code {}. See the log " + "in {}".format(climaf_name, script, returncode, logname)) + # + duration = time.time() - tim1 + clogger.info("Done in %.1f s with %s computation " + "(command was :%s )" % (duration, script, command)) + + # Returned value is a pair : working directory, dict of provenance information + try : + with open(settings['run_dir']+"/diagnostic_provenance.yml","r") as prov : + prov_dict=yaml.safe_load(prov) + except : + raise Climaf_Error("Script %s (%s) didn't produce provenance information", + climaf_name, script) + prov_dict = {} + + return tmpdir,prov_dict + + + +def _create_metadata_file(script, ensemble, value, preproc_dir, metadatas) : + """Create an ESMVamTool diagnostic script interface file of type 'medata file' + + This yaml file describes each input file of an objects' ENSEMBLE + provided to a SCRIPT. Input files are those listed in VALUE, as a + white-space separated list of filenames, and are CliMAF cache + files (or basic data files). They are provided ot the script as + symbolic links in directory PREPROC_DIR, and with names built with + ensemble key, variable name and file period + + A number of descriptors are soughtread from datafiles + + Arg METADATAS is a dict that allows to return the metadata + filename, the key being the ensemble variable + + """ + if not isinstance(ensemble, classes.cens): + raise Climaf_Error("EVT scripts like %s only accepts ensembles , which is not the case for %s:"\ + % (script,ensemble)) + files=value.split(" ") + if value != '' and not all(map(os.path.exists, files)): + raise Climaf_Driver_Error("Internal error : some input file does not exist among %s:" % infile) + + variable = classes.varOf(ensemble[ensemble.order[0]]) + data_dir = preproc_dir + "/" + variable + os.makedirs(data_dir) + # + i=0 + metadata = dict() + for member in ensemble.order : + if variable != classes.varOf(ensemble[member]) : + raise Climaf_Driver_Error("A member has wrong variable (%s rather than %s)"\ + % (classes.varOf(ensemble[member]),variable)) + d=dict() + d['alias'] = member + d['dataset'] = member + # recipe_dataset_index : ? numero d'ordre dans la liste des datasets de la recipe ? + d['recipe_dataset_index'] = i + 1 + d['project'] = classes.projectOf(ensemble[member]) + d['exp'] = classes.experimentOf(ensemble[member]) + # We assume that the dataset period has complete years + d['start_year'] = int(timePeriod(ensemble[member]).pr().split("-")[0]) + d['end_year'] = int(timePeriod(ensemble[member]).pr().split("-")[1]) + d['short_name'] = classes.varOf(ensemble[member]) + d['variable_group'] = classes.varOf(ensemble[member]) + d['ensemble'] = classes.realizationOf(ensemble[member]) + d['diagnostic'] = script + d['preprocessor'] = 'default' + + # Create a symbolic link in preproc dir for the input file + afile = files[i] + i += 1 + link_name = data_dir + "/" + member + "_" + variable + \ + "_%d"%d['start_year'] + "-" + "%d"%d['end_year'] + ".nc" + if os.path.exists(link_name): + os.remove(link_name) + os.symlink(afile,link_name) + d['filename'] = link_name + + # Add informations read in file + freq, inst, lname, table, realm, stdname, units = _read_attr_from_file(afile,variable) + d['frequency'] = freq + d['institute'] = inst.split() + d['long_name'] = lname + d['mip'] = table + d['modeling_realm'] = realm.split() + d['standard_name'] = stdname + d['units'] = units + metadata[d['filename']] = d + + # Write metadata file + metadata_filename = data_dir + "/metadata.yml" + with open(metadata_filename, 'w') as file: + yaml.safe_dump(metadata, file) + + metadatas[variable] = metadata_filename + + +def _read_attr_from_file(afile,variable) : + with ncf(afile,'r') as fileobject: + try : + freq = fileobject.frequency + except : + freq = "N/A" + try : + inst = fileobject.institution_id + except : + inst = "N/A" + try : + table = fileobject.table_id + except : + table = "N/A" + try : + realm = fileobject.realm + except : + realm = "N/A" + + var = fileobject.variables[variable] + try : + lname = var.long_name + except : + lname = "N/A" + try : + stdname = var.standard_name + except : + stdname = "N/A" + try : + units = var.units + except : + units = "N/A" + return (freq, inst, lname, table, realm, stdname, units) + + +def _translate_loglevel(level) : + """ + Returns a string corresponding to the logging LEVEL, understandable by ESMVamTool + """ + + if level == logging.INFO : + return "info" + elif level == logging.DEBUG : + return "debug" + elif level == logging.WARNING : + return "warning" + elif level == logging.ERROR : + return "error" + elif level == logging.CRITICAL : + return "critical" + else : + return level + +def _init_wrapper(): + """ + Find a wrapper script for ESMValTool diags for the current platform. Its task + is to set the environment for executing such diags, and to launch it. See an + example of such wrapper in + :download:`$CLIMAF/scripts/ESMValTool_python_diags_wrapper_for_ciclad.sh<../scripts/EVT_python_diags_wrapper_for_ciclad.sh>` + """ + if atIPSL : + scripts_dir = __file__ + "/../../scripts" + wrapper= scripts_dir / "EVT_python_diags_wrapper_for_ciclad.sh" + else: + raise Climaf_Error( + "Cannot find a relevant wrapper for ESMValTool diagnostic scripts " + "for current platform (in directory {})".format(scripts_dir)) + diff --git a/climaf/__init__.py b/climaf/__init__.py index 0b23fb51..c810f3aa 100644 --- a/climaf/__init__.py +++ b/climaf/__init__.py @@ -13,7 +13,7 @@ __all__ = ["cache", "classes", "dataloc", "driver", "netcdfbasics", "operators", "period", "standard_operators", "cmacro", "html", "functions", "plot", - "projects", "derived_variables"] + "projects", "derived_variables","ESMValTool_diags"] version = "2.0.0" diff --git a/climaf/cache.py b/climaf/cache.py index 98a23775..5594c6f5 100644 --- a/climaf/cache.py +++ b/climaf/cache.py @@ -210,7 +210,7 @@ def do_move(crs, filename, outfilename): waited += 1 # time.sleep(0.5) if not os.path.exists(filename): - raise Climaf_Cache_Error("File %s wasn't created upstream (or not quick enough)" % filename) + raise Climaf_Cache_Error("File %s wasn't created upstream (or not quick enough). It represents %s" % (filename, crs)) else: if stamping is False: clogger.debug('No stamping') diff --git a/climaf/classes.py b/climaf/classes.py index 4e5a0268..12a2ecfe 100644 --- a/climaf/classes.py +++ b/climaf/classes.py @@ -1316,10 +1316,15 @@ def __init__(self, climaf_operator, script, *operands, **parameters): """ Builds the tree of a composed object, including a dict for outputs. """ + if len(operands) == 0 : + raise Climaf_Classes_Error("Cannot apply an operator to no operand") self.operator = climaf_operator self.script = script import copy - self.flags = copy.copy(script.flags) + if script is None : + self.flags = False + else: + self.flags = copy.copy(script.flags) self.operands = operands if "period" in parameters: p = parameters["period"] @@ -2099,6 +2104,12 @@ def modelOf(cobject): return attributeOf(cobject, "model") def simulationOf(cobject): return attributeOf(cobject, "simulation") +def experimentOf(cobject): return attributeOf(cobject, "experiment") + + +def realizationOf(cobject): return attributeOf(cobject, "realization") + + def projectOf(cobject): return attributeOf(cobject, "project") @@ -2145,6 +2156,36 @@ def attributeOf(cobject, attrib): raise Climaf_Classes_Error("Unknown class for argument " + repr(cobject)) +def timePeriod(cobject): + """ Returns a time period for a CliMAF object : if object is a dataset, returns + its time period, otherwise analyze complex case and reurns something sensible + """ + if isinstance(cobject, cdataset): + return cobject.period + elif isinstance(cobject, ctree): + clogger.debug("timePeriod : processing %s,operands=%s" % (cobject.script, repr(cobject.operands))) + if cobject.script.flags.doCatTime and len(cobject.operands) > 1: + clogger.debug("Building composite period for results of %s" % cobject.operator) + periods = [timePeriod(op) for op in cobject.operands] + merged_period = merge_periods(periods) + if len(merged_period) > 1: + raise Climaf_Driver_Error("Issue when time assembling with %s, periods are not consecutive : %s" % + (cobject.operator, merged_period)) + return merged_period[0] + else: + clogger.debug("timePeriod logic for script is 'choose 1st operand' %s" % cobject.script) + return timePeriod(cobject.operands[0]) + elif isinstance(cobject, scriptChild): + clogger.debug("for now, timePeriod logic for scriptChilds is basic - TBD") + return timePeriod(cobject.father) + elif isinstance(cobject, cens): + clogger.debug("for now, timePeriod logic for 'cens' objet is basic (1st member)- TBD") + return timePeriod(list(cobject.values())[0]) + else: + return None # clogger.error("unkown class for argument "+`cobject`) + + + def resolve_first_or_last_years(kwargs, duration, option="last"): # Returns a dataset after translation of period like 'last_50y' kwargs['period'] = '*' diff --git a/climaf/driver.py b/climaf/driver.py index fde93bb0..b12d8f31 100644 --- a/climaf/driver.py +++ b/climaf/driver.py @@ -40,9 +40,10 @@ from climaf.cmacro import instantiate from env.clogging import clogger, indent as cindent, dedent as cdedent from climaf.netcdfbasics import varOfFile -from climaf.period import init_period, cperiod, merge_periods +from climaf.period import init_period, cperiod from climaf import xdg_bin -from climaf.classes import allow_errors_on_ds_call +from climaf.classes import allow_errors_on_ds_call, timePeriod +from climaf.ESMValTool_diags import call_evt_script from env.environment import * @@ -64,6 +65,7 @@ def capply(climaf_operator, *operands, **parameters): if operands is None or operands[0] is None and not classes.allow_errors_on_ds_call: raise Climaf_Driver_Error("Operands is None for operator %s" % climaf_operator) opds = list(map(str, operands)) + if climaf_operator in cscripts: # clogger.debug("applying script %s to"%climaf_operator + `opds` + `parameters`) res = capply_script(climaf_operator, *operands, **parameters) @@ -71,16 +73,20 @@ def capply(climaf_operator, *operands, **parameters): op = cscripts[climaf_operator] if op.outputFormat in none_formats: ceval(res, userflags=copy.copy(op.flags)) + elif climaf_operator in cmacros: if len(parameters) > 0: raise Climaf_Driver_Error("Macros cannot be called with keyword args") clogger.debug("applying macro %s to" % climaf_operator + repr(opds)) res = instantiate(cmacros[climaf_operator], *operands) + elif climaf_operator in operators: clogger.debug("applying operator %s to" % climaf_operator + repr(opds) + repr(parameters)) res = capply_operator(climaf_operator, *operands, **parameters) + else: clogger.error("%s is not a known operator nor script" % climaf_operator) + return res @@ -136,7 +142,7 @@ def capply_script(script_name, *operands, **parameters): else: return maketree(script_name, script, *operands, **parameters) - + def maketree(script_name, script, *operands, **parameters): # maketree takes care of # - creating a ctree object representing the application of the scripts to its operands @@ -264,7 +270,8 @@ def ceval_for_cdataset(cobject, userflags=None, format="MaskedArray", deep=None, rep = ceval(extract, userflags=userflags, format=format) else: raise Climaf_Driver_Error("Untractable output format %s" % format) - userflags.unset_selectors() + if userflags : + userflags.unset_selectors() cdedent() return rep else: @@ -377,13 +384,13 @@ def ceval_for_ctree(cobject, userflags=None, format="MaskedArray", deep=None, de # TBD : analyze if the dataset is remote and the remote place 'offers' the operator if cobject.operator in cscripts: clogger.debug("Script %s found" % cobject.operator) - file = ceval_script(cobject, down_deep, + filen = ceval_script(cobject, down_deep, recurse_list=recurse_list) # Does return a filename, or list of filenames cdedent() if format == 'file': - return file + return filen else: - return cread(file, classes.varOf(cobject)) + return cread(filen, classes.varOf(cobject)) elif cobject.operator in operators: clogger.debug("Operator %s found" % cobject.operator) # TODO: Implement ceval_operator @@ -636,6 +643,35 @@ def ceval_for_string(cobject, userflags=None, format="MaskedArray", deep=None, d raise NotImplementedError("Evaluation from CRS is not yet implemented ( %s )" % cobject) + +def evaluate_inputs(call, deep = False ,recurse_list = []): + # Evaluate input data for a script call , either a CliMAF-tye one or an ESMValTool one + invalues = [] + sizes = [] + for op in call.operands: + if op: + if call.operator != 'remote_select' and \ + isinstance(op, classes.cdataset) and \ + not (op.isLocal() or op.isCached()): + inValue = ceval(op, format='file', deep=deep) + else: + inValue = ceval(op, format='file', deep=deep, + userflags=call.flags, recurse_list=recurse_list) + clogger.debug("evaluating %s operand %s as %s" % (call.operator, op, inValue)) + if inValue is None or inValue == "": + raise Climaf_Driver_Error("When evaluating %s : value for %s is None" % (call.script, repr(op))) + if isinstance(inValue, list): + size = len(inValue) + else: + size = 1 + else: + inValue = '' + size = 0 + sizes.append(size) + invalues.append(inValue) + return invalues,sizes + + def ceval(cobject, userflags=None, format="MaskedArray", deep=None, derived_list=[], recurse_list=[]): """ @@ -699,30 +735,7 @@ def ceval_script(scriptCall, deep, recurse_list=[]): """ script = cscripts[scriptCall.operator] template = Template(script.command) - # Evaluate input data - invalues = [] - sizes = [] - for op in scriptCall.operands: - if op: - if scriptCall.operator != 'remote_select' and \ - isinstance(op, classes.cdataset) and \ - not (op.isLocal() or op.isCached()): - inValue = ceval(op, format='file', deep=deep) - else: - inValue = ceval(op, userflags=scriptCall.flags, format='file', deep=deep, - recurse_list=recurse_list) - clogger.debug("evaluating %s operand %s as %s" % (scriptCall.operator, op, inValue)) - if inValue is None or inValue == "": - raise Climaf_Driver_Error("When evaluating %s : value for %s is None" % (scriptCall.script, repr(op))) - if isinstance(inValue, list): - size = len(inValue) - else: - size = 1 - else: - inValue = '' - size = 0 - sizes.append(size) - invalues.append(inValue) + invalues,sizes=evaluate_inputs(scriptCall,deep,recurse_list) # print("len(invalues)=%d"%len(invalues)) # # Replace input data placeholders with filenames @@ -768,6 +781,7 @@ def ceval_script(scriptCall, deep, recurse_list=[]): else: subdict["var"] = classes.varOf(scriptCall) subdict["Var"] = classes.varOf(scriptCall) + i = 0 for op in scriptCall.operands: if op: @@ -935,6 +949,7 @@ def ceval_script(scriptCall, deep, recurse_list=[]): for ll, lt in script.fixedfields: if not files_exist[ll]: os.system("rm -f " + ll) + # Handle outputs if script.outputFormat in ["txt", ]: with open(logdir + "/last.out", 'r') as f: @@ -959,34 +974,17 @@ def ceval_script(scriptCall, deep, recurse_list=[]): raise Climaf_Driver_Error("Some output missing when executing " ": %s. \n See %s/last.out" % (template, logdir)) + +def ceval_evt(climaf_name, script, *operands, **parameters) : + """ + Evaluates OPERANDS and forward them to function + :py:func:`~climaf.ESMValTool_diags.call_evt_script` together with all arguments. -def timePeriod(cobject): - """ Returns a time period for a CliMAF object : if object is a dataset, returns - its time period, otherwise returns time period of first operand + This function is NOT supposed to be called directly except by CliMAF driver, see doc. """ - if isinstance(cobject, classes.cdataset): - return cobject.period - elif isinstance(cobject, classes.ctree): - clogger.debug("timePeriod : processing %s,operands=%s" % (cobject.script, repr(cobject.operands))) - if cobject.script.flags.doCatTime and len(cobject.operands) > 1: - clogger.debug("Building composite period for results of %s" % cobject.operator) - periods = [timePeriod(op) for op in cobject.operands] - merged_period = merge_periods(periods) - if len(merged_period) > 1: - raise Climaf_Driver_Error("Issue when time assembling with %s, periods are not consecutive : %s" % - (cobject.operator, merged_period)) - return merged_period[0] - else: - clogger.debug("timePeriod logic for script is 'choose 1st operand' %s" % cobject.script) - return timePeriod(cobject.operands[0]) - elif isinstance(cobject, classes.scriptChild): - clogger.debug("for now, timePeriod logic for scriptChilds is basic - TBD") - return timePeriod(cobject.father) - elif isinstance(cobject, classes.cens): - clogger.debug("for now, timePeriod logic for 'cens' objet is basic (1st member)- TBD") - return timePeriod(list(cobject.values())[0]) - else: - return None # clogger.error("unkown class for argument "+`cobject`) + invalues, _ = evaluate_inputs(classes.ctree(script, None, *operands, **parameters)) + return call_evt_script(climaf_name, script, invalues, *operands, **parameters) + def ceval_select(includer, included, userflags, format, deep, derived_list, recurse_list): @@ -1142,14 +1140,15 @@ def noselect(userflags, ds, format): can_select = False - if ((userflags.canSelectVar or ds.oneVarPerFile()) and - (userflags.canSelectTime or ds.periodIsFine()) and - (userflags.canSelectDomain or ds.domainIsFine()) and - (userflags.canAggregateTime or ds.periodHasOneFile()) and - (userflags.canAlias or ds.hasExactVariable()) and - (userflags.canMissing or ds.missingIsOK()) and - (ds.hasOneMember()) and - (format == 'file')): + if (userflags and + (userflags.canSelectVar or ds.oneVarPerFile()) and + (userflags.canSelectTime or ds.periodIsFine()) and + (userflags.canSelectDomain or ds.domainIsFine()) and + (userflags.canAggregateTime or ds.periodHasOneFile()) and + (userflags.canAlias or ds.hasExactVariable()) and + (userflags.canMissing or ds.missingIsOK()) and + (ds.hasOneMember()) and + (format == 'file')): can_select = True return can_select diff --git a/climaf/operators.py b/climaf/operators.py index 55504914..c0eef159 100644 --- a/climaf/operators.py +++ b/climaf/operators.py @@ -240,7 +240,7 @@ def __init__(self, name, command, format="nc", select=True, canOpendap=False, clogger.debug("Found %s" % executable) # # Analyze inputs field keywords and populate dict - # attribute 'inputs' with some properties + # attribute 'inputs' with some properties (key is rank) self.inputs = dict() commuteWithEnsemble = True it = re.finditer( diff --git a/doc/contents.rst b/doc/contents.rst index 97745e8e..2873b174 100644 --- a/doc/contents.rst +++ b/doc/contents.rst @@ -14,6 +14,7 @@ Contents operators std_operators API + esmvaltool howto experts_corner contributing diff --git a/doc/esmvaltool.rst b/doc/esmvaltool.rst new file mode 100644 index 00000000..afccf334 --- /dev/null +++ b/doc/esmvaltool.rst @@ -0,0 +1,134 @@ +----------------------------------------------------------------- +Calling ESMValTools diagnostic scripts +----------------------------------------------------------------- + + +`ESMValTool `_ is a software package for evaluating Earth Sytem Models ; it includes a large number of ``recipes`` which are configuration files for its runs (see a `recipe example +`_); such ``recipes`` include a data preparation and preprocessing step, followed by the call of one or more diagnostic scripts. + +CliMAF can feed those diagnostic scripts and launch it. The following applies : + +Script declaration +------------------ +The ESMValTool diagnostic script must be declared using function :py:func:`~climaf.ESMValTool_diags.evt_script` (and using the same name as in ESMValTool recipes); it allows to create a python function for calling the script, herafter called the ``nickname`` function. + +Script inputs +------------- +- when using this feature, it is the user's responsibility to understand which data pre-processing is needed upstream of the diagnostic script and to reproduce that pre-processing using CliMAF, in order to feed the diagnostic consistently. The recipe describes that pre-processing; + +- according to ESMValTool's principles, each script input of type fields must be an ensemble of fields, composed of fields representing the same geophyiscal variable + +- these ensembles must be provided as arguments to the ``nickname`` function in the same order than used in ESMValTool recipe's list of variables; + +- any script parameter that can be set in a ``recipe`` can also be set through CliMAF as a keyword argument to the ``nickname`` function call + +- other parameters that ESMValTool users set in the config-user.yml file and that ESMValTool passes to the script can also be set through CliMAF at the stage of the ``nickname`` function call. This applies for instance to parameters : + - ``output_dir`` (defaults to ./evtscript_output/), + - ``output_type`` (png) , + - ``write_netcdf`` (True) + - ``write_plots`` (True) + - ``profile_diagnostic`` (False), + - ``auxiliary_data_dir`` (empty) + +- the log level passed to the script is the current CliMAF log level, but can be superseded using parameter ``log_level`` + + +Scripts outputs +--------------- +- diagnostic script outputs are organized similarly to ESMValtool's : a subdirectory of ``output_dir`` is created for each call, which name includes a date/time stamp and is part of the value returned by the ``nickname`` function call (see below); it includes subdirectories ``preproc``, ``work``, ``run`` and ``plots``; these sub-directories have a simpler organization than in ESMValTool (less hierarchy levels) because they represent outputs for a simpler run : only the diagnostic script (rather than a full recipe with its pre-processing) + +- subdir ``preproc`` includes, like in ESMValTool, one sub-directory per input variable (i.e per input ensemble); each one includes symbolic links to the input data files, which are located in CliMAF cache; the symbolic link name is built using the key (in the ensemble dictionnary), the variable name and the start and end year for the data period; these symbolic names are those used as provenance information by ESMValTool and cannot host the kind of provenance information that CliMAF could provide; + +- there is yet no way for CliMAF to handle ESMValTool diagnostic script's output data as CliMAF objects, because ESMValTool doesn't include any rule for **symbolic naming** of these outputs. So, calling an ESMValTool script is more or less a dead-end in the data processing flow. + +- ESMValTool generates ``provenance`` information using diagnostic script's outputs, and builds an html index of graphic type outputs, named ``index.html``, in the output dir. The provenance information dictionnary is included in the ``nickname`` function call returned values. + + +Values returned upon diagnostic script call +------------------------------------------- +Upon invocation, the ``nickname`` function returns a pair of values : + +- the name for the script's top-level outputs directory + +- the dictionnary of provenance information (see ESMValTool documentation) + + +ESMValTool scripts wrapper +-------------------------- + +An helper script is needed in order to set the relevant environment for calling ESMValTool scripts before calling it. Such a wrapper script must be tuned for each ESMValTool install one wants to use, and to the specifics of the platform for handling software envroinments. Variable :py:func:`~climaf.ESMValTool_diags.wrapper` should be set to the path for such a wrapper script. If it is not set, CliMAF will try to use hard-coded values depending on the current platform. At the time of writing, hard-coded values are limited to the case of Ciclad, and the corresponding wrapper is available in CliMAF distribution :download:`as scripts/ESMValTool_python_diags_wrapper_for_ciclad.sh <../scripts/ESMValTool_python_diags_wrapper_for_ciclad.sh>` + +Example +-------- + +The example below is also available for :download:`download here <../examples/ESMValTool_cvdp.py>` + +.. code-block:: bash + + # An example of declaring and calling an ESMValTool script from CliMAF + + from climaf.api import * + from climaf.ESMValTool_diags import evt_script + + # If your platform is not Ciclad, you must tell which is the wrapper for ESMValTool scripts + climaf.ESMValTool_diags.wrapper = \ + "/home/ssenesi/climaf_installs/climaf_running/scripts/"+\ + "ESMValTool_python_diags_wrapper_for_ciclad.sh" + + # Create a CliMAF function for calling the ESMValTool diagnostic script + # (use the same syntax as the ESMVaTool recipe for designating the script) + evt_script("call_cvdp", "cvdp/cvdp_wrapper") + + # Prepare input datasets for the diag. + base = dict(project="CMIP6", experiment="historical", + realization='r1i1p1f2', table="Amon", period="1850-1855", ) + models = [ "CNRM-CM6-1", "CNRM-ESM2-1"] + + variables = [ "ts", "tas", "pr", "psl" ] + + ensembles = [] + for variable in variables: + ensemble = cens( + { + model : ds(model=model, variable=variable, **base) + for model in models + }) + ensembles.append(ensemble) + + # Note : here, for other diagnostic scripts, you may have to reproduce + # the preprocessing steps that ESMValTool recipes implement upstream + # of the diagnostic script. For CVDP, there is actually no such + # preprocessing + + # Call the diag. You may provide parameters that are known to ESMValTool + # or to the diagnostic script + wdir, prov = call_cvdp(*ensembles, output_dir="./out", write_netcdf=False) + + # First returned value is the diag's working directory + print(wdir) + + # Second one is a dictionnary of provenance information which + # describes all outputs (either graphics or NetCDF files) by various + # attributes, one of which being a 'caption' + one_output, its_attributes=prov.popitem() + print(one_output, its_attributes['caption']) + + # But there is no further established framework in ESMValTool for a + # diagnostic to 'publish' a list of identifiers for its outputs + + + +Public functions and variables involved +--------------------------------------- + +.. autofunction:: climaf.ESMValTool_diags.evt_script + +.. autodata:: climaf.ESMValTool_diags.wrapper + +Private functions and variables involved +---------------------------------------- + +.. autofunction:: climaf.driver.ceval_evt + +.. autofunction:: climaf.ESMValTool_diags.call_evt_script + diff --git a/doc/index.rst b/doc/index.rst index 34f6ef8b..b27828c0 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -166,6 +166,7 @@ See the full, extended table at :ref:`contents` std_operators known_data API + esmvaltool howto experts_corner contributing diff --git a/doc/news.rst b/doc/news.rst index a3aea39c..e53419ac 100644 --- a/doc/news.rst +++ b/doc/news.rst @@ -6,6 +6,10 @@ What's new Changes, newest first: +- V2.0.2: + + - CliMAF can **call ESMValTool diagnostic scripts**, feeding them with data accessed and pre-processed by CliMAF calls. See :doc:`esmvaltool` + - V2.0.1: - **Scripts can now process multiple ensembles, and ensembles which are not the first argument**: diff --git a/examples/ESMValTool_cvdp.py b/examples/ESMValTool_cvdp.py new file mode 100644 index 00000000..25fbbc8c --- /dev/null +++ b/examples/ESMValTool_cvdp.py @@ -0,0 +1,52 @@ +# An example of declaring and calling an ESMValTool script from CliMAF + +from climaf.api import * +from climaf.ESMValTool_diags import evt_script + +# If your platform is not Ciclad, you must tell which is the wrapper for ESMValTool scripts +climaf.ESMValTool_diags.wrapper = \ + "/home/ssenesi/climaf_installs/climaf_running/scripts/"+\ + "ESMValTool_python_diags_wrapper_for_ciclad.sh" + +# Create a CliMAF function for calling the ESMValTool diagnostic script +# (use the same syntax as the ESMVaTool recipe for designating the script) +evt_script("call_cvdp", "cvdp/cvdp_wrapper") + +# Prepare input datasets for the diag. +base = dict(project="CMIP6", experiment="historical", + realization='r1i1p1f2', table="Amon", period="1850-1855", ) +models = [ "CNRM-CM6-1", "CNRM-ESM2-1"] + +variables = [ "ts", "tas", "pr", "psl" ] + +ensembles = [] +for variable in variables: + ensemble = cens( + { + model : ds(model=model, variable=variable, **base) + for model in models + }) + ensembles.append(ensemble) + +# Note : here, for other diagnostic scripts, you may have to reproduce +# the preprocessing steps that ESMValTool recipes implement upstream +# of the diagnostic script. For CVDP, there is actually no such +# preprocessing + +# Call the diag. You may provide parameters that are known to ESMValTool +# or to the diagnostic script +wdir, prov = call_cvdp(*ensembles, output_dir="./out", write_netcdf=False) + +# First returned value is the diag's working directory +print(wdir) + +# Second one is a dictionnary of provenance information which +# describes all outputs (either graphics or NetCDF files) by various +# attributes, one of which being a 'caption' +one_output, its_attributes=prov.popitem() +print(one_output, its_attributes['caption']) + +# But there is no further established framework in ESMValTool for a +# diagnostic to 'publish' a list of identifiers for its outputs + + diff --git a/scripts/ESMValTool_python_diags_wrapper_for_ciclad.sh b/scripts/ESMValTool_python_diags_wrapper_for_ciclad.sh new file mode 100755 index 00000000..281e8c01 --- /dev/null +++ b/scripts/ESMValTool_python_diags_wrapper_for_ciclad.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Run an ESMValTool python diagnostic script, after setting the relavant +# environment + +# This script must be tuned both to the platform and to the ESMValTool install + +# Here, platform is Ciclad and ESMValTool install is an intermediate one +# And we assume that command 'conda' is available + +diag=${1/.py/}.py +settings=$2 + +module load modtools-python3 + +# Init conda +base=$(conda info | grep -i 'base environment' | awk '{print $4}') +source $base/etc/profile.d/conda.sh + +# Init ESMValTool with conda just for the sake of initing some environment +# variables such as NCARG_ROOT for Ncl +CENV=/modfs/modtools/miniconda3/envs/esmvaltool_dev +conda activate $CENV + +# Using python from ESMValTool env is necessary for accessing ESMValTool modules, +# and is not automatic after 'conda activate', when launched by a python's +# subprocess with shell=False +python=$CENV/bin/python3 + +$python $CENV/lib/python3.9/site-packages/Esmvaltool/diag_scripts/$diag $settings + From fcb7ab425595e401835776ae71016ff8f924c0de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20S=C3=A9n=C3=A9si?= Date: Thu, 23 Sep 2021 15:00:03 +0200 Subject: [PATCH 2/3] Add a 'breaking news' v banner about calling ESV diags. Fix spelling --- doc/conf.py | 10 +++++----- doc/esmvaltool.rst | 2 +- doc/index.rst | 11 +++++++++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 4e4142e2..0a51d133 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -49,12 +49,12 @@ ] from env.site_settings import atCNRM, onCiclad -if atCNRM or onCiclad: - # This one works at CNRM , Ciclad and RTD - extensions.append('sphinxcontrib.napoleon') +if atCNRM: + # This one works at CNRM + extensions.append('sphinxcontrib.napoleon') else: - # this one works at RTD - extensions.append('sphinx.ext.napoleon') + # this one works at RTD and Ciclad + extensions.append('sphinx.ext.napoleon') # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/doc/esmvaltool.rst b/doc/esmvaltool.rst index afccf334..ef865988 100644 --- a/doc/esmvaltool.rst +++ b/doc/esmvaltool.rst @@ -16,7 +16,7 @@ Script inputs ------------- - when using this feature, it is the user's responsibility to understand which data pre-processing is needed upstream of the diagnostic script and to reproduce that pre-processing using CliMAF, in order to feed the diagnostic consistently. The recipe describes that pre-processing; -- according to ESMValTool's principles, each script input of type fields must be an ensemble of fields, composed of fields representing the same geophyiscal variable +- according to ESMValTool's principles, each script input of type ``field`` must be an ensemble of fields, composed of fields representing the same geophysical variable - these ensembles must be provided as arguments to the ``nickname`` function in the same order than used in ESMValTool recipe's list of variables; diff --git a/doc/index.rst b/doc/index.rst index b27828c0..c86408a0 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -22,6 +22,17 @@ Discover CliMAF: essentials of the documentation to get started - :download:`A notebook for CMIP6 with CliMAF at CLIMERI <../examples/CMIP6_analysis_with_CliMAF.html>` - `Subscribe to CliMAF users mailing list `_ to stay in the loop! +Breaking news: launch ESMValTool diags with CliMAF +================================================== + +The development of a standard interface for diagnostic scripts within +the IS-ENES3 European project has led to the development of an +interface between CliMAF and ESMValTool diagnostic scripts that works +even with the current ESMValTool interface. + +Hence, it is now possible to use ESMValTool diagnostic scripts and +to reproduce its recipes from CliMAF; see the :doc:`documentation page +here` CliMAF - a Climate Model Assessment Framework From 680a8064cb2a723e547d2306120737a29f267a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABlle=20Rigoudy?= Date: Tue, 16 Nov 2021 09:13:58 +0100 Subject: [PATCH 3/3] Update requirement and a small bugfix. --- climaf/driver.py | 2 +- requirements.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/climaf/driver.py b/climaf/driver.py index 714445dd..10f60094 100644 --- a/climaf/driver.py +++ b/climaf/driver.py @@ -400,7 +400,7 @@ def ceval_for_ctree(cobject, userflags=None, format="MaskedArray", deep=None, de recurse_list=recurse_list) # Does return a filename, or list of filenames cdedent() if format in ['file', ]: - return file + return filen else: return cread(filen, classes.varOf(cobject)) elif cobject.operator in operators: diff --git a/requirements.txt b/requirements.txt index 73829a79..019af97d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ cftime==1.0.3.4 # Cython>0.15.1 # cartopy==0.17 # python-dateutil -natsort \ No newline at end of file +natsort +pyyaml \ No newline at end of file