From 5a4bfe4546c9527ca017accc3bf6a256117aa9f2 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Fri, 19 Feb 2021 16:34:49 +0000 Subject: [PATCH 1/3] attempt to make reportengine lockfile interface simpler --- src/reportengine/configparser.py | 127 +++++++++++++++++++------------ 1 file changed, 79 insertions(+), 48 deletions(-) diff --git a/src/reportengine/configparser.py b/src/reportengine/configparser.py index 9c9a9ce..f8824c2 100644 --- a/src/reportengine/configparser.py +++ b/src/reportengine/configparser.py @@ -135,62 +135,93 @@ def f_(self, val, *args, **kwargs): return f_ def record_from_defaults(f): - """Decorator for recording default values. Given a key, for example - `filter_defaults` there might exist several specifications or specs of - defaults, each spec will have its own label e.g foo and bar. First a - parse function must be defined for the key and decorated with - `record_from_defaults` + """Decorator for recording default values returned by a production rule. + This should decorate a production rule which uses the following template: + + from reportengine.configparser import Config + ... + class ConfigClass(Config): + ... + def produce_(self, _recorded_spec_=None): + if _recorded_spec_ is not None: + return _recorded_spec_ + else: + # load some defaults + ... + return loaded_defaults + + The loaded_defaults will most likely be a mapping with different variations + of defaults for whatever the is. This might seem rather extract so + consider the following example: + + We want to save the default ways in which we can group datasets. Those + defaults could be stored in a ``yaml`` file as + + standard_report: experiment + theory_covariance_report: nnpdf31_process + + Here we have two different variants of our default, and each one has an + associate value, which in this case defines a grouping. Since there are + only two variants of defaults which correspond to a single string each, + we could easily store this information in a dictionary instead of its own + file. For now, let's assume the defaults file is saved in a file called + ``default_groups.yaml`` a package directory called ``validphys.defaults``. + Lets also decide that the for this set of defaults will be + called ``default_grouping`` such that our production rule becomes: + + from importlib.resources import read_text + from reportengine.compat import yaml + from reportengine.configparser import Config + ... + class ConfigClass(Config): + ... @record_from_defaults - def parse_filter_defaults(self, spec): - return spec - - Next a rule must exist for obtaining the defaults, the most simple rule - would check for a provided spec in a dictionary. These rules must be named - `load_default_` e.g - - def load_default_filter_defaults(self, spec): - _defaults = dict(eggs="spam", breakfast="continental") - return _defaults[spec] - - these functions should be expanded with some error handling and can also - allow for lazy initialization for defaults which are more expensive to - compute. - - Now the spec from the runcard is used to select some defaults for the given - key. The lockfile keeps track of what defaults are used by updating the input - with a nested mapping like - - _recorded_spec_: - : - defaults_mapping - - The concept of a lockfile is that it can be used in lieu of a runcard and - produce the exact same configuration as when it was created, even if - the defaults have since been altered. In order to gain access to this - functionality a production rule needs to be added to the class which takes - as an argument and _recorded_spec_=None as an optional argument - - def produce_filter_defaults_rules( - self, filter_defaults, filter_defaults_recorded_spec_=None - ): - if filter_defaults_recorded_spec_: - # do something with recorded defaults + def produce_default_grouping(self, default_grouping_recorded_spec_=None): + if default_grouping_recorded_spec_ is not None: + return default_grouping_recorded_spec_ else: - # do something with current loaded defaults (and update runcard) + loaded_defaults = yaml.safe_load( + read_text(validphys.defaults, "default_groups.yaml") + ) + return loaded_defaults + + It's now clear what gets returned when ``default_grouping_recorded_spec_`` is + ``None``: a mapping containing all of the defaults which are currently + contained in the default file. It's now important to discuss what happens + to the returned defaults. When you decorate with ``record_from_defaults`` + the return of the wrapped function is added to ``self.lockfile`` as + self.lockfile[_recorded_spec_] = return value of produce_. + The lockfile already contains the input config file and updates that with + any defaults which are loaded. The lockfile then gets dumped after all + actions have successfully ran and keeps a permanent record of the + input configuration and any defaults that were used. The lockfile is saved + to the ``input_directory`` of your output along with the runcard. + + The lockfile itself is a valid runcard and if you were to use it as + input to reportengine, then _recorded_spec_ would no longer be None + and the return value of produce_ would take the recorded value. This + means that defaults can even be changed without preventing us from + reproducing old results. """ + sig = inspect.signature(f) + f_name = f.__name__ + key = trim_token(f_name) + lockkey = key + "_recorded_spec_" + if lockkey not in sig.parameters or sig.parameters[lockkey].default is not None: + raise KeyError( + f"{f_name} must have `{lockkey}=None` in it's signature to be a " + "valid default loading production rule which saves defaults to " + "lockfile." + ) @functools.wraps(f) def f_(self, *args, **kwargs): - res = f(self, *args, **kwargs) - key = trim_token(f.__name__) - lockkey = key + "_recorded_spec_" - load_func = getattr(self, f"{_defaults_token}{key}") - # only load defaults if we need to + # either returns pre-existing _recorded_spec_ or loaded defaults + res = f(self, *args, **kwargs) + # save to lockfile if not present. if lockkey not in self.lockfile: - self.lockfile[lockkey] = {res: load_func(res)} - elif res not in self.lockfile[lockkey]: - self.lockfile[lockkey][res] = load_func(res) + self.lockfile[lockkey] = res return res return f_ From a8ba37d81c8ab5f69dc624adea9cc658e98cfa0d Mon Sep 17 00:00:00 2001 From: wilsonm Date: Mon, 22 Feb 2021 10:48:26 +0000 Subject: [PATCH 2/3] Removing need to explictly ask for _recorded_spec_ --- src/reportengine/configparser.py | 66 +++++++++++++++----------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/src/reportengine/configparser.py b/src/reportengine/configparser.py index f8824c2..d43b26d 100644 --- a/src/reportengine/configparser.py +++ b/src/reportengine/configparser.py @@ -143,16 +143,14 @@ def record_from_defaults(f): ... class ConfigClass(Config): ... - def produce_(self, _recorded_spec_=None): - if _recorded_spec_ is not None: - return _recorded_spec_ - else: - # load some defaults - ... - return loaded_defaults + def produce_(self): + # load some defaults + ... + return loaded_defaults The loaded_defaults will most likely be a mapping with different variations - of defaults for whatever the is. This might seem rather extract so + of defaults for whatever the is, however in theory the loaded_defaults + could be a string or a list. This might seem rather abstract so consider the following example: We want to save the default ways in which we can group datasets. Those @@ -177,17 +175,14 @@ def produce_(self, _recorded_spec_=None): class ConfigClass(Config): ... @record_from_defaults - def produce_default_grouping(self, default_grouping_recorded_spec_=None): - if default_grouping_recorded_spec_ is not None: - return default_grouping_recorded_spec_ - else: - loaded_defaults = yaml.safe_load( - read_text(validphys.defaults, "default_groups.yaml") - ) - return loaded_defaults - - It's now clear what gets returned when ``default_grouping_recorded_spec_`` is - ``None``: a mapping containing all of the defaults which are currently + def produce_default_grouping(self): + loaded_defaults = yaml.safe_load( + read_text(validphys.defaults, "default_groups.yaml") + ) + return loaded_defaults + + It's now clear what gets returned by the production rule: a mapping + containing all of the defaults which are currently contained in the default file. It's now important to discuss what happens to the returned defaults. When you decorate with ``record_from_defaults`` the return of the wrapped function is added to ``self.lockfile`` as @@ -196,29 +191,30 @@ def produce_default_grouping(self, default_grouping_recorded_spec_=None): any defaults which are loaded. The lockfile then gets dumped after all actions have successfully ran and keeps a permanent record of the input configuration and any defaults that were used. The lockfile is saved - to the ``input_directory`` of your output along with the runcard. + to the ``input`` directory of your output along with the runcard. The lockfile itself is a valid runcard and if you were to use it as - input to reportengine, then _recorded_spec_ would no longer be None - and the return value of produce_ would take the recorded value. This - means that defaults can even be changed without preventing us from - reproducing old results. + input to reportengine, then _recorded_spec_ would be present in the + input config. Then the wrapper would force the return value of produce_ + to be the recorded defaults. This means that defaults can even be changed + without preventing us from reproducing old results (by running reportengine + on the lockfile produced alongside the old results). """ - sig = inspect.signature(f) - f_name = f.__name__ - key = trim_token(f_name) - lockkey = key + "_recorded_spec_" - if lockkey not in sig.parameters or sig.parameters[lockkey].default is not None: - raise KeyError( - f"{f_name} must have `{lockkey}=None` in it's signature to be a " - "valid default loading production rule which saves defaults to " - "lockfile." - ) + lockkey = trim_token(f.__name__) + "_recorded_spec_" @functools.wraps(f) def f_(self, *args, **kwargs): # either returns pre-existing _recorded_spec_ or loaded defaults - res = f(self, *args, **kwargs) + try: + _, res = self.parse_from_(None, lockkey, write=False) + except InputNotFoundError as e: + log.debug( + "Couldn't parse previously saved defaults for %s, the " + "following error was raised when attempting to find them:\n%s.", + trim_token(f.__name__), + e + ) + res = f(self, *args, **kwargs) # save to lockfile if not present. if lockkey not in self.lockfile: self.lockfile[lockkey] = res From 6d7273b7f45bffb0530a26f7ce8ca630b4a16a74 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Tue, 23 Feb 2021 12:07:12 +0000 Subject: [PATCH 3/3] change parsing of default to take key directly from the input rather than using try/except --- src/reportengine/configparser.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/reportengine/configparser.py b/src/reportengine/configparser.py index d43b26d..0818221 100644 --- a/src/reportengine/configparser.py +++ b/src/reportengine/configparser.py @@ -205,15 +205,10 @@ def produce_default_grouping(self): @functools.wraps(f) def f_(self, *args, **kwargs): # either returns pre-existing _recorded_spec_ or loaded defaults - try: - _, res = self.parse_from_(None, lockkey, write=False) - except InputNotFoundError as e: - log.debug( - "Couldn't parse previously saved defaults for %s, the " - "following error was raised when attempting to find them:\n%s.", - trim_token(f.__name__), - e - ) + curr_input = self._curr_input + if lockkey in curr_input: + res = curr_input[lockkey] + else: res = f(self, *args, **kwargs) # save to lockfile if not present. if lockkey not in self.lockfile: