diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index f88f3e7a0a..e364904f7e 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -44,7 +44,7 @@ requirements: - joblib - sphinx_rtd_theme >0.5 - sphinxcontrib-bibtex - - ruamel.yaml <0.18 + - ruamel.yaml >=0.15 test: requires: diff --git a/extra_tests/regression_checks.py b/extra_tests/regression_checks.py index 855c398fd5..e5976d54dc 100644 --- a/extra_tests/regression_checks.py +++ b/extra_tests/regression_checks.py @@ -9,7 +9,7 @@ import pytest from n3fit.tests.test_fit import EXE, check_fit_results -from reportengine.compat import yaml +from validphys.utils import yaml_safe REGRESSION_FOLDER = pathlib.Path(__file__).with_name("regression_fits") @@ -37,7 +37,7 @@ def test_regression_fit(tmp_path, runcard, replica, regenerate): runcard_file = REGRESSION_FOLDER / runcard_name shutil.copy(runcard_file, tmp_path) - runcard_info = yaml.load(runcard_file.read_text()) + runcard_info = yaml_safe.load(runcard_file.read_text()) if (wname := runcard_info.get("load")) is not None: shutil.copy(REGRESSION_FOLDER / wname, tmp_path) diff --git a/n3fit/src/evolven3fit/evolve.py b/n3fit/src/evolven3fit/evolve.py index eb6e27960d..fb10a4db84 100644 --- a/n3fit/src/evolven3fit/evolve.py +++ b/n3fit/src/evolven3fit/evolve.py @@ -11,7 +11,7 @@ import eko from eko import basis_rotation, runner -from reportengine.compat import yaml +from validphys.utils import yaml_safe from . import eko_utils, utils @@ -164,7 +164,7 @@ def load_fit(usr_path): nnfitpath = usr_path / "nnfit" pdf_dict = {} for yaml_file in nnfitpath.glob(f"replica_*/{usr_path.name}.exportgrid"): - data = yaml.safe_load(yaml_file.read_text(encoding="UTF-8")) + data = yaml_safe.load(yaml_file.read_text(encoding="UTF-8")) pdf_dict[yaml_file.parent.stem] = data return pdf_dict diff --git a/n3fit/src/evolven3fit/utils.py b/n3fit/src/evolven3fit/utils.py index 9f0b6ad9fd..cdd649d972 100644 --- a/n3fit/src/evolven3fit/utils.py +++ b/n3fit/src/evolven3fit/utils.py @@ -4,8 +4,8 @@ import numpy as np from scipy.interpolate import interp1d -from reportengine.compat import yaml from validphys.pdfbases import PIDS_DICT +from validphys.utils import yaml_safe from .q2grids import Q2GRID_DEFAULT, Q2GRID_NNPDF40 @@ -57,7 +57,7 @@ def hasFlavor(self, pid): def read_runcard(usr_path): """Read the runcard and return the relevant information for evolven3fit""" - return yaml.safe_load((usr_path / "filter.yml").read_text(encoding="UTF-8")) + return yaml_safe.load((usr_path / "filter.yml").read_text(encoding="UTF-8")) def get_theoryID_from_runcard(usr_path): @@ -99,9 +99,7 @@ def generate_q2grid(Q0, Qfin, Q_points, match_dict, nf0=None, legacy40=False): frac_of_point = np.log(match_scale / Q_ini) / np.log(Qfin / Q0) num_points = int(Q_points * frac_of_point) num_points_list.append(num_points) - grids.append( - np.geomspace(Q_ini**2, match_scale**2, num=num_points, endpoint=False) - ) + grids.append(np.geomspace(Q_ini**2, match_scale**2, num=num_points, endpoint=False)) Q_ini = match_scale num_points = Q_points - sum(num_points_list) grids.append(np.geomspace(Q_ini**2, Qfin**2, num=num_points)) diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 93842d23b4..053fc6d229 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -12,8 +12,8 @@ import n3fit from n3fit import vpinterface -from reportengine.compat import yaml import validphys +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -622,4 +622,4 @@ def storefit(pdf_object, replica, out_path, theory): } with open(out_path, "w") as fs: - yaml.dump(data, fs) + yaml_safe.dump(data, fs) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 3f2d69559f..58942b32f4 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -9,14 +9,15 @@ import re import shutil import sys -import warnings + +from ruamel.yaml import error from reportengine import colors -from reportengine.compat import yaml from reportengine.namespaces import NSList from validphys.app import App from validphys.config import Config, ConfigError, Environment, EnvironmentError_ from validphys.core import FitSpec +from validphys.utils import yaml_safe N3FIT_FIXED_CONFIG = dict(use_cuts='internal', use_t0=True, actions_=[]) @@ -108,15 +109,8 @@ class N3FitConfig(Config): @classmethod def from_yaml(cls, o, *args, **kwargs): try: - with warnings.catch_warnings(): - warnings.simplefilter("ignore", yaml.error.MantissaNoDotYAML1_1Warning) - # We need to specify the older version 1.1 to support the - # older configuration files, which liked to use on/off for - # booleans. - # The floating point parsing yields warnings everywhere, which - # we suppress. - file_content = yaml.safe_load(o, version="1.1") - except yaml.error.YAMLError as e: + file_content = yaml_safe.load(o) + except error.YAMLError as e: raise ConfigError(f"Failed to parse yaml file: {e}") if not isinstance(file_content, dict): raise ConfigError( diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 34de8dfea7..0739963cc1 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -31,12 +31,13 @@ import re import shutil import sys -import warnings + +from ruamel.yaml import error from reportengine import colors -from reportengine.compat import yaml from validphys.app import App from validphys.config import Config, ConfigError, Environment, EnvironmentError_ +from validphys.utils import yaml_safe SETUPFIT_FIXED_CONFIG = dict( actions_=[ @@ -131,15 +132,8 @@ class SetupFitConfig(Config): @classmethod def from_yaml(cls, o, *args, **kwargs): try: - with warnings.catch_warnings(): - warnings.simplefilter('ignore', yaml.error.MantissaNoDotYAML1_1Warning) - # We need to specify the older version 1.1 to support the - # older configuration files, which liked to use on/off for - # booleans. - # The floating point parsing yields warnings everywhere, which - # we suppress. - file_content = yaml.safe_load(o, version='1.1') - except yaml.error.YAMLError as e: + file_content = yaml_safe.load(o) + except error.YAMLError as e: raise ConfigError(f"Failed to parse yaml file: {e}") if not isinstance(file_content, dict): raise ConfigError( diff --git a/n3fit/src/n3fit/tests/test_evolven3fit.py b/n3fit/src/n3fit/tests/test_evolven3fit.py index 52799829f6..34039cb8ba 100644 --- a/n3fit/src/n3fit/tests/test_evolven3fit.py +++ b/n3fit/src/n3fit/tests/test_evolven3fit.py @@ -8,9 +8,9 @@ import pytest from eko import EKO, runner -from reportengine.compat import yaml from validphys.api import API from validphys.pdfbases import PIDS_DICT +from validphys.utils import yaml_safe REGRESSION_FOLDER = pathlib.Path(__file__).with_name("regressions") log = logging.getLogger(__name__) @@ -24,7 +24,7 @@ def assert_sorted(arr, title): def check_lhapdf_info(info_path): """Check the LHAPDF info file is correct""" - info = yaml.load(info_path.open("r", encoding="utf-8")) + info = yaml_safe.load(info_path.open("r", encoding="utf-8")) alphas_qs = info["AlphaS_Qs"] alphas = info["AlphaS_Vals"] diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py index c6458e2d27..0610250843 100644 --- a/n3fit/src/n3fit/tests/test_fit.py +++ b/n3fit/src/n3fit/tests/test_fit.py @@ -22,8 +22,8 @@ import pytest import n3fit -from reportengine.compat import yaml from validphys.n3fit_data import replica_mcseed, replica_nnseed, replica_trvlseed +from validphys.utils import yaml_safe log = logging.getLogger(__name__) REGRESSION_FOLDER = pathlib.Path(__file__).with_name("regressions") @@ -45,7 +45,7 @@ def _load_json(info_file): def _load_exportgrid(exportgrid_file): """Loads the exportgrid file""" - return yaml.safe_load(exportgrid_file.read_text()) + return yaml_safe.load(exportgrid_file.read_text()) def test_initialize_seeds(): diff --git a/nnpdf_data/nnpdf_data/__init__.py b/nnpdf_data/nnpdf_data/__init__.py index a6eeec892d..f12d3e0abc 100644 --- a/nnpdf_data/nnpdf_data/__init__.py +++ b/nnpdf_data/nnpdf_data/__init__.py @@ -1,9 +1,7 @@ from functools import lru_cache import pathlib -import ruamel.yaml as yaml - -from ._version import __version__ +import yaml path_vpdata = pathlib.Path(__file__).parent path_commondata = path_vpdata / "commondata" @@ -12,7 +10,8 @@ _path_legacy_mapping = path_commondata / "dataset_names.yml" theory_cards = path_vpdata / "theory_cards" -_legacy_to_new_mapping_raw = yaml.YAML().load(_path_legacy_mapping) +with open(_path_legacy_mapping) as file: + _legacy_to_new_mapping_raw = yaml.load(file, yaml.Loader) # Convert strings into a dictionary legacy_to_new_mapping = { k: ({"dataset": v} if isinstance(v, str) else v) for k, v in _legacy_to_new_mapping_raw.items() diff --git a/nnpdf_data/nnpdf_data/theory.py b/nnpdf_data/nnpdf_data/theory.py index 558ca57306..2ce555f37e 100644 --- a/nnpdf_data/nnpdf_data/theory.py +++ b/nnpdf_data/nnpdf_data/theory.py @@ -4,12 +4,9 @@ """ import dataclasses -from functools import lru_cache import logging from typing import Literal, Optional -from .utils import parse_yaml_inp - DEPRECATED_KEYS = ["MaxNfAs", "SxRes", "SxOrd" "EScaleVar", "Qedref", "global_nx"] log = logging.getLogger(__name__) diff --git a/nnpdf_data/nnpdf_data/utils.py b/nnpdf_data/nnpdf_data/utils.py index 33987134bd..b07fe4f399 100644 --- a/nnpdf_data/nnpdf_data/utils.py +++ b/nnpdf_data/nnpdf_data/utils.py @@ -1,8 +1,10 @@ import pathlib -import ruamel.yaml as yaml +from ruamel.yaml import YAML from validobj import ValidationError, parse_input +import yaml +yaml_rt = YAML(typ="rt") try: Loader = yaml.CLoader except AttributeError: @@ -22,14 +24,14 @@ def parse_yaml_inp(input_yaml, spec): return parse_input(inp, spec) except ValidationError as e: current_exc = e - # In order to provide a more complete error information, use round_trip_load + # In order to provide a more complete error information, use round trip # to read the .yaml file again (insetad of using the CLoader) - current_inp = yaml.round_trip_load(input_yaml.open("r", encoding="utf-8")) + current_inp = yaml_rt.load(input_yaml.open("r", encoding="utf-8")) error_text_lines = [] while current_exc: if hasattr(current_exc, 'wrong_field'): wrong_field = current_exc.wrong_field - # Mappings compping from ``round_trip_load`` have an + # Mappings compping from yaml_rt have an # ``lc`` attribute that gives a tuple of # ``(line_number, column)`` for a given item in # the mapping. diff --git a/nnpdf_data/pyproject.toml b/nnpdf_data/pyproject.toml index 68e272ccb7..11609e12ed 100644 --- a/nnpdf_data/pyproject.toml +++ b/nnpdf_data/pyproject.toml @@ -33,7 +33,7 @@ include = [ [tool.poetry.dependencies] python = "^3.9" -"ruamel.yaml" = "<0.18" +"ruamel.yaml" = "*" # Required to run filters: `filter_files_dependencies` pandas = {version = "*", optional = true} numpy = {version = "*", optional = true} diff --git a/pyproject.toml b/pyproject.toml index d4f135d44c..e9f4ce5d9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ matplotlib = "^3.9" pineappl = "^0.8.2" pandas = "*" numpy = "*" +"ruamel.yaml" = "*" validobj = "*" prompt_toolkit = "*" # Reportengine needs to be installed from git diff --git a/validphys2/serverscripts/index-reports.py b/validphys2/serverscripts/index-reports.py index 0c910e6b29..a48aebae62 100644 --- a/validphys2/serverscripts/index-reports.py +++ b/validphys2/serverscripts/index-reports.py @@ -6,20 +6,24 @@ called meta.yaml in the report folder and finally the html attributes, in that order. """ -import pathlib + +from collections import ChainMap, defaultdict import datetime import json +import pathlib import re import sys import traceback -from collections import ChainMap, defaultdict -import ruamel_yaml as yaml from bs4 import BeautifulSoup -#TODO: Move the thumbnail logic somewhere -import skimage.transform -import skimage.io import numpy as np +from ruamel.yaml import error +import skimage.io + +# TODO: Move the thumbnail logic somewhere +import skimage.transform + +from validphys.utils import yaml_safe ROOT = '/home/nnpdf/validphys-reports' ROOT_URL = 'https://vp.nnpdf.science/' @@ -34,6 +38,7 @@ REQUIRED_FILE_METADATA = {'title', 'author', 'keywords'} + def meta_from_html(f): soup = BeautifulSoup(f, 'lxml') try: @@ -41,68 +46,73 @@ def meta_from_html(f): except Exception: title = None try: - author = soup.find('meta', {'name':'author'})['content'] + author = soup.find('meta', {'name': 'author'})['content'] except Exception: author = EMPTY try: - tagtext = soup.find('meta', {'name':'keywords'})['content'] + tagtext = soup.find('meta', {'name': 'keywords'})['content'] except Exception: tags = [] else: tags = re.split(r"\s*,\s*", tagtext) #'soup.title.string' doesn't - #return a strig but rather an object with the reference to - #the whole parse tree, causing a huge memory leak. + # return a strig but rather an object with the reference to + # the whole parse tree, causing a huge memory leak. return dict(title=str(title), author=author, keywords=tags) -class TagProps(): + +class TagProps: def __init__(self, count=0, last_timestamp=0): self.count = count self.last_timestamp = last_timestamp __slots__ = ('count', 'last_timestamp') + def meta_from_path(p): meta = ChainMap(DEFAULTS) - yaml_meta = p/'meta.yaml' + yaml_meta = p / 'meta.yaml' yaml_res = {} if yaml_meta.exists(): with yaml_meta.open() as f: try: - yaml_res = yaml.safe_load(f) - except yaml.YAMLError as e: + yaml_res = yaml_safe.load(f) + except error.YAMLError as e: print(f"Error processing {yaml_meta}: {e}", file=sys.stderr) - index = p/'index.html' - #Only do the expensive HTML parsing if we actually need a key + index = p / 'index.html' + # Only do the expensive HTML parsing if we actually need a key if REQUIRED_FILE_METADATA - yaml_res.keys() and index.exists(): with index.open() as f: meta = meta.new_child(meta_from_html(f)) meta = meta.new_child(yaml_res) return meta + def make_single_thumbnail(f, shape=(100, 150)): img = skimage.io.imread(f) - res = skimage.transform.resize( - img, shape, anti_aliasing=True, mode='constant') + res = skimage.transform.resize(img, shape, anti_aliasing=True, mode='constant') return res + def make_4_img_thumbnail(paths, shape=(100, 150)): w, h = shape whalf, hhalf = w // 2, h // 2 positions = ( - (slice(0,whalf), slice(0,hhalf)), - (slice(whalf,w), slice(0,hhalf)), - (slice(0,whalf), slice(hhalf,h)), - (slice(whalf,w), slice(hhalf,h)) + (slice(0, whalf), slice(0, hhalf)), + (slice(whalf, w), slice(0, hhalf)), + (slice(0, whalf), slice(hhalf, h)), + (slice(whalf, w), slice(hhalf, h)), ) res = np.zeros((*shape, 4)) imgs = skimage.io.imread_collection(paths) for img, pos in zip(imgs, positions): res[pos] = skimage.transform.resize( - img, (whalf, hhalf), anti_aliasing=True, mode='constant') + img, (whalf, hhalf), anti_aliasing=True, mode='constant' + ) return res + def make_thumbnail(folder): folder = pathlib.Path(folder) pngs = sorted(folder.glob('*.png')) @@ -112,19 +122,20 @@ def make_thumbnail(folder): return make_single_thumbnail(pngs[0]) else: l = len(pngs) - imgs = pngs[:l-(l%4):l//4] + imgs = pngs[: l - (l % 4) : l // 4] return make_4_img_thumbnail(imgs) def thumbnail_tag(name): return f'{ROOT_URL}thumbnails/{name}"' + def handle_thumbnail(p): dest = (pathlib.Path(THUMBNAILS) / p.name).with_suffix('.png') name = dest.name if dest.exists(): return thumbnail_tag(name) - figures = (p / 'figures') + figures = p / 'figures' if figures.is_dir(): try: res = make_thumbnail(figures) @@ -137,12 +148,13 @@ def handle_thumbnail(p): return None return None + def register(p, emails): path_meta = meta_from_path(p) title, author, tags = path_meta['title'], path_meta['author'], path_meta['keywords'] url = ROOT_URL + p.name - #Use the timestamp for sorting and the string for displaying + # Use the timestamp for sorting and the string for displaying timestamp = p.stat().st_mtime date = datetime.date.fromtimestamp(timestamp).isoformat() if not title or not isinstance(title, str): @@ -155,9 +167,7 @@ def register(p, emails): if not isinstance(author, str): author = "" - emaillinks = ' '.join( - f'📧' for (url, title) in emails - ) + emaillinks = ' '.join(f'📧' for (url, title) in emails) titlelink = f'{title} {emaillinks}' @@ -188,18 +198,17 @@ def make_index(): timestamp = res[2][1] for k in newkeywords: props = keywords[k] - props.count+=1 + props.count += 1 props.last_timestamp = max(props.last_timestamp, timestamp) except: - print("Error processing folder", p,file=sys.stderr) + print("Error processing folder", p, file=sys.stderr) raise keylist = sorted(keywords.items(), key=lambda x: -x[1].last_timestamp) - keywordmap = [(k, v.count) for k,v in keylist] - + keywordmap = [(k, v.count) for k, v in keylist] with open(OUT, 'w') as f: - json.dump({'data':data, 'keywords':keywordmap}, f) + json.dump({'data': data, 'keywords': keywordmap}, f) if __name__ == '__main__': diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 7e4875d997..b63dce6d97 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -41,7 +41,7 @@ import logging from operator import attrgetter from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Optional import numpy as np import pandas as pd @@ -50,27 +50,10 @@ from nnpdf_data import new_to_legacy_map, path_commondata from nnpdf_data.utils import parse_yaml_inp - -# We cannot use ruamel directly due to the ambiguity ruamel.yaml / ruamel_yaml -# of some versions which are pinned in some of the conda packages we use... -from reportengine.compat import yaml from validphys.coredata import KIN_NAMES, CommonData from validphys.plotoptions.plottingoptions import PlottingOptions, labeler_functions from validphys.process_options import ValidProcess - -try: - # If libyaml is available, use the C loader to speed up some of the read - # https://pyyaml.org/wiki/LibYAML - # libyaml is available for most linux distributions - Loader = yaml.CLoader -except AttributeError: - # fallback to the slow loader - Loader = yaml.Loader - - -def _quick_yaml_load(filepath): - return yaml.load(filepath.read_text(encoding="utf-8"), Loader=Loader) - +from validphys.utils import yaml_fast # JCM: # Some notes for developers @@ -230,7 +213,7 @@ class TheoryMeta: ------- >>> from validphys.commondataparser import TheoryMeta ... from validobj import parse_input - ... from reportengine.compat import yaml + ... from ruamel.yaml import YAML ... theory_raw = ''' ... FK_tables: ... - - fk1 @@ -238,7 +221,7 @@ class TheoryMeta: ... - fk3 ... operation: ratio ... ''' - ... theory = yaml.safe_load(theory_raw) + ... theory = YAML(typ='safe').load(theory_raw) ... parse_input(theory, TheoryMeta) TheoryMeta(FK_tables=[['fk1'], ['fk2', 'fk3']], operation='RATIO', shifts = None, conversion_factor=1.0, comment=None, normalization=None)) """ @@ -263,7 +246,7 @@ def parser(cls, yaml_file): """The yaml databases in the server use "operands" as key instead of "FK_tables" """ if not yaml_file.exists(): raise FileNotFoundError(yaml_file) - meta = yaml.safe_load(yaml_file.read_text()) + meta = yaml_fast.load(yaml_file.read_text()) # Make sure the operations are upper-cased for compound-compatibility meta["operation"] = "NULL" if meta["operation"] is None else meta["operation"].upper() if "operands" in meta: @@ -309,7 +292,7 @@ class Variant: experiment: Optional[str] = None -ValidVariants = Dict[str, Variant] +ValidVariants = dict[str, Variant] ### Kinematic data @@ -351,7 +334,7 @@ class ValidKinematics: """ file: ValidPath - variables: Dict[str, ValidVariable] + variables: dict[str, ValidVariable] def get_label(self, var): """For the given variable, return the label as label (unit) @@ -523,7 +506,7 @@ def load_data_central(self): if self.is_nnpdf_special: data = np.zeros(self.ndata) else: - datayaml = _quick_yaml_load(self.path_data_central) + datayaml = yaml_fast.load(self.path_data_central) data = datayaml["data_central"] if len(data) != self.ndata: @@ -552,8 +535,7 @@ def load_uncertainties(self): all_df = [] for ufile in self.paths_uncertainties: - uncyaml = _quick_yaml_load(ufile) - + uncyaml = yaml_fast.load(ufile) mindex = pd.MultiIndex.from_tuples( [(k, v["treatment"], v["type"]) for k, v in uncyaml["definitions"].items()], names=["name", "treatment", "type"], @@ -589,7 +571,7 @@ def load_kinematics(self, fill_to_three=True, drop_minmax=True): a dataframe containing the kinematics """ kinematics_file = self.path_kinematics - kinyaml = _quick_yaml_load(kinematics_file) + kinyaml = yaml_fast.load(kinematics_file) kin_dict = {} for bin_index, dbin in enumerate(kinyaml["bins"], start=1): diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 1c814af690..10e4f009f5 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -12,7 +12,6 @@ from nnpdf_data import legacy_to_new_map from reportengine import configparser, report -from reportengine.compat import yaml from reportengine.configparser import ConfigError, _parse_func, element_of, record_from_defaults from reportengine.environment import Environment, EnvironmentError_ from reportengine.helputils import get_parser_type @@ -48,6 +47,7 @@ ) from validphys.plotoptions.core import get_info import validphys.scalevariations +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -1289,7 +1289,7 @@ def load_default_default_filter_rules(self, spec): lock_token = "_filters.lock.yaml" try: - return yaml.safe_load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) + return yaml_safe.load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) except FileNotFoundError as e: alternatives = [ el.strip(lock_token) @@ -1386,7 +1386,7 @@ def load_default_default_filter_settings(self, spec): lock_token = "_defaults.lock.yaml" try: - return yaml.safe_load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) + return yaml.load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) except FileNotFoundError as e: alternatives = alternatives = [ el.strip(lock_token) @@ -1663,7 +1663,7 @@ def produce_theoryids(self, t0id, point_prescription): This hard codes the theories needed for each prescription to avoid user error.""" th = t0id.id - lsv = yaml.safe_load(read_text(validphys.scalevariations, "scalevariationtheoryids.yaml")) + lsv = yaml_safe.load(read_text(validphys.scalevariations, "scalevariationtheoryids.yaml")) scalevarsfor_list = lsv["scale_variations_for"] # Allowed central theoryids @@ -1677,7 +1677,7 @@ def produce_theoryids(self, t0id, point_prescription): ) # Find scales that correspond to this point prescription - pp_scales_dict = yaml.safe_load( + pp_scales_dict = yaml_safe.load( read_text(validphys.scalevariations, "pointprescriptions.yaml") ) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index dc1d880513..ceb0343feb 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -12,11 +12,11 @@ import re import numpy as np +from ruamel.yaml import error from nnpdf_data.theorydbutils import fetch_theory from reportengine import namespaces from reportengine.baseexceptions import AsInputError -from reportengine.compat import yaml # TODO: There is a bit of a circular dependency between filters.py and this. # Maybe move the cuts logic to its own module? @@ -26,7 +26,7 @@ from validphys.hyperoptplot import HyperoptTrial from validphys.lhapdfset import LHAPDFSet from validphys.tableloader import parse_exp_mat -from validphys.utils import experiments_to_dataset_inputs +from validphys.utils import experiments_to_dataset_inputs, yaml_safe log = logging.getLogger(__name__) @@ -713,8 +713,8 @@ def as_input(self): log.debug('Reading input from fit configuration %s', p) try: with p.open() as f: - d = yaml.safe_load(f) - except (yaml.YAMLError, FileNotFoundError) as e: + d = yaml_safe.load(f) + except (error.YAMLError, FileNotFoundError) as e: raise AsInputError(str(e)) from e d['pdf'] = {'id': self.name, 'label': self.label} diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 46a1cab7c6..371f9837b1 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -10,8 +10,7 @@ import numpy as np import pandas as pd -from reportengine.compat import yaml -from validphys.utils import generate_path_filtered_data +from validphys.utils import generate_path_filtered_data, yaml_safe KIN_NAMES = ["kin1", "kin2", "kin3"] log = logging.getLogger(__name__) @@ -431,7 +430,7 @@ def systematic_errors(self, central_values=None): def export_data(self, buffer): """Exports the central data defined by this commondata instance to the given buffer""" ret = {"data_central": self.central_values.tolist()} - yaml.safe_dump(ret, buffer) + yaml_safe.dump(ret, buffer) def export_uncertainties(self, buffer): """Exports the uncertainties defined by this commondata instance to the given buffer""" @@ -460,7 +459,7 @@ def export_uncertainties(self, buffer): "type": "UNCORR", } ret = {"definitions": sorted_definitions, "bins": bins} - yaml.safe_dump(ret, buffer) + yaml_safe.dump(ret, buffer) def export(self, folder_path): """Wrapper around export_data and export_uncertainties @@ -473,6 +472,8 @@ def export(self, folder_path): data_path = folder_path / data_path.name unc_path = folder_path / unc_path.name # Export data and uncertainties - self.export_data(data_path.open("w", encoding="utf-8")) - self.export_uncertainties(unc_path.open("w", encoding="utf-8")) + with open(data_path, "w") as file: + self.export_data(file) + with open(unc_path, "w") as file: + self.export_uncertainties(file) return data_path, unc_path diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index 0837b06b3d..00ac155479 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -1,12 +1,12 @@ -# -*- coding: utf-8 -*- """ Tools for computing and plotting effective exponents. """ -from __future__ import generator_stop import logging import numbers +from pathlib import Path import random +import tempfile import warnings import matplotlib as mpl @@ -15,7 +15,6 @@ from reportengine import collect from reportengine.checks import check_positive -from reportengine.compat import yaml from reportengine.figure import figuregen from reportengine.floatformatting import format_number, significant_digits from reportengine.table import table @@ -24,6 +23,7 @@ from validphys.pdfbases import Basis, check_basis import validphys.pdfgrids as pdfgrids from validphys.pdfplots import BandPDFPlotter, PDFPlotter +from validphys.utils import yaml_rt log = logging.getLogger(__name__) @@ -139,7 +139,7 @@ def get_title(self, parton_name): def get_ylabel(self, parton_name): if self.normalize_to is not None: - return "Ratio to {}".format(self.normalize_pdf.label) + return f"Ratio to {self.normalize_pdf.label}" else: return fr"$\{self.exponent}_e$ for ${parton_name}$" @@ -502,8 +502,8 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= """ (df_effexps,) = next_fit_eff_exps_table # Use round trip loader rather than safe_load in fit.as_input() - with open(fit.path / "filter.yml", "r") as f: - filtermap = yaml.load(f, yaml.RoundTripLoader) + with open(fit.path / "filter.yml") as f: + filtermap = yaml_rt.load(f) previous_exponents = filtermap["fitting"]["basis"] basis = filtermap["fitting"]["fitbasis"] checked = check_basis(basis, None) @@ -524,7 +524,11 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= betas = np.clip(betas, **largex_args) previous_exponents[i]["smallx"] = [fmt(alpha) for alpha in alphas] previous_exponents[i]["largex"] = [fmt(beta) for beta in betas] - return yaml.dump(filtermap, Dumper=yaml.RoundTripDumper) + with tempfile.NamedTemporaryFile() as fp: + path = Path(fp.name) + yaml_rt.dump(filtermap, path) + yaml_string = fp.read().decode("utf-8") + return yaml_string def update_runcard_description_yaml(iterate_preprocessing_yaml, _updated_description=None): @@ -539,13 +543,18 @@ def update_runcard_description_yaml(iterate_preprocessing_yaml, _updated_descrip ``` """ - filtermap = yaml.load(iterate_preprocessing_yaml, yaml.RoundTripLoader) + filtermap = yaml_rt.load(iterate_preprocessing_yaml) # update description if necessary if _updated_description is not None: filtermap["description"] = _updated_description - return yaml.dump(filtermap, Dumper=yaml.RoundTripDumper) + with tempfile.NamedTemporaryFile() as fp: + path = Path(fp.name) + yaml_rt.dump(filtermap, path) + yaml_string = fp.read().decode("utf-8") + + return yaml_string def iterated_runcard_yaml(fit, update_runcard_description_yaml): @@ -578,7 +587,7 @@ def iterated_runcard_yaml(fit, update_runcard_description_yaml): ... f.write(yaml_output) """ - filtermap = yaml.load(update_runcard_description_yaml, yaml.RoundTripLoader) + filtermap = yaml_rt.load(update_runcard_description_yaml) # iterate t0 filtermap["datacuts"]["t0pdfset"] = fit.name @@ -605,4 +614,9 @@ def iterated_runcard_yaml(fit, update_runcard_description_yaml): if "fiatlux" in filtermap: filtermap['fiatlux']['luxset'] = fit.name - return yaml.dump(filtermap, Dumper=yaml.RoundTripDumper) + with tempfile.NamedTemporaryFile() as fp: + path = Path(fp.name) + yaml_rt.dump(filtermap, path) + yaml_string = fp.read().decode("utf-8") + + return yaml_string diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 944d11c5cb..df698ca34d 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -13,10 +13,9 @@ import numpy as np from reportengine.checks import check, make_check -from reportengine.compat import yaml import validphys.cuts from validphys.process_options import PROCESSES -from validphys.utils import generate_path_filtered_data +from validphys.utils import generate_path_filtered_data, yaml_safe log = logging.getLogger(__name__) @@ -156,7 +155,7 @@ def default_filter_settings_input(): """Return a FilterDefaults dataclass with the default hardcoded filter settings. These are defined in ``defaults.yaml`` in the ``validphys.cuts`` module. """ - return FilterDefaults(**yaml.safe_load(read_text(validphys.cuts, "defaults.yaml"))) + return FilterDefaults(**yaml_safe.load(read_text(validphys.cuts, "defaults.yaml"))) def default_filter_rules_input(): @@ -164,7 +163,7 @@ def default_filter_rules_input(): Return a tuple of FilterRule objects. These are defined in ``filters.yaml`` in the ``validphys.cuts`` module. """ - list_rules = yaml.safe_load(read_text(validphys.cuts, "filters.yaml")) + list_rules = yaml_safe.load(read_text(validphys.cuts, "filters.yaml")) return tuple(FilterRule(**rule) for rule in list_rules) diff --git a/validphys2/src/validphys/fitdata.py b/validphys2/src/validphys/fitdata.py index d87138a2e2..8bd4c5eae0 100644 --- a/validphys2/src/validphys/fitdata.py +++ b/validphys2/src/validphys/fitdata.py @@ -1,6 +1,7 @@ """ Utilities for loading data from fit folders """ + from collections import OrderedDict, defaultdict, namedtuple from io import StringIO import json @@ -12,12 +13,12 @@ from reportengine import collect from reportengine.checks import CheckError, make_argcheck -from reportengine.compat import yaml from reportengine.floatformatting import ValueErrorTuple from reportengine.table import table from validphys import checks from validphys.core import PDF from validphys.plotoptions.core import get_info +from validphys.utils import yaml_safe # TODO: Add more stuff here as needed for postfit LITERAL_FILES = ['chi2exps.log'] @@ -431,7 +432,7 @@ def _get_fitted_index(pdf, i): """Return the nnfit index for the replica i""" p = pdf.infopath.with_name(f'{pdf.name}_{i:04d}.dat') with open(p) as f: - it = yaml.safe_load_all(f) + it = yaml_safe.load_all(f) metadata = next(it) return metadata['FromMCReplica'] diff --git a/validphys2/src/validphys/lhaindex.py b/validphys2/src/validphys/lhaindex.py index 9a10d66ff1..6b4ee00db0 100644 --- a/validphys2/src/validphys/lhaindex.py +++ b/validphys2/src/validphys/lhaindex.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- #!/usr/bin/env python """ Created on Fri Jan 23 12:11:23 2015 @@ -13,8 +12,8 @@ from pathlib import Path import re -from reportengine.compat import yaml from validphys.lhapdf_compatibility import lhapdf +from validphys.utils import yaml_safe _indexes_to_names = None _names_to_indexes = None @@ -123,10 +122,10 @@ def infofilename(name): raise FileNotFoundError(name + ".info") -@lru_cache() +@lru_cache def parse_info(name): with open(infofilename(name)) as infofile: - result = yaml.YAML(typ='safe', pure=True).load(infofile) + result = yaml_safe.load(infofile) return result diff --git a/validphys2/src/validphys/lhio.py b/validphys2/src/validphys/lhio.py index 92c767cccc..d0d8f9d861 100644 --- a/validphys2/src/validphys/lhio.py +++ b/validphys2/src/validphys/lhio.py @@ -11,9 +11,9 @@ import numpy as np import pandas as pd -from reportengine.compat import yaml from validphys import lhaindex from validphys.core import PDF +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -28,8 +28,8 @@ def split_sep(f): def read_xqf_from_file(f): lines = split_sep(f) try: - (xtext, qtext, ftext) = [next(lines) for _ in range(3)] - except StopIteration: + (xtext, qtext, ftext) = (next(lines) for _ in range(3)) + except RuntimeError: return None xvals = np.fromstring(xtext, sep=" ") qvals = np.fromstring(qtext, sep=" ") @@ -69,7 +69,7 @@ def load_replica(pdf, rep, kin_grids=None): path = osp.join(lhaindex.finddir(pdf_name), pdf_name + "_" + suffix + ".dat") - log.debug("Loading replica {rep} at {path}".format(rep=rep, path=path)) + log.debug(f"Loading replica {rep} at {path}") with open(path, 'rb') as inn: header = b"".join(split_sep(inn)) @@ -315,7 +315,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): # copy replica 0 shutil.copy(base / f'{pdf}_0000.dat', set_root / f"{set_name }_0000.dat") - with open(base / f'{pdf}.info', 'r') as inn, open(set_root / f'{set_name }.info', 'w') as out: + with open(base / f'{pdf}.info') as inn, open(set_root / f'{set_name }.info', 'w') as out: for l in inn.readlines(): if l.find("SetDesc:") >= 0: out.write(f"SetDesc: \"Hessian {pdf}_hessian\"\n") @@ -326,7 +326,8 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): else: out.write(l) if extra_fields is not None: - yaml.dump(extra_fields, out, default_flow_style=False) + yaml_safe.default_flow_style = False + yaml_safe.dump(extra_fields, out) _headers, grids = load_all_replicas(pdf) result = (big_matrix(grids).dot(V)).add(grids[0], axis=0) diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py index 230c26b076..e8312b6c10 100644 --- a/validphys2/src/validphys/loader.py +++ b/validphys2/src/validphys/loader.py @@ -20,7 +20,6 @@ from nnpdf_data import legacy_to_new_mapping, path_vpdata from reportengine import filefinder -from reportengine.compat import yaml from validphys import lhaindex from validphys.commondataparser import load_commondata_old, parse_new_metadata, parse_set_metadata from validphys.core import ( @@ -37,9 +36,8 @@ InternalCutsWrapper, PositivitySetSpec, TheoryIDSpec, - peek_commondata_metadata, ) -from validphys.utils import generate_path_filtered_data, tempfile_cleaner +from validphys.utils import generate_path_filtered_data, tempfile_cleaner, yaml_safe log = logging.getLogger(__name__) NNPDF_DIR = "NNPDF" @@ -132,14 +130,13 @@ def _get_nnpdf_profile(profile_path=None): the python prefix (``Path(sys.prefix)/"share"/"NNPDF"``) will be used """ - yaml_reader = yaml.YAML(typ='safe', pure=True) home_config = pathlib.Path().home() / ".config" config_folder = pathlib.Path(os.environ.get("XDG_CONFIG_HOME", home_config)) / NNPDF_DIR # Set all default values profile_content = pkgutil.get_data("validphys", "nnprofile_default.yaml") - profile_dict = yaml_reader.load(profile_content) + profile_dict = yaml_safe.load(profile_content) # including the data_path to the validphys package profile_dict.setdefault("data_path", path_vpdata) @@ -156,7 +153,7 @@ def _get_nnpdf_profile(profile_path=None): if profile_path is not None: with open(profile_path, encoding="utf-8") as f: - profile_entries = yaml_reader.load(f) + profile_entries = yaml_safe.load(f) if profile_entries is not None: profile_dict.update(profile_entries) @@ -423,7 +420,12 @@ def check_commondata( ) break # try new commondata format - old_path = fit.path / "filter" / legacy_name / f"filtered_uncertainties_{legacy_name}.yaml" + old_path = ( + fit.path + / "filter" + / legacy_name + / f"filtered_uncertainties_{legacy_name}.yaml" + ) if old_path.exists(): data_path = old_path.with_name(f"filtered_data_{legacy_name}.yaml") unc_path = old_path.with_name(f"filtered_uncertainties_{legacy_name}.yaml") @@ -533,7 +535,7 @@ def check_compound(self, theoryID, setname, cfac): raise CompoundNotFound(msg) # This is a little bit funny, but is the least amount of thinking... yaml_format = 'FK:\n' + re.sub('FK:', ' - ', txt) - data = yaml.safe_load(yaml_format) + data = yaml_safe.load(yaml_format) # we have to split out 'FK_' the extension to get a name consistent # with everything else try: diff --git a/validphys2/src/validphys/photon/compute.py b/validphys2/src/validphys/photon/compute.py index 32ef57f388..6466c4ddb7 100644 --- a/validphys2/src/validphys/photon/compute.py +++ b/validphys2/src/validphys/photon/compute.py @@ -122,8 +122,7 @@ def __init__(self, theoryid, lux_params, replicas): alpha = Alpha(theory, fiatlux_runcard["q2_max"]) with tempfile.NamedTemporaryFile(mode="w") as tmp: - with tmp.file as tmp_file: - tmp_file.write(yaml.dump(fiatlux_runcard)) + yaml.dump(fiatlux_runcard, tmp) self.lux[replica] = fiatlux.FiatLux(tmp.name) # we have a dict but fiatlux wants a yaml file # TODO : once that fiatlux will allow dictionaries diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index e636d81531..5eaca0c9cb 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -3,15 +3,15 @@ Tools for filtering replica sets based on criteria on the replicas. """ + import logging import re import shutil from reportengine.checks import check, make_argcheck -from reportengine.compat import yaml from validphys.core import PDF from validphys.renametools import rename_pdf -from validphys.utils import tempfile_cleaner +from validphys.utils import tempfile_cleaner, yaml_safe log = logging.getLogger(__name__) @@ -96,9 +96,8 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = #  Fixup the info file info_file = (temp_pdf / temp_pdf.name).with_suffix('.info') - with open(info_file, 'r') as stream: - yaml_obj = yaml.YAML() - info_yaml = yaml_obj.load(stream) + with open(info_file) as stream: + info_yaml = yaml_safe.load(stream) info_yaml['NumMembers'] = new_nrep info_yaml['ErrorType'] += '+as' extra_desc = '; '.join( @@ -106,7 +105,7 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = ) info_yaml['SetDesc'] += f"; {extra_desc}" with open(info_file, 'w') as stream: - yaml_obj.dump(info_yaml, stream) + yaml_safe.dump(info_yaml, stream) # Rename the base pdf to the final name rename_pdf(temp_pdf, pdf.name, target_name) diff --git a/validphys2/src/validphys/scripts/vp_comparefits.py b/validphys2/src/validphys/scripts/vp_comparefits.py index 7c79a7c05e..eb2251038a 100644 --- a/validphys2/src/validphys/scripts/vp_comparefits.py +++ b/validphys2/src/validphys/scripts/vp_comparefits.py @@ -1,18 +1,17 @@ -import sys -import os import logging +import os +import sys # TODO: Look into making these lazy imports import prompt_toolkit from prompt_toolkit.completion import WordCompleter -from reportengine.compat import yaml from reportengine.colors import t - +from validphys import compareclosuretemplates, comparefittemplates from validphys.app import App from validphys.loader import RemoteLoader -from validphys import comparefittemplates, compareclosuretemplates -from validphys.promptutils import confirm, KeywordsWithCache +from validphys.promptutils import KeywordsWithCache, confirm +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -241,7 +240,7 @@ def get_config(self): with open(self.args['config_yml']) as f: # TODO: Ideally this would load round trip but needs # to be fixed in reportengine. - c = yaml.safe_load(f) + c = yaml_safe.load(f) c.update(self.complete_mapping()) return self.config_class(c, environment=self.environment) diff --git a/validphys2/src/validphys/scripts/vp_deltachi2.py b/validphys2/src/validphys/scripts/vp_deltachi2.py index 8762ede5c3..41ca8543eb 100644 --- a/validphys2/src/validphys/scripts/vp_deltachi2.py +++ b/validphys2/src/validphys/scripts/vp_deltachi2.py @@ -2,27 +2,19 @@ import os import pwd -from reportengine.compat import yaml - from validphys import deltachi2templates from validphys.app import App - +from validphys.utils import yaml_safe log = logging.getLogger(__name__) class HyperoptPlotApp(App): def add_positional_arguments(self, parser): - """ Wrapper around argumentparser """ - parser.add_argument( - "fit", help="Name of the fit", - ) - parser.add_argument( - "hessian_pdfs", help="Name of the set of Hessian pdfs", - ) - parser.add_argument( - "--Q", help="Energy Scale in GeV", type=float, default=1.7, - ) + """Wrapper around argumentparser""" + parser.add_argument("fit", help="Name of the fit") + parser.add_argument("hessian_pdfs", help="Name of the set of Hessian pdfs") + parser.add_argument("--Q", help="Energy Scale in GeV", type=float, default=1.7) # Report meta data parser.add_argument( "--author", @@ -30,9 +22,7 @@ def add_positional_arguments(self, parser): type=str, default=pwd.getpwuid(os.getuid())[4].replace(",", ""), ) - parser.add_argument( - "--title", help="Add custom title to the report's meta data", type=str, - ) + parser.add_argument("--title", help="Add custom title to the report's meta data", type=str) parser.add_argument( "--keywords", help="Add keywords to the report's meta data. The keywords must be provided as a list", @@ -67,14 +57,8 @@ def complete_mapping(self): "normalize_to": fit, } - autosettings["decomposition"] = { - "normalize_to": hessian_pdfs, - "pdf": hessian_pdfs, - } - autosettings["MC_Hessian_compare"] = { - "pdfs": [hessian_pdfs, fit], - "normalize_to": fit, - } + autosettings["decomposition"] = {"normalize_to": hessian_pdfs, "pdf": hessian_pdfs} + autosettings["MC_Hessian_compare"] = {"pdfs": [hessian_pdfs, fit], "normalize_to": fit} return autosettings @@ -85,7 +69,7 @@ def get_config(self): with open(runcard) as f: # TODO: Ideally this would load round trip but needs # to be fixed in reportengine. - c = yaml.safe_load(f) + c = yaml_safe.load(f) c.update(complete_mapping) return self.config_class(c, environment=self.environment) diff --git a/validphys2/src/validphys/scripts/vp_hyperoptplot.py b/validphys2/src/validphys/scripts/vp_hyperoptplot.py index 1faa875070..0d767f2aa8 100644 --- a/validphys2/src/validphys/scripts/vp_hyperoptplot.py +++ b/validphys2/src/validphys/scripts/vp_hyperoptplot.py @@ -1,22 +1,21 @@ -from validphys.app import App -from validphys.loader import Loader, HyperscanNotFound -from validphys import hyperplottemplates -from reportengine.compat import yaml -import pwd +import logging import os +import pwd -import logging +from validphys import hyperplottemplates +from validphys.app import App +from validphys.loader import HyperscanNotFound, Loader +from validphys.utils import yaml_safe log = logging.getLogger(__name__) class HyperoptPlotApp(App): def add_positional_arguments(self, parser): - """ Wrapper around argumentparser """ + """Wrapper around argumentparser""" # Hyperopt settings parser.add_argument( - "hyperopt_name", - help="Folder of the hyperopt fit to generate the report for", + "hyperopt_name", help="Folder of the hyperopt fit to generate the report for" ) parser.add_argument( "-l", @@ -73,16 +72,12 @@ def add_positional_arguments(self, parser): type=str, default=pwd.getpwuid(os.getuid())[4].replace(",", ""), ) - parser.add_argument( - "--title", - help="Add custom title to the report's meta data", - type=str, - ) + parser.add_argument("--title", help="Add custom title to the report's meta data", type=str) parser.add_argument( "--keywords", help="Add keywords to the report's meta data. The keywords must be provided as a list", type=list, - default=[] + default=[], ) args = parser.parse_args() @@ -104,7 +99,7 @@ def complete_mapping(self): hyperop_folder = hyperop_folder[:-1] with open(hyperopt_filter) as f: - filtercard = yaml.safe_load(f) + filtercard = yaml_safe.load(f) folder_path = hyperop_folder index_slash = folder_path.rfind("/") + 1 @@ -127,7 +122,7 @@ def complete_mapping(self): "combine": args["combine"], "autofilter": args["autofilter"], "debug": args["debug"], - "loss_target": args["loss_target"] + "loss_target": args["loss_target"], } try: @@ -148,7 +143,7 @@ def get_config(self): with open(self.args['config_yml']) as f: # TODO: Ideally this would load round trip but needs # to be fixed in reportengine. - c = yaml.safe_load(f) + c = yaml_safe.load(f) c.update(self.complete_mapping()) return self.config_class(c, environment=self.environment) diff --git a/validphys2/src/validphys/scripts/vp_nextfitruncard.py b/validphys2/src/validphys/scripts/vp_nextfitruncard.py index ecf379e014..03eeeddb6a 100644 --- a/validphys2/src/validphys/scripts/vp_nextfitruncard.py +++ b/validphys2/src/validphys/scripts/vp_nextfitruncard.py @@ -16,16 +16,16 @@ """ import argparse +import logging import os import pathlib import sys -import logging + import prompt_toolkit from reportengine import colors -from reportengine.compat import yaml - from validphys.api import API +from validphys.utils import yaml_safe # arguments for np.clip to enforce integrability. # key should be identical to runcard key, first inner dictionary can contain @@ -40,11 +40,10 @@ "t8": {"smallx": {"a_min": None, "a_max": 1.0}}, } + # Take command line arguments def process_args(): - parser = argparse.ArgumentParser( - description="Script to generate iterated fit runcard." - ) + parser = argparse.ArgumentParser(description="Script to generate iterated fit runcard.") parser.add_argument("input_fit", help="Name of input fit.") parser.add_argument( "output_dir", @@ -65,7 +64,7 @@ def process_args(): "Do not enforce any preprocessing constraints, which are chosen to " "ensure integrability. By default the following constraints are " f"used: {PREPROCESSING_LIMS}" - ) + ), ) args = parser.parse_args() return args @@ -125,7 +124,7 @@ def main(): preproc_lims = PREPROCESSING_LIMS log.info( "The following constraints will be used for preprocessing ranges, \n%s", - yaml.dump(preproc_lims), + yaml_safe.dump(preproc_lims, sys.stdout), ) else: # don't enforce any limits. @@ -134,9 +133,7 @@ def main(): updated_description = interactive_description(description) iterated_runcard_yaml = API.iterated_runcard_yaml( - fit=input_fit, - _updated_description=updated_description, - _flmap_np_clip_arg=preproc_lims, + fit=input_fit, _updated_description=updated_description, _flmap_np_clip_arg=preproc_lims ) # Write new runcard to file diff --git a/validphys2/src/validphys/scripts/vp_pdffromreplicas.py b/validphys2/src/validphys/scripts/vp_pdffromreplicas.py index e68df56dcc..4c87e8584a 100755 --- a/validphys2/src/validphys/scripts/vp_pdffromreplicas.py +++ b/validphys2/src/validphys/scripts/vp_pdffromreplicas.py @@ -27,13 +27,12 @@ import tempfile import pandas as pd -from reportengine import colors -from reportengine.compat import yaml +from reportengine import colors from validphys import lhaindex from validphys.lhio import new_pdf_from_indexes from validphys.loader import FallbackLoader - +from validphys.utils import yaml_safe log = logging.getLogger() log.setLevel(logging.INFO) @@ -53,9 +52,7 @@ def check_none_or_gt_one(value): try: ivalue = int(value) except ValueError as e: - raise argparse.ArgumentTypeError( - f"{value} cannot be interpreted as an integer." - ) from e + raise argparse.ArgumentTypeError(f"{value} cannot be interpreted as an integer.") from e if ivalue <= 0: raise argparse.ArgumentTypeError(f"{value} is an invalid positive int value.") return ivalue @@ -92,9 +89,7 @@ def main(): input_pdf = loader.check_pdf(args.input_pdf) if input_pdf.error_type != "replicas": - log.error( - "Error type of input PDF must be `replicas` not `%s`", input_pdf.error_type - ) + log.error("Error type of input PDF must be `replicas` not `%s`", input_pdf.error_type) sys.exit(1) if args.replicas > len(input_pdf) - 1: @@ -115,11 +110,7 @@ def main(): with tempfile.TemporaryDirectory() as f: try: new_pdf_from_indexes( - input_pdf, - indices, - set_name=output_name, - folder=pathlib.Path(f), - installgrid=True, + input_pdf, indices, set_name=output_name, folder=pathlib.Path(f), installgrid=True ) except FileExistsError: log.error( @@ -133,39 +124,30 @@ def main(): "PDFs in the LHAPDF format are required to have 2 replicas, copying " "replica 1 to replica 2" ) - base_name = str( - pathlib.Path(lhaindex.get_lha_datapath()) / output_name / output_name - ) + base_name = str(pathlib.Path(lhaindex.get_lha_datapath()) / output_name / output_name) - shutil.copyfile( - base_name + "_0001.dat", base_name + "_0002.dat", - ) + shutil.copyfile(base_name + "_0001.dat", base_name + "_0002.dat") # fixup info file - with open(base_name + ".info", "r") as f: - info_file = yaml.safe_load(f) + with open(base_name + ".info") as f: + info_file = yaml_safe.load(f) info_file["NumMembers"] = 3 with open(base_name + ".info", "w") as f: - yaml.dump(info_file, f) + yaml_safe.dump(info_file, f) # here we update old indices in case the user creates # the original_index_mapping.csv - indices = 2*indices + indices = 2 * indices if args.save_indices: index_file = ( - pathlib.Path(lhaindex.get_lha_datapath()) - / output_name - / "original_index_mapping.csv" + pathlib.Path(lhaindex.get_lha_datapath()) / output_name / "original_index_mapping.csv" ) log.info("Saving output PDF/input PDF replica index mapping to %s", index_file) with open(index_file, "w+") as f: pd.DataFrame( list(enumerate(indices, 1)), - columns=[ - f"{output_name} replica index", - f"{args.input_pdf} replica index", - ], + columns=[f"{output_name} replica index", f"{args.input_pdf} replica index"], ).to_csv(f, index=False) diff --git a/validphys2/src/validphys/scripts/vp_pdfrename.py b/validphys2/src/validphys/scripts/vp_pdfrename.py index 5fd0124cbf..68c9a3748d 100755 --- a/validphys2/src/validphys/scripts/vp_pdfrename.py +++ b/validphys2/src/validphys/scripts/vp_pdfrename.py @@ -21,9 +21,8 @@ import lhapdf from reportengine import colors -from reportengine.compat import yaml - from validphys.renametools import rename_pdf +from validphys.utils import yaml_rt # Taking command line arguments @@ -45,13 +44,9 @@ def process_args(): Quotations should be used for this field.""", ) parser.add_argument( - "--data-version", - type=int, - help="The data version to be added to the PDF .info file.", - ) - parser.add_argument( - "--index", help="The set index to be added to the PDF .info file." + "--data-version", type=int, help="The data version to be added to the PDF .info file." ) + parser.add_argument("--index", help="The set index to be added to the PDF .info file.") parser.add_argument( "--reference", help="The reference to be added to the PDF .info file, usually an arXiv reference.", @@ -62,9 +57,7 @@ def process_args(): action="store_true", help="Place the output LHAPDF in the LHAPDF directory.", ) - parser.add_argument( - "-c", "--compress", action="store_true", help="Compress the resulting PDF." - ) + parser.add_argument("-c", "--compress", action="store_true", help="Compress the resulting PDF.") args = parser.parse_args() return args @@ -81,8 +74,7 @@ def fixup_ref(pdf_path: pathlib.Path, field_dict): infopath = pdf_path / f"{pdf_name}.info" with open(infopath) as f: - y = yaml.YAML() - res = y.load(f) + res = yaml_rt.load(f) # If a field entry is not provided, then we revert to the existing # field in pre-existing info file. @@ -102,12 +94,11 @@ def fixup_ref(pdf_path: pathlib.Path, field_dict): res["Reference"] = field_dict["reference"] with open(infopath, "w") as f: - y.default_flow_style = True - y.dump(res, f) + yaml_rt.dump(res, f) def compress(lhapdf_path: pathlib.Path): - """ Function to compress the resulting PDF. Dereferences are handled + """Function to compress the resulting PDF. Dereferences are handled in order to account for possible symbolic linking of grids. """ output = lhapdf_path.name + ".tar.gz" @@ -136,9 +127,7 @@ def main(): sys.exit(1) if not source_path.is_dir(): - log.error( - f"Could not find fit. Path '{source_path.absolute()}' is not a directory." - ) + log.error(f"Could not find fit. Path '{source_path.absolute()}' is not a directory.") sys.exit(1) with tempfile.TemporaryDirectory(dir=dest_path.parent) as tmp: @@ -156,6 +145,7 @@ def main(): if args.compress: from validphys.renametools import Spinner + log.info("Compressing output") with Spinner(): compress(dest_path) diff --git a/validphys2/src/validphys/scripts/wiki_upload.py b/validphys2/src/validphys/scripts/wiki_upload.py index bfc6eb1f12..3be7618b37 100644 --- a/validphys2/src/validphys/scripts/wiki_upload.py +++ b/validphys2/src/validphys/scripts/wiki_upload.py @@ -2,17 +2,16 @@ A more interactive version of vp_upload """ -#Note that the imports are done as late as possible to improve the speed of -#the command line. - -import sys -import pathlib -import os +# Note that the imports are done as late as possible to improve the speed of +# the command line. import logging +import os +import pathlib +import sys -import pygments from prompt_toolkit.shortcuts import prompt +import pygments from reportengine import colors from validphys.promptutils import confirm @@ -21,14 +20,17 @@ log.setLevel(logging.INFO) log.addHandler(colors.ColorHandler()) + def handle_single_file(filename): import tempfile + out = pathlib.Path(tempfile.mkdtemp(prefix='vp-upload')) filename = pathlib.Path(filename) p = out / filename.name p.symlink_to(filename.absolute()) return out, filename.name + def edit_settings(d): title = d.get('title', '') author = d.get('author', '') @@ -48,10 +50,12 @@ def edit_settings(d): kwinp = prompt("keywords: ", default=','.join(keywords)) d['keywords'] = [k.strip() for k in kwinp.split(',') if k] + def handle_meta_interactive(output): metapath = output / 'meta.yaml' - from reportengine.compat import yaml - #The yaml lexer is broken. Use something else. + from validphys.utils import yaml_safe + + # The yaml lexer is broken. Use something else. lex = pygments.lexers.get_lexer_by_name('pkgconfig') fmt = pygments.formatters.TerminalFormatter() if metapath.exists(): @@ -66,35 +70,37 @@ def handle_meta_interactive(output): edit = not confirm(msg, default=True) if edit: - d = yaml.load(content, yaml.RoundTripLoader) + d = yaml_safe.load(content) else: return else: - #We are making these the empty string, because prompt_toolkit doesn't - #support default=None. - d = {'title': '', 'author': '', 'keywords':''} + # We are making these the empty string, because prompt_toolkit doesn't + # support default=None. + d = {'title': '', 'author': '', 'keywords': ''} import io + while True: edit_settings(d) print("Metadata:") s = io.StringIO() - yaml.dump(d, s, yaml.RoundTripDumper) + yaml_safe.dump(d, s) metastr = s.getvalue() print(pygments.highlight(metastr, lex, fmt)) if confirm("Confirm?"): break - with open(metapath, 'w') as f: f.write(metastr) + def main(): import argparse + parser = argparse.ArgumentParser(description="Upload output to the NNPDF server.") parser.add_argument("output", help="Folder to upload.") args = parser.parse_args() @@ -118,7 +124,6 @@ def main(): uploader = uploadutils.ReportUploader() upargs = output - try: with uploader.upload_or_exit_context(upargs): handle_meta_interactive(upload_output) diff --git a/validphys2/src/validphys/tests/photon/test_compute.py b/validphys2/src/validphys/tests/photon/test_compute.py index dd846bdcf2..ff0b64710b 100644 --- a/validphys2/src/validphys/tests/photon/test_compute.py +++ b/validphys2/src/validphys/tests/photon/test_compute.py @@ -76,8 +76,7 @@ def test_photon(): # load fiatlux with tempfile.NamedTemporaryFile(mode="w") as tmp: - with tmp.file as tmp_file: - tmp_file.write(yaml.dump(FIATLUX_DEFAULT)) + yaml.dump(FIATLUX_DEFAULT, tmp) lux = fiatlux.FiatLux(tmp.name) alpha = Alpha(theory, fiatlux_default["q2_max"]) diff --git a/validphys2/src/validphys/tests/test_effexponents.py b/validphys2/src/validphys/tests/test_effexponents.py index f069402d60..1944e5523c 100644 --- a/validphys2/src/validphys/tests/test_effexponents.py +++ b/validphys2/src/validphys/tests/test_effexponents.py @@ -1,10 +1,8 @@ -import pytest - -from reportengine.compat import yaml from validphys.api import API from validphys.loader import FallbackLoader as Loader from validphys.scripts.vp_nextfitruncard import PREPROCESSING_LIMS from validphys.tests.conftest import FIT, FIT_ITERATED +from validphys.utils import yaml_safe def test_next_runcard(): @@ -23,10 +21,10 @@ def test_next_runcard(): # We load it using the context manager because at_input has been modified # to load various keys that are not present in the actual runcard for # backwards compatibility - with open(l.check_fit(FIT_ITERATED).path / "filter.yml", "r") as f: - ite2_runcard = yaml.safe_load(f) + with open(l.check_fit(FIT_ITERATED).path / "filter.yml") as f: + ite2_runcard = yaml_safe.load(f) - predicted_ite2_runcard = yaml.safe_load( + predicted_ite2_runcard = yaml_safe.load( API.iterated_runcard_yaml(fit=FIT, _flmap_np_clip_arg=PREPROCESSING_LIMS) ) diff --git a/validphys2/src/validphys/tests/test_postfit.py b/validphys2/src/validphys/tests/test_postfit.py index a3f30ac583..31f2ad6cdb 100644 --- a/validphys2/src/validphys/tests/test_postfit.py +++ b/validphys2/src/validphys/tests/test_postfit.py @@ -3,14 +3,15 @@ Module for testing postfit. """ + import json -import subprocess as sp import os import shutil +import subprocess as sp from validphys.loader import FallbackLoader as Loader from validphys.tests.conftest import FIT -from reportengine.compat import yaml +from validphys.utils import yaml_safe def test_postfit(tmp): @@ -76,8 +77,8 @@ def test_postfit(tmp): # [File in PDF set, file in fit] files = [pdfsetpath / f"{TMPFIT}_{x:04d}.dat", postfitpath / f"replica_{x}/{TMPFIT}.dat"] for file in files: - with open(file, "r") as f: - data = yaml.safe_load_all(f) + with open(file) as f: + data = yaml_safe.load_all(f) metadata = next(data) repnos.add(metadata["FromMCReplica"]) assert ( @@ -87,8 +88,8 @@ def test_postfit(tmp): # Check that number of PDF members is written correctly infopath = postfitpath / f"{TMPFIT}/{TMPFIT}.info" - with open(infopath, "r") as f: - data = yaml.safe_load(f) + with open(infopath) as f: + data = yaml_safe.load(f) # Add one to nrep to account for replica 0 assert ( data["NumMembers"] == nrep + 1 @@ -96,7 +97,7 @@ def test_postfit(tmp): # Check that chi2 and arclength thresholds are recorded correctly vetopath = postfitpath / "veto_count.json" - with open(vetopath, "r") as f: + with open(vetopath) as f: veto_count = json.load(f) assert ( veto_count["chi2_threshold"] == chi2_threshold diff --git a/validphys2/src/validphys/tests/test_scalevariationtheoryids.py b/validphys2/src/validphys/tests/test_scalevariationtheoryids.py index 71ad3d4183..19ac62c83f 100644 --- a/validphys2/src/validphys/tests/test_scalevariationtheoryids.py +++ b/validphys2/src/validphys/tests/test_scalevariationtheoryids.py @@ -2,11 +2,9 @@ import importlib.resources as resources import pytest -from ruamel.yaml import YAML import validphys.scalevariations - -yaml = YAML() +from validphys.utils import yaml_safe def test_unique_theoryid_variations(): @@ -15,7 +13,7 @@ def test_unique_theoryid_variations(): """ file_path = resources.files(validphys.scalevariations).joinpath("scalevariationtheoryids.yaml") with file_path.open("r") as file: - data = yaml.load(file) + data = yaml_safe.load(file) thids = [k["theoryid"] for k in data["scale_variations_for"]] counter = Counter(thids) duplicates = [item for item, count in counter.items() if count > 1] diff --git a/validphys2/src/validphys/tests/test_theorydbutils.py b/validphys2/src/validphys/tests/test_theorydbutils.py index a72d86900d..c4349ac272 100644 --- a/validphys2/src/validphys/tests/test_theorydbutils.py +++ b/validphys2/src/validphys/tests/test_theorydbutils.py @@ -1,10 +1,10 @@ import pytest -from ruamel import yaml from validobj import ValidationError from nnpdf_data.theorydbutils import TheoryNotFoundInDatabase, fetch_all, fetch_theory from validphys.api import API from validphys.loader import Loader +from validphys.utils import yaml_safe L = Loader() DBPATH = L.theorydb_folder @@ -33,7 +33,7 @@ def _dump_and_check_error(tdict, tmp, bad_number=999): """Dump theory dict to a file and load expecting an error""" tdict["ID"] = bad_number ofile = tmp / f"{bad_number}.yaml" - yaml.dump(tdict, ofile.open("w")) + yaml_safe.dump(tdict, ofile.open("w")) with pytest.raises(ValidationError): fetch_theory(tmp, bad_number) diff --git a/validphys2/src/validphys/uploadutils.py b/validphys2/src/validphys/uploadutils.py index 2420845282..f3eb486f69 100644 --- a/validphys2/src/validphys/uploadutils.py +++ b/validphys2/src/validphys/uploadutils.py @@ -24,9 +24,9 @@ from prompt_toolkit.completion import WordCompleter from reportengine.colors import t -from reportengine.compat import yaml from validphys.loader import Loader, RemoteLoader from validphys.renametools import Spinner +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -404,7 +404,7 @@ def interactive_meta(path): meta_dict = {"title": title, "author": author, "keywords": keywords} with open(path / "meta.yaml", "w") as stream: - yaml.safe_dump(meta_dict, stream) + yaml_safe.dump(meta_dict, stream) def check_input(path): diff --git a/validphys2/src/validphys/utils.py b/validphys2/src/validphys/utils.py index 1c77fd5b5d..042d8b3925 100644 --- a/validphys2/src/validphys/utils.py +++ b/validphys2/src/validphys/utils.py @@ -4,6 +4,11 @@ import tempfile import numpy as np +from ruamel.yaml import YAML + +yaml_safe = YAML(typ='safe') +yaml_rt = YAML(typ='rt') +yaml_fast = YAML(typ='safe', pure=False) # uses Cparser if available (not faster than yaml_safe) def generate_path_filtered_data(fit_path, setname):