From 0dffb82df3115bf58d2a77376748a318401b7278 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Mon, 25 Nov 2024 14:13:46 +0000 Subject: [PATCH 01/20] update for ruamel version, some issues still to be fixed --- conda-recipe/meta.yaml | 2 +- extra_tests/regression_checks.py | 4 +- n3fit/src/evolven3fit/evolve.py | 6 +- n3fit/src/evolven3fit/utils.py | 9 +-- n3fit/src/n3fit/io/writer.py | 4 +- n3fit/src/n3fit/scripts/n3fit_exec.py | 17 ++-- n3fit/src/n3fit/scripts/vp_setupfit.py | 17 ++-- .../n3fit/tests/regressions/quickcard_qed.yml | 2 +- n3fit/src/n3fit/tests/test_evolven3fit.py | 4 +- n3fit/src/n3fit/tests/test_fit.py | 6 +- nnpdf_data/nnpdf_data/__init__.py | 6 +- nnpdf_data/nnpdf_data/utils.py | 11 +-- pyproject.toml | 3 +- validphys2/serverscripts/index-reports.py | 77 +++++++++++-------- validphys2/src/validphys/commondataparser.py | 38 +++------ validphys2/src/validphys/config.py | 14 ++-- validphys2/src/validphys/core.py | 8 +- validphys2/src/validphys/coredata.py | 7 +- validphys2/src/validphys/eff_exponents.py | 34 +++++--- validphys2/src/validphys/filters.py | 8 +- validphys2/src/validphys/fitdata.py | 7 +- validphys2/src/validphys/lhaindex.py | 9 ++- validphys2/src/validphys/lhio.py | 9 ++- validphys2/src/validphys/loader.py | 16 ++-- validphys2/src/validphys/photon/compute.py | 3 +- validphys2/src/validphys/replica_selector.py | 13 ++-- .../src/validphys/scripts/vp_comparefits.py | 14 ++-- .../src/validphys/scripts/vp_deltachi2.py | 35 +++------ .../src/validphys/scripts/vp_hyperoptplot.py | 31 ++++---- .../validphys/scripts/vp_nextfitruncard.py | 20 ++--- .../validphys/scripts/vp_pdffromreplicas.py | 43 ++++------- .../src/validphys/scripts/vp_pdfrename.py | 30 +++----- .../src/validphys/scripts/wiki_upload.py | 38 +++++---- .../validphys/tests/photon/test_compute.py | 3 +- .../src/validphys/tests/test_effexponents.py | 10 +-- .../src/validphys/tests/test_postfit.py | 18 +++-- .../src/validphys/tests/test_theorydbutils.py | 3 +- validphys2/src/validphys/uploadutils.py | 6 +- 38 files changed, 290 insertions(+), 295 deletions(-) diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index f88f3e7a0a..e364904f7e 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -44,7 +44,7 @@ requirements: - joblib - sphinx_rtd_theme >0.5 - sphinxcontrib-bibtex - - ruamel.yaml <0.18 + - ruamel.yaml >=0.15 test: requires: diff --git a/extra_tests/regression_checks.py b/extra_tests/regression_checks.py index 855c398fd5..b77f2c0cee 100644 --- a/extra_tests/regression_checks.py +++ b/extra_tests/regression_checks.py @@ -7,9 +7,11 @@ import subprocess as sp import pytest +from ruamel.yaml import YAML from n3fit.tests.test_fit import EXE, check_fit_results -from reportengine.compat import yaml + +yaml = YAML(typ='safe') REGRESSION_FOLDER = pathlib.Path(__file__).with_name("regression_fits") diff --git a/n3fit/src/evolven3fit/evolve.py b/n3fit/src/evolven3fit/evolve.py index eb6e27960d..fe0bcbd5b3 100644 --- a/n3fit/src/evolven3fit/evolve.py +++ b/n3fit/src/evolven3fit/evolve.py @@ -8,10 +8,12 @@ from joblib import Parallel, delayed import numpy as np import psutil +from ruamel.yaml import YAML import eko from eko import basis_rotation, runner -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from . import eko_utils, utils @@ -164,7 +166,7 @@ def load_fit(usr_path): nnfitpath = usr_path / "nnfit" pdf_dict = {} for yaml_file in nnfitpath.glob(f"replica_*/{usr_path.name}.exportgrid"): - data = yaml.safe_load(yaml_file.read_text(encoding="UTF-8")) + data = yaml.load(yaml_file.read_text(encoding="UTF-8")) pdf_dict[yaml_file.parent.stem] = data return pdf_dict diff --git a/n3fit/src/evolven3fit/utils.py b/n3fit/src/evolven3fit/utils.py index 9f0b6ad9fd..700e95baec 100644 --- a/n3fit/src/evolven3fit/utils.py +++ b/n3fit/src/evolven3fit/utils.py @@ -2,9 +2,10 @@ import shutil import numpy as np +from ruamel.yaml import YAML from scipy.interpolate import interp1d -from reportengine.compat import yaml +yaml = YAML(typ='safe') from validphys.pdfbases import PIDS_DICT from .q2grids import Q2GRID_DEFAULT, Q2GRID_NNPDF40 @@ -57,7 +58,7 @@ def hasFlavor(self, pid): def read_runcard(usr_path): """Read the runcard and return the relevant information for evolven3fit""" - return yaml.safe_load((usr_path / "filter.yml").read_text(encoding="UTF-8")) + return yaml.load((usr_path / "filter.yml").read_text(encoding="UTF-8")) def get_theoryID_from_runcard(usr_path): @@ -99,9 +100,7 @@ def generate_q2grid(Q0, Qfin, Q_points, match_dict, nf0=None, legacy40=False): frac_of_point = np.log(match_scale / Q_ini) / np.log(Qfin / Q0) num_points = int(Q_points * frac_of_point) num_points_list.append(num_points) - grids.append( - np.geomspace(Q_ini**2, match_scale**2, num=num_points, endpoint=False) - ) + grids.append(np.geomspace(Q_ini**2, match_scale**2, num=num_points, endpoint=False)) Q_ini = match_scale num_points = Q_points - sum(num_points_list) grids.append(np.geomspace(Q_ini**2, Qfin**2, num=num_points)) diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 93842d23b4..1f31a06ad0 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -9,10 +9,12 @@ import logging import numpy as np +from ruamel.yaml import YAML import n3fit from n3fit import vpinterface -from reportengine.compat import yaml + +yaml = YAML(typ='safe') import validphys log = logging.getLogger(__name__) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 3f2d69559f..4664d0973b 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -9,10 +9,12 @@ import re import shutil import sys -import warnings + +from ruamel.yaml import YAML, error from reportengine import colors -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from reportengine.namespaces import NSList from validphys.app import App from validphys.config import Config, ConfigError, Environment, EnvironmentError_ @@ -108,15 +110,8 @@ class N3FitConfig(Config): @classmethod def from_yaml(cls, o, *args, **kwargs): try: - with warnings.catch_warnings(): - warnings.simplefilter("ignore", yaml.error.MantissaNoDotYAML1_1Warning) - # We need to specify the older version 1.1 to support the - # older configuration files, which liked to use on/off for - # booleans. - # The floating point parsing yields warnings everywhere, which - # we suppress. - file_content = yaml.safe_load(o, version="1.1") - except yaml.error.YAMLError as e: + file_content = yaml.load(o) + except error.YAMLError as e: raise ConfigError(f"Failed to parse yaml file: {e}") if not isinstance(file_content, dict): raise ConfigError( diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 34de8dfea7..75312bea83 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -31,10 +31,12 @@ import re import shutil import sys -import warnings + +from ruamel.yaml import YAML, error from reportengine import colors -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from validphys.app import App from validphys.config import Config, ConfigError, Environment, EnvironmentError_ @@ -131,15 +133,8 @@ class SetupFitConfig(Config): @classmethod def from_yaml(cls, o, *args, **kwargs): try: - with warnings.catch_warnings(): - warnings.simplefilter('ignore', yaml.error.MantissaNoDotYAML1_1Warning) - # We need to specify the older version 1.1 to support the - # older configuration files, which liked to use on/off for - # booleans. - # The floating point parsing yields warnings everywhere, which - # we suppress. - file_content = yaml.safe_load(o, version='1.1') - except yaml.error.YAMLError as e: + file_content = yaml.load(o) + except error.YAMLError as e: raise ConfigError(f"Failed to parse yaml file: {e}") if not isinstance(file_content, dict): raise ConfigError( diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml b/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml index 193edc8631..a4b4843bba 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml +++ b/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml @@ -33,7 +33,7 @@ trvlseed: 3 nnseed: 2 mcseed: 1 -load: "weights.weights.h5" +# load: "weights.weights.h5" separate_multiplicative: True parameters: # This defines the parameter dictionary that is passed to the Model Trainer diff --git a/n3fit/src/n3fit/tests/test_evolven3fit.py b/n3fit/src/n3fit/tests/test_evolven3fit.py index 52799829f6..34e5de44dc 100644 --- a/n3fit/src/n3fit/tests/test_evolven3fit.py +++ b/n3fit/src/n3fit/tests/test_evolven3fit.py @@ -6,9 +6,11 @@ from evolven3fit import eko_utils, utils import numpy as np import pytest +from ruamel.yaml import YAML from eko import EKO, runner -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from validphys.api import API from validphys.pdfbases import PIDS_DICT diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py index c6458e2d27..0e8b01fb4a 100644 --- a/n3fit/src/n3fit/tests/test_fit.py +++ b/n3fit/src/n3fit/tests/test_fit.py @@ -20,9 +20,11 @@ import h5py from numpy.testing import assert_allclose, assert_equal import pytest +from ruamel.yaml import YAML import n3fit -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from validphys.n3fit_data import replica_mcseed, replica_nnseed, replica_trvlseed log = logging.getLogger(__name__) @@ -45,7 +47,7 @@ def _load_json(info_file): def _load_exportgrid(exportgrid_file): """Loads the exportgrid file""" - return yaml.safe_load(exportgrid_file.read_text()) + return yaml.load(exportgrid_file.read_text()) def test_initialize_seeds(): diff --git a/nnpdf_data/nnpdf_data/__init__.py b/nnpdf_data/nnpdf_data/__init__.py index a6eeec892d..fccfa92ea6 100644 --- a/nnpdf_data/nnpdf_data/__init__.py +++ b/nnpdf_data/nnpdf_data/__init__.py @@ -1,9 +1,9 @@ from functools import lru_cache import pathlib -import ruamel.yaml as yaml +from ruamel.yaml import YAML -from ._version import __version__ +pass path_vpdata = pathlib.Path(__file__).parent path_commondata = path_vpdata / "commondata" @@ -12,7 +12,7 @@ _path_legacy_mapping = path_commondata / "dataset_names.yml" theory_cards = path_vpdata / "theory_cards" -_legacy_to_new_mapping_raw = yaml.YAML().load(_path_legacy_mapping) +_legacy_to_new_mapping_raw = YAML(typ='safe').load(_path_legacy_mapping) # Convert strings into a dictionary legacy_to_new_mapping = { k: ({"dataset": v} if isinstance(v, str) else v) for k, v in _legacy_to_new_mapping_raw.items() diff --git a/nnpdf_data/nnpdf_data/utils.py b/nnpdf_data/nnpdf_data/utils.py index 33987134bd..64f762a281 100644 --- a/nnpdf_data/nnpdf_data/utils.py +++ b/nnpdf_data/nnpdf_data/utils.py @@ -1,12 +1,9 @@ import pathlib -import ruamel.yaml as yaml +from ruamel.yaml import YAML from validobj import ValidationError, parse_input -try: - Loader = yaml.CLoader -except AttributeError: - Loader = yaml.Loader +yaml = YAML(typ='safe') def parse_yaml_inp(input_yaml, spec): @@ -17,14 +14,14 @@ def parse_yaml_inp(input_yaml, spec): https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers """ input_yaml = pathlib.Path(input_yaml) - inp = yaml.load(input_yaml.read_text(encoding="utf-8"), Loader=Loader) + inp = yaml.load(input_yaml.read_text(encoding="utf-8")) try: return parse_input(inp, spec) except ValidationError as e: current_exc = e # In order to provide a more complete error information, use round_trip_load # to read the .yaml file again (insetad of using the CLoader) - current_inp = yaml.round_trip_load(input_yaml.open("r", encoding="utf-8")) + current_inp = yaml.load(input_yaml.open("r", encoding="utf-8")) error_text_lines = [] while current_exc: if hasattr(current_exc, 'wrong_field'): diff --git a/pyproject.toml b/pyproject.toml index d4f135d44c..de3464aa4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,10 +65,11 @@ matplotlib = "^3.9" pineappl = "^0.8.2" pandas = "*" numpy = "*" +"ruamel.yaml" = "^0.15" validobj = "*" prompt_toolkit = "*" # Reportengine needs to be installed from git -reportengine = { git = "https://github.com/NNPDF/reportengine" } +reportengine = { git = "https://github.com/NNPDF/reportengine", branch="update_yaml" } # Fit psutil = "*" tensorflow = "*" diff --git a/validphys2/serverscripts/index-reports.py b/validphys2/serverscripts/index-reports.py index 0c910e6b29..e36d590a1e 100644 --- a/validphys2/serverscripts/index-reports.py +++ b/validphys2/serverscripts/index-reports.py @@ -6,20 +6,24 @@ called meta.yaml in the report folder and finally the html attributes, in that order. """ -import pathlib + +from collections import ChainMap, defaultdict import datetime import json +import pathlib import re import sys import traceback -from collections import ChainMap, defaultdict -import ruamel_yaml as yaml from bs4 import BeautifulSoup -#TODO: Move the thumbnail logic somewhere -import skimage.transform -import skimage.io import numpy as np +from ruamel.yaml import YAML, error +import skimage.io + +# TODO: Move the thumbnail logic somewhere +import skimage.transform + +yaml = YAML(typ='safe') ROOT = '/home/nnpdf/validphys-reports' ROOT_URL = 'https://vp.nnpdf.science/' @@ -34,6 +38,7 @@ REQUIRED_FILE_METADATA = {'title', 'author', 'keywords'} + def meta_from_html(f): soup = BeautifulSoup(f, 'lxml') try: @@ -41,68 +46,73 @@ def meta_from_html(f): except Exception: title = None try: - author = soup.find('meta', {'name':'author'})['content'] + author = soup.find('meta', {'name': 'author'})['content'] except Exception: author = EMPTY try: - tagtext = soup.find('meta', {'name':'keywords'})['content'] + tagtext = soup.find('meta', {'name': 'keywords'})['content'] except Exception: tags = [] else: tags = re.split(r"\s*,\s*", tagtext) #'soup.title.string' doesn't - #return a strig but rather an object with the reference to - #the whole parse tree, causing a huge memory leak. + # return a strig but rather an object with the reference to + # the whole parse tree, causing a huge memory leak. return dict(title=str(title), author=author, keywords=tags) -class TagProps(): + +class TagProps: def __init__(self, count=0, last_timestamp=0): self.count = count self.last_timestamp = last_timestamp __slots__ = ('count', 'last_timestamp') + def meta_from_path(p): meta = ChainMap(DEFAULTS) - yaml_meta = p/'meta.yaml' + yaml_meta = p / 'meta.yaml' yaml_res = {} if yaml_meta.exists(): with yaml_meta.open() as f: try: - yaml_res = yaml.safe_load(f) - except yaml.YAMLError as e: + yaml_res = yaml.load(f) + except error.YAMLError as e: print(f"Error processing {yaml_meta}: {e}", file=sys.stderr) - index = p/'index.html' - #Only do the expensive HTML parsing if we actually need a key + index = p / 'index.html' + # Only do the expensive HTML parsing if we actually need a key if REQUIRED_FILE_METADATA - yaml_res.keys() and index.exists(): with index.open() as f: meta = meta.new_child(meta_from_html(f)) meta = meta.new_child(yaml_res) return meta + def make_single_thumbnail(f, shape=(100, 150)): img = skimage.io.imread(f) - res = skimage.transform.resize( - img, shape, anti_aliasing=True, mode='constant') + res = skimage.transform.resize(img, shape, anti_aliasing=True, mode='constant') return res + def make_4_img_thumbnail(paths, shape=(100, 150)): w, h = shape whalf, hhalf = w // 2, h // 2 positions = ( - (slice(0,whalf), slice(0,hhalf)), - (slice(whalf,w), slice(0,hhalf)), - (slice(0,whalf), slice(hhalf,h)), - (slice(whalf,w), slice(hhalf,h)) + (slice(0, whalf), slice(0, hhalf)), + (slice(whalf, w), slice(0, hhalf)), + (slice(0, whalf), slice(hhalf, h)), + (slice(whalf, w), slice(hhalf, h)), ) res = np.zeros((*shape, 4)) imgs = skimage.io.imread_collection(paths) for img, pos in zip(imgs, positions): res[pos] = skimage.transform.resize( - img, (whalf, hhalf), anti_aliasing=True, mode='constant') + img, (whalf, hhalf), anti_aliasing=True, mode='constant' + ) return res + def make_thumbnail(folder): folder = pathlib.Path(folder) pngs = sorted(folder.glob('*.png')) @@ -112,19 +122,20 @@ def make_thumbnail(folder): return make_single_thumbnail(pngs[0]) else: l = len(pngs) - imgs = pngs[:l-(l%4):l//4] + imgs = pngs[: l - (l % 4) : l // 4] return make_4_img_thumbnail(imgs) def thumbnail_tag(name): return f'{ROOT_URL}thumbnails/{name}"' + def handle_thumbnail(p): dest = (pathlib.Path(THUMBNAILS) / p.name).with_suffix('.png') name = dest.name if dest.exists(): return thumbnail_tag(name) - figures = (p / 'figures') + figures = p / 'figures' if figures.is_dir(): try: res = make_thumbnail(figures) @@ -137,12 +148,13 @@ def handle_thumbnail(p): return None return None + def register(p, emails): path_meta = meta_from_path(p) title, author, tags = path_meta['title'], path_meta['author'], path_meta['keywords'] url = ROOT_URL + p.name - #Use the timestamp for sorting and the string for displaying + # Use the timestamp for sorting and the string for displaying timestamp = p.stat().st_mtime date = datetime.date.fromtimestamp(timestamp).isoformat() if not title or not isinstance(title, str): @@ -155,9 +167,7 @@ def register(p, emails): if not isinstance(author, str): author = "" - emaillinks = ' '.join( - f'📧' for (url, title) in emails - ) + emaillinks = ' '.join(f'📧' for (url, title) in emails) titlelink = f'{title} {emaillinks}' @@ -188,18 +198,17 @@ def make_index(): timestamp = res[2][1] for k in newkeywords: props = keywords[k] - props.count+=1 + props.count += 1 props.last_timestamp = max(props.last_timestamp, timestamp) except: - print("Error processing folder", p,file=sys.stderr) + print("Error processing folder", p, file=sys.stderr) raise keylist = sorted(keywords.items(), key=lambda x: -x[1].last_timestamp) - keywordmap = [(k, v.count) for k,v in keylist] - + keywordmap = [(k, v.count) for k, v in keylist] with open(OUT, 'w') as f: - json.dump({'data':data, 'keywords':keywordmap}, f) + json.dump({'data': data, 'keywords': keywordmap}, f) if __name__ == '__main__': diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 7e4875d997..486422b147 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -41,36 +41,21 @@ import logging from operator import attrgetter from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Optional import numpy as np import pandas as pd +from ruamel.yaml import YAML from validobj import ValidationError, parse_input from validobj.custom import Parser from nnpdf_data import new_to_legacy_map, path_commondata from nnpdf_data.utils import parse_yaml_inp - -# We cannot use ruamel directly due to the ambiguity ruamel.yaml / ruamel_yaml -# of some versions which are pinned in some of the conda packages we use... -from reportengine.compat import yaml from validphys.coredata import KIN_NAMES, CommonData from validphys.plotoptions.plottingoptions import PlottingOptions, labeler_functions from validphys.process_options import ValidProcess -try: - # If libyaml is available, use the C loader to speed up some of the read - # https://pyyaml.org/wiki/LibYAML - # libyaml is available for most linux distributions - Loader = yaml.CLoader -except AttributeError: - # fallback to the slow loader - Loader = yaml.Loader - - -def _quick_yaml_load(filepath): - return yaml.load(filepath.read_text(encoding="utf-8"), Loader=Loader) - +yaml = YAML(typ='rt') # JCM: # Some notes for developers @@ -230,7 +215,7 @@ class TheoryMeta: ------- >>> from validphys.commondataparser import TheoryMeta ... from validobj import parse_input - ... from reportengine.compat import yaml + ... from ruamel.yaml import YAML ... theory_raw = ''' ... FK_tables: ... - - fk1 @@ -238,7 +223,7 @@ class TheoryMeta: ... - fk3 ... operation: ratio ... ''' - ... theory = yaml.safe_load(theory_raw) + ... theory = YAML(typ='safe').load(theory_raw) ... parse_input(theory, TheoryMeta) TheoryMeta(FK_tables=[['fk1'], ['fk2', 'fk3']], operation='RATIO', shifts = None, conversion_factor=1.0, comment=None, normalization=None)) """ @@ -263,7 +248,7 @@ def parser(cls, yaml_file): """The yaml databases in the server use "operands" as key instead of "FK_tables" """ if not yaml_file.exists(): raise FileNotFoundError(yaml_file) - meta = yaml.safe_load(yaml_file.read_text()) + meta = yaml.load(yaml_file.read_text()) # Make sure the operations are upper-cased for compound-compatibility meta["operation"] = "NULL" if meta["operation"] is None else meta["operation"].upper() if "operands" in meta: @@ -309,7 +294,7 @@ class Variant: experiment: Optional[str] = None -ValidVariants = Dict[str, Variant] +ValidVariants = dict[str, Variant] ### Kinematic data @@ -351,7 +336,7 @@ class ValidKinematics: """ file: ValidPath - variables: Dict[str, ValidVariable] + variables: dict[str, ValidVariable] def get_label(self, var): """For the given variable, return the label as label (unit) @@ -523,7 +508,7 @@ def load_data_central(self): if self.is_nnpdf_special: data = np.zeros(self.ndata) else: - datayaml = _quick_yaml_load(self.path_data_central) + datayaml = yaml.load(self.path_data_central) data = datayaml["data_central"] if len(data) != self.ndata: @@ -552,8 +537,7 @@ def load_uncertainties(self): all_df = [] for ufile in self.paths_uncertainties: - uncyaml = _quick_yaml_load(ufile) - + uncyaml = yaml.load(ufile) mindex = pd.MultiIndex.from_tuples( [(k, v["treatment"], v["type"]) for k, v in uncyaml["definitions"].items()], names=["name", "treatment", "type"], @@ -589,7 +573,7 @@ def load_kinematics(self, fill_to_three=True, drop_minmax=True): a dataframe containing the kinematics """ kinematics_file = self.path_kinematics - kinyaml = _quick_yaml_load(kinematics_file) + kinyaml = yaml.load(kinematics_file) kin_dict = {} for bin_index, dbin in enumerate(kinyaml["bins"], start=1): diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 1c814af690..6651650c07 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -9,10 +9,12 @@ import pathlib import pandas as pd +from ruamel.yaml import YAML from nnpdf_data import legacy_to_new_map from reportengine import configparser, report -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from reportengine.configparser import ConfigError, _parse_func, element_of, record_from_defaults from reportengine.environment import Environment, EnvironmentError_ from reportengine.helputils import get_parser_type @@ -1289,7 +1291,7 @@ def load_default_default_filter_rules(self, spec): lock_token = "_filters.lock.yaml" try: - return yaml.safe_load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) + return yaml.load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) except FileNotFoundError as e: alternatives = [ el.strip(lock_token) @@ -1386,7 +1388,7 @@ def load_default_default_filter_settings(self, spec): lock_token = "_defaults.lock.yaml" try: - return yaml.safe_load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) + return yaml.load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) except FileNotFoundError as e: alternatives = alternatives = [ el.strip(lock_token) @@ -1663,7 +1665,7 @@ def produce_theoryids(self, t0id, point_prescription): This hard codes the theories needed for each prescription to avoid user error.""" th = t0id.id - lsv = yaml.safe_load(read_text(validphys.scalevariations, "scalevariationtheoryids.yaml")) + lsv = yaml.load(read_text(validphys.scalevariations, "scalevariationtheoryids.yaml")) scalevarsfor_list = lsv["scale_variations_for"] # Allowed central theoryids @@ -1677,9 +1679,7 @@ def produce_theoryids(self, t0id, point_prescription): ) # Find scales that correspond to this point prescription - pp_scales_dict = yaml.safe_load( - read_text(validphys.scalevariations, "pointprescriptions.yaml") - ) + pp_scales_dict = yaml.load(read_text(validphys.scalevariations, "pointprescriptions.yaml")) try: scales = pp_scales_dict[point_prescription] diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index dc1d880513..ea7303f8e9 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -12,11 +12,13 @@ import re import numpy as np +from ruamel.yaml import YAML, error from nnpdf_data.theorydbutils import fetch_theory from reportengine import namespaces from reportengine.baseexceptions import AsInputError -from reportengine.compat import yaml + +yaml = YAML(typ='safe') # TODO: There is a bit of a circular dependency between filters.py and this. # Maybe move the cuts logic to its own module? @@ -713,8 +715,8 @@ def as_input(self): log.debug('Reading input from fit configuration %s', p) try: with p.open() as f: - d = yaml.safe_load(f) - except (yaml.YAMLError, FileNotFoundError) as e: + d = yaml.load(f) + except (error.YAMLError, FileNotFoundError) as e: raise AsInputError(str(e)) from e d['pdf'] = {'id': self.name, 'label': self.label} diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 46a1cab7c6..690a2a9623 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -9,8 +9,9 @@ import numpy as np import pandas as pd +from ruamel.yaml import YAML -from reportengine.compat import yaml +yaml = YAML(typ='safe') from validphys.utils import generate_path_filtered_data KIN_NAMES = ["kin1", "kin2", "kin3"] @@ -431,7 +432,7 @@ def systematic_errors(self, central_values=None): def export_data(self, buffer): """Exports the central data defined by this commondata instance to the given buffer""" ret = {"data_central": self.central_values.tolist()} - yaml.safe_dump(ret, buffer) + yaml.dump(ret, buffer) def export_uncertainties(self, buffer): """Exports the uncertainties defined by this commondata instance to the given buffer""" @@ -460,7 +461,7 @@ def export_uncertainties(self, buffer): "type": "UNCORR", } ret = {"definitions": sorted_definitions, "bins": bins} - yaml.safe_dump(ret, buffer) + yaml.dump(ret, buffer) def export(self, folder_path): """Wrapper around export_data and export_uncertainties diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index 0837b06b3d..4d07e5f0c8 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -1,21 +1,22 @@ -# -*- coding: utf-8 -*- """ Tools for computing and plotting effective exponents. """ -from __future__ import generator_stop import logging import numbers import random +import tempfile import warnings import matplotlib as mpl import numpy as np import pandas as pd +from ruamel.yaml import YAML from reportengine import collect from reportengine.checks import check_positive -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from reportengine.figure import figuregen from reportengine.floatformatting import format_number, significant_digits from reportengine.table import table @@ -139,7 +140,7 @@ def get_title(self, parton_name): def get_ylabel(self, parton_name): if self.normalize_to is not None: - return "Ratio to {}".format(self.normalize_pdf.label) + return f"Ratio to {self.normalize_pdf.label}" else: return fr"$\{self.exponent}_e$ for ${parton_name}$" @@ -502,8 +503,8 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= """ (df_effexps,) = next_fit_eff_exps_table # Use round trip loader rather than safe_load in fit.as_input() - with open(fit.path / "filter.yml", "r") as f: - filtermap = yaml.load(f, yaml.RoundTripLoader) + with open(fit.path / "filter.yml") as f: + filtermap = yaml.load(f) previous_exponents = filtermap["fitting"]["basis"] basis = filtermap["fitting"]["fitbasis"] checked = check_basis(basis, None) @@ -524,7 +525,10 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= betas = np.clip(betas, **largex_args) previous_exponents[i]["smallx"] = [fmt(alpha) for alpha in alphas] previous_exponents[i]["largex"] = [fmt(beta) for beta in betas] - return yaml.dump(filtermap, Dumper=yaml.RoundTripDumper) + with tempfile.NamedTemporaryFile() as fp: + yaml.dump(filtermap, fp.name) + yaml_string = fp.read() + return yaml_string def update_runcard_description_yaml(iterate_preprocessing_yaml, _updated_description=None): @@ -539,13 +543,17 @@ def update_runcard_description_yaml(iterate_preprocessing_yaml, _updated_descrip ``` """ - filtermap = yaml.load(iterate_preprocessing_yaml, yaml.RoundTripLoader) + filtermap = yaml.load(iterate_preprocessing_yaml) # update description if necessary if _updated_description is not None: filtermap["description"] = _updated_description - return yaml.dump(filtermap, Dumper=yaml.RoundTripDumper) + with tempfile.NamedTemporaryFile() as fp: + yaml.dump(filtermap, fp.name) + yaml_string = fp.read() + + return yaml_string def iterated_runcard_yaml(fit, update_runcard_description_yaml): @@ -578,7 +586,7 @@ def iterated_runcard_yaml(fit, update_runcard_description_yaml): ... f.write(yaml_output) """ - filtermap = yaml.load(update_runcard_description_yaml, yaml.RoundTripLoader) + filtermap = yaml.load(update_runcard_description_yaml) # iterate t0 filtermap["datacuts"]["t0pdfset"] = fit.name @@ -605,4 +613,8 @@ def iterated_runcard_yaml(fit, update_runcard_description_yaml): if "fiatlux" in filtermap: filtermap['fiatlux']['luxset'] = fit.name - return yaml.dump(filtermap, Dumper=yaml.RoundTripDumper) + with tempfile.NamedTemporaryFile() as fp: + yaml.dump(filtermap, fp.name) + yaml_string = fp.read() + + return yaml_string diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 944d11c5cb..3260adee06 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -11,9 +11,11 @@ from typing import Union import numpy as np +from ruamel.yaml import YAML from reportengine.checks import check, make_check -from reportengine.compat import yaml + +yaml = YAML(typ='safe') import validphys.cuts from validphys.process_options import PROCESSES from validphys.utils import generate_path_filtered_data @@ -156,7 +158,7 @@ def default_filter_settings_input(): """Return a FilterDefaults dataclass with the default hardcoded filter settings. These are defined in ``defaults.yaml`` in the ``validphys.cuts`` module. """ - return FilterDefaults(**yaml.safe_load(read_text(validphys.cuts, "defaults.yaml"))) + return FilterDefaults(**yaml.load(read_text(validphys.cuts, "defaults.yaml"))) def default_filter_rules_input(): @@ -164,7 +166,7 @@ def default_filter_rules_input(): Return a tuple of FilterRule objects. These are defined in ``filters.yaml`` in the ``validphys.cuts`` module. """ - list_rules = yaml.safe_load(read_text(validphys.cuts, "filters.yaml")) + list_rules = yaml.load(read_text(validphys.cuts, "filters.yaml")) return tuple(FilterRule(**rule) for rule in list_rules) diff --git a/validphys2/src/validphys/fitdata.py b/validphys2/src/validphys/fitdata.py index d87138a2e2..fa5ad8061e 100644 --- a/validphys2/src/validphys/fitdata.py +++ b/validphys2/src/validphys/fitdata.py @@ -1,6 +1,7 @@ """ Utilities for loading data from fit folders """ + from collections import OrderedDict, defaultdict, namedtuple from io import StringIO import json @@ -9,10 +10,12 @@ import numpy as np import pandas as pd +from ruamel.yaml import YAML from reportengine import collect from reportengine.checks import CheckError, make_argcheck -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from reportengine.floatformatting import ValueErrorTuple from reportengine.table import table from validphys import checks @@ -431,7 +434,7 @@ def _get_fitted_index(pdf, i): """Return the nnfit index for the replica i""" p = pdf.infopath.with_name(f'{pdf.name}_{i:04d}.dat') with open(p) as f: - it = yaml.safe_load_all(f) + it = yaml.load_all(f) metadata = next(it) return metadata['FromMCReplica'] diff --git a/validphys2/src/validphys/lhaindex.py b/validphys2/src/validphys/lhaindex.py index 9a10d66ff1..31a03a83ae 100644 --- a/validphys2/src/validphys/lhaindex.py +++ b/validphys2/src/validphys/lhaindex.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- #!/usr/bin/env python """ Created on Fri Jan 23 12:11:23 2015 @@ -13,7 +12,9 @@ from pathlib import Path import re -from reportengine.compat import yaml +from ruamel.yaml import YAML + +yaml = YAML(typ='safe') from validphys.lhapdf_compatibility import lhapdf _indexes_to_names = None @@ -123,10 +124,10 @@ def infofilename(name): raise FileNotFoundError(name + ".info") -@lru_cache() +@lru_cache def parse_info(name): with open(infofilename(name)) as infofile: - result = yaml.YAML(typ='safe', pure=True).load(infofile) + result = YAML(typ='safe', pure=True).load(infofile) return result diff --git a/validphys2/src/validphys/lhio.py b/validphys2/src/validphys/lhio.py index 92c767cccc..b0be198916 100644 --- a/validphys2/src/validphys/lhio.py +++ b/validphys2/src/validphys/lhio.py @@ -10,8 +10,9 @@ import numpy as np import pandas as pd +from ruamel.yaml import YAML -from reportengine.compat import yaml +yaml = YAML(typ='safe') from validphys import lhaindex from validphys.core import PDF @@ -28,7 +29,7 @@ def split_sep(f): def read_xqf_from_file(f): lines = split_sep(f) try: - (xtext, qtext, ftext) = [next(lines) for _ in range(3)] + (xtext, qtext, ftext) = (next(lines) for _ in range(3)) except StopIteration: return None xvals = np.fromstring(xtext, sep=" ") @@ -69,7 +70,7 @@ def load_replica(pdf, rep, kin_grids=None): path = osp.join(lhaindex.finddir(pdf_name), pdf_name + "_" + suffix + ".dat") - log.debug("Loading replica {rep} at {path}".format(rep=rep, path=path)) + log.debug(f"Loading replica {rep} at {path}") with open(path, 'rb') as inn: header = b"".join(split_sep(inn)) @@ -315,7 +316,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): # copy replica 0 shutil.copy(base / f'{pdf}_0000.dat', set_root / f"{set_name }_0000.dat") - with open(base / f'{pdf}.info', 'r') as inn, open(set_root / f'{set_name }.info', 'w') as out: + with open(base / f'{pdf}.info') as inn, open(set_root / f'{set_name }.info', 'w') as out: for l in inn.readlines(): if l.find("SetDesc:") >= 0: out.write(f"SetDesc: \"Hessian {pdf}_hessian\"\n") diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py index 230c26b076..2b56e7f665 100644 --- a/validphys2/src/validphys/loader.py +++ b/validphys2/src/validphys/loader.py @@ -17,10 +17,12 @@ import urllib.parse as urls import requests +from ruamel.yaml import YAML from nnpdf_data import legacy_to_new_mapping, path_vpdata from reportengine import filefinder -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from validphys import lhaindex from validphys.commondataparser import load_commondata_old, parse_new_metadata, parse_set_metadata from validphys.core import ( @@ -37,7 +39,6 @@ InternalCutsWrapper, PositivitySetSpec, TheoryIDSpec, - peek_commondata_metadata, ) from validphys.utils import generate_path_filtered_data, tempfile_cleaner @@ -132,7 +133,7 @@ def _get_nnpdf_profile(profile_path=None): the python prefix (``Path(sys.prefix)/"share"/"NNPDF"``) will be used """ - yaml_reader = yaml.YAML(typ='safe', pure=True) + yaml_reader = YAML(typ='safe', pure=True) home_config = pathlib.Path().home() / ".config" config_folder = pathlib.Path(os.environ.get("XDG_CONFIG_HOME", home_config)) / NNPDF_DIR @@ -423,7 +424,12 @@ def check_commondata( ) break # try new commondata format - old_path = fit.path / "filter" / legacy_name / f"filtered_uncertainties_{legacy_name}.yaml" + old_path = ( + fit.path + / "filter" + / legacy_name + / f"filtered_uncertainties_{legacy_name}.yaml" + ) if old_path.exists(): data_path = old_path.with_name(f"filtered_data_{legacy_name}.yaml") unc_path = old_path.with_name(f"filtered_uncertainties_{legacy_name}.yaml") @@ -533,7 +539,7 @@ def check_compound(self, theoryID, setname, cfac): raise CompoundNotFound(msg) # This is a little bit funny, but is the least amount of thinking... yaml_format = 'FK:\n' + re.sub('FK:', ' - ', txt) - data = yaml.safe_load(yaml_format) + data = yaml.load(yaml_format) # we have to split out 'FK_' the extension to get a name consistent # with everything else try: diff --git a/validphys2/src/validphys/photon/compute.py b/validphys2/src/validphys/photon/compute.py index 32ef57f388..6466c4ddb7 100644 --- a/validphys2/src/validphys/photon/compute.py +++ b/validphys2/src/validphys/photon/compute.py @@ -122,8 +122,7 @@ def __init__(self, theoryid, lux_params, replicas): alpha = Alpha(theory, fiatlux_runcard["q2_max"]) with tempfile.NamedTemporaryFile(mode="w") as tmp: - with tmp.file as tmp_file: - tmp_file.write(yaml.dump(fiatlux_runcard)) + yaml.dump(fiatlux_runcard, tmp) self.lux[replica] = fiatlux.FiatLux(tmp.name) # we have a dict but fiatlux wants a yaml file # TODO : once that fiatlux will allow dictionaries diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index e636d81531..1e1677b7f8 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -3,12 +3,16 @@ Tools for filtering replica sets based on criteria on the replicas. """ + import logging import re import shutil +from ruamel.yaml import YAML + from reportengine.checks import check, make_argcheck -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from validphys.core import PDF from validphys.renametools import rename_pdf from validphys.utils import tempfile_cleaner @@ -96,9 +100,8 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = #  Fixup the info file info_file = (temp_pdf / temp_pdf.name).with_suffix('.info') - with open(info_file, 'r') as stream: - yaml_obj = yaml.YAML() - info_yaml = yaml_obj.load(stream) + with open(info_file) as stream: + info_yaml = yaml.load(stream) info_yaml['NumMembers'] = new_nrep info_yaml['ErrorType'] += '+as' extra_desc = '; '.join( @@ -106,7 +109,7 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = ) info_yaml['SetDesc'] += f"; {extra_desc}" with open(info_file, 'w') as stream: - yaml_obj.dump(info_yaml, stream) + yaml.dump(info_yaml, stream) # Rename the base pdf to the final name rename_pdf(temp_pdf, pdf.name, target_name) diff --git a/validphys2/src/validphys/scripts/vp_comparefits.py b/validphys2/src/validphys/scripts/vp_comparefits.py index 7c79a7c05e..b15a079f09 100644 --- a/validphys2/src/validphys/scripts/vp_comparefits.py +++ b/validphys2/src/validphys/scripts/vp_comparefits.py @@ -1,18 +1,18 @@ -import sys -import os import logging +import os +import sys # TODO: Look into making these lazy imports import prompt_toolkit from prompt_toolkit.completion import WordCompleter +from ruamel.yaml import YAML -from reportengine.compat import yaml +yaml = YAML(typ='safe') from reportengine.colors import t - +from validphys import compareclosuretemplates, comparefittemplates from validphys.app import App from validphys.loader import RemoteLoader -from validphys import comparefittemplates, compareclosuretemplates -from validphys.promptutils import confirm, KeywordsWithCache +from validphys.promptutils import KeywordsWithCache, confirm log = logging.getLogger(__name__) @@ -241,7 +241,7 @@ def get_config(self): with open(self.args['config_yml']) as f: # TODO: Ideally this would load round trip but needs # to be fixed in reportengine. - c = yaml.safe_load(f) + c = yaml.load(f) c.update(self.complete_mapping()) return self.config_class(c, environment=self.environment) diff --git a/validphys2/src/validphys/scripts/vp_deltachi2.py b/validphys2/src/validphys/scripts/vp_deltachi2.py index 8762ede5c3..cfb59941d9 100644 --- a/validphys2/src/validphys/scripts/vp_deltachi2.py +++ b/validphys2/src/validphys/scripts/vp_deltachi2.py @@ -2,27 +2,22 @@ import os import pwd -from reportengine.compat import yaml +from ruamel.yaml import YAML + +yaml = YAML(typ='safe') from validphys import deltachi2templates from validphys.app import App - log = logging.getLogger(__name__) class HyperoptPlotApp(App): def add_positional_arguments(self, parser): - """ Wrapper around argumentparser """ - parser.add_argument( - "fit", help="Name of the fit", - ) - parser.add_argument( - "hessian_pdfs", help="Name of the set of Hessian pdfs", - ) - parser.add_argument( - "--Q", help="Energy Scale in GeV", type=float, default=1.7, - ) + """Wrapper around argumentparser""" + parser.add_argument("fit", help="Name of the fit") + parser.add_argument("hessian_pdfs", help="Name of the set of Hessian pdfs") + parser.add_argument("--Q", help="Energy Scale in GeV", type=float, default=1.7) # Report meta data parser.add_argument( "--author", @@ -30,9 +25,7 @@ def add_positional_arguments(self, parser): type=str, default=pwd.getpwuid(os.getuid())[4].replace(",", ""), ) - parser.add_argument( - "--title", help="Add custom title to the report's meta data", type=str, - ) + parser.add_argument("--title", help="Add custom title to the report's meta data", type=str) parser.add_argument( "--keywords", help="Add keywords to the report's meta data. The keywords must be provided as a list", @@ -67,14 +60,8 @@ def complete_mapping(self): "normalize_to": fit, } - autosettings["decomposition"] = { - "normalize_to": hessian_pdfs, - "pdf": hessian_pdfs, - } - autosettings["MC_Hessian_compare"] = { - "pdfs": [hessian_pdfs, fit], - "normalize_to": fit, - } + autosettings["decomposition"] = {"normalize_to": hessian_pdfs, "pdf": hessian_pdfs} + autosettings["MC_Hessian_compare"] = {"pdfs": [hessian_pdfs, fit], "normalize_to": fit} return autosettings @@ -85,7 +72,7 @@ def get_config(self): with open(runcard) as f: # TODO: Ideally this would load round trip but needs # to be fixed in reportengine. - c = yaml.safe_load(f) + c = yaml.load(f) c.update(complete_mapping) return self.config_class(c, environment=self.environment) diff --git a/validphys2/src/validphys/scripts/vp_hyperoptplot.py b/validphys2/src/validphys/scripts/vp_hyperoptplot.py index 1faa875070..82ce13a427 100644 --- a/validphys2/src/validphys/scripts/vp_hyperoptplot.py +++ b/validphys2/src/validphys/scripts/vp_hyperoptplot.py @@ -1,22 +1,23 @@ -from validphys.app import App -from validphys.loader import Loader, HyperscanNotFound +from ruamel.yaml import YAML + from validphys import hyperplottemplates -from reportengine.compat import yaml -import pwd -import os +from validphys.app import App +from validphys.loader import HyperscanNotFound, Loader +yaml = YAML(typ='safe') import logging +import os +import pwd log = logging.getLogger(__name__) class HyperoptPlotApp(App): def add_positional_arguments(self, parser): - """ Wrapper around argumentparser """ + """Wrapper around argumentparser""" # Hyperopt settings parser.add_argument( - "hyperopt_name", - help="Folder of the hyperopt fit to generate the report for", + "hyperopt_name", help="Folder of the hyperopt fit to generate the report for" ) parser.add_argument( "-l", @@ -73,16 +74,12 @@ def add_positional_arguments(self, parser): type=str, default=pwd.getpwuid(os.getuid())[4].replace(",", ""), ) - parser.add_argument( - "--title", - help="Add custom title to the report's meta data", - type=str, - ) + parser.add_argument("--title", help="Add custom title to the report's meta data", type=str) parser.add_argument( "--keywords", help="Add keywords to the report's meta data. The keywords must be provided as a list", type=list, - default=[] + default=[], ) args = parser.parse_args() @@ -104,7 +101,7 @@ def complete_mapping(self): hyperop_folder = hyperop_folder[:-1] with open(hyperopt_filter) as f: - filtercard = yaml.safe_load(f) + filtercard = yaml.load(f) folder_path = hyperop_folder index_slash = folder_path.rfind("/") + 1 @@ -127,7 +124,7 @@ def complete_mapping(self): "combine": args["combine"], "autofilter": args["autofilter"], "debug": args["debug"], - "loss_target": args["loss_target"] + "loss_target": args["loss_target"], } try: @@ -148,7 +145,7 @@ def get_config(self): with open(self.args['config_yml']) as f: # TODO: Ideally this would load round trip but needs # to be fixed in reportengine. - c = yaml.safe_load(f) + c = yaml.load(f) c.update(self.complete_mapping()) return self.config_class(c, environment=self.environment) diff --git a/validphys2/src/validphys/scripts/vp_nextfitruncard.py b/validphys2/src/validphys/scripts/vp_nextfitruncard.py index ecf379e014..a3c793e02d 100644 --- a/validphys2/src/validphys/scripts/vp_nextfitruncard.py +++ b/validphys2/src/validphys/scripts/vp_nextfitruncard.py @@ -16,14 +16,17 @@ """ import argparse +import logging import os import pathlib import sys -import logging + import prompt_toolkit +from ruamel.yaml import YAML from reportengine import colors -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from validphys.api import API @@ -40,11 +43,10 @@ "t8": {"smallx": {"a_min": None, "a_max": 1.0}}, } + # Take command line arguments def process_args(): - parser = argparse.ArgumentParser( - description="Script to generate iterated fit runcard." - ) + parser = argparse.ArgumentParser(description="Script to generate iterated fit runcard.") parser.add_argument("input_fit", help="Name of input fit.") parser.add_argument( "output_dir", @@ -65,7 +67,7 @@ def process_args(): "Do not enforce any preprocessing constraints, which are chosen to " "ensure integrability. By default the following constraints are " f"used: {PREPROCESSING_LIMS}" - ) + ), ) args = parser.parse_args() return args @@ -125,7 +127,7 @@ def main(): preproc_lims = PREPROCESSING_LIMS log.info( "The following constraints will be used for preprocessing ranges, \n%s", - yaml.dump(preproc_lims), + print(yaml.dump(preproc_lims)), ) else: # don't enforce any limits. @@ -134,9 +136,7 @@ def main(): updated_description = interactive_description(description) iterated_runcard_yaml = API.iterated_runcard_yaml( - fit=input_fit, - _updated_description=updated_description, - _flmap_np_clip_arg=preproc_lims, + fit=input_fit, _updated_description=updated_description, _flmap_np_clip_arg=preproc_lims ) # Write new runcard to file diff --git a/validphys2/src/validphys/scripts/vp_pdffromreplicas.py b/validphys2/src/validphys/scripts/vp_pdffromreplicas.py index e68df56dcc..c53d4e615a 100755 --- a/validphys2/src/validphys/scripts/vp_pdffromreplicas.py +++ b/validphys2/src/validphys/scripts/vp_pdffromreplicas.py @@ -27,14 +27,16 @@ import tempfile import pandas as pd +from ruamel.yaml import YAML + from reportengine import colors -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from validphys import lhaindex from validphys.lhio import new_pdf_from_indexes from validphys.loader import FallbackLoader - log = logging.getLogger() log.setLevel(logging.INFO) log.addHandler(colors.ColorHandler()) @@ -53,9 +55,7 @@ def check_none_or_gt_one(value): try: ivalue = int(value) except ValueError as e: - raise argparse.ArgumentTypeError( - f"{value} cannot be interpreted as an integer." - ) from e + raise argparse.ArgumentTypeError(f"{value} cannot be interpreted as an integer.") from e if ivalue <= 0: raise argparse.ArgumentTypeError(f"{value} is an invalid positive int value.") return ivalue @@ -92,9 +92,7 @@ def main(): input_pdf = loader.check_pdf(args.input_pdf) if input_pdf.error_type != "replicas": - log.error( - "Error type of input PDF must be `replicas` not `%s`", input_pdf.error_type - ) + log.error("Error type of input PDF must be `replicas` not `%s`", input_pdf.error_type) sys.exit(1) if args.replicas > len(input_pdf) - 1: @@ -115,11 +113,7 @@ def main(): with tempfile.TemporaryDirectory() as f: try: new_pdf_from_indexes( - input_pdf, - indices, - set_name=output_name, - folder=pathlib.Path(f), - installgrid=True, + input_pdf, indices, set_name=output_name, folder=pathlib.Path(f), installgrid=True ) except FileExistsError: log.error( @@ -133,16 +127,12 @@ def main(): "PDFs in the LHAPDF format are required to have 2 replicas, copying " "replica 1 to replica 2" ) - base_name = str( - pathlib.Path(lhaindex.get_lha_datapath()) / output_name / output_name - ) + base_name = str(pathlib.Path(lhaindex.get_lha_datapath()) / output_name / output_name) - shutil.copyfile( - base_name + "_0001.dat", base_name + "_0002.dat", - ) + shutil.copyfile(base_name + "_0001.dat", base_name + "_0002.dat") # fixup info file - with open(base_name + ".info", "r") as f: - info_file = yaml.safe_load(f) + with open(base_name + ".info") as f: + info_file = yaml.load(f) info_file["NumMembers"] = 3 with open(base_name + ".info", "w") as f: @@ -150,22 +140,17 @@ def main(): # here we update old indices in case the user creates # the original_index_mapping.csv - indices = 2*indices + indices = 2 * indices if args.save_indices: index_file = ( - pathlib.Path(lhaindex.get_lha_datapath()) - / output_name - / "original_index_mapping.csv" + pathlib.Path(lhaindex.get_lha_datapath()) / output_name / "original_index_mapping.csv" ) log.info("Saving output PDF/input PDF replica index mapping to %s", index_file) with open(index_file, "w+") as f: pd.DataFrame( list(enumerate(indices, 1)), - columns=[ - f"{output_name} replica index", - f"{args.input_pdf} replica index", - ], + columns=[f"{output_name} replica index", f"{args.input_pdf} replica index"], ).to_csv(f, index=False) diff --git a/validphys2/src/validphys/scripts/vp_pdfrename.py b/validphys2/src/validphys/scripts/vp_pdfrename.py index 5fd0124cbf..8f2816d47f 100755 --- a/validphys2/src/validphys/scripts/vp_pdfrename.py +++ b/validphys2/src/validphys/scripts/vp_pdfrename.py @@ -19,9 +19,11 @@ import tempfile import lhapdf +from ruamel.yaml import YAML from reportengine import colors -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from validphys.renametools import rename_pdf @@ -45,13 +47,9 @@ def process_args(): Quotations should be used for this field.""", ) parser.add_argument( - "--data-version", - type=int, - help="The data version to be added to the PDF .info file.", - ) - parser.add_argument( - "--index", help="The set index to be added to the PDF .info file." + "--data-version", type=int, help="The data version to be added to the PDF .info file." ) + parser.add_argument("--index", help="The set index to be added to the PDF .info file.") parser.add_argument( "--reference", help="The reference to be added to the PDF .info file, usually an arXiv reference.", @@ -62,9 +60,7 @@ def process_args(): action="store_true", help="Place the output LHAPDF in the LHAPDF directory.", ) - parser.add_argument( - "-c", "--compress", action="store_true", help="Compress the resulting PDF." - ) + parser.add_argument("-c", "--compress", action="store_true", help="Compress the resulting PDF.") args = parser.parse_args() return args @@ -81,8 +77,7 @@ def fixup_ref(pdf_path: pathlib.Path, field_dict): infopath = pdf_path / f"{pdf_name}.info" with open(infopath) as f: - y = yaml.YAML() - res = y.load(f) + res = yaml.load(f) # If a field entry is not provided, then we revert to the existing # field in pre-existing info file. @@ -102,12 +97,12 @@ def fixup_ref(pdf_path: pathlib.Path, field_dict): res["Reference"] = field_dict["reference"] with open(infopath, "w") as f: - y.default_flow_style = True - y.dump(res, f) + yaml.default_flow_style = True + yaml.dump(res, f) def compress(lhapdf_path: pathlib.Path): - """ Function to compress the resulting PDF. Dereferences are handled + """Function to compress the resulting PDF. Dereferences are handled in order to account for possible symbolic linking of grids. """ output = lhapdf_path.name + ".tar.gz" @@ -136,9 +131,7 @@ def main(): sys.exit(1) if not source_path.is_dir(): - log.error( - f"Could not find fit. Path '{source_path.absolute()}' is not a directory." - ) + log.error(f"Could not find fit. Path '{source_path.absolute()}' is not a directory.") sys.exit(1) with tempfile.TemporaryDirectory(dir=dest_path.parent) as tmp: @@ -156,6 +149,7 @@ def main(): if args.compress: from validphys.renametools import Spinner + log.info("Compressing output") with Spinner(): compress(dest_path) diff --git a/validphys2/src/validphys/scripts/wiki_upload.py b/validphys2/src/validphys/scripts/wiki_upload.py index bfc6eb1f12..d4414c404f 100644 --- a/validphys2/src/validphys/scripts/wiki_upload.py +++ b/validphys2/src/validphys/scripts/wiki_upload.py @@ -2,17 +2,16 @@ A more interactive version of vp_upload """ -#Note that the imports are done as late as possible to improve the speed of -#the command line. - -import sys -import pathlib -import os +# Note that the imports are done as late as possible to improve the speed of +# the command line. import logging +import os +import pathlib +import sys -import pygments from prompt_toolkit.shortcuts import prompt +import pygments from reportengine import colors from validphys.promptutils import confirm @@ -21,14 +20,17 @@ log.setLevel(logging.INFO) log.addHandler(colors.ColorHandler()) + def handle_single_file(filename): import tempfile + out = pathlib.Path(tempfile.mkdtemp(prefix='vp-upload')) filename = pathlib.Path(filename) p = out / filename.name p.symlink_to(filename.absolute()) return out, filename.name + def edit_settings(d): title = d.get('title', '') author = d.get('author', '') @@ -48,10 +50,13 @@ def edit_settings(d): kwinp = prompt("keywords: ", default=','.join(keywords)) d['keywords'] = [k.strip() for k in kwinp.split(',') if k] + def handle_meta_interactive(output): metapath = output / 'meta.yaml' - from reportengine.compat import yaml - #The yaml lexer is broken. Use something else. + from ruamel.yaml import YAML + + yaml = YAML(typ='safe') + # The yaml lexer is broken. Use something else. lex = pygments.lexers.get_lexer_by_name('pkgconfig') fmt = pygments.formatters.TerminalFormatter() if metapath.exists(): @@ -66,35 +71,37 @@ def handle_meta_interactive(output): edit = not confirm(msg, default=True) if edit: - d = yaml.load(content, yaml.RoundTripLoader) + d = yaml.load(content) else: return else: - #We are making these the empty string, because prompt_toolkit doesn't - #support default=None. - d = {'title': '', 'author': '', 'keywords':''} + # We are making these the empty string, because prompt_toolkit doesn't + # support default=None. + d = {'title': '', 'author': '', 'keywords': ''} import io + while True: edit_settings(d) print("Metadata:") s = io.StringIO() - yaml.dump(d, s, yaml.RoundTripDumper) + yaml.dump(d, s) metastr = s.getvalue() print(pygments.highlight(metastr, lex, fmt)) if confirm("Confirm?"): break - with open(metapath, 'w') as f: f.write(metastr) + def main(): import argparse + parser = argparse.ArgumentParser(description="Upload output to the NNPDF server.") parser.add_argument("output", help="Folder to upload.") args = parser.parse_args() @@ -118,7 +125,6 @@ def main(): uploader = uploadutils.ReportUploader() upargs = output - try: with uploader.upload_or_exit_context(upargs): handle_meta_interactive(upload_output) diff --git a/validphys2/src/validphys/tests/photon/test_compute.py b/validphys2/src/validphys/tests/photon/test_compute.py index dd846bdcf2..ff0b64710b 100644 --- a/validphys2/src/validphys/tests/photon/test_compute.py +++ b/validphys2/src/validphys/tests/photon/test_compute.py @@ -76,8 +76,7 @@ def test_photon(): # load fiatlux with tempfile.NamedTemporaryFile(mode="w") as tmp: - with tmp.file as tmp_file: - tmp_file.write(yaml.dump(FIATLUX_DEFAULT)) + yaml.dump(FIATLUX_DEFAULT, tmp) lux = fiatlux.FiatLux(tmp.name) alpha = Alpha(theory, fiatlux_default["q2_max"]) diff --git a/validphys2/src/validphys/tests/test_effexponents.py b/validphys2/src/validphys/tests/test_effexponents.py index f069402d60..0221111224 100644 --- a/validphys2/src/validphys/tests/test_effexponents.py +++ b/validphys2/src/validphys/tests/test_effexponents.py @@ -1,6 +1,6 @@ -import pytest +from ruamel.yaml import YAML -from reportengine.compat import yaml +yaml = YAML(typ='safe') from validphys.api import API from validphys.loader import FallbackLoader as Loader from validphys.scripts.vp_nextfitruncard import PREPROCESSING_LIMS @@ -23,10 +23,10 @@ def test_next_runcard(): # We load it using the context manager because at_input has been modified # to load various keys that are not present in the actual runcard for # backwards compatibility - with open(l.check_fit(FIT_ITERATED).path / "filter.yml", "r") as f: - ite2_runcard = yaml.safe_load(f) + with open(l.check_fit(FIT_ITERATED).path / "filter.yml") as f: + ite2_runcard = yaml.load(f) - predicted_ite2_runcard = yaml.safe_load( + predicted_ite2_runcard = yaml.load( API.iterated_runcard_yaml(fit=FIT, _flmap_np_clip_arg=PREPROCESSING_LIMS) ) diff --git a/validphys2/src/validphys/tests/test_postfit.py b/validphys2/src/validphys/tests/test_postfit.py index a3f30ac583..43807b6cca 100644 --- a/validphys2/src/validphys/tests/test_postfit.py +++ b/validphys2/src/validphys/tests/test_postfit.py @@ -3,14 +3,18 @@ Module for testing postfit. """ + import json -import subprocess as sp import os import shutil +import subprocess as sp + +from ruamel.yaml import YAML from validphys.loader import FallbackLoader as Loader from validphys.tests.conftest import FIT -from reportengine.compat import yaml + +yaml = YAML(typ='safe') def test_postfit(tmp): @@ -76,8 +80,8 @@ def test_postfit(tmp): # [File in PDF set, file in fit] files = [pdfsetpath / f"{TMPFIT}_{x:04d}.dat", postfitpath / f"replica_{x}/{TMPFIT}.dat"] for file in files: - with open(file, "r") as f: - data = yaml.safe_load_all(f) + with open(file) as f: + data = yaml.load_all(f) metadata = next(data) repnos.add(metadata["FromMCReplica"]) assert ( @@ -87,8 +91,8 @@ def test_postfit(tmp): # Check that number of PDF members is written correctly infopath = postfitpath / f"{TMPFIT}/{TMPFIT}.info" - with open(infopath, "r") as f: - data = yaml.safe_load(f) + with open(infopath) as f: + data = yaml.load(f) # Add one to nrep to account for replica 0 assert ( data["NumMembers"] == nrep + 1 @@ -96,7 +100,7 @@ def test_postfit(tmp): # Check that chi2 and arclength thresholds are recorded correctly vetopath = postfitpath / "veto_count.json" - with open(vetopath, "r") as f: + with open(vetopath) as f: veto_count = json.load(f) assert ( veto_count["chi2_threshold"] == chi2_threshold diff --git a/validphys2/src/validphys/tests/test_theorydbutils.py b/validphys2/src/validphys/tests/test_theorydbutils.py index a72d86900d..22c78fe87c 100644 --- a/validphys2/src/validphys/tests/test_theorydbutils.py +++ b/validphys2/src/validphys/tests/test_theorydbutils.py @@ -1,5 +1,5 @@ import pytest -from ruamel import yaml +from ruamel.yaml import YAML from validobj import ValidationError from nnpdf_data.theorydbutils import TheoryNotFoundInDatabase, fetch_all, fetch_theory @@ -8,6 +8,7 @@ L = Loader() DBPATH = L.theorydb_folder +yaml = YAML(typ='safe') def test_fetch_theory(): diff --git a/validphys2/src/validphys/uploadutils.py b/validphys2/src/validphys/uploadutils.py index 2420845282..dd6cbe02fd 100644 --- a/validphys2/src/validphys/uploadutils.py +++ b/validphys2/src/validphys/uploadutils.py @@ -22,9 +22,11 @@ import prompt_toolkit from prompt_toolkit.completion import WordCompleter +from ruamel.yaml import YAML from reportengine.colors import t -from reportengine.compat import yaml + +yaml = YAML(typ='safe') from validphys.loader import Loader, RemoteLoader from validphys.renametools import Spinner @@ -404,7 +406,7 @@ def interactive_meta(path): meta_dict = {"title": title, "author": author, "keywords": keywords} with open(path / "meta.yaml", "w") as stream: - yaml.safe_dump(meta_dict, stream) + yaml.dump(meta_dict, stream) def check_input(path): From 03b942ddc4f789b210b6c8dd79bdb3f1e6e37d3c Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Mon, 25 Nov 2024 14:24:48 +0000 Subject: [PATCH 02/20] remove ruamel.yaml pin --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index de3464aa4b..eef39b0c7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ matplotlib = "^3.9" pineappl = "^0.8.2" pandas = "*" numpy = "*" -"ruamel.yaml" = "^0.15" +"ruamel.yaml" = "*" validobj = "*" prompt_toolkit = "*" # Reportengine needs to be installed from git From 57860e02c3442b57e7de91d4598f2eaa75c2d723 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Mon, 25 Nov 2024 14:33:43 +0000 Subject: [PATCH 03/20] uncomment load weights in quickard_qed --- n3fit/src/n3fit/tests/regressions/quickcard_qed.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml b/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml index a4b4843bba..193edc8631 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml +++ b/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml @@ -33,7 +33,7 @@ trvlseed: 3 nnseed: 2 mcseed: 1 -# load: "weights.weights.h5" +load: "weights.weights.h5" separate_multiplicative: True parameters: # This defines the parameter dictionary that is passed to the Model Trainer From 3236706b751d2a71163ab529b91f3be30c70cffe Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Mon, 25 Nov 2024 17:09:17 +0000 Subject: [PATCH 04/20] move error to be raised one step later.. --- validphys2/src/validphys/coredata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 690a2a9623..b2f68713b0 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -11,7 +11,7 @@ import pandas as pd from ruamel.yaml import YAML -yaml = YAML(typ='safe') +yaml = YAML(typ='rt') from validphys.utils import generate_path_filtered_data KIN_NAMES = ["kin1", "kin2", "kin3"] From 2dc8c4168780918ea270939d29c94c5a83d384b5 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Mon, 25 Nov 2024 17:10:41 +0000 Subject: [PATCH 05/20] remove useless pass statement --- nnpdf_data/nnpdf_data/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nnpdf_data/nnpdf_data/__init__.py b/nnpdf_data/nnpdf_data/__init__.py index fccfa92ea6..60e7e0fb61 100644 --- a/nnpdf_data/nnpdf_data/__init__.py +++ b/nnpdf_data/nnpdf_data/__init__.py @@ -3,8 +3,6 @@ from ruamel.yaml import YAML -pass - path_vpdata = pathlib.Path(__file__).parent path_commondata = path_vpdata / "commondata" From 2aa8aa8f1784bd0cd9204738145b8401c28a77b3 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Mon, 25 Nov 2024 17:54:48 +0000 Subject: [PATCH 06/20] pass stream to yaml.dump, not a path as string --- validphys2/src/validphys/eff_exponents.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index 4d07e5f0c8..4b621648d9 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -526,7 +526,7 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= previous_exponents[i]["smallx"] = [fmt(alpha) for alpha in alphas] previous_exponents[i]["largex"] = [fmt(beta) for beta in betas] with tempfile.NamedTemporaryFile() as fp: - yaml.dump(filtermap, fp.name) + yaml.dump(filtermap, fp) yaml_string = fp.read() return yaml_string @@ -550,7 +550,7 @@ def update_runcard_description_yaml(iterate_preprocessing_yaml, _updated_descrip filtermap["description"] = _updated_description with tempfile.NamedTemporaryFile() as fp: - yaml.dump(filtermap, fp.name) + yaml.dump(filtermap, fp) yaml_string = fp.read() return yaml_string @@ -614,7 +614,7 @@ def iterated_runcard_yaml(fit, update_runcard_description_yaml): filtermap['fiatlux']['luxset'] = fit.name with tempfile.NamedTemporaryFile() as fp: - yaml.dump(filtermap, fp.name) + yaml.dump(filtermap, fp) yaml_string = fp.read() return yaml_string From 0a840e35b74a4397ed4bc867669bfc4b262e2e2c Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Tue, 3 Dec 2024 15:49:33 +0000 Subject: [PATCH 07/20] correctly yaml.dump dict to file --- validphys2/src/validphys/commondataparser.py | 2 +- validphys2/src/validphys/eff_exponents.py | 13 +++++++++---- validphys2/src/validphys/lhaindex.py | 2 +- validphys2/src/validphys/tests/test_effexponents.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 486422b147..cb1f58a5d3 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -55,7 +55,7 @@ from validphys.plotoptions.plottingoptions import PlottingOptions, labeler_functions from validphys.process_options import ValidProcess -yaml = YAML(typ='rt') +yaml = YAML(typ='safe', pure=False) # JCM: # Some notes for developers diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index 4b621648d9..db23a251ef 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -4,6 +4,7 @@ import logging import numbers +from pathlib import Path import random import tempfile import warnings @@ -16,7 +17,8 @@ from reportengine import collect from reportengine.checks import check_positive -yaml = YAML(typ='safe') +yaml = YAML(typ='rt') +yaml.default_flow_style = False from reportengine.figure import figuregen from reportengine.floatformatting import format_number, significant_digits from reportengine.table import table @@ -526,7 +528,8 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= previous_exponents[i]["smallx"] = [fmt(alpha) for alpha in alphas] previous_exponents[i]["largex"] = [fmt(beta) for beta in betas] with tempfile.NamedTemporaryFile() as fp: - yaml.dump(filtermap, fp) + path = Path(fp.name) + yaml.dump(filtermap, path) yaml_string = fp.read() return yaml_string @@ -550,7 +553,8 @@ def update_runcard_description_yaml(iterate_preprocessing_yaml, _updated_descrip filtermap["description"] = _updated_description with tempfile.NamedTemporaryFile() as fp: - yaml.dump(filtermap, fp) + path = Path(fp.name) + yaml.dump(filtermap, path) yaml_string = fp.read() return yaml_string @@ -614,7 +618,8 @@ def iterated_runcard_yaml(fit, update_runcard_description_yaml): filtermap['fiatlux']['luxset'] = fit.name with tempfile.NamedTemporaryFile() as fp: - yaml.dump(filtermap, fp) + path = Path(fp.name) + yaml.dump(filtermap, path) yaml_string = fp.read() return yaml_string diff --git a/validphys2/src/validphys/lhaindex.py b/validphys2/src/validphys/lhaindex.py index 31a03a83ae..5d3db810c5 100644 --- a/validphys2/src/validphys/lhaindex.py +++ b/validphys2/src/validphys/lhaindex.py @@ -14,7 +14,7 @@ from ruamel.yaml import YAML -yaml = YAML(typ='safe') +yaml = YAML(typ='safe', pure=False) from validphys.lhapdf_compatibility import lhapdf _indexes_to_names = None diff --git a/validphys2/src/validphys/tests/test_effexponents.py b/validphys2/src/validphys/tests/test_effexponents.py index 0221111224..afdbdd4d60 100644 --- a/validphys2/src/validphys/tests/test_effexponents.py +++ b/validphys2/src/validphys/tests/test_effexponents.py @@ -1,6 +1,6 @@ from ruamel.yaml import YAML -yaml = YAML(typ='safe') +yaml = YAML(typ='rt') from validphys.api import API from validphys.loader import FallbackLoader as Loader from validphys.scripts.vp_nextfitruncard import PREPROCESSING_LIMS From 2e68972e92775e89326a192eed31387dd93a5e2c Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Tue, 3 Dec 2024 16:09:56 +0000 Subject: [PATCH 08/20] fix(?) mc2hessian test --- validphys2/src/validphys/eff_exponents.py | 3 +-- validphys2/src/validphys/lhio.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index db23a251ef..a49b77ed6c 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -18,7 +18,6 @@ from reportengine.checks import check_positive yaml = YAML(typ='rt') -yaml.default_flow_style = False from reportengine.figure import figuregen from reportengine.floatformatting import format_number, significant_digits from reportengine.table import table @@ -529,7 +528,7 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= previous_exponents[i]["largex"] = [fmt(beta) for beta in betas] with tempfile.NamedTemporaryFile() as fp: path = Path(fp.name) - yaml.dump(filtermap, path) + yaml.dump(filtermap, path, default_flow_style=False) yaml_string = fp.read() return yaml_string diff --git a/validphys2/src/validphys/lhio.py b/validphys2/src/validphys/lhio.py index b0be198916..f42af5afad 100644 --- a/validphys2/src/validphys/lhio.py +++ b/validphys2/src/validphys/lhio.py @@ -30,7 +30,7 @@ def read_xqf_from_file(f): lines = split_sep(f) try: (xtext, qtext, ftext) = (next(lines) for _ in range(3)) - except StopIteration: + except RuntimeError: return None xvals = np.fromstring(xtext, sep=" ") qvals = np.fromstring(qtext, sep=" ") From e897201c10ff39b1691d750d68e629641583c256 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Tue, 3 Dec 2024 16:12:07 +0000 Subject: [PATCH 09/20] pass default_flow_style at header level isntead of function --- validphys2/src/validphys/eff_exponents.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index a49b77ed6c..db23a251ef 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -18,6 +18,7 @@ from reportengine.checks import check_positive yaml = YAML(typ='rt') +yaml.default_flow_style = False from reportengine.figure import figuregen from reportengine.floatformatting import format_number, significant_digits from reportengine.table import table @@ -528,7 +529,7 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= previous_exponents[i]["largex"] = [fmt(beta) for beta in betas] with tempfile.NamedTemporaryFile() as fp: path = Path(fp.name) - yaml.dump(filtermap, path, default_flow_style=False) + yaml.dump(filtermap, path) yaml_string = fp.read() return yaml_string From 42633380ba6cf32f0a893fb4fe3c694f1c9ff1c0 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Tue, 3 Dec 2024 16:35:39 +0000 Subject: [PATCH 10/20] prevent file from closing while writing to buffer --- validphys2/src/validphys/coredata.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index b2f68713b0..cd5cf895b9 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -11,7 +11,7 @@ import pandas as pd from ruamel.yaml import YAML -yaml = YAML(typ='rt') +yaml = YAML(typ='safe') from validphys.utils import generate_path_filtered_data KIN_NAMES = ["kin1", "kin2", "kin3"] @@ -474,6 +474,8 @@ def export(self, folder_path): data_path = folder_path / data_path.name unc_path = folder_path / unc_path.name # Export data and uncertainties - self.export_data(data_path.open("w", encoding="utf-8")) - self.export_uncertainties(unc_path.open("w", encoding="utf-8")) + with open(data_path, "w") as file: + self.export_data(file) + with open(unc_path, "w") as file: + self.export_uncertainties(file) return data_path, unc_path From c7d9c720314f10178782fa2687d924d46c847648 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Tue, 3 Dec 2024 16:50:01 +0000 Subject: [PATCH 11/20] fix vp-nextfitruncard --- validphys2/src/validphys/eff_exponents.py | 6 +++--- validphys2/src/validphys/scripts/vp_nextfitruncard.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index db23a251ef..eba319ab5f 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -530,7 +530,7 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= with tempfile.NamedTemporaryFile() as fp: path = Path(fp.name) yaml.dump(filtermap, path) - yaml_string = fp.read() + yaml_string = fp.read().decode("utf-8") return yaml_string @@ -555,7 +555,7 @@ def update_runcard_description_yaml(iterate_preprocessing_yaml, _updated_descrip with tempfile.NamedTemporaryFile() as fp: path = Path(fp.name) yaml.dump(filtermap, path) - yaml_string = fp.read() + yaml_string = fp.read().decode("utf-8") return yaml_string @@ -620,6 +620,6 @@ def iterated_runcard_yaml(fit, update_runcard_description_yaml): with tempfile.NamedTemporaryFile() as fp: path = Path(fp.name) yaml.dump(filtermap, path) - yaml_string = fp.read() + yaml_string = fp.read().decode("utf-8") return yaml_string diff --git a/validphys2/src/validphys/scripts/vp_nextfitruncard.py b/validphys2/src/validphys/scripts/vp_nextfitruncard.py index a3c793e02d..5cf48092bc 100644 --- a/validphys2/src/validphys/scripts/vp_nextfitruncard.py +++ b/validphys2/src/validphys/scripts/vp_nextfitruncard.py @@ -127,7 +127,7 @@ def main(): preproc_lims = PREPROCESSING_LIMS log.info( "The following constraints will be used for preprocessing ranges, \n%s", - print(yaml.dump(preproc_lims)), + yaml.dump(preproc_lims, sys.stdout), ) else: # don't enforce any limits. From f06d78aa56551743018981ec86d1ea3140083fec Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Tue, 3 Dec 2024 18:33:55 +0000 Subject: [PATCH 12/20] clean up yaml imports --- extra_tests/regression_checks.py | 6 ++---- n3fit/src/evolven3fit/evolve.py | 6 ++---- n3fit/src/evolven3fit/utils.py | 5 ++--- n3fit/src/n3fit/io/writer.py | 6 ++---- n3fit/src/n3fit/scripts/n3fit_exec.py | 7 +++---- n3fit/src/n3fit/scripts/vp_setupfit.py | 7 +++---- n3fit/src/n3fit/tests/test_evolven3fit.py | 6 ++---- n3fit/src/n3fit/tests/test_fit.py | 6 ++---- nnpdf_data/nnpdf_data/__init__.py | 5 +++-- nnpdf_data/nnpdf_data/theory.py | 3 --- nnpdf_data/nnpdf_data/utils.py | 8 +++----- validphys2/serverscripts/index-reports.py | 6 +++--- validphys2/src/validphys/commondataparser.py | 12 +++++------- validphys2/src/validphys/config.py | 12 ++++++------ validphys2/src/validphys/core.py | 8 +++----- validphys2/src/validphys/coredata.py | 8 +++----- validphys2/src/validphys/eff_exponents.py | 17 +++++++---------- validphys2/src/validphys/filters.py | 9 +++------ validphys2/src/validphys/fitdata.py | 6 ++---- validphys2/src/validphys/lhaindex.py | 6 ++---- validphys2/src/validphys/lhio.py | 5 ++--- validphys2/src/validphys/loader.py | 12 ++++-------- validphys2/src/validphys/replica_selector.py | 10 +++------- .../src/validphys/scripts/vp_comparefits.py | 5 ++--- .../src/validphys/scripts/vp_deltachi2.py | 7 ++----- .../src/validphys/scripts/vp_hyperoptplot.py | 14 ++++++-------- .../src/validphys/scripts/vp_nextfitruncard.py | 7 ++----- .../src/validphys/scripts/vp_pdffromreplicas.py | 9 +++------ .../src/validphys/scripts/vp_pdfrename.py | 11 ++++------- validphys2/src/validphys/scripts/wiki_upload.py | 7 +++---- .../src/validphys/tests/test_effexponents.py | 8 +++----- validphys2/src/validphys/tests/test_postfit.py | 9 +++------ .../tests/test_scalevariationtheoryids.py | 6 ++---- .../src/validphys/tests/test_theorydbutils.py | 5 ++--- validphys2/src/validphys/uploadutils.py | 6 ++---- validphys2/src/validphys/utils.py | 5 +++++ 36 files changed, 106 insertions(+), 169 deletions(-) diff --git a/extra_tests/regression_checks.py b/extra_tests/regression_checks.py index b77f2c0cee..e5976d54dc 100644 --- a/extra_tests/regression_checks.py +++ b/extra_tests/regression_checks.py @@ -7,11 +7,9 @@ import subprocess as sp import pytest -from ruamel.yaml import YAML from n3fit.tests.test_fit import EXE, check_fit_results - -yaml = YAML(typ='safe') +from validphys.utils import yaml_safe REGRESSION_FOLDER = pathlib.Path(__file__).with_name("regression_fits") @@ -39,7 +37,7 @@ def test_regression_fit(tmp_path, runcard, replica, regenerate): runcard_file = REGRESSION_FOLDER / runcard_name shutil.copy(runcard_file, tmp_path) - runcard_info = yaml.load(runcard_file.read_text()) + runcard_info = yaml_safe.load(runcard_file.read_text()) if (wname := runcard_info.get("load")) is not None: shutil.copy(REGRESSION_FOLDER / wname, tmp_path) diff --git a/n3fit/src/evolven3fit/evolve.py b/n3fit/src/evolven3fit/evolve.py index fe0bcbd5b3..fb10a4db84 100644 --- a/n3fit/src/evolven3fit/evolve.py +++ b/n3fit/src/evolven3fit/evolve.py @@ -8,12 +8,10 @@ from joblib import Parallel, delayed import numpy as np import psutil -from ruamel.yaml import YAML import eko from eko import basis_rotation, runner - -yaml = YAML(typ='safe') +from validphys.utils import yaml_safe from . import eko_utils, utils @@ -166,7 +164,7 @@ def load_fit(usr_path): nnfitpath = usr_path / "nnfit" pdf_dict = {} for yaml_file in nnfitpath.glob(f"replica_*/{usr_path.name}.exportgrid"): - data = yaml.load(yaml_file.read_text(encoding="UTF-8")) + data = yaml_safe.load(yaml_file.read_text(encoding="UTF-8")) pdf_dict[yaml_file.parent.stem] = data return pdf_dict diff --git a/n3fit/src/evolven3fit/utils.py b/n3fit/src/evolven3fit/utils.py index 700e95baec..cdd649d972 100644 --- a/n3fit/src/evolven3fit/utils.py +++ b/n3fit/src/evolven3fit/utils.py @@ -2,11 +2,10 @@ import shutil import numpy as np -from ruamel.yaml import YAML from scipy.interpolate import interp1d -yaml = YAML(typ='safe') from validphys.pdfbases import PIDS_DICT +from validphys.utils import yaml_safe from .q2grids import Q2GRID_DEFAULT, Q2GRID_NNPDF40 @@ -58,7 +57,7 @@ def hasFlavor(self, pid): def read_runcard(usr_path): """Read the runcard and return the relevant information for evolven3fit""" - return yaml.load((usr_path / "filter.yml").read_text(encoding="UTF-8")) + return yaml_safe.load((usr_path / "filter.yml").read_text(encoding="UTF-8")) def get_theoryID_from_runcard(usr_path): diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index 1f31a06ad0..053fc6d229 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -9,13 +9,11 @@ import logging import numpy as np -from ruamel.yaml import YAML import n3fit from n3fit import vpinterface - -yaml = YAML(typ='safe') import validphys +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -624,4 +622,4 @@ def storefit(pdf_object, replica, out_path, theory): } with open(out_path, "w") as fs: - yaml.dump(data, fs) + yaml_safe.dump(data, fs) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 4664d0973b..58942b32f4 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -10,15 +10,14 @@ import shutil import sys -from ruamel.yaml import YAML, error +from ruamel.yaml import error from reportengine import colors - -yaml = YAML(typ='safe') from reportengine.namespaces import NSList from validphys.app import App from validphys.config import Config, ConfigError, Environment, EnvironmentError_ from validphys.core import FitSpec +from validphys.utils import yaml_safe N3FIT_FIXED_CONFIG = dict(use_cuts='internal', use_t0=True, actions_=[]) @@ -110,7 +109,7 @@ class N3FitConfig(Config): @classmethod def from_yaml(cls, o, *args, **kwargs): try: - file_content = yaml.load(o) + file_content = yaml_safe.load(o) except error.YAMLError as e: raise ConfigError(f"Failed to parse yaml file: {e}") if not isinstance(file_content, dict): diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 75312bea83..0739963cc1 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -32,13 +32,12 @@ import shutil import sys -from ruamel.yaml import YAML, error +from ruamel.yaml import error from reportengine import colors - -yaml = YAML(typ='safe') from validphys.app import App from validphys.config import Config, ConfigError, Environment, EnvironmentError_ +from validphys.utils import yaml_safe SETUPFIT_FIXED_CONFIG = dict( actions_=[ @@ -133,7 +132,7 @@ class SetupFitConfig(Config): @classmethod def from_yaml(cls, o, *args, **kwargs): try: - file_content = yaml.load(o) + file_content = yaml_safe.load(o) except error.YAMLError as e: raise ConfigError(f"Failed to parse yaml file: {e}") if not isinstance(file_content, dict): diff --git a/n3fit/src/n3fit/tests/test_evolven3fit.py b/n3fit/src/n3fit/tests/test_evolven3fit.py index 34e5de44dc..34039cb8ba 100644 --- a/n3fit/src/n3fit/tests/test_evolven3fit.py +++ b/n3fit/src/n3fit/tests/test_evolven3fit.py @@ -6,13 +6,11 @@ from evolven3fit import eko_utils, utils import numpy as np import pytest -from ruamel.yaml import YAML from eko import EKO, runner - -yaml = YAML(typ='safe') from validphys.api import API from validphys.pdfbases import PIDS_DICT +from validphys.utils import yaml_safe REGRESSION_FOLDER = pathlib.Path(__file__).with_name("regressions") log = logging.getLogger(__name__) @@ -26,7 +24,7 @@ def assert_sorted(arr, title): def check_lhapdf_info(info_path): """Check the LHAPDF info file is correct""" - info = yaml.load(info_path.open("r", encoding="utf-8")) + info = yaml_safe.load(info_path.open("r", encoding="utf-8")) alphas_qs = info["AlphaS_Qs"] alphas = info["AlphaS_Vals"] diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py index 0e8b01fb4a..0610250843 100644 --- a/n3fit/src/n3fit/tests/test_fit.py +++ b/n3fit/src/n3fit/tests/test_fit.py @@ -20,12 +20,10 @@ import h5py from numpy.testing import assert_allclose, assert_equal import pytest -from ruamel.yaml import YAML import n3fit - -yaml = YAML(typ='safe') from validphys.n3fit_data import replica_mcseed, replica_nnseed, replica_trvlseed +from validphys.utils import yaml_safe log = logging.getLogger(__name__) REGRESSION_FOLDER = pathlib.Path(__file__).with_name("regressions") @@ -47,7 +45,7 @@ def _load_json(info_file): def _load_exportgrid(exportgrid_file): """Loads the exportgrid file""" - return yaml.load(exportgrid_file.read_text()) + return yaml_safe.load(exportgrid_file.read_text()) def test_initialize_seeds(): diff --git a/nnpdf_data/nnpdf_data/__init__.py b/nnpdf_data/nnpdf_data/__init__.py index 60e7e0fb61..f12d3e0abc 100644 --- a/nnpdf_data/nnpdf_data/__init__.py +++ b/nnpdf_data/nnpdf_data/__init__.py @@ -1,7 +1,7 @@ from functools import lru_cache import pathlib -from ruamel.yaml import YAML +import yaml path_vpdata = pathlib.Path(__file__).parent path_commondata = path_vpdata / "commondata" @@ -10,7 +10,8 @@ _path_legacy_mapping = path_commondata / "dataset_names.yml" theory_cards = path_vpdata / "theory_cards" -_legacy_to_new_mapping_raw = YAML(typ='safe').load(_path_legacy_mapping) +with open(_path_legacy_mapping) as file: + _legacy_to_new_mapping_raw = yaml.load(file, yaml.Loader) # Convert strings into a dictionary legacy_to_new_mapping = { k: ({"dataset": v} if isinstance(v, str) else v) for k, v in _legacy_to_new_mapping_raw.items() diff --git a/nnpdf_data/nnpdf_data/theory.py b/nnpdf_data/nnpdf_data/theory.py index 558ca57306..2ce555f37e 100644 --- a/nnpdf_data/nnpdf_data/theory.py +++ b/nnpdf_data/nnpdf_data/theory.py @@ -4,12 +4,9 @@ """ import dataclasses -from functools import lru_cache import logging from typing import Literal, Optional -from .utils import parse_yaml_inp - DEPRECATED_KEYS = ["MaxNfAs", "SxRes", "SxOrd" "EScaleVar", "Qedref", "global_nx"] log = logging.getLogger(__name__) diff --git a/nnpdf_data/nnpdf_data/utils.py b/nnpdf_data/nnpdf_data/utils.py index 64f762a281..ad7f75e4c7 100644 --- a/nnpdf_data/nnpdf_data/utils.py +++ b/nnpdf_data/nnpdf_data/utils.py @@ -1,9 +1,7 @@ import pathlib -from ruamel.yaml import YAML from validobj import ValidationError, parse_input - -yaml = YAML(typ='safe') +import yaml def parse_yaml_inp(input_yaml, spec): @@ -14,14 +12,14 @@ def parse_yaml_inp(input_yaml, spec): https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers """ input_yaml = pathlib.Path(input_yaml) - inp = yaml.load(input_yaml.read_text(encoding="utf-8")) + inp = yaml.load(input_yaml.read_text(encoding="utf-8"), yaml.Loader) try: return parse_input(inp, spec) except ValidationError as e: current_exc = e # In order to provide a more complete error information, use round_trip_load # to read the .yaml file again (insetad of using the CLoader) - current_inp = yaml.load(input_yaml.open("r", encoding="utf-8")) + current_inp = yaml.load(input_yaml.open("r", encoding="utf-8"), yaml.Loader) error_text_lines = [] while current_exc: if hasattr(current_exc, 'wrong_field'): diff --git a/validphys2/serverscripts/index-reports.py b/validphys2/serverscripts/index-reports.py index e36d590a1e..a48aebae62 100644 --- a/validphys2/serverscripts/index-reports.py +++ b/validphys2/serverscripts/index-reports.py @@ -17,13 +17,13 @@ from bs4 import BeautifulSoup import numpy as np -from ruamel.yaml import YAML, error +from ruamel.yaml import error import skimage.io # TODO: Move the thumbnail logic somewhere import skimage.transform -yaml = YAML(typ='safe') +from validphys.utils import yaml_safe ROOT = '/home/nnpdf/validphys-reports' ROOT_URL = 'https://vp.nnpdf.science/' @@ -77,7 +77,7 @@ def meta_from_path(p): if yaml_meta.exists(): with yaml_meta.open() as f: try: - yaml_res = yaml.load(f) + yaml_res = yaml_safe.load(f) except error.YAMLError as e: print(f"Error processing {yaml_meta}: {e}", file=sys.stderr) index = p / 'index.html' diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index cb1f58a5d3..b63dce6d97 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -45,7 +45,6 @@ import numpy as np import pandas as pd -from ruamel.yaml import YAML from validobj import ValidationError, parse_input from validobj.custom import Parser @@ -54,8 +53,7 @@ from validphys.coredata import KIN_NAMES, CommonData from validphys.plotoptions.plottingoptions import PlottingOptions, labeler_functions from validphys.process_options import ValidProcess - -yaml = YAML(typ='safe', pure=False) +from validphys.utils import yaml_fast # JCM: # Some notes for developers @@ -248,7 +246,7 @@ def parser(cls, yaml_file): """The yaml databases in the server use "operands" as key instead of "FK_tables" """ if not yaml_file.exists(): raise FileNotFoundError(yaml_file) - meta = yaml.load(yaml_file.read_text()) + meta = yaml_fast.load(yaml_file.read_text()) # Make sure the operations are upper-cased for compound-compatibility meta["operation"] = "NULL" if meta["operation"] is None else meta["operation"].upper() if "operands" in meta: @@ -508,7 +506,7 @@ def load_data_central(self): if self.is_nnpdf_special: data = np.zeros(self.ndata) else: - datayaml = yaml.load(self.path_data_central) + datayaml = yaml_fast.load(self.path_data_central) data = datayaml["data_central"] if len(data) != self.ndata: @@ -537,7 +535,7 @@ def load_uncertainties(self): all_df = [] for ufile in self.paths_uncertainties: - uncyaml = yaml.load(ufile) + uncyaml = yaml_fast.load(ufile) mindex = pd.MultiIndex.from_tuples( [(k, v["treatment"], v["type"]) for k, v in uncyaml["definitions"].items()], names=["name", "treatment", "type"], @@ -573,7 +571,7 @@ def load_kinematics(self, fill_to_three=True, drop_minmax=True): a dataframe containing the kinematics """ kinematics_file = self.path_kinematics - kinyaml = yaml.load(kinematics_file) + kinyaml = yaml_fast.load(kinematics_file) kin_dict = {} for bin_index, dbin in enumerate(kinyaml["bins"], start=1): diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 6651650c07..10e4f009f5 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -9,12 +9,9 @@ import pathlib import pandas as pd -from ruamel.yaml import YAML from nnpdf_data import legacy_to_new_map from reportengine import configparser, report - -yaml = YAML(typ='safe') from reportengine.configparser import ConfigError, _parse_func, element_of, record_from_defaults from reportengine.environment import Environment, EnvironmentError_ from reportengine.helputils import get_parser_type @@ -50,6 +47,7 @@ ) from validphys.plotoptions.core import get_info import validphys.scalevariations +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -1291,7 +1289,7 @@ def load_default_default_filter_rules(self, spec): lock_token = "_filters.lock.yaml" try: - return yaml.load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) + return yaml_safe.load(read_text(validphys.cuts.lockfiles, f"{spec}{lock_token}")) except FileNotFoundError as e: alternatives = [ el.strip(lock_token) @@ -1665,7 +1663,7 @@ def produce_theoryids(self, t0id, point_prescription): This hard codes the theories needed for each prescription to avoid user error.""" th = t0id.id - lsv = yaml.load(read_text(validphys.scalevariations, "scalevariationtheoryids.yaml")) + lsv = yaml_safe.load(read_text(validphys.scalevariations, "scalevariationtheoryids.yaml")) scalevarsfor_list = lsv["scale_variations_for"] # Allowed central theoryids @@ -1679,7 +1677,9 @@ def produce_theoryids(self, t0id, point_prescription): ) # Find scales that correspond to this point prescription - pp_scales_dict = yaml.load(read_text(validphys.scalevariations, "pointprescriptions.yaml")) + pp_scales_dict = yaml_safe.load( + read_text(validphys.scalevariations, "pointprescriptions.yaml") + ) try: scales = pp_scales_dict[point_prescription] diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index ea7303f8e9..ceb0343feb 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -12,14 +12,12 @@ import re import numpy as np -from ruamel.yaml import YAML, error +from ruamel.yaml import error from nnpdf_data.theorydbutils import fetch_theory from reportengine import namespaces from reportengine.baseexceptions import AsInputError -yaml = YAML(typ='safe') - # TODO: There is a bit of a circular dependency between filters.py and this. # Maybe move the cuts logic to its own module? from validphys import filters, lhaindex @@ -28,7 +26,7 @@ from validphys.hyperoptplot import HyperoptTrial from validphys.lhapdfset import LHAPDFSet from validphys.tableloader import parse_exp_mat -from validphys.utils import experiments_to_dataset_inputs +from validphys.utils import experiments_to_dataset_inputs, yaml_safe log = logging.getLogger(__name__) @@ -715,7 +713,7 @@ def as_input(self): log.debug('Reading input from fit configuration %s', p) try: with p.open() as f: - d = yaml.load(f) + d = yaml_safe.load(f) except (error.YAMLError, FileNotFoundError) as e: raise AsInputError(str(e)) from e d['pdf'] = {'id': self.name, 'label': self.label} diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index cd5cf895b9..371f9837b1 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -9,10 +9,8 @@ import numpy as np import pandas as pd -from ruamel.yaml import YAML -yaml = YAML(typ='safe') -from validphys.utils import generate_path_filtered_data +from validphys.utils import generate_path_filtered_data, yaml_safe KIN_NAMES = ["kin1", "kin2", "kin3"] log = logging.getLogger(__name__) @@ -432,7 +430,7 @@ def systematic_errors(self, central_values=None): def export_data(self, buffer): """Exports the central data defined by this commondata instance to the given buffer""" ret = {"data_central": self.central_values.tolist()} - yaml.dump(ret, buffer) + yaml_safe.dump(ret, buffer) def export_uncertainties(self, buffer): """Exports the uncertainties defined by this commondata instance to the given buffer""" @@ -461,7 +459,7 @@ def export_uncertainties(self, buffer): "type": "UNCORR", } ret = {"definitions": sorted_definitions, "bins": bins} - yaml.dump(ret, buffer) + yaml_safe.dump(ret, buffer) def export(self, folder_path): """Wrapper around export_data and export_uncertainties diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py index eba319ab5f..00ac155479 100644 --- a/validphys2/src/validphys/eff_exponents.py +++ b/validphys2/src/validphys/eff_exponents.py @@ -12,13 +12,9 @@ import matplotlib as mpl import numpy as np import pandas as pd -from ruamel.yaml import YAML from reportengine import collect from reportengine.checks import check_positive - -yaml = YAML(typ='rt') -yaml.default_flow_style = False from reportengine.figure import figuregen from reportengine.floatformatting import format_number, significant_digits from reportengine.table import table @@ -27,6 +23,7 @@ from validphys.pdfbases import Basis, check_basis import validphys.pdfgrids as pdfgrids from validphys.pdfplots import BandPDFPlotter, PDFPlotter +from validphys.utils import yaml_rt log = logging.getLogger(__name__) @@ -506,7 +503,7 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= (df_effexps,) = next_fit_eff_exps_table # Use round trip loader rather than safe_load in fit.as_input() with open(fit.path / "filter.yml") as f: - filtermap = yaml.load(f) + filtermap = yaml_rt.load(f) previous_exponents = filtermap["fitting"]["basis"] basis = filtermap["fitting"]["fitbasis"] checked = check_basis(basis, None) @@ -529,7 +526,7 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg= previous_exponents[i]["largex"] = [fmt(beta) for beta in betas] with tempfile.NamedTemporaryFile() as fp: path = Path(fp.name) - yaml.dump(filtermap, path) + yaml_rt.dump(filtermap, path) yaml_string = fp.read().decode("utf-8") return yaml_string @@ -546,7 +543,7 @@ def update_runcard_description_yaml(iterate_preprocessing_yaml, _updated_descrip ``` """ - filtermap = yaml.load(iterate_preprocessing_yaml) + filtermap = yaml_rt.load(iterate_preprocessing_yaml) # update description if necessary if _updated_description is not None: @@ -554,7 +551,7 @@ def update_runcard_description_yaml(iterate_preprocessing_yaml, _updated_descrip with tempfile.NamedTemporaryFile() as fp: path = Path(fp.name) - yaml.dump(filtermap, path) + yaml_rt.dump(filtermap, path) yaml_string = fp.read().decode("utf-8") return yaml_string @@ -590,7 +587,7 @@ def iterated_runcard_yaml(fit, update_runcard_description_yaml): ... f.write(yaml_output) """ - filtermap = yaml.load(update_runcard_description_yaml) + filtermap = yaml_rt.load(update_runcard_description_yaml) # iterate t0 filtermap["datacuts"]["t0pdfset"] = fit.name @@ -619,7 +616,7 @@ def iterated_runcard_yaml(fit, update_runcard_description_yaml): with tempfile.NamedTemporaryFile() as fp: path = Path(fp.name) - yaml.dump(filtermap, path) + yaml_rt.dump(filtermap, path) yaml_string = fp.read().decode("utf-8") return yaml_string diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 3260adee06..df698ca34d 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -11,14 +11,11 @@ from typing import Union import numpy as np -from ruamel.yaml import YAML from reportengine.checks import check, make_check - -yaml = YAML(typ='safe') import validphys.cuts from validphys.process_options import PROCESSES -from validphys.utils import generate_path_filtered_data +from validphys.utils import generate_path_filtered_data, yaml_safe log = logging.getLogger(__name__) @@ -158,7 +155,7 @@ def default_filter_settings_input(): """Return a FilterDefaults dataclass with the default hardcoded filter settings. These are defined in ``defaults.yaml`` in the ``validphys.cuts`` module. """ - return FilterDefaults(**yaml.load(read_text(validphys.cuts, "defaults.yaml"))) + return FilterDefaults(**yaml_safe.load(read_text(validphys.cuts, "defaults.yaml"))) def default_filter_rules_input(): @@ -166,7 +163,7 @@ def default_filter_rules_input(): Return a tuple of FilterRule objects. These are defined in ``filters.yaml`` in the ``validphys.cuts`` module. """ - list_rules = yaml.load(read_text(validphys.cuts, "filters.yaml")) + list_rules = yaml_safe.load(read_text(validphys.cuts, "filters.yaml")) return tuple(FilterRule(**rule) for rule in list_rules) diff --git a/validphys2/src/validphys/fitdata.py b/validphys2/src/validphys/fitdata.py index fa5ad8061e..8bd4c5eae0 100644 --- a/validphys2/src/validphys/fitdata.py +++ b/validphys2/src/validphys/fitdata.py @@ -10,17 +10,15 @@ import numpy as np import pandas as pd -from ruamel.yaml import YAML from reportengine import collect from reportengine.checks import CheckError, make_argcheck - -yaml = YAML(typ='safe') from reportengine.floatformatting import ValueErrorTuple from reportengine.table import table from validphys import checks from validphys.core import PDF from validphys.plotoptions.core import get_info +from validphys.utils import yaml_safe # TODO: Add more stuff here as needed for postfit LITERAL_FILES = ['chi2exps.log'] @@ -434,7 +432,7 @@ def _get_fitted_index(pdf, i): """Return the nnfit index for the replica i""" p = pdf.infopath.with_name(f'{pdf.name}_{i:04d}.dat') with open(p) as f: - it = yaml.load_all(f) + it = yaml_safe.load_all(f) metadata = next(it) return metadata['FromMCReplica'] diff --git a/validphys2/src/validphys/lhaindex.py b/validphys2/src/validphys/lhaindex.py index 5d3db810c5..6b4ee00db0 100644 --- a/validphys2/src/validphys/lhaindex.py +++ b/validphys2/src/validphys/lhaindex.py @@ -12,10 +12,8 @@ from pathlib import Path import re -from ruamel.yaml import YAML - -yaml = YAML(typ='safe', pure=False) from validphys.lhapdf_compatibility import lhapdf +from validphys.utils import yaml_safe _indexes_to_names = None _names_to_indexes = None @@ -127,7 +125,7 @@ def infofilename(name): @lru_cache def parse_info(name): with open(infofilename(name)) as infofile: - result = YAML(typ='safe', pure=True).load(infofile) + result = yaml_safe.load(infofile) return result diff --git a/validphys2/src/validphys/lhio.py b/validphys2/src/validphys/lhio.py index f42af5afad..1f5584e815 100644 --- a/validphys2/src/validphys/lhio.py +++ b/validphys2/src/validphys/lhio.py @@ -10,11 +10,10 @@ import numpy as np import pandas as pd -from ruamel.yaml import YAML -yaml = YAML(typ='safe') from validphys import lhaindex from validphys.core import PDF +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -327,7 +326,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): else: out.write(l) if extra_fields is not None: - yaml.dump(extra_fields, out, default_flow_style=False) + yaml_safe.dump(extra_fields, out, default_flow_style=False) _headers, grids = load_all_replicas(pdf) result = (big_matrix(grids).dot(V)).add(grids[0], axis=0) diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py index 2b56e7f665..e8312b6c10 100644 --- a/validphys2/src/validphys/loader.py +++ b/validphys2/src/validphys/loader.py @@ -17,12 +17,9 @@ import urllib.parse as urls import requests -from ruamel.yaml import YAML from nnpdf_data import legacy_to_new_mapping, path_vpdata from reportengine import filefinder - -yaml = YAML(typ='safe') from validphys import lhaindex from validphys.commondataparser import load_commondata_old, parse_new_metadata, parse_set_metadata from validphys.core import ( @@ -40,7 +37,7 @@ PositivitySetSpec, TheoryIDSpec, ) -from validphys.utils import generate_path_filtered_data, tempfile_cleaner +from validphys.utils import generate_path_filtered_data, tempfile_cleaner, yaml_safe log = logging.getLogger(__name__) NNPDF_DIR = "NNPDF" @@ -133,14 +130,13 @@ def _get_nnpdf_profile(profile_path=None): the python prefix (``Path(sys.prefix)/"share"/"NNPDF"``) will be used """ - yaml_reader = YAML(typ='safe', pure=True) home_config = pathlib.Path().home() / ".config" config_folder = pathlib.Path(os.environ.get("XDG_CONFIG_HOME", home_config)) / NNPDF_DIR # Set all default values profile_content = pkgutil.get_data("validphys", "nnprofile_default.yaml") - profile_dict = yaml_reader.load(profile_content) + profile_dict = yaml_safe.load(profile_content) # including the data_path to the validphys package profile_dict.setdefault("data_path", path_vpdata) @@ -157,7 +153,7 @@ def _get_nnpdf_profile(profile_path=None): if profile_path is not None: with open(profile_path, encoding="utf-8") as f: - profile_entries = yaml_reader.load(f) + profile_entries = yaml_safe.load(f) if profile_entries is not None: profile_dict.update(profile_entries) @@ -539,7 +535,7 @@ def check_compound(self, theoryID, setname, cfac): raise CompoundNotFound(msg) # This is a little bit funny, but is the least amount of thinking... yaml_format = 'FK:\n' + re.sub('FK:', ' - ', txt) - data = yaml.load(yaml_format) + data = yaml_safe.load(yaml_format) # we have to split out 'FK_' the extension to get a name consistent # with everything else try: diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index 1e1677b7f8..5eaca0c9cb 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -8,14 +8,10 @@ import re import shutil -from ruamel.yaml import YAML - from reportengine.checks import check, make_argcheck - -yaml = YAML(typ='safe') from validphys.core import PDF from validphys.renametools import rename_pdf -from validphys.utils import tempfile_cleaner +from validphys.utils import tempfile_cleaner, yaml_safe log = logging.getLogger(__name__) @@ -101,7 +97,7 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = info_file = (temp_pdf / temp_pdf.name).with_suffix('.info') with open(info_file) as stream: - info_yaml = yaml.load(stream) + info_yaml = yaml_safe.load(stream) info_yaml['NumMembers'] = new_nrep info_yaml['ErrorType'] += '+as' extra_desc = '; '.join( @@ -109,7 +105,7 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = ) info_yaml['SetDesc'] += f"; {extra_desc}" with open(info_file, 'w') as stream: - yaml.dump(info_yaml, stream) + yaml_safe.dump(info_yaml, stream) # Rename the base pdf to the final name rename_pdf(temp_pdf, pdf.name, target_name) diff --git a/validphys2/src/validphys/scripts/vp_comparefits.py b/validphys2/src/validphys/scripts/vp_comparefits.py index b15a079f09..eb2251038a 100644 --- a/validphys2/src/validphys/scripts/vp_comparefits.py +++ b/validphys2/src/validphys/scripts/vp_comparefits.py @@ -5,14 +5,13 @@ # TODO: Look into making these lazy imports import prompt_toolkit from prompt_toolkit.completion import WordCompleter -from ruamel.yaml import YAML -yaml = YAML(typ='safe') from reportengine.colors import t from validphys import compareclosuretemplates, comparefittemplates from validphys.app import App from validphys.loader import RemoteLoader from validphys.promptutils import KeywordsWithCache, confirm +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -241,7 +240,7 @@ def get_config(self): with open(self.args['config_yml']) as f: # TODO: Ideally this would load round trip but needs # to be fixed in reportengine. - c = yaml.load(f) + c = yaml_safe.load(f) c.update(self.complete_mapping()) return self.config_class(c, environment=self.environment) diff --git a/validphys2/src/validphys/scripts/vp_deltachi2.py b/validphys2/src/validphys/scripts/vp_deltachi2.py index cfb59941d9..41ca8543eb 100644 --- a/validphys2/src/validphys/scripts/vp_deltachi2.py +++ b/validphys2/src/validphys/scripts/vp_deltachi2.py @@ -2,12 +2,9 @@ import os import pwd -from ruamel.yaml import YAML - -yaml = YAML(typ='safe') - from validphys import deltachi2templates from validphys.app import App +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -72,7 +69,7 @@ def get_config(self): with open(runcard) as f: # TODO: Ideally this would load round trip but needs # to be fixed in reportengine. - c = yaml.load(f) + c = yaml_safe.load(f) c.update(complete_mapping) return self.config_class(c, environment=self.environment) diff --git a/validphys2/src/validphys/scripts/vp_hyperoptplot.py b/validphys2/src/validphys/scripts/vp_hyperoptplot.py index 82ce13a427..0d767f2aa8 100644 --- a/validphys2/src/validphys/scripts/vp_hyperoptplot.py +++ b/validphys2/src/validphys/scripts/vp_hyperoptplot.py @@ -1,13 +1,11 @@ -from ruamel.yaml import YAML +import logging +import os +import pwd from validphys import hyperplottemplates from validphys.app import App from validphys.loader import HyperscanNotFound, Loader - -yaml = YAML(typ='safe') -import logging -import os -import pwd +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -101,7 +99,7 @@ def complete_mapping(self): hyperop_folder = hyperop_folder[:-1] with open(hyperopt_filter) as f: - filtercard = yaml.load(f) + filtercard = yaml_safe.load(f) folder_path = hyperop_folder index_slash = folder_path.rfind("/") + 1 @@ -145,7 +143,7 @@ def get_config(self): with open(self.args['config_yml']) as f: # TODO: Ideally this would load round trip but needs # to be fixed in reportengine. - c = yaml.load(f) + c = yaml_safe.load(f) c.update(self.complete_mapping()) return self.config_class(c, environment=self.environment) diff --git a/validphys2/src/validphys/scripts/vp_nextfitruncard.py b/validphys2/src/validphys/scripts/vp_nextfitruncard.py index 5cf48092bc..03eeeddb6a 100644 --- a/validphys2/src/validphys/scripts/vp_nextfitruncard.py +++ b/validphys2/src/validphys/scripts/vp_nextfitruncard.py @@ -22,13 +22,10 @@ import sys import prompt_toolkit -from ruamel.yaml import YAML from reportengine import colors - -yaml = YAML(typ='safe') - from validphys.api import API +from validphys.utils import yaml_safe # arguments for np.clip to enforce integrability. # key should be identical to runcard key, first inner dictionary can contain @@ -127,7 +124,7 @@ def main(): preproc_lims = PREPROCESSING_LIMS log.info( "The following constraints will be used for preprocessing ranges, \n%s", - yaml.dump(preproc_lims, sys.stdout), + yaml_safe.dump(preproc_lims, sys.stdout), ) else: # don't enforce any limits. diff --git a/validphys2/src/validphys/scripts/vp_pdffromreplicas.py b/validphys2/src/validphys/scripts/vp_pdffromreplicas.py index c53d4e615a..4c87e8584a 100755 --- a/validphys2/src/validphys/scripts/vp_pdffromreplicas.py +++ b/validphys2/src/validphys/scripts/vp_pdffromreplicas.py @@ -27,15 +27,12 @@ import tempfile import pandas as pd -from ruamel.yaml import YAML from reportengine import colors - -yaml = YAML(typ='safe') - from validphys import lhaindex from validphys.lhio import new_pdf_from_indexes from validphys.loader import FallbackLoader +from validphys.utils import yaml_safe log = logging.getLogger() log.setLevel(logging.INFO) @@ -132,11 +129,11 @@ def main(): shutil.copyfile(base_name + "_0001.dat", base_name + "_0002.dat") # fixup info file with open(base_name + ".info") as f: - info_file = yaml.load(f) + info_file = yaml_safe.load(f) info_file["NumMembers"] = 3 with open(base_name + ".info", "w") as f: - yaml.dump(info_file, f) + yaml_safe.dump(info_file, f) # here we update old indices in case the user creates # the original_index_mapping.csv diff --git a/validphys2/src/validphys/scripts/vp_pdfrename.py b/validphys2/src/validphys/scripts/vp_pdfrename.py index 8f2816d47f..ee1e0c2f7f 100755 --- a/validphys2/src/validphys/scripts/vp_pdfrename.py +++ b/validphys2/src/validphys/scripts/vp_pdfrename.py @@ -19,13 +19,10 @@ import tempfile import lhapdf -from ruamel.yaml import YAML from reportengine import colors - -yaml = YAML(typ='safe') - from validphys.renametools import rename_pdf +from validphys.utils import yaml_safe # Taking command line arguments @@ -77,7 +74,7 @@ def fixup_ref(pdf_path: pathlib.Path, field_dict): infopath = pdf_path / f"{pdf_name}.info" with open(infopath) as f: - res = yaml.load(f) + res = yaml_safe.load(f) # If a field entry is not provided, then we revert to the existing # field in pre-existing info file. @@ -97,8 +94,8 @@ def fixup_ref(pdf_path: pathlib.Path, field_dict): res["Reference"] = field_dict["reference"] with open(infopath, "w") as f: - yaml.default_flow_style = True - yaml.dump(res, f) + yaml_safe.default_flow_style = True + yaml_safe.dump(res, f) def compress(lhapdf_path: pathlib.Path): diff --git a/validphys2/src/validphys/scripts/wiki_upload.py b/validphys2/src/validphys/scripts/wiki_upload.py index d4414c404f..3be7618b37 100644 --- a/validphys2/src/validphys/scripts/wiki_upload.py +++ b/validphys2/src/validphys/scripts/wiki_upload.py @@ -53,9 +53,8 @@ def edit_settings(d): def handle_meta_interactive(output): metapath = output / 'meta.yaml' - from ruamel.yaml import YAML + from validphys.utils import yaml_safe - yaml = YAML(typ='safe') # The yaml lexer is broken. Use something else. lex = pygments.lexers.get_lexer_by_name('pkgconfig') fmt = pygments.formatters.TerminalFormatter() @@ -71,7 +70,7 @@ def handle_meta_interactive(output): edit = not confirm(msg, default=True) if edit: - d = yaml.load(content) + d = yaml_safe.load(content) else: return @@ -88,7 +87,7 @@ def handle_meta_interactive(output): print("Metadata:") s = io.StringIO() - yaml.dump(d, s) + yaml_safe.dump(d, s) metastr = s.getvalue() print(pygments.highlight(metastr, lex, fmt)) diff --git a/validphys2/src/validphys/tests/test_effexponents.py b/validphys2/src/validphys/tests/test_effexponents.py index afdbdd4d60..1944e5523c 100644 --- a/validphys2/src/validphys/tests/test_effexponents.py +++ b/validphys2/src/validphys/tests/test_effexponents.py @@ -1,10 +1,8 @@ -from ruamel.yaml import YAML - -yaml = YAML(typ='rt') from validphys.api import API from validphys.loader import FallbackLoader as Loader from validphys.scripts.vp_nextfitruncard import PREPROCESSING_LIMS from validphys.tests.conftest import FIT, FIT_ITERATED +from validphys.utils import yaml_safe def test_next_runcard(): @@ -24,9 +22,9 @@ def test_next_runcard(): # to load various keys that are not present in the actual runcard for # backwards compatibility with open(l.check_fit(FIT_ITERATED).path / "filter.yml") as f: - ite2_runcard = yaml.load(f) + ite2_runcard = yaml_safe.load(f) - predicted_ite2_runcard = yaml.load( + predicted_ite2_runcard = yaml_safe.load( API.iterated_runcard_yaml(fit=FIT, _flmap_np_clip_arg=PREPROCESSING_LIMS) ) diff --git a/validphys2/src/validphys/tests/test_postfit.py b/validphys2/src/validphys/tests/test_postfit.py index 43807b6cca..31f2ad6cdb 100644 --- a/validphys2/src/validphys/tests/test_postfit.py +++ b/validphys2/src/validphys/tests/test_postfit.py @@ -9,12 +9,9 @@ import shutil import subprocess as sp -from ruamel.yaml import YAML - from validphys.loader import FallbackLoader as Loader from validphys.tests.conftest import FIT - -yaml = YAML(typ='safe') +from validphys.utils import yaml_safe def test_postfit(tmp): @@ -81,7 +78,7 @@ def test_postfit(tmp): files = [pdfsetpath / f"{TMPFIT}_{x:04d}.dat", postfitpath / f"replica_{x}/{TMPFIT}.dat"] for file in files: with open(file) as f: - data = yaml.load_all(f) + data = yaml_safe.load_all(f) metadata = next(data) repnos.add(metadata["FromMCReplica"]) assert ( @@ -92,7 +89,7 @@ def test_postfit(tmp): # Check that number of PDF members is written correctly infopath = postfitpath / f"{TMPFIT}/{TMPFIT}.info" with open(infopath) as f: - data = yaml.load(f) + data = yaml_safe.load(f) # Add one to nrep to account for replica 0 assert ( data["NumMembers"] == nrep + 1 diff --git a/validphys2/src/validphys/tests/test_scalevariationtheoryids.py b/validphys2/src/validphys/tests/test_scalevariationtheoryids.py index 71ad3d4183..19ac62c83f 100644 --- a/validphys2/src/validphys/tests/test_scalevariationtheoryids.py +++ b/validphys2/src/validphys/tests/test_scalevariationtheoryids.py @@ -2,11 +2,9 @@ import importlib.resources as resources import pytest -from ruamel.yaml import YAML import validphys.scalevariations - -yaml = YAML() +from validphys.utils import yaml_safe def test_unique_theoryid_variations(): @@ -15,7 +13,7 @@ def test_unique_theoryid_variations(): """ file_path = resources.files(validphys.scalevariations).joinpath("scalevariationtheoryids.yaml") with file_path.open("r") as file: - data = yaml.load(file) + data = yaml_safe.load(file) thids = [k["theoryid"] for k in data["scale_variations_for"]] counter = Counter(thids) duplicates = [item for item, count in counter.items() if count > 1] diff --git a/validphys2/src/validphys/tests/test_theorydbutils.py b/validphys2/src/validphys/tests/test_theorydbutils.py index 22c78fe87c..c4349ac272 100644 --- a/validphys2/src/validphys/tests/test_theorydbutils.py +++ b/validphys2/src/validphys/tests/test_theorydbutils.py @@ -1,14 +1,13 @@ import pytest -from ruamel.yaml import YAML from validobj import ValidationError from nnpdf_data.theorydbutils import TheoryNotFoundInDatabase, fetch_all, fetch_theory from validphys.api import API from validphys.loader import Loader +from validphys.utils import yaml_safe L = Loader() DBPATH = L.theorydb_folder -yaml = YAML(typ='safe') def test_fetch_theory(): @@ -34,7 +33,7 @@ def _dump_and_check_error(tdict, tmp, bad_number=999): """Dump theory dict to a file and load expecting an error""" tdict["ID"] = bad_number ofile = tmp / f"{bad_number}.yaml" - yaml.dump(tdict, ofile.open("w")) + yaml_safe.dump(tdict, ofile.open("w")) with pytest.raises(ValidationError): fetch_theory(tmp, bad_number) diff --git a/validphys2/src/validphys/uploadutils.py b/validphys2/src/validphys/uploadutils.py index dd6cbe02fd..f3eb486f69 100644 --- a/validphys2/src/validphys/uploadutils.py +++ b/validphys2/src/validphys/uploadutils.py @@ -22,13 +22,11 @@ import prompt_toolkit from prompt_toolkit.completion import WordCompleter -from ruamel.yaml import YAML from reportengine.colors import t - -yaml = YAML(typ='safe') from validphys.loader import Loader, RemoteLoader from validphys.renametools import Spinner +from validphys.utils import yaml_safe log = logging.getLogger(__name__) @@ -406,7 +404,7 @@ def interactive_meta(path): meta_dict = {"title": title, "author": author, "keywords": keywords} with open(path / "meta.yaml", "w") as stream: - yaml.dump(meta_dict, stream) + yaml_safe.dump(meta_dict, stream) def check_input(path): diff --git a/validphys2/src/validphys/utils.py b/validphys2/src/validphys/utils.py index 1c77fd5b5d..89706611f0 100644 --- a/validphys2/src/validphys/utils.py +++ b/validphys2/src/validphys/utils.py @@ -4,6 +4,11 @@ import tempfile import numpy as np +from ruamel.yaml import YAML + +yaml_safe = YAML(typ='safe') +yaml_rt = YAML(typ='rt') +yaml_fast = YAML(typ='safe', pure=False) # uses CLoader if available def generate_path_filtered_data(fit_path, setname): From 310d9d0827e5210bde488f6ca494b27b1be4a460 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Tue, 3 Dec 2024 18:37:19 +0000 Subject: [PATCH 13/20] replace ruamel with pyyaml in nnpdf_data --- nnpdf_data/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nnpdf_data/pyproject.toml b/nnpdf_data/pyproject.toml index 68e272ccb7..839026413c 100644 --- a/nnpdf_data/pyproject.toml +++ b/nnpdf_data/pyproject.toml @@ -33,7 +33,7 @@ include = [ [tool.poetry.dependencies] python = "^3.9" -"ruamel.yaml" = "<0.18" +pyyaml = "*" # Required to run filters: `filter_files_dependencies` pandas = {version = "*", optional = true} numpy = {version = "*", optional = true} From abfc5eb9af45730aa2fa52ca585f36bd49268af1 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Tue, 3 Dec 2024 19:04:34 +0000 Subject: [PATCH 14/20] use yaml_rt for .info file --- validphys2/src/validphys/lhio.py | 3 ++- validphys2/src/validphys/scripts/vp_pdfrename.py | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/validphys2/src/validphys/lhio.py b/validphys2/src/validphys/lhio.py index 1f5584e815..d0d8f9d861 100644 --- a/validphys2/src/validphys/lhio.py +++ b/validphys2/src/validphys/lhio.py @@ -326,7 +326,8 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): else: out.write(l) if extra_fields is not None: - yaml_safe.dump(extra_fields, out, default_flow_style=False) + yaml_safe.default_flow_style = False + yaml_safe.dump(extra_fields, out) _headers, grids = load_all_replicas(pdf) result = (big_matrix(grids).dot(V)).add(grids[0], axis=0) diff --git a/validphys2/src/validphys/scripts/vp_pdfrename.py b/validphys2/src/validphys/scripts/vp_pdfrename.py index ee1e0c2f7f..68c9a3748d 100755 --- a/validphys2/src/validphys/scripts/vp_pdfrename.py +++ b/validphys2/src/validphys/scripts/vp_pdfrename.py @@ -22,7 +22,7 @@ from reportengine import colors from validphys.renametools import rename_pdf -from validphys.utils import yaml_safe +from validphys.utils import yaml_rt # Taking command line arguments @@ -74,7 +74,7 @@ def fixup_ref(pdf_path: pathlib.Path, field_dict): infopath = pdf_path / f"{pdf_name}.info" with open(infopath) as f: - res = yaml_safe.load(f) + res = yaml_rt.load(f) # If a field entry is not provided, then we revert to the existing # field in pre-existing info file. @@ -94,8 +94,7 @@ def fixup_ref(pdf_path: pathlib.Path, field_dict): res["Reference"] = field_dict["reference"] with open(infopath, "w") as f: - yaml_safe.default_flow_style = True - yaml_safe.dump(res, f) + yaml_rt.dump(res, f) def compress(lhapdf_path: pathlib.Path): From 3a09960b7449f97e9ac37eecd7451a42575ceb4a Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Wed, 4 Dec 2024 11:16:53 +0000 Subject: [PATCH 15/20] use yaml_rt to deal with error messages, otherwise yaml_fast --- nnpdf_data/nnpdf_data/utils.py | 11 +++++++---- nnpdf_data/pyproject.toml | 2 +- pyproject.toml | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/nnpdf_data/nnpdf_data/utils.py b/nnpdf_data/nnpdf_data/utils.py index ad7f75e4c7..be7350d021 100644 --- a/nnpdf_data/nnpdf_data/utils.py +++ b/nnpdf_data/nnpdf_data/utils.py @@ -1,7 +1,10 @@ import pathlib +from ruamel.yaml import YAML from validobj import ValidationError, parse_input -import yaml + +yaml_rt = YAML(typ="rt") +yaml_fast = YAML(typ="safe", pure=False) def parse_yaml_inp(input_yaml, spec): @@ -12,14 +15,14 @@ def parse_yaml_inp(input_yaml, spec): https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers """ input_yaml = pathlib.Path(input_yaml) - inp = yaml.load(input_yaml.read_text(encoding="utf-8"), yaml.Loader) + inp = yaml_fast.load(input_yaml.read_text(encoding="utf-8")) try: return parse_input(inp, spec) except ValidationError as e: current_exc = e - # In order to provide a more complete error information, use round_trip_load + # In order to provide a more complete error information, use round trip # to read the .yaml file again (insetad of using the CLoader) - current_inp = yaml.load(input_yaml.open("r", encoding="utf-8"), yaml.Loader) + current_inp = yaml_rt.load(input_yaml.open("r", encoding="utf-8")) error_text_lines = [] while current_exc: if hasattr(current_exc, 'wrong_field'): diff --git a/nnpdf_data/pyproject.toml b/nnpdf_data/pyproject.toml index 839026413c..9c07b22be7 100644 --- a/nnpdf_data/pyproject.toml +++ b/nnpdf_data/pyproject.toml @@ -33,7 +33,7 @@ include = [ [tool.poetry.dependencies] python = "^3.9" -pyyaml = "*" +"ruamel.yaml" = "^0.15" # Required to run filters: `filter_files_dependencies` pandas = {version = "*", optional = true} numpy = {version = "*", optional = true} diff --git a/pyproject.toml b/pyproject.toml index eef39b0c7a..de3464aa4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ matplotlib = "^3.9" pineappl = "^0.8.2" pandas = "*" numpy = "*" -"ruamel.yaml" = "*" +"ruamel.yaml" = "^0.15" validobj = "*" prompt_toolkit = "*" # Reportengine needs to be installed from git From 75491436ede960ff22bae79354fe240832070f46 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Wed, 4 Dec 2024 11:17:36 +0000 Subject: [PATCH 16/20] update inline comment --- nnpdf_data/nnpdf_data/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nnpdf_data/nnpdf_data/utils.py b/nnpdf_data/nnpdf_data/utils.py index be7350d021..89d8dad8f8 100644 --- a/nnpdf_data/nnpdf_data/utils.py +++ b/nnpdf_data/nnpdf_data/utils.py @@ -27,7 +27,7 @@ def parse_yaml_inp(input_yaml, spec): while current_exc: if hasattr(current_exc, 'wrong_field'): wrong_field = current_exc.wrong_field - # Mappings compping from ``round_trip_load`` have an + # Mappings compping from yaml_rt have an # ``lc`` attribute that gives a tuple of # ``(line_number, column)`` for a given item in # the mapping. From 0e0741a5cc36261b9add9a5bee1edb212cdb3957 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Wed, 4 Dec 2024 11:47:07 +0000 Subject: [PATCH 17/20] make test pass --- nnpdf_data/pyproject.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nnpdf_data/pyproject.toml b/nnpdf_data/pyproject.toml index 9c07b22be7..11609e12ed 100644 --- a/nnpdf_data/pyproject.toml +++ b/nnpdf_data/pyproject.toml @@ -33,7 +33,7 @@ include = [ [tool.poetry.dependencies] python = "^3.9" -"ruamel.yaml" = "^0.15" +"ruamel.yaml" = "*" # Required to run filters: `filter_files_dependencies` pandas = {version = "*", optional = true} numpy = {version = "*", optional = true} diff --git a/pyproject.toml b/pyproject.toml index de3464aa4b..eef39b0c7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ matplotlib = "^3.9" pineappl = "^0.8.2" pandas = "*" numpy = "*" -"ruamel.yaml" = "^0.15" +"ruamel.yaml" = "*" validobj = "*" prompt_toolkit = "*" # Reportengine needs to be installed from git From 0622c49305a286c0b251fe19964459dcfddd6416 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Wed, 4 Dec 2024 13:05:03 +0000 Subject: [PATCH 18/20] use CLoader in nnpdf_data utils --- nnpdf_data/nnpdf_data/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nnpdf_data/nnpdf_data/utils.py b/nnpdf_data/nnpdf_data/utils.py index 89d8dad8f8..b07fe4f399 100644 --- a/nnpdf_data/nnpdf_data/utils.py +++ b/nnpdf_data/nnpdf_data/utils.py @@ -2,9 +2,13 @@ from ruamel.yaml import YAML from validobj import ValidationError, parse_input +import yaml yaml_rt = YAML(typ="rt") -yaml_fast = YAML(typ="safe", pure=False) +try: + Loader = yaml.CLoader +except AttributeError: + Loader = yaml.Loader def parse_yaml_inp(input_yaml, spec): @@ -15,7 +19,7 @@ def parse_yaml_inp(input_yaml, spec): https://validobj.readthedocs.io/en/latest/examples.html#yaml-line-numbers """ input_yaml = pathlib.Path(input_yaml) - inp = yaml_fast.load(input_yaml.read_text(encoding="utf-8")) + inp = yaml.load(input_yaml.read_text(encoding="utf-8"), Loader=Loader) try: return parse_input(inp, spec) except ValidationError as e: From 418ecf21351968c8243d3a6218027a17be7185e5 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Wed, 4 Dec 2024 13:18:02 +0000 Subject: [PATCH 19/20] don't say yaml_fast uses CLoader --- validphys2/src/validphys/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/utils.py b/validphys2/src/validphys/utils.py index 89706611f0..042d8b3925 100644 --- a/validphys2/src/validphys/utils.py +++ b/validphys2/src/validphys/utils.py @@ -8,7 +8,7 @@ yaml_safe = YAML(typ='safe') yaml_rt = YAML(typ='rt') -yaml_fast = YAML(typ='safe', pure=False) # uses CLoader if available +yaml_fast = YAML(typ='safe', pure=False) # uses Cparser if available (not faster than yaml_safe) def generate_path_filtered_data(fit_path, setname): From 2e4096d9fccdabd7b5d983abd1ad40dade52cce5 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Wed, 4 Dec 2024 14:37:28 +0000 Subject: [PATCH 20/20] point reportengine to master again --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index eef39b0c7a..e9f4ce5d9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ numpy = "*" validobj = "*" prompt_toolkit = "*" # Reportengine needs to be installed from git -reportengine = { git = "https://github.com/NNPDF/reportengine", branch="update_yaml" } +reportengine = { git = "https://github.com/NNPDF/reportengine" } # Fit psutil = "*" tensorflow = "*"