From 92c5352ce6682dbd2c3c291a3807c4afcdf2335b Mon Sep 17 00:00:00 2001 From: Dominik1123 Date: Thu, 24 Jan 2019 01:08:45 +0100 Subject: [PATCH] Provide interface for registering custom file types for token instrumentation File types are stored in global dict, registration of new types requires specifying the suffix as well as comment chars. --- README.md | 1 + nevergrad/instrumentation/__init__.py | 2 +- nevergrad/instrumentation/folderfunction.py | 9 ++---- nevergrad/instrumentation/instantiate.py | 31 +++++++++++++------ nevergrad/instrumentation/test_instantiate.py | 23 +++++++++++--- 5 files changed, 43 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 49e3f456a..985979eaa 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,7 @@ Some important things to note: - using `FolderFunction` argument `clean_copy=True` will copy your folder so that tempering with it during optimization will run different versions of your code. - under the hood, with or without `clean_copy=True`, when calling the function, `FolderFunction` will create symlink copy of the initial folder, remove the files that have tokens, and create new ones with appropriate values. Symlinks are used in order to avoid duplicating large projects, but they have some drawbacks, see next point ;) - one can add a compilation step to `FolderFunction` (the compilation just has to be included in the script). However, be extra careful that if the initial folder contains some build files, they could be modified by the compilation step, because of the symlinks. Make sure that during compilation, you remove the build symlinks first! **This feature has not been fool proofed yet!!!** + - the following external file types are registered by default: `[".c", ".h", ".cpp", ".hpp", ".py", ".m"]`. Custom file types can be registered using `instrumentation.register_file_type` by providing the relevant file suffix as well as the characters that indicate a comment. diff --git a/nevergrad/instrumentation/__init__.py b/nevergrad/instrumentation/__init__.py index 8788bc41e..46deb039d 100644 --- a/nevergrad/instrumentation/__init__.py +++ b/nevergrad/instrumentation/__init__.py @@ -4,7 +4,7 @@ # LICENSE file in the root directory of this source tree. from .folderfunction import FolderFunction -from .instantiate import InstrumentedFunction +from .instantiate import InstrumentedFunction, register_file_type from . import variables from .variables import Instrumentation from .utils import TemporaryDirectoryCopy diff --git a/nevergrad/instrumentation/folderfunction.py b/nevergrad/instrumentation/folderfunction.py index 7f83f8587..ebcb5c0d7 100644 --- a/nevergrad/instrumentation/folderfunction.py +++ b/nevergrad/instrumentation/folderfunction.py @@ -28,8 +28,6 @@ class FolderFunction: # should derive from BaseFunction? clean_copy: bool whether to create an initial clean temporary copy of the folder in order to avoid versioning problems (instantiations are lightweight symlinks in any case) - extension: tuple - list of extensions for files to parametrize (files with dftokens) Returns ------- @@ -51,14 +49,11 @@ class FolderFunction: # should derive from BaseFunction? """ # pylint: disable=too-many-arguments - def __init__(self, folder: Union[Path, str], command: List[str], verbose: bool = False, clean_copy: bool = False, - extensions: Optional[List[str]] = None) -> None: - if extensions is None: - extensions = [".py", "m", ".cpp", ".hpp", ".c", ".h"] + def __init__(self, folder: Union[Path, str], command: List[str], verbose: bool = False, clean_copy: bool = False) -> None: self.command = command self.verbose = verbose self.postprocessings = [get_last_line_as_float] - self.instrumented_folder = InstrumentedFolder(folder, extensions=extensions, clean_copy=clean_copy) + self.instrumented_folder = InstrumentedFolder(folder, clean_copy=clean_copy) self.last_full_output: Optional[str] = None @property diff --git a/nevergrad/instrumentation/instantiate.py b/nevergrad/instrumentation/instantiate.py index 7491b5f7e..0becde050 100644 --- a/nevergrad/instrumentation/instantiate.py +++ b/nevergrad/instrumentation/instantiate.py @@ -19,6 +19,20 @@ BIG_NUMBER = 3000 LINETOKEN = "@nevergrad" + "@" # Do not trigger an error when parsing this file... +FILE_TYPES = { + ".c": dict(comment="//"), + ".py": dict(comment="#"), + ".m": dict(comment="%"), +} +FILE_TYPES[".h"] = FILE_TYPES[".hpp"] = FILE_TYPES[".cpp"] = FILE_TYPES[".c"] + + +def register_file_type(suffix: str, comment_chars: str): + """Register a new file type to be used for token instrumentation by providing the relevant file suffix as well as + the characters that indicate a comment.""" + if not suffix.startswith("."): + suffix = f".{suffix}" + FILE_TYPES[suffix] = {"comment": comment_chars} def symlink_folder_tree(folder: Union[Path, str], shadow_folder: Union[Path, str]) -> None: @@ -35,10 +49,11 @@ def symlink_folder_tree(folder: Union[Path, str], shadow_folder: Union[Path, str def uncomment_line(line: str, extension: str) -> str: - comment_chars = {x: "//" for x in [".cpp", ".hpp", ".c", ".h"]} - comment_chars.update({".py": r"#", ".m": r"%"}) + if extension not in FILE_TYPES: + raise RuntimeError(f'Unknown file type: {extension}\nDid you register it using {register_file_type.__name__}?') + comment_chars = FILE_TYPES[extension]["comment"] pattern = r'^(?P *)' - pattern += r'(?P' + comment_chars[extension] + r" *" + LINETOKEN + r" *)" + pattern += r'(?P' + comment_chars + r" *" + LINETOKEN + r" *)" pattern += r'(?P.*)' lineseg = re.search(pattern, line) if lineseg is not None: @@ -58,7 +73,7 @@ def __init__(self, filepath: Path) -> None: text = f.read() if "NG_" in text and LINETOKEN in text: # assuming there is a token somewhere lines = text.splitlines() - ext = filepath.suffix + ext = filepath.suffix.lower() lines = [(l if LINETOKEN not in l else uncomment_line(l, ext)) for l in lines] text = "\n".join(lines) self.text, self.variables = utils.replace_tokens_by_placeholders(text) @@ -102,8 +117,6 @@ class InstrumentedFolder: # should derive from base function? clean_copy: bool whether to create an initial clean temporary copy of the folder in order to avoid versioning problems (instantiations are lightweight symlinks in any case). - extensions: list - extensions of the instrumented files which must be instantiated Caution ------- @@ -114,18 +127,16 @@ class InstrumentedFolder: # should derive from base function? variable to a shared directory """ - def __init__(self, folder: Union[Path, str], clean_copy: bool = False, extensions: Optional[List[str]] = None) -> None: + def __init__(self, folder: Union[Path, str], clean_copy: bool = False) -> None: self._clean_copy = None self.folder = Path(folder).expanduser().absolute() assert self.folder.exists(), "{folder} does not seem to exist" if clean_copy: self._clean_copy = utils.TemporaryDirectoryCopy(str(folder)) self.folder = self._clean_copy.copyname - if extensions is None: - extensions = [".py", "m", ".cpp", ".hpp", ".c", ".h"] self.instrumented_files: List[InstrumentedFile] = [] for fp in self.folder.glob("**/*"): # TODO filter out all hidden files - if fp.is_file() and fp.suffix in extensions: + if fp.is_file() and fp.suffix.lower() in FILE_TYPES: instru_f = InstrumentedFile(fp) if instru_f.dimension: self.instrumented_files.append(instru_f) diff --git a/nevergrad/instrumentation/test_instantiate.py b/nevergrad/instrumentation/test_instantiate.py index aa876e86c..3651fe5d0 100644 --- a/nevergrad/instrumentation/test_instantiate.py +++ b/nevergrad/instrumentation/test_instantiate.py @@ -24,6 +24,13 @@ def test_symlink_folder_tree() -> None: @genty.genty class InstantiationTests(TestCase): + def _test_uncomment_line(self, line: str, ext: str, expected: str) -> None: + if isinstance(expected, str): + output = instantiate.uncomment_line(line, ext) + np.testing.assert_equal(output, expected) + else: + np.testing.assert_raises(expected, instantiate.uncomment_line, line, ext) + # CAREFUL: avoid triggering errors if the module parses itself... # Note: 'bidule' is French for dummy widget @genty.genty_dataset( # type: ignore @@ -33,13 +40,19 @@ class InstantiationTests(TestCase): bad_python=(" // @" + "nevergrad@ bidule", ".py", RuntimeError), cpp=(f" //{LINETOKEN}bidule", ".cpp", " bidule"), matlab=(f"%{LINETOKEN}bidule", ".m", "bidule"), + unknown=(f"// {LINETOKEN} bidule", ".unknown", RuntimeError), ) def test_uncomment_line(self, line: str, ext: str, expected: str) -> None: - if isinstance(expected, str): - output = instantiate.uncomment_line(line, ext) - np.testing.assert_equal(output, expected) - else: - np.testing.assert_raises(expected, instantiate.uncomment_line, line, ext) + self._test_uncomment_line(line, ext, expected) + + @genty.genty_dataset( + custom=(f"// {LINETOKEN} bidule", ".custom", "//", "bidule"), + wrong_comment_chars=(f"// {LINETOKEN} bidule", ".custom", "#", RuntimeError), + ) + def test_uncomment_line_custom_file_type(self, line: str, ext: str, comment: str, expected: str) -> None: + instantiate.register_file_type(ext, comment) + self._test_uncomment_line(line, ext, expected) + del instantiate.FILE_TYPES[ext] @genty.genty_dataset( # type: ignore with_clean_copy=(True,),