From 44532208bb82596ce49178f273e46bff9acaadee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my?= Date: Wed, 2 Jan 2019 09:43:52 +0100 Subject: [PATCH] Added guidelines to add algorithms (#14) --- README.md | 4 +- docs/adding_an_algorithm.md | 100 +++++++++++++++ nevergrad/benchmark/experiments.py | 151 +--------------------- nevergrad/benchmark/frozenexperiments.py | 154 +++++++++++++++++++++++ nevergrad/benchmark/xpbase.py | 4 + nevergrad/common/test_testing.py | 8 ++ nevergrad/common/testing.py | 55 +++++++- 7 files changed, 326 insertions(+), 150 deletions(-) create mode 100644 docs/adding_an_algorithm.md create mode 100644 nevergrad/benchmark/frozenexperiments.py diff --git a/README.md b/README.md index 1d3fa1206..321214332 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ An *ask and tell* interface is also available. The 3 key methods for this interf - `ask`: suggest a point on which to evaluate the function to optimize. - `tell`: for updated the optimizer with the value of the function at a given point. - `provide_recommendation`: returns the point the algorithms considers the best. -For most optimization algorithms in the platform, they can be called in arbitrary order - asynchronous optimization is OK. +For most optimization algorithms in the platform, they can be called in arbitrary order - asynchronous optimization is OK. Some algorithms (with class attribute `no_parallelization=True` however do not support this. Here is a simpler example in the sequential case (this is what happens in the `optimize` method for `num_workers=1`): ```python @@ -116,7 +116,7 @@ Example (please note that `nevergrad` needs to be cloned in your working directo ``` python -m nevergrad.benchmark additional_experiment --imports=nevergrad/benchmark/additional/example.py ``` -See the [example file](nevergrad/benchmark/additional/example.py) to understand more precisely how functions/optimizers/experiments are specified. You can also submit a pull request to add your code directly in `nevergrad`. +See the [example file](nevergrad/benchmark/additional/example.py) to understand more precisely how functions/optimizers/experiments are specified. You can also submit a pull request to add your code directly in `nevergrad`. In this case, please refer to these [guidelines](docs/adding_an_algorithm.md). Functions used for the experiments must derive from `nevergrad.functions.BaseFunction`. This abstract class helps you set up a description of your function settings through the `get_summary` method, which is called to create the columns of the data file produced by the experiments. See the docstrings for more information, and [functionlib.py](nevergrad/functions/functionlib.py) and [example.py](nevergrad/benchmark/additional/example.py) for examples. diff --git a/docs/adding_an_algorithm.md b/docs/adding_an_algorithm.md new file mode 100644 index 000000000..0eab4a83e --- /dev/null +++ b/docs/adding_an_algorithm.md @@ -0,0 +1,100 @@ +# Adding an algorithm + +These guidelines are for people who want to add an algorithm to `nevergrad`. Feel free to update them if you find them unclear or think they should evolve. + +## Where to add the algorithm? + +All optimizers are implemented in the `nevergrad.optimization` subpackage, and all optimizer classes are available either in the `nevergrad.optimization.optimizerlib` module, or through the optimizer registry: `nevergrad.optimization.registry`. + +Implementations are however spread into several files: +- [optimizerlib.py](../nevergrad/optimization/optimizerlib.py): this is the default file, where most algorithms are implemented. It also imports optimizers from all other files. +- [oneshot.py](../nevergrad/optimization/oneshot.py): this is where one-shot optimizers are implemented +- [differentialevolution.py](../nevergrad/optimization/differentialevolution.py): this is where evolutionary algorithms are implemteted. +- [recastlib.py](../nevergrad/optimization/recastlib.py): this is where we implement ask & tell versions of existing Python implementations which do not follow this pattern. The underlying class which helps spawn a subprocess to run the existing implementation into is in [recaster.py](../nevergrad/optimization/recaster.py). Hopefully, you won't need this. + +If you implement one new algorithm and if this algorithm is not one-shot/evolutionary/recast, you should implement it into [optimizerlib.py](../nevergrad/optimization/optimizerlib.py). If you implement a whole family of algorithms, you are welcome to create a new corresponding file. +Still, this structure is not final, it is bound to evolve and you are welcome to amend it. + + +## How to implement it? + +### Base class features + +All algorithms derive from a base class named `Optimizer` and are registered through a decorator. The implementation of the base class is [here](../nevergrad/optimization/base.py). +This base class implements the `ask` and `tell` interface. + +It records all evaluated points through the `archive` attribute, which is of type: +``` +Dict[Tuple[float,...], Value] +``` +The key tuple if the point location, and `Value` is a class with attributes: +- `count`: number of evaluations at this point. +- `mean`: mean value of the evaluations at this point. +- `variance`: variance of the evaluations at this point. + +For more detauls, see the implementation in [utils.py](../nevergrad/optimization/utils.py). + +Through the archive, you can therefore access most useful information about past evaluations. + +The base `Optimizer` class also tracks the best optimistic and pessimistic points through the `current_bests` attribute which is of type: +``` +Dict[str, Point] +``` +The key string is either `optimistic` or `pessimistic`, and the `Point` value is a `Value` with an additional `x` attribute, recording the location of the point. + + +### Methods and attributes + +4 methods are designed to be overriden: +- `__init__`: for the initialization of your algorithm +- `_internal_ask__`: to fetch the next point to be evaluated. This function is the only one that is absolutely required to be overriden. The default `ask` method calls this method (please do not override the default `ask`). +- `_internal_tell`: to update your algorithm with the new point. The default `tell` method calls this internal method after updating the archive (see paragraph above), please do not override it. +- `_internal_provide_recommendation`: to provide the final recommendation. By default, the recommendation is the pessimistic best point. + +If the algorithm is not able to handle parallelization (if `ask` cannot be called multiple times consecutively), the `no_parallelization` **class attribute** must be set to `True`. + + +### Seeding + +Seeding has an important part for the significance and reproducibility of the algorithm benchmarking. For this to work, it is however important to **avoid seeding from inside** the algorithm. Indeed: +- we expect stochastic algorithms to be actually stochastic, if we set a seed inside the implementation this assumption is broken. +- we need the randomness to obtain relevant statistics when benchmarking the algorithms on deterministic functions. +- we can seed anyway from **outside** when we need it. This is what happens in the benchmarks: in this case we do want each independent run to be repeatable. + +For consistency and simplicity's sake, please prefer `numpy`'s random generator over the standard one. +Also, we consider that instanciating a random generator for each optimizer and using it afterwards is a good practice: +```python +self._rng = np.ranndom.RandomState() +``` +Indeed, it makes the optimizer independent of other uses of the default random generator. However, this is not a constraint, and most algorithms currently do not follow such practice. + +A unit tests automatically makes sure that all optimizers have repeatable bvehaviors on a simple test case when seeded from outside (see below). + + +## How to test it + +You are welcome to add tests if you want to make sure your implementation is correct. It is however not required since some tests are run on all registered algorithms. They will test two features: +- that all algorithms are able to find the optimum of a simple 2-variable quadratic fitness function. +- that running the algorithms twice after setting a seed lead to the exact same recommendation. This is useful to make sure we will get repeatibility in the benchmarks. + +To run these tests, you can use: +``` +nosetests nevergrad/optimization/test_optimizerlib.py +``` + +The repeatability test will however crash the first time you run it, since no value for the recommendation of your algorithm exist. This is automatically added when running the tests, and if everything goes well the second time you run them, it means everything is fine. You will see in you diff that an additional line was added to a file containing all expected recommendations. + +If for any reason one of this test is not suitable for your algorithm, we'll discuss this in the pull request and decide of the appropriate workaround. + +## How to benchmark it + +Benchmarks are implemented in two files [experiments.py](../nevergrad/benchmark/experiments.py) and [frozenexperiments.py](../nevergrad/benchmark/frozenexperiments.py). +While the former can be freely modified (benchmarks will be regularly added and removed), the latter file implements experiments which should not be modified when adding an algorithm, because they are used in tests, or for reproducibility of published results. + +Providing some benchmark results along your pull requests will highlight the interest of your algorithm. It is however not required. For now, there is no standard apprroach for benchmarking your algorithm. You can implement your own benchmark, or copy an existing one and add your algorithm. Feel free to propose other solutions. + +### How benchmarks are implemented + +A benchmark is made of many `Experiment` instances. An `Experiment` is basically the combination of a test function, and settings for the optimization (optimizer, budget, etc...). + +Benchmarks are specified using a generator of `Experiment` instances. See examples in [experiments.py](../nevergrad/benchmark/experiments.py). If you want to make sure your benchmark is perfectly reproducible, you will need to be careful of properly seeding the functions and/or the experiments. diff --git a/nevergrad/benchmark/experiments.py b/nevergrad/benchmark/experiments.py index c97cd3743..e4eee060c 100644 --- a/nevergrad/benchmark/experiments.py +++ b/nevergrad/benchmark/experiments.py @@ -4,37 +4,16 @@ # LICENSE file in the root directory of this source tree. from typing import Iterator, Optional -import numpy as np from ..functions import ArtificialFunction -from ..common import decorators from .. import optimization from .xpbase import Experiment from .xpbase import create_seed_generator +from .xpbase import registry +# register all frozen experiments +from . import frozenexperiments # pylint:disable=unused-import # pylint: disable=stop-iteration-return -registry = decorators.Registry() - - -@registry.register -def basic(seed: Optional[int] = None) -> Iterator[Experiment]: - """Test settings - """ - seedg = create_seed_generator(seed) - function = ArtificialFunction(name="sphere", block_dimension=2, noise_level=1) - np.random.seed(seed) # seed before initializing the function! - function.initialize() # initialization uses randomness - return iter([Experiment(function, optimizer_name="OnePlusOne", num_workers=2, budget=4, seed=next(seedg))]) - - -@registry.register -def repeated_basic(seed: Optional[int] = None) -> Iterator[Experiment]: - """Test settings - """ - seedg = create_seed_generator(seed) - return (next(basic(next(seedg))) for _ in range(10)) - - @registry.register def discrete(seed: Optional[int] = None) -> Iterator[Experiment]: # prepare list of parameters to sweep for independent variables @@ -53,22 +32,6 @@ def discrete(seed: Optional[int] = None) -> Iterator[Experiment]: yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) -@registry.register -def small_discrete(seed: Optional[int] = None) -> Iterator[Experiment]: - # prepare list of parameters to sweep for independent variables - seedg = create_seed_generator(seed) - names = ["hardonemax5", "hardjump5", "hardleadingones5"] - optims = sorted(x for x, y in optimization.registry.items() if "iscrete" in x and "epea" not in x and "DE" not in x - and "SSNEA" not in x) - functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) - for name in names for bd in [30] for uv_factor in [5, 10] for n_blocks in [1]] - # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) - for func in functions: - for optim in optims: - for budget in [100, 400, 700, 1000, 1300, 1600, 1900, 2200, 2500, 2800, 3000]: # , 10000]: - yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) - - @registry.register def minidoe(seed: Optional[int] = None) -> Iterator[Experiment]: # prepare list of parameters to sweep for independent variables @@ -85,124 +48,18 @@ def minidoe(seed: Optional[int] = None) -> Iterator[Experiment]: yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) -@registry.register -def illcond(seed: Optional[int] = None) -> Iterator[Experiment]: - """All optimizers on ill cond problems - """ - seedg = create_seed_generator(seed) - for budget in [500, 1000, 2000, 4000]: - for optim in ["SQP", "DE", "CMA", "PSO", "RotationInvariantDE", "NelderMead"]: - for rotation in [True, False]: - for name in ["ellipsoid", "cigar"]: - function = ArtificialFunction(name=name, rotation=rotation, block_dimension=100) - yield Experiment(function, optim, budget=budget, seed=next(seedg)) - - -@registry.register -def compabasedillcond(seed: Optional[int] = None) -> Iterator[Experiment]: - """All optimizers on ill cond problems - """ - seedg = create_seed_generator(seed) - for budget in [500, 1000, 2000, 4000, 8000]: - for optim in ["DE", "CMA", "PSO", "BPRotationInvariantDE", "RotationInvariantDE", - "AlmostRotationInvariantDE", "AlmostRotationInvariantDEAndBigPop"]: - for rotation in [True, False]: - for name in ["ellipsoid", "cigar"]: - function = ArtificialFunction(name=name, rotation=rotation, block_dimension=30) - yield Experiment(function, optim, budget=budget, seed=next(seedg)) - - -@registry.register -def noise(seed: Optional[int] = None) -> Iterator[Experiment]: - """All optimizers on ill cond problems - """ - seedg = create_seed_generator(seed) - optims = sorted(x for x, y in optimization.registry.items() - if ("TBPSA" in x or "ois" in x or "CMA" in x or "epea" in x) and "iscr" not in x) - for budget in [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000 ]: - for optim in optims: - for rotation in [True, False]: - for name in ["sphere", "cigar", "sphere4"]: - function = ArtificialFunction(name=name, rotation=rotation, block_dimension=20, noise_level=10) - yield Experiment(function, optim, budget=budget, seed=next(seedg)) - - -@registry.register -def doe_dim4(seed: Optional[int] = None) -> Iterator[Experiment]: # Here, QR performs best, then Random, then LHS, then Cauchy. - # prepare list of parameters to sweep for independent variables - seedg = create_seed_generator(seed) - names = ["sphere"] # n for n in ArtificialFunction.list_sorted_function_names() if "sphere" in n] - optims = sorted(x for x, y in optimization.registry.items() if y.one_shot and "arg" not in x and "mal" not in x) - functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) - for name in names for bd in [4] for uv_factor in [0] for n_blocks in [1]] - # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) - for func in functions: - for optim in optims: - for budget in [30, 100, 3000, 10000]: - # duplicate -> each Experiment has different randomness - yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) - - @registry.register def doe_dim10(seed: Optional[int] = None) -> Iterator[Experiment]: # LHS performs best, followed by QR and random # nearly equally (Hammersley better than random, Halton not clearly; scrambling improves results). # prepare list of parameters to sweep for independent variables - seedg = create_seed_generator(seed) names = ["sphere"] - optims = sorted(x for x, y in optimization.registry.items() if y.one_shot and "arg" not in x and "mal" not in x) - functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) - for name in names for bd in [10] for uv_factor in [0] for n_blocks in [1]] - # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) - for func in functions: - for optim in optims: - for budget in [30, 100, 3000, 10000]: - # duplicate -> each Experiment has different randomness - yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) - - -@registry.register -def dim10_smallbudget(seed: Optional[int] = None) -> Iterator[Experiment]: - # prepare list of parameters to sweep for independent variables seedg = create_seed_generator(seed) - names = ["sphere"] optims = sorted(x for x, y in optimization.registry.items() if y.one_shot and "arg" not in x and "mal" not in x) functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) for name in names for bd in [10] for uv_factor in [0] for n_blocks in [1]] # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) for func in functions: for optim in optims: - for budget in [4, 8, 16, 32]: - # duplicate -> each Experiment has different randomness - yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) - - -@registry.register -def dim10_select_two_features(seed: Optional[int] = None) -> Iterator[Experiment]: # 2 variables matter - Scrambled Hammersley rules. - # prepare list of parameters to sweep for independent variables - seedg = create_seed_generator(seed) - names = ["sphere"] - optims = sorted(x for x, y in optimization.registry.items() if y.one_shot and "arg" not in x and "mal" not in x) - functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) - for name in names for bd in [2] for uv_factor in [5] for n_blocks in [1]] - # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) - for func in functions: - for optim in optims: - for budget in [4, 8, 16, 32]: - # duplicate -> each Experiment has different randomness - yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) - - -@registry.register -def dim10_select_one_feature(seed: Optional[int] = None) -> Iterator[Experiment]: # One and only one variable matters - LHS wins. - # prepare list of parameters to sweep for independent variables - seedg = create_seed_generator(seed) - names = ["sphere"] - optims = sorted(x for x, y in optimization.registry.items() if y.one_shot and "arg" not in x and "mal" not in x) - functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) - for name in names for bd in [1] for uv_factor in [10] for n_blocks in [1]] - # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) - for func in functions: - for optim in optims: - for budget in [8, 10, 12, 14, 16, 18, 20]: + for budget in [30, 100, 3000, 10000]: # duplicate -> each Experiment has different randomness yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) diff --git a/nevergrad/benchmark/frozenexperiments.py b/nevergrad/benchmark/frozenexperiments.py new file mode 100644 index 000000000..ea0cdccd3 --- /dev/null +++ b/nevergrad/benchmark/frozenexperiments.py @@ -0,0 +1,154 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Iterator, Optional +import numpy as np +from .. import optimization +from ..functions import ArtificialFunction +from .xpbase import registry +from .xpbase import create_seed_generator +from .xpbase import Experiment +# pylint: disable=stop-iteration-return + + +@registry.register +def basic(seed: Optional[int] = None) -> Iterator[Experiment]: + """Test settings + """ + seedg = create_seed_generator(seed) + function = ArtificialFunction(name="sphere", block_dimension=2, noise_level=1) + np.random.seed(seed) # seed before initializing the function! + function.initialize() # initialization uses randomness + return iter([Experiment(function, optimizer_name="OnePlusOne", num_workers=2, budget=4, seed=next(seedg))]) + + +@registry.register +def repeated_basic(seed: Optional[int] = None) -> Iterator[Experiment]: + """Test settings + """ + seedg = create_seed_generator(seed) + return (next(basic(next(seedg))) for _ in range(10)) + + +@registry.register +def small_discrete(seed: Optional[int] = None) -> Iterator[Experiment]: + # prepare list of parameters to sweep for independent variables + seedg = create_seed_generator(seed) + names = ["hardonemax5", "hardjump5", "hardleadingones5"] + optims = sorted(x for x, y in optimization.registry.items() if "iscrete" in x and "epea" not in x and "DE" not in x + and "SSNEA" not in x) + functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) + for name in names for bd in [30] for uv_factor in [5, 10] for n_blocks in [1]] + # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) + for func in functions: + for optim in optims: + for budget in [100, 400, 700, 1000, 1300, 1600, 1900, 2200, 2500, 2800, 3000]: # , 10000]: + yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) + + +@registry.register +def illcond(seed: Optional[int] = None) -> Iterator[Experiment]: + """All optimizers on ill cond problems + """ + seedg = create_seed_generator(seed) + for budget in [500, 1000, 2000, 4000]: + for optim in ["SQP", "DE", "CMA", "PSO", "RotationInvariantDE", "NelderMead"]: + for rotation in [True, False]: + for name in ["ellipsoid", "cigar"]: + function = ArtificialFunction(name=name, rotation=rotation, block_dimension=100) + yield Experiment(function, optim, budget=budget, seed=next(seedg)) + + +@registry.register +def compabasedillcond(seed: Optional[int] = None) -> Iterator[Experiment]: + """All optimizers on ill cond problems + """ + seedg = create_seed_generator(seed) + for budget in [500, 1000, 2000, 4000, 8000]: + for optim in ["DE", "CMA", "PSO", "BPRotationInvariantDE", "RotationInvariantDE", + "AlmostRotationInvariantDE", "AlmostRotationInvariantDEAndBigPop"]: + for rotation in [True, False]: + for name in ["ellipsoid", "cigar"]: + function = ArtificialFunction(name=name, rotation=rotation, block_dimension=30) + yield Experiment(function, optim, budget=budget, seed=next(seedg)) + + +@registry.register +def noise(seed: Optional[int] = None) -> Iterator[Experiment]: + """All optimizers on ill cond problems + """ + seedg = create_seed_generator(seed) + optims = sorted(x for x, y in optimization.registry.items() + if ("TBPSA" in x or "ois" in x or "CMA" in x or "epea" in x) and "iscr" not in x) + for budget in [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000]: + for optim in optims: + for rotation in [True, False]: + for name in ["sphere", "cigar", "sphere4"]: + function = ArtificialFunction(name=name, rotation=rotation, block_dimension=20, noise_level=10) + yield Experiment(function, optim, budget=budget, seed=next(seedg)) + + +@registry.register +def dim10_smallbudget(seed: Optional[int] = None) -> Iterator[Experiment]: + # prepare list of parameters to sweep for independent variables + seedg = create_seed_generator(seed) + names = ["sphere"] + optims = sorted(x for x, y in optimization.registry.items() if y.one_shot and "arg" not in x and "mal" not in x) + functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) + for name in names for bd in [10] for uv_factor in [0] for n_blocks in [1]] + # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) + for func in functions: + for optim in optims: + for budget in [4, 8, 16, 32]: + # duplicate -> each Experiment has different randomness + yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) + + +@registry.register +def dim10_select_two_features(seed: Optional[int] = None) -> Iterator[Experiment]: # 2 variables matter - Scrambled Hammersley rules. + # prepare list of parameters to sweep for independent variables + seedg = create_seed_generator(seed) + names = ["sphere"] + optims = sorted(x for x, y in optimization.registry.items() if y.one_shot and "arg" not in x and "mal" not in x) + functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) + for name in names for bd in [2] for uv_factor in [5] for n_blocks in [1]] + # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) + for func in functions: + for optim in optims: + for budget in [4, 8, 16, 32]: + # duplicate -> each Experiment has different randomness + yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) + + +@registry.register +def dim10_select_one_feature(seed: Optional[int] = None) -> Iterator[Experiment]: # One and only one variable matters - LHS wins. + # prepare list of parameters to sweep for independent variables + seedg = create_seed_generator(seed) + names = ["sphere"] + optims = sorted(x for x, y in optimization.registry.items() if y.one_shot and "arg" not in x and "mal" not in x) + functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) + for name in names for bd in [1] for uv_factor in [10] for n_blocks in [1]] + # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) + for func in functions: + for optim in optims: + for budget in [8, 10, 12, 14, 16, 18, 20]: + # duplicate -> each Experiment has different randomness + yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) + + +@registry.register +def doe_dim4(seed: Optional[int] = None) -> Iterator[Experiment]: # Here, QR performs best, then Random, then LHS, then Cauchy. + # prepare list of parameters to sweep for independent variables + seedg = create_seed_generator(seed) + names = ["sphere"] # n for n in ArtificialFunction.list_sorted_function_names() if "sphere" in n] + optims = sorted(x for x, y in optimization.registry.items() if y.one_shot and "arg" not in x and "mal" not in x) + functions = [ArtificialFunction(name, block_dimension=bd, num_blocks=n_blocks, useless_variables=bd * uv_factor * n_blocks) + for name in names for bd in [4] for uv_factor in [0] for n_blocks in [1]] + # functions are not initialized and duplicated at yield time, they will be initialized in the experiment (no need to seed here) + for func in functions: + for optim in optims: + for budget in [30, 100, 3000, 10000]: + # duplicate -> each Experiment has different randomness + yield Experiment(func.duplicate(), optim, budget=budget, num_workers=1, seed=next(seedg)) diff --git a/nevergrad/benchmark/xpbase.py b/nevergrad/benchmark/xpbase.py index a16a91a80..903a9c285 100644 --- a/nevergrad/benchmark/xpbase.py +++ b/nevergrad/benchmark/xpbase.py @@ -10,11 +10,15 @@ import traceback from typing import Dict, Union, Callable, Any, Optional, Iterator import numpy as np +from ..common import decorators from ..functions import BaseFunction from ..optimization import base from ..optimization.optimizerlib import registry as optimizer_registry +registry = decorators.Registry() + + class CallCounter: """Simple wrapper which counts the number of calls to a function. diff --git a/nevergrad/common/test_testing.py b/nevergrad/common/test_testing.py index fb4b7a5b6..1cf59e795 100644 --- a/nevergrad/common/test_testing.py +++ b/nevergrad/common/test_testing.py @@ -3,6 +3,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from pathlib import Path from unittest import TestCase import genty import numpy as np @@ -34,3 +35,10 @@ def test_assert_set_equal(self, estimate: testing.Iterable, message: str) -> Non def test_printed_assert_equal() -> None: testing.printed_assert_equal(0, 0) np.testing.assert_raises(AssertionError, testing.printed_assert_equal, 0, 1) + + +def test_assert_markdown_links_not_broken() -> None: + folder = Path(__file__).parents[2].expanduser().absolute() + assert (folder / "README.md").exists(), f"Wrong root folder: {folder}" + assert testing._get_all_markdown_links(folder), "There should be at least one hyperlink!" + testing.assert_markdown_links_not_broken(folder) diff --git a/nevergrad/common/testing.py b/nevergrad/common/testing.py index d235169c1..af15f3899 100644 --- a/nevergrad/common/testing.py +++ b/nevergrad/common/testing.py @@ -3,7 +3,9 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from typing import Iterable, Any +import re +from pathlib import Path +from typing import Iterable, Any, Union, List import numpy as np @@ -32,3 +34,54 @@ def printed_assert_equal(actual: Any, desired: Any, err_msg: str = '') -> None: print("\n" + "# " * 12 + "DEBUG MESSAGE " + "# " * 12) print(f"Expected: {desired}\nbut got: {actual}") raise e + + +def assert_markdown_links_not_broken(folder: Union[str, Path]) -> None: + """Asserts that all relative hyperlinks are valid in markdown files of the folder + and its subfolders. + + Note + ---- + http hyperlinks are not tested. + """ + links = _get_all_markdown_links(folder) + broken = [l for l in links if not l.exists()] + if broken: + text = "\n - ".join([str(l) for l in broken]) + raise AssertionError(f"Broken markdown links:\n - {text}") + + +class _MarkdownLink: + """Handle to a markdown link, for easy existence test and printing + (external links are not tested) + """ + + def __init__(self, folder: Path, filepath: Path, string: str, link: str) -> None: + self._folder = folder + self._filepath = filepath + self._string = string + self._link = link + + def exists(self) -> bool: + if self._link.startswith("http"): # consider it exists + return True + fullpath = self._folder / self._filepath.parent / self._link + return fullpath.exists() + + def __repr__(self) -> str: + return f"{self._link} ({self._string}) from file {self._filepath}" + + +def _get_all_markdown_links(folder: Union[str, Path]) -> List[_MarkdownLink]: + """Returns a list of all existing markdown links + """ + pattern = re.compile(r"\[(?P.+?)\]\((?P.+?)\)") + folder = Path(folder).expanduser().absolute() + links = [] + for rfilepath in folder.glob("**/*.md"): + filepath = folder / rfilepath + with filepath.open("r") as f: + text = f.read() + for match in pattern.finditer(text): + links.append(_MarkdownLink(folder, rfilepath, match.group("string"), match.group("link"))) + return links