From 2fe1ccfb1668f124b1490f46a718c2e535afdfa9 Mon Sep 17 00:00:00 2001 From: Nikola Jajcay Date: Mon, 30 Nov 2020 12:24:39 +0100 Subject: [PATCH 1/6] initial test python3.9 --- .github/workflows/pythontesting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythontesting.yml b/.github/workflows/pythontesting.yml index 3471923..10b786b 100644 --- a/.github/workflows/pythontesting.yml +++ b/.github/workflows/pythontesting.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest] - python-version: [3.6, 3.7, 3.8] + python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 From 124dd8e0f6d265e2222f44250bd530fb2cfbd1f6 Mon Sep 17 00:00:00 2001 From: Nikola Jajcay Date: Fri, 18 Dec 2020 15:17:18 +0100 Subject: [PATCH 2/6] run testing on all branches --- .github/workflows/pythontesting.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/pythontesting.yml b/.github/workflows/pythontesting.yml index 10b786b..23f103e 100644 --- a/.github/workflows/pythontesting.yml +++ b/.github/workflows/pythontesting.yml @@ -5,11 +5,7 @@ name: pytest on: push: - branches: - - "*" pull_request: - branches: - - "*" jobs: build: From 08abce488ce75ee6feed7030f1ea1d52b769b9dd Mon Sep 17 00:00:00 2001 From: Nikola Jajcay Date: Tue, 18 May 2021 12:43:14 +0200 Subject: [PATCH 3/6] explicitely install hdf5 for github actions for macos --- .github/workflows/pythontesting.yml | 6 ++++++ README.md | 2 +- gpso/callbacks.py | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pythontesting.yml b/.github/workflows/pythontesting.yml index 23f103e..f0eca45 100644 --- a/.github/workflows/pythontesting.yml +++ b/.github/workflows/pythontesting.yml @@ -22,6 +22,12 @@ jobs: uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} + - name: Install MacOS dependencies + if: startsWith(matrix.os, 'macos') + run: | + set -e + brew update + brew install hdf5 c-blosc - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/README.md b/README.md index e4e1c2e..f54807c 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Example of ternary partition tree after optimisation. ## Installation -`GPSO` package is tested and should run without any problems on python versions 3.6 -- 3.8. +`GPSO` package is tested and should run without any problems on python versions 3.6 -- 3.9. ### One-liner For those who want to optimise right away just diff --git a/gpso/callbacks.py b/gpso/callbacks.py index 47c6b94..52cbe8a 100644 --- a/gpso/callbacks.py +++ b/gpso/callbacks.py @@ -141,7 +141,7 @@ def run(self, optimiser): if self.first_update: ckpt = tf.train.Checkpoint( model=optimiser.gp_surr.gpflow_model, - evluations=self.n_evals, + evaluations=self.n_evals, ) self.manager = tf.train.CheckpointManager( ckpt, self.path, max_to_keep=self.max_to_keep @@ -150,4 +150,4 @@ def run(self, optimiser): self.first_update = False else: saved_to = self.manager.save() - logging.debug(f"Checkppoint saved to {saved_to}") + logging.debug(f"Checkpoint saved to {saved_to}") From 0f6194a00392c82cc7996f03f326079667e3cde3 Mon Sep 17 00:00:00 2001 From: Nikola Jajcay Date: Tue, 18 May 2021 12:58:59 +0200 Subject: [PATCH 4/6] typos in comments etc --- gpso/param_space.py | 5 +++-- gpso/plotting.py | 2 +- gpso/saving_helper.py | 8 ++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/gpso/param_space.py b/gpso/param_space.py index 47db007..c447399 100644 --- a/gpso/param_space.py +++ b/gpso/param_space.py @@ -257,7 +257,8 @@ def _replace_normed_coord(self, index, new_coord): def ternary_split(self): """ Parcellate this leaf using the ternary split function along the largest - dimension. For discussion why ternary is the best choise see the paper. + dimension. For the discussion on why ternary is the best choice, see the + paper. Hadida, J., Sotiropoulos, S. N., Abeysuriya, R. G., Woolrich, M. W., & Jbabdi, S. (2018). Bayesian Optimisation of Large-Scale Biophysical @@ -436,7 +437,7 @@ def normalise_coords(self, orig_coords): def denormalise_coords(self, normed_coords): """ - Deormalise normed coordinates. + Denormalise normed coordinates. :param normed_coords: normed coordinates in the parameter space as [n points x ndim] diff --git a/gpso/plotting.py b/gpso/plotting.py index 7eea35f..d73bc7a 100644 --- a/gpso/plotting.py +++ b/gpso/plotting.py @@ -48,7 +48,7 @@ def plot_ternary_tree( :type param_space: `gpso.param_space.ParameterSpace` :param cmap: colormap for scores :type cmap: str - :param cmap_limits: limits for the colormapping of the scores, if None will + :param cmap_limits: limits for the color mapping of the scores, if None will be inferred from the data :param center_root_node: whether to center root node within the figure - graph might become less readable diff --git a/gpso/saving_helper.py b/gpso/saving_helper.py index e3ae68c..57d6dfb 100644 --- a/gpso/saving_helper.py +++ b/gpso/saving_helper.py @@ -25,7 +25,7 @@ def __init__(self, filename, extras=None): :param filename: filename for the HDF file :type filename: str :param extras: extras to write as a group into HDF file, e.g. default - parameters of the model, which are not subject to optimisaion + parameters of the model, which are not subject to optimisation :type extras: dict|None """ if not filename.endswith(H5_EXT): @@ -54,7 +54,7 @@ def save_runs(self, result, score, parameters): :param result: result(s) of the run, can be single result or multiple results (same parameters, different results, typically valid for stochastic systems) - :type result: any pytables supported + pd.DataFrame|list of thereof + :type result: any `pytables` supported + pd.DataFrame|list of thereof :param score: score(s) of the run :type score: float|list[float] :param parameters: parameters for this particular run(s) @@ -100,7 +100,7 @@ def _write_result(self, group, result, score): :param group: group to write to :type group: `tables.group.Group` :param result: result to write - :type result: any pytables supported + pd.DataFrame + :type result: any `pytables` supported + pd.DataFrame :param score: score to write :type score: float """ @@ -138,7 +138,7 @@ def table_reader(filename): :param filename: filename of the HDF file :type filename: str - :return: results, scores, parameters and extras if present; if mutliple + :return: results, scores, parameters and extras if present; if multiple runs per parameter set, that item is a list itself :rtype: (list,list,list,dict|None) """ From fe85c73de56a3110fdeecbcc67f532c434488692 Mon Sep 17 00:00:00 2001 From: Nikola Jajcay Date: Tue, 18 May 2021 13:01:16 +0200 Subject: [PATCH 5/6] remark for macos + python3.9 to the readme --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index f54807c..add42d8 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,13 @@ Example of ternary partition tree after optimisation. `GPSO` package is tested and should run without any problems on python versions 3.6 -- 3.9. +### Note on python3.9 with macOS +Installing `pytables` might give you hdf5 errors. If this is the case, please do +```bash +brew install hdf5 c-blosc +``` +and all should work like a charm afterwards. + ### One-liner For those who want to optimise right away just ```bash From 725aace631f22100feb453bb22c66aba8ab8ee23 Mon Sep 17 00:00:00 2001 From: Nikola Jajcay Date: Tue, 18 May 2021 13:02:29 +0200 Subject: [PATCH 6/6] readme markdown formatting --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index add42d8..725f0fd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ [![Build Status](https://github.com/jajcayn/pygpso/workflows/pytest/badge.svg)](https://github.com/jajcayn/pygpso/actions) ![](https://img.shields.io/github/v/release/jajcayn/pygpso) [![codecov](https://codecov.io/gh/jajcayn/pygpso/branch/master/graph/badge.svg)](https://codecov.io/gh/jajcayn/pygpso) [![PyPI license](https://img.shields.io/pypi/l/pygpso.svg)](https://pypi.python.org/pypi/pygpso/) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/jajcayn/pygpso.git/master?filepath=examples) [![DOI](https://zenodo.org/badge/236983676.svg)](https://zenodo.org/badge/latestdoi/236983676) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) # pyGPSO + *Optimise anything (but mainly large-scale biophysical models) using Gaussian Processes surrogate* `pyGPSO` is a python package for Gaussian-Processes Surrogate Optimisation. GPSO is a Bayesian optimisation method designed to cope with costly, high-dimensional, non-convex problems by switching between exploration of the parameter space (using partition tree) and exploitation of the gathered knowledge (by training the surrogate function using Gaussian Processes regression). The motivation for this method stems from the optimisation of large-scale biophysical models in neuroscience when the modelled data should match the experimental one. This package leverages [`GPFlow`](https://github.com/GPflow/GPflow) for training and predicting the Gaussian Processes surrogate. @@ -24,21 +25,29 @@ Example of ternary partition tree after optimisation. `GPSO` package is tested and should run without any problems on python versions 3.6 -- 3.9. ### Note on python3.9 with macOS + Installing `pytables` might give you hdf5 errors. If this is the case, please do + ```bash brew install hdf5 c-blosc ``` + and all should work like a charm afterwards. ### One-liner + For those who want to optimise right away just + ```bash pip install pygpso ``` + and go ahead! Make sure to check example notebooks in [the **examples** directory](examples/) to see how it works and what it can do. Or, alternatively, you can run interactive notebooks in binder: [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/jajcayn/pygpso.git/master?filepath=examples) ### Go proper + When you are the type of girl or guy who likes to install packages properly, start by cloning (or forking) this repository, then installing all the dependencies and finally install the package itself + ```bash git clone https://github.com/jajcayn/pygpso cd pygpso/ @@ -47,15 +56,19 @@ pip install -r requirements.txt pip install -r requirements_optional.txt pip install . ``` + Don't forget to test! + ```bash pytest ``` ## Usage + A guide on how to optimise and what can be done using this package is given as jupyter notebooks in [the **examples** directory](examples/). You can also try them out live thanks to binder: [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/jajcayn/pygpso.git/master?filepath=examples). The basic idea is to initialise the parameter space in which the optimisation is to be run and then iteratively dig deeper and evaluate the objective function when necessary + ```python from gpso import ParameterSpace, GPSOptimiser @@ -79,6 +92,7 @@ best_point = opt.run(objective_function) The package also offers plotting functions for visualising the results. Again, those are documented and showcased in [the **examples** directory](examples/). ### Notes + Gaussian Processes regression uses normalised coordinates within the bounds [0, 1]. All normalisation and de-normalisation is done automatically, however when you want to call `predict_y` on GPR model, do not forget to pass normalised coordinates. The normalisation is handled by `sklearn.MinMaxScaler` and `ParameterSpace` instance offers a convenience functions for this: `ParameterSpace.normalise_coords(orig_coords)` and `ParameterSpace.denormalise_coords(normed_coords)`. Plotting of the ternary tree (`gpso.plotting.plot_ternary_tree()`) requires `igraph` package, whose layout function is exploited. If you want to see the resulting beautiful tree, please install `python-igraph`. @@ -86,12 +100,15 @@ Plotting of the ternary tree (`gpso.plotting.plot_ternary_tree()`) requires `igr Support of saver (for saving models run, e.g. timeseries along with the optimisation) is provided by `PyTables` (and `pandas` if you're saving results to `DataFrame`s). ## Known bugs and future improvements + * saving of GP surrogate is now hacky, as `GPFlow` supports only saving model for future prediction but AFAIK they cannot be trained anymore, since the information on kernels and mean-functions are not saved (only the trained weights in the computational graph). Thus, `pyGPSO` still relies on hacky saving to `pkl` files and recreating kernels and mean-function on-the-go when loading from saved. ## Final notes + When you encounter a bug or have any idea for an improvement, please open an issue and/or contact me. When using this package in publications, please cite the original Jonathan's paper for the methodology as + ```bibtex @article{hadida2018bayesian, title={Bayesian Optimisation of Large-Scale Biophysical Networks}, @@ -103,4 +120,5 @@ When using this package in publications, please cite the original Jonathan's pap publisher={Elsevier} } ``` + and acknowledge the usage of this software via its DOI: [![DOI](https://zenodo.org/badge/236983676.svg)](https://zenodo.org/badge/latestdoi/236983676). After clicking, you will see citation data.