diff --git a/.gitignore b/.gitignore index 2a45ed72..f71e81e2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.pyc .vscode +*.DS_Store +build diff --git a/.travis.yml b/.travis.yml index 3d9c07ef..d4f03437 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,10 @@ language: python sudo: required dist: xenial +env: + global: + - MPLBACKEND=agg + python: - "2.7" - "3.5" diff --git a/AUTHORS b/AUTHORS index 672262b7..c48f9fa2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -33,4 +33,6 @@ Chronological list of authors 2019 - Victoria Lim (@vlim) - Hyungro Lee (@lee212) - - Mohammad S. Barhaghi (@msoroush) \ No newline at end of file + - Mohammad S. Barhaghi (@msoroush) +2020 + - Zhiyi Wu (@xiki-tempula) \ No newline at end of file diff --git a/CHANGES b/CHANGES index 98638667..b95aa851 100644 --- a/CHANGES +++ b/CHANGES @@ -15,11 +15,12 @@ The rules for this file: ------------------------------------------------------------------------------ -??/??/2020 wehs7661, dotsdl +??/??/2020 wehs7661, dotsdl, xiki-tempula * 0.?.? Enhancements + - Allow the overlap matrix of the MBAR estimator to be plotted. (PR #107) Deprecations diff --git a/docs/api_principles.rst b/docs/api_principles.rst index e644fb37..e0c6d73c 100644 --- a/docs/api_principles.rst +++ b/docs/api_principles.rst @@ -1,5 +1,5 @@ API principles -============ +============== The following is an overview over the guiding principles and ideas that underpin the API of alchemlyb. diff --git a/docs/conf.py b/docs/conf.py index 517695f4..28d24289 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,8 +50,10 @@ # General information about the project. project = u'alchemlyb' -author = u'David Dotson and contributors' -copyright = u'2017-2019, ' + author +author = u'''David Dotson, Ian Kenney, Oliver Beckstein, Shuai Liu, +Travis Jensen, Bryce Allen, Dominik Wille, Victoria Lim, Hyungro Lee, +Mohammad S. Barhaghi, Zhiyi Wu''' +copyright = u'2017-2020, ' + author # The version info for the project you're documenting, acts as replacement for diff --git a/docs/images/O_MBAR.png b/docs/images/O_MBAR.png new file mode 100644 index 00000000..65effe53 Binary files /dev/null and b/docs/images/O_MBAR.png differ diff --git a/docs/index.rst b/docs/index.rst index da8ef91e..e199ee0b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -60,6 +60,7 @@ Contributions are very welcome. If you have bug reports or feature requests or q parsing preprocessing estimators + visualisation .. toctree:: :maxdepth: 1 diff --git a/docs/visualisation.rst b/docs/visualisation.rst new file mode 100644 index 00000000..e7a03336 --- /dev/null +++ b/docs/visualisation.rst @@ -0,0 +1,39 @@ +Visualisation of the results +============================ +It is quite often that the user want to visualise the results to gain +confidence on the computed free energy. **alchemlyb** provides various +visualisation tools to help user to judge the estimate. + +.. _plot_overlap_matrix: + +Overlap Matrix of the MBAR +-------------------------- +The accuracy of the :class:`~alchemlyb.estimators.MBAR` estimator depends on +the overlap between different lambda states. The overlap matrix from the +:class:`~alchemlyb.estimators.MBAR` estimator could be plotted using +:func:`~alchemlyb.visualisation.plot_mbar_overlap_matrix` to check +the degree of overlap. It is recommended that there should be at least +**0.03** [Klimovich2015]_ overlap between neighboring states. :: + + >>> import pandas as pd + >>> from alchemtest.gmx import load_benzene + >>> from alchemlyb.parsing.gmx import extract_u_nk + >>> from alchemlyb.estimators import MBAR + + >>> bz = load_benzene().data + >>> u_nk_coul = pd.concat([extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']]) + >>> mbar_coul = MBAR() + >>> mbar_coul.fit(u_nk_coul) + + >>> from alchemlyb.visualisation import plot_mbar_overlap_matrix + >>> ax = plot_mbar_overlap_matrix(mbar_coul.overlap_matrix) + >>> ax.figure.savefig('O_MBAR.pdf', bbox_inches='tight', pad_inches=0.0) + + +Will give a plot looks like this + +.. image:: images/O_MBAR.png + +.. [Klimovich2015] Klimovich, P.V., Shirts, M.R. & Mobley, D.L. Guidelines for + the analysis of free energy calculations. J Comput Aided Mol Des 29, 397–411 + (2015). https://doi.org/10.1007/s10822-015-9840-9 diff --git a/docs/visualisation/alchemlyb.visualisation.plot_mbar_overlap_matrix.rst b/docs/visualisation/alchemlyb.visualisation.plot_mbar_overlap_matrix.rst new file mode 100644 index 00000000..5323d1a9 --- /dev/null +++ b/docs/visualisation/alchemlyb.visualisation.plot_mbar_overlap_matrix.rst @@ -0,0 +1,18 @@ +.. _plot_overlap: + +Plot Overlap Matrix from MBAR +============================= + +The function :func:`~alchemlyb.visualisation.plot_mbar_overlap_matrix` allows +the user to plot the overlap matrix from +:attr:`~alchemlyb.estimators.MBAR.overlap_matrix`. The user can pass +:class:`matplotlib.axes.Axes` into the function to have the overlap maxtrix +drawn on a specific axes. The user could also specify a list of lambda states +to be skipped when labelling the states. + +Please check :ref:`How to plot MBAR overlap matrix ` for +usage. + +API Reference +------------- +.. autofunction:: alchemlyb.visualisation.plot_mbar_overlap_matrix \ No newline at end of file diff --git a/setup.py b/setup.py index 0e3e3531..eb689366 100755 --- a/setup.py +++ b/setup.py @@ -29,5 +29,5 @@ license='BSD', long_description=open('README.rst').read(), tests_require = ['pytest', 'alchemtest'], - install_requires=['numpy', 'pandas>=0.23.0', 'pymbar>=3.0.5', 'scipy', 'scikit-learn'] + install_requires=['numpy', 'pandas>=0.23.0', 'pymbar>=3.0.5', 'scipy', 'scikit-learn', 'matplotlib'] ) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index 6d3b4e36..dd8127ae 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -45,6 +45,9 @@ class MBAR(BaseEstimator): states_ : list Lambda states for which free energy differences were obtained. + overlap_matrix: numpy.matrix + The overlap matrix. + """ def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, @@ -98,3 +101,19 @@ def fit(self, u_nk): def predict(self, u_ln): pass + + @property + def overlap_matrix(self): + r"""MBAR overlap matrix. + + The estimated state overlap matrix :math:`O_{ij}` is an estimate of the probability + of observing a sample from state :math:`i` in state :math:`j`. + + The :attr:`overlap_matrix` is computed on-the-fly. Assign it to a variable if + you plan to re-use it. + + See Also + --------- + pymbar.mbar.MBAR.computeOverlap + """ + return self._mbar.computeOverlap()['matrix'] diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py new file mode 100644 index 00000000..d482caad --- /dev/null +++ b/src/alchemlyb/tests/test_visualisation.py @@ -0,0 +1,27 @@ +import matplotlib +import pandas as pd + +from alchemtest.gmx import load_benzene +from alchemlyb.parsing.gmx import extract_u_nk +from alchemlyb.estimators import MBAR +from alchemlyb.visualisation.mbar_matrix import plot_mbar_overlap_matrix + +def test_plot_mbar_omatrix(): + '''Just test if the plot runs''' + bz = load_benzene().data + u_nk_coul = pd.concat([extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']]) + mbar_coul = MBAR() + mbar_coul.fit(u_nk_coul) + + assert isinstance(plot_mbar_overlap_matrix(mbar_coul.overlap_matrix), + matplotlib.axes.Axes) + assert isinstance(plot_mbar_overlap_matrix(mbar_coul.overlap_matrix, [1,]), + matplotlib.axes.Axes) + + # Bump up coverage + overlap_maxtrix = mbar_coul.overlap_matrix + overlap_maxtrix[0,0] = 0.0025 + overlap_maxtrix[-1, -1] = 0.9975 + assert isinstance(plot_mbar_overlap_matrix(overlap_maxtrix), + matplotlib.axes.Axes) + diff --git a/src/alchemlyb/visualisation/__init__.py b/src/alchemlyb/visualisation/__init__.py new file mode 100644 index 00000000..4fc73051 --- /dev/null +++ b/src/alchemlyb/visualisation/__init__.py @@ -0,0 +1 @@ +from .mbar_matrix import plot_mbar_overlap_matrix \ No newline at end of file diff --git a/src/alchemlyb/visualisation/mbar_matrix.py b/src/alchemlyb/visualisation/mbar_matrix.py new file mode 100644 index 00000000..682b8555 --- /dev/null +++ b/src/alchemlyb/visualisation/mbar_matrix.py @@ -0,0 +1,95 @@ +"""Functions for Plotting the overlay matrix for the MBAR estimator. + +To assess the quality of the MBAR estimator, the overlap matrix between +the lambda states can be computed and the more overlap is observed between +the states, the more reliable the estimate is. One way of accessing the +quality of the overlap matrix is by plotting it. + +The code for producing the overlap matrix plot is taken from +: `Alchemical Analysis `_. + +""" +from __future__ import division + +import matplotlib.pyplot as plt +import numpy as np + +def plot_mbar_overlap_matrix(matrix, skip_lambda_index=[], ax=None): + '''Plot the MBAR overlap matrix. + + Parameters + ---------- + matrix : numpy.matrix + DataFrame of the overlap matrix obtained from + :attr:`~alchemlyb.estimators.MBAR.overlap_matrix` + skip_lambda_index : List + list of lambda indices to be omitted from plotting process. + Default: []. + ax : matplotlib.axes.Axes + Matplotlib axes object where the plot will be drawn on. If ax=None, + a new axes will be generated. + + Returns + ------- + matplotlib.axes.Axes + An axes with the overlap matrix drawn. + + Notes + ----- + The code is taken and modified from + : `Alchemical Analysis `_ + + ''' + # Compute the size of the figure, if ax is not given. + max_prob = matrix.max() + size = len(matrix) + if ax is None: + fig, ax = plt.subplots(figsize=(size / 2, size / 2)) + ax.set_xticks([]) + ax.set_yticks([]) + ax.axis('off') + for i in range(size): + if i != 0: + ax.axvline(x=i, ls='-', lw=0.5, color='k', alpha=0.25) + ax.axhline(y=i, ls='-', lw=0.5, color='k', alpha=0.25) + for j in range(size): + if matrix[j, i] < 0.005: + ii = '' + elif matrix[j, i] > 0.995: + ii = '1.00' + else: + ii = ("{:.2f}".format(matrix[j, i])[1:]) + alf = matrix[j, i] / max_prob + ax.fill_between([i, i + 1], [size - j, size - j], [size - (j + 1), size - (j + 1)], color='k', alpha=alf) + ax.annotate(ii, xy=(i, j), xytext=(i + 0.5, size - (j + 0.5)), size=8, textcoords='data', va='center', + ha='center', color=('k' if alf < 0.5 else 'w')) + + if skip_lambda_index: + ks = [int(l) for l in skip_lambda_index] + ks = np.delete(np.arange(size + len(ks)), ks) + else: + ks = range(size) + for i in range(size): + ax.annotate(ks[i], xy=(i + 0.5, 1), xytext=(i + 0.5, size + 0.5), size=10, textcoords=('data', 'data'), + va='center', ha='center', color='k') + ax.annotate(ks[i], xy=(-0.5, size - (size - 0.5)), xytext=(-0.5, size - (i + 0.5)), size=10, textcoords=('data', 'data'), + va='center', ha='center', color='k') + ax.annotate('$\lambda$', xy=(-0.5, size - (size - 0.5)), xytext=(-0.5, size + 0.5), size=10, textcoords=('data', 'data'), + va='center', ha='center', color='k') + ax.plot([0, size], [0, 0], 'k-', lw=4.0, solid_capstyle='butt') + ax.plot([size, size], [0, size], 'k-', lw=4.0, solid_capstyle='butt') + ax.plot([0, 0], [0, size], 'k-', lw=2.0, solid_capstyle='butt') + ax.plot([0, size], [size, size], 'k-', lw=2.0, solid_capstyle='butt') + + cx = np.repeat(range(size + 1), 2) + cy = sorted(np.repeat(range(size + 1), 2), reverse=True) + ax.plot(cx[2:-1], cy[1:-2], 'k-', lw=2.0) + ax.plot(np.array(cx[2:-3]) + 1, cy[1:-4], 'k-', lw=2.0) + ax.plot(cx[1:-2], np.array(cy[:-3]) - 1, 'k-', lw=2.0) + ax.plot(cx[1:-4], np.array(cy[:-5]) - 2, 'k-', lw=2.0) + + ax.set_xlim(-1, size) + ax.set_ylim(0, size + 1) + return ax + +