diff --git a/mdpow/fep.py b/mdpow/fep.py index 74c7558a..a98781fb 100644 --- a/mdpow/fep.py +++ b/mdpow/fep.py @@ -129,11 +129,18 @@ import warnings import numpy +import pandas as pd import scipy.integrate +from scipy import constants import numkit.integration import numkit.timeseries +from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk +from alchemlyb.estimators import TI, BAR, MBAR +from alchemlyb.parsing.gmx import _extract_dataframe +from alchemlyb.preprocessing.subsampling import statistical_inefficiency + import gromacs, gromacs.utilities try: import gromacs.setup @@ -141,6 +148,7 @@ raise ImportError("Gromacs installation not found, source GMXRC?") from gromacs.utilities import asiterable, AttributeDict, in_dir, openany from numkit.observables import QuantityWithError +from glob import glob import logging logger = logging.getLogger('mdpow.fep') @@ -168,6 +176,10 @@ def kJ_to_kcal(x): """Convert a energy in kJ to kcal.""" return x / 4.184 +def kBT_to_kJ(x, T): + """Convert a energy in kBT to kJ/mol.""" + return x * constants.N_A*constants.k*T*1e-3 + class FEPschedule(AttributeDict): """Describe mdp parameter choices as key - value pairs. @@ -289,6 +301,12 @@ class Gsolv(Journalled): #: :meth:`Simulation.get_protocol` and :class:`restart.Journal` protocols = ["setup", "fep_run"] + #: Estimators in alchemlyb + estimators = {'TI': {'extract': extract_dHdl, 'estimator': TI}, + 'BAR': {'extract': extract_u_nk, 'estimator': BAR}, + 'MBAR': {'extract': extract_u_nk, 'estimator': MBAR} + } + # TODO: initialize from default cfg schedules_default = {'coulomb': FEPschedule(name='coulomb', @@ -373,6 +391,13 @@ def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs) the value set in a loaded pickle file. [``False``] *stride* collect every *stride* data line, see :meth:`Gsolv.collect` [1] + *start* + Start frame of data analyzed in every fep window. + *stop* + Stop frame of data analyzed in every fep window. + *SI* + Set to ``True`` if you want to perform statistical inefficiency + to preprocess the data. *kwargs* other undocumented arguments (see source for the moment) @@ -461,6 +486,9 @@ def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs) # for analysis self.stride = kwargs.pop('stride', 1) + self.start = kwargs.pop('start', 0) + self.stop = kwargs.pop('stop', None) + self.SI = kwargs.pop('SI', False) # other variables #: Results from the analysis @@ -689,11 +717,12 @@ def dgdl_edr(self, *args): could be found """ - fn = os.path.join(*args + (self.deffnm + '.edr',)) - if not os.path.exists(fn): - logger.error("Missing dgdl.edr file %(fn)r.", vars()) - raise IOError(errno.ENOENT, "Missing dgdl.edr file", fn) - return fn + pattern = os.path.join(*args + (self.deffnm + '*.edr',)) + edrs = glob(pattern) + if not edrs: + logger.error("Missing dgdl.edr file %(pattern)r.", vars()) + raise IOError(errno.ENOENT, "Missing dgdl.edr file", pattern) + return [os.path.abspath(i) for i in edrs] def dgdl_tpr(self, *args): """Return filename of the dgdl TPR file. @@ -715,6 +744,24 @@ def dgdl_tpr(self, *args): raise IOError(errno.ENOENT, "Missing TPR file", fn) return fn + def dgdl_total_edr(self, *args, **kwargs): + """Return filename of the combined dgdl EDR file. + + :Arguments: + *args* + joins the arguments into a path and adds the default + filename for the dvdl file + + :Keywords: + *total_edr_name* + Name of the user defined total edr file. + + :Returns: path to total EDR + + """ + total_edr_name = kwargs.get("total_edr_name", "total.edr") + fn = os.path.join(*args + (total_edr_name,)) + return fn def convert_edr(self): """Convert EDR files to compressed XVG files.""" @@ -725,10 +772,13 @@ def convert_edr(self): edr_files = [self.dgdl_edr(self.wdir(component, l)) for l in lambdas] tpr_files = [self.dgdl_tpr(self.wdir(component, l)) for l in lambdas] for tpr, edr in zip(tpr_files, edr_files): - deffnm = os.path.splitext(edr)[0] - xvgfile = deffnm + ".xvg" # hack - logger.info(" {0} --> {1}".format(edr, xvgfile)) - gromacs.g_energy(s=tpr, f=edr, odh=xvgfile) + dirct = os.path.abspath(os.path.dirname(tpr)) + total_edr = self.dgdl_total_edr(dirct) + logger.info(" {0} --> {1}".format('edrs', total_edr)) + gromacs.eneconv(f=edr, o=total_edr) + xvgfile = os.path.join(dirct, self.deffnm + ".xvg") # hack + logger.info(" {0} --> {1}".format(total_edr, xvgfile)) + gromacs.g_energy(s=tpr, f=total_edr, odh=xvgfile) def collect(self, stride=None, autosave=True, autocompress=True): """Collect dV/dl from output. @@ -953,6 +1003,87 @@ def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000): self.logger_DeltaA0() return self.results.DeltaA.Gibbs + def collect_alchemlyb(self, start=0, stop=None, stride=None, autosave=True, autocompress=True): + extract = self.estimators[self.method]['extract'] + + if autocompress: + # must be done before adding to results.xvg or we will not find the file later + self.compress_dgdl_xvg() + + logger.info("[%(dirname)s] Finding dgdl xvg files, reading with " + "stride=%(stride)d permissive=%(permissive)r." % vars(self)) + for component, lambdas in self.lambdas.items(): + val = [] + for l in lambdas: + xvg_file = self.dgdl_xvg(self.wdir(component, l)) + xvg_df = extract(xvg_file, T=self.Temperature).iloc[start:stop:stride] + if self.SI: + ts = _extract_dataframe(xvg_file).iloc[start:stop:stride] + ts = pd.DataFrame({'time': ts.iloc[:,0], 'dhdl': ts.iloc[:,1]}) + ts = ts.set_index('time') + xvg_df = statistical_inefficiency(xvg_df, series=ts, conservative=True) + val.append(xvg_df) + self.results.xvg[component] = (numpy.array(lambdas), pd.concat(val)) + + if autosave: + self.save() + + def analyze_alchemlyb(self, start=0, stop=None, stride=None, force=False, autosave=True): + stride = stride or self.stride + start = start or self.start + stop = stop or self.stop + + if self.method in ['TI', 'BAR', 'MBAR']: + estimator = self.estimators[self.method]['estimator'] + else: + errmsg = "The method is not supported." + logger.error(errmsg) + raise ValueError(errmsg) + + if force or not self.has_dVdl(): + try: + self.collect_alchemlyb(start, stop, stride, autosave=False) + except IOError as err: + if err.errno == errno.ENOENT: + self.convert_edr() + self.collect_alchemlyb(start, stop, stride, autosave=False) + else: + logger.exception() + raise + else: + logger.info("Analyzing stored data.") + + # total free energy difference at const P (all simulations are done in NPT) + GibbsFreeEnergy = QuantityWithError(0,0) + + for component, (lambdas, xvgs) in self.results.xvg.items(): + result = estimator().fit(xvgs) + if self.method == 'BAR': + DeltaA = QuantityWithError(0,0) + a_s= numpy.diagonal(result.delta_f_, offset=1) + da_s = numpy.diagonal(result.d_delta_f_, offset=1) + for a, da in zip(a_s, da_s): + DeltaA += QuantityWithError(a, da) + self.results.DeltaA[component] = kBT_to_kJ(DeltaA, self.Temperature) + else: + a = result.delta_f_.loc[0.00, 1.00] + da = result.d_delta_f_.loc[0.00, 1.00] + self.results.DeltaA[component] = kBT_to_kJ(QuantityWithError(a, da), self.Temperature) + GibbsFreeEnergy += self.results.DeltaA[component] # error propagation is automagic! + + # hydration free energy Delta A = -(Delta A_coul + Delta A_vdw) + GibbsFreeEnergy *= -1 + self.results.DeltaA.Gibbs = GibbsFreeEnergy + + if autosave: + self.save() + + self.logger_DeltaA0() + return self.results.DeltaA.Gibbs + + if autosave: + self.save() + def write_DeltaA0(self, filename, mode='w'): """Write free energy components to a file. @@ -1087,6 +1218,7 @@ def choose_script_from(scripts): def __repr__(self): return "%s(filename=%r)" % (self.__class__.__name__, self.filename) + class Ghyd(Gsolv): """Sets up and analyses MD to obtain the hydration free energy of a solute.""" solvent_default = "water" @@ -1138,7 +1270,7 @@ class Gwoct(Goct): """ solvent_default = "wetoctanol" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default) - + def p_transfer(G1, G2, **kwargs): """Compute partition coefficient from two :class:`Gsolv` objects. diff --git a/mdpow/tests/test_analysis_alchemlyb.py b/mdpow/tests/test_analysis_alchemlyb.py new file mode 100644 index 00000000..b27cbd13 --- /dev/null +++ b/mdpow/tests/test_analysis_alchemlyb.py @@ -0,0 +1,149 @@ +import os.path +import pytest +import py.path + +import yaml +import pybol + +from numpy.testing import assert_array_almost_equal + +from six.moves import cPickle as pickle + +import mdpow.fep + +from pkg_resources import resource_filename +RESOURCES = py.path.local(resource_filename(__name__, 'testing_resources')) +MANIFEST = RESOURCES.join("manifest.yml") + +def fix_manifest(topdir): + """Create a temporary manifest with a custom `path`. + + Fix manifest in a local temporary copy in existing dir topdir + where the `path` is an absolute path to our "states" directory. We + use `pkg_resources.resource_filename` to anchor the path. + + Arguments + --------- + topdir : py.path.local + existing temporary directory (as provided by, for instance, + `pytest.tmpdir`) + + Returns + ------- + new_manifest : py.path.local + Path to the new manifest. + + Example + ------- + Use as :: + + new_manifest = fix_manifest(tmpdir) + m = pybol.Manifest(new_manifest.strpath) + + """ + manifest = yaml.load(MANIFEST.open()) + # simple heuristic: last element of the recorded manifest::path is the name + # of the states directory, typically 'states' (from .../testing_resources/states) + manifest['path'] = RESOURCES.join(os.path.basename(manifest['path'])).strpath + new_manifest = topdir.join("local_manifest.yml") + yaml.dump(manifest, stream=new_manifest.open("w")) + return new_manifest + + +# session scope if read-only use + +@pytest.fixture(scope="function") +def fep_benzene_directory(tmpdir_factory): + topdir = tmpdir_factory.mktemp('analysis') + m = pybol.Manifest(fix_manifest(topdir).strpath) + m.assemble('FEP', topdir.strpath) + return topdir.join("benzene") + +class TestAnalyze(object): + def get_Gsolv(self, pth): + gsolv = pth.join("FEP", "water", "Gsolv.fep") + G = pickle.load(gsolv.open()) + # patch paths + G.basedir = pth.strpath + G.filename = gsolv.strpath + return G + + @pytest.mark.parametrize('method, Gibbs, coulomb, vdw', [ + ('TI', + (-3.901068, 0.550272), + (8.417035, 0.22289), + (-4.515967, 0.50311)), + ('BAR', + (-4.091241, 0.385413), + (8.339705, 0.166802), + (-4.248463, 0.347449)), + ('MBAR', + (-6.793117, 0.475149), + (8.241836, 0.219235), + (-1.448719, 0.421548)) + ]) + def test_estimator_alchemlyb(self, fep_benzene_directory, method, + Gibbs, coulomb, vdw): + G = self.get_Gsolv(fep_benzene_directory) + G.SI = False + G.method = method + G.start = 0 + G.stop = None + # ensure conversion EDR to XVG.bz2; if the fixture is session scoped + # then other workers will pick up these files. Make sure that only one + # runs convert because there is no file locking, if in doubt, make + # fep_benzene_directory locally scoped + G.convert_edr() + try: + G.analyze_alchemlyb(force=True, autosave=False) + except IOError as err: + raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format( + err.strerror, err.filename)) + DeltaA = G.results.DeltaA + assert_array_almost_equal(DeltaA.Gibbs.astuple(), Gibbs, + decimal=6) + assert_array_almost_equal(DeltaA.coulomb.astuple(), coulomb, + decimal=6) + assert_array_almost_equal(DeltaA.vdw.astuple(), vdw, + decimal=6) + + def test_SI(self, fep_benzene_directory): + G = self.get_Gsolv(fep_benzene_directory) + G.SI = True + G.method = 'TI' + G.start = 0 + G.stop = None + G.convert_edr() + try: + G.analyze_alchemlyb(force=True, autosave=False) + except IOError as err: + raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format( + err.strerror, err.filename)) + DeltaA = G.results.DeltaA + assert_array_almost_equal(DeltaA.Gibbs.astuple(), (-2.908885, 2.175976), + decimal=6) + assert_array_almost_equal(DeltaA.coulomb.astuple(), (7.755779, 0.531481), + decimal=6) + assert_array_almost_equal(DeltaA.vdw.astuple(), (-4.846894, 2.110071), + decimal=6) + + def test_start_stop_stride(self, fep_benzene_directory): + G = self.get_Gsolv(fep_benzene_directory) + G.SI = False + G.method = 'TI' + G.start = 10 + G.stride = 2 + G.stop = 200 + G.convert_edr() + try: + G.analyze_alchemlyb(force=True, autosave=False) + except IOError as err: + raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format( + err.strerror, err.filename)) + DeltaA = G.results.DeltaA + assert_array_almost_equal(DeltaA.Gibbs.astuple(), (-3.318109, 0.905128), + decimal=6) + assert_array_almost_equal(DeltaA.coulomb.astuple(), (8.146806, 0.348866), + decimal=6) + assert_array_almost_equal(DeltaA.vdw.astuple(), (-4.828696, 0.835195), + decimal=6) diff --git a/mdpow/tests/test_fep.py b/mdpow/tests/test_fep.py index a88fa26f..81ed75fc 100644 --- a/mdpow/tests/test_fep.py +++ b/mdpow/tests/test_fep.py @@ -1,5 +1,6 @@ import numpy as np from numpy.testing import assert_array_almost_equal, assert_almost_equal +from scipy import constants import mdpow import mdpow.fep @@ -18,6 +19,10 @@ def test_kcal_to_kJ(): def test_kJ_to_kcal(): assert_almost_equal(mdpow.fep.kJ_to_kcal(41.84), 10.0) +def test_kBT_to_kJ(): + ref = constants.N_A*constants.k*1e-3 + assert_almost_equal(mdpow.fep.kBT_to_kJ(1, 1), ref) + class TestFEPschedule(object): reference = { 'VDW': diff --git a/setup.py b/setup.py index 82ad7559..eb7870fa 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,8 @@ 'numkit', 'six', 'mdanalysis', + 'alchemlyb', + 'pandas' ], #setup_requires=['pytest-runner',], tests_require=['pytest', 'pybol', 'py'],