Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update fep with alchemlyb and multiple edr files input #132

Merged
merged 17 commits into from
Mar 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 142 additions & 10 deletions mdpow/fep.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,26 @@
import warnings

import numpy
import pandas as pd
orbeckst marked this conversation as resolved.
Show resolved Hide resolved

import scipy.integrate
from scipy import constants
import numkit.integration
import numkit.timeseries

from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk
from alchemlyb.estimators import TI, BAR, MBAR
from alchemlyb.parsing.gmx import _extract_dataframe
from alchemlyb.preprocessing.subsampling import statistical_inefficiency

import gromacs, gromacs.utilities
try:
import gromacs.setup
except (ImportError, OSError):
raise ImportError("Gromacs installation not found, source GMXRC?")
from gromacs.utilities import asiterable, AttributeDict, in_dir, openany
from numkit.observables import QuantityWithError
from glob import glob

import logging
logger = logging.getLogger('mdpow.fep')
Expand Down Expand Up @@ -168,6 +176,10 @@ def kJ_to_kcal(x):
"""Convert a energy in kJ to kcal."""
return x / 4.184

def kBT_to_kJ(x, T):
"""Convert a energy in kBT to kJ/mol."""
return x * constants.N_A*constants.k*T*1e-3


class FEPschedule(AttributeDict):
"""Describe mdp parameter choices as key - value pairs.
Expand Down Expand Up @@ -289,6 +301,12 @@ class Gsolv(Journalled):
#: :meth:`Simulation.get_protocol` and :class:`restart.Journal`
protocols = ["setup", "fep_run"]

#: Estimators in alchemlyb
estimators = {'TI': {'extract': extract_dHdl, 'estimator': TI},
'BAR': {'extract': extract_u_nk, 'estimator': BAR},
'MBAR': {'extract': extract_u_nk, 'estimator': MBAR}
}

# TODO: initialize from default cfg
schedules_default = {'coulomb':
FEPschedule(name='coulomb',
Expand Down Expand Up @@ -373,6 +391,13 @@ def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs)
the value set in a loaded pickle file. [``False``]
*stride*
collect every *stride* data line, see :meth:`Gsolv.collect` [1]
*start*
Start frame of data analyzed in every fep window.
*stop*
Stop frame of data analyzed in every fep window.
*SI*
Set to ``True`` if you want to perform statistical inefficiency
to preprocess the data.
*kwargs*
other undocumented arguments (see source for the moment)

Expand Down Expand Up @@ -461,6 +486,9 @@ def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs)

# for analysis
self.stride = kwargs.pop('stride', 1)
self.start = kwargs.pop('start', 0)
self.stop = kwargs.pop('stop', None)
self.SI = kwargs.pop('SI', False)
orbeckst marked this conversation as resolved.
Show resolved Hide resolved

# other variables
#: Results from the analysis
Expand Down Expand Up @@ -689,11 +717,12 @@ def dgdl_edr(self, *args):
could be found

"""
fn = os.path.join(*args + (self.deffnm + '.edr',))
if not os.path.exists(fn):
logger.error("Missing dgdl.edr file %(fn)r.", vars())
raise IOError(errno.ENOENT, "Missing dgdl.edr file", fn)
return fn
pattern = os.path.join(*args + (self.deffnm + '*.edr',))
edrs = glob(pattern)
if not edrs:
logger.error("Missing dgdl.edr file %(pattern)r.", vars())
raise IOError(errno.ENOENT, "Missing dgdl.edr file", pattern)
return [os.path.abspath(i) for i in edrs]

def dgdl_tpr(self, *args):
"""Return filename of the dgdl TPR file.
Expand All @@ -715,6 +744,24 @@ def dgdl_tpr(self, *args):
raise IOError(errno.ENOENT, "Missing TPR file", fn)
return fn

def dgdl_total_edr(self, *args, **kwargs):
"""Return filename of the combined dgdl EDR file.

:Arguments:
*args*
joins the arguments into a path and adds the default
filename for the dvdl file

:Keywords:
*total_edr_name*
Name of the user defined total edr file.

:Returns: path to total EDR

"""
total_edr_name = kwargs.get("total_edr_name", "total.edr")
fn = os.path.join(*args + (total_edr_name,))
return fn

def convert_edr(self):
"""Convert EDR files to compressed XVG files."""
Expand All @@ -725,10 +772,13 @@ def convert_edr(self):
edr_files = [self.dgdl_edr(self.wdir(component, l)) for l in lambdas]
tpr_files = [self.dgdl_tpr(self.wdir(component, l)) for l in lambdas]
for tpr, edr in zip(tpr_files, edr_files):
deffnm = os.path.splitext(edr)[0]
xvgfile = deffnm + ".xvg" # hack
logger.info(" {0} --> {1}".format(edr, xvgfile))
gromacs.g_energy(s=tpr, f=edr, odh=xvgfile)
dirct = os.path.abspath(os.path.dirname(tpr))
total_edr = self.dgdl_total_edr(dirct)
logger.info(" {0} --> {1}".format('edrs', total_edr))
gromacs.eneconv(f=edr, o=total_edr)
xvgfile = os.path.join(dirct, self.deffnm + ".xvg") # hack
logger.info(" {0} --> {1}".format(total_edr, xvgfile))
gromacs.g_energy(s=tpr, f=total_edr, odh=xvgfile)

def collect(self, stride=None, autosave=True, autocompress=True):
"""Collect dV/dl from output.
Expand Down Expand Up @@ -953,6 +1003,87 @@ def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000):
self.logger_DeltaA0()
return self.results.DeltaA.Gibbs

def collect_alchemlyb(self, start=0, stop=None, stride=None, autosave=True, autocompress=True):
extract = self.estimators[self.method]['extract']

if autocompress:
# must be done before adding to results.xvg or we will not find the file later
self.compress_dgdl_xvg()

logger.info("[%(dirname)s] Finding dgdl xvg files, reading with "
"stride=%(stride)d permissive=%(permissive)r." % vars(self))
for component, lambdas in self.lambdas.items():
val = []
for l in lambdas:
xvg_file = self.dgdl_xvg(self.wdir(component, l))
xvg_df = extract(xvg_file, T=self.Temperature).iloc[start:stop:stride]
if self.SI:
orbeckst marked this conversation as resolved.
Show resolved Hide resolved
ts = _extract_dataframe(xvg_file).iloc[start:stop:stride]
ts = pd.DataFrame({'time': ts.iloc[:,0], 'dhdl': ts.iloc[:,1]})
ts = ts.set_index('time')
xvg_df = statistical_inefficiency(xvg_df, series=ts, conservative=True)
val.append(xvg_df)
self.results.xvg[component] = (numpy.array(lambdas), pd.concat(val))

if autosave:
self.save()

def analyze_alchemlyb(self, start=0, stop=None, stride=None, force=False, autosave=True):
stride = stride or self.stride
start = start or self.start
stop = stop or self.stop

if self.method in ['TI', 'BAR', 'MBAR']:
estimator = self.estimators[self.method]['estimator']
else:
errmsg = "The method is not supported."
logger.error(errmsg)
raise ValueError(errmsg)

if force or not self.has_dVdl():
try:
self.collect_alchemlyb(start, stop, stride, autosave=False)
except IOError as err:
if err.errno == errno.ENOENT:
self.convert_edr()
self.collect_alchemlyb(start, stop, stride, autosave=False)
else:
logger.exception()
raise
else:
logger.info("Analyzing stored data.")

# total free energy difference at const P (all simulations are done in NPT)
GibbsFreeEnergy = QuantityWithError(0,0)

for component, (lambdas, xvgs) in self.results.xvg.items():
result = estimator().fit(xvgs)
if self.method == 'BAR':
DeltaA = QuantityWithError(0,0)
a_s= numpy.diagonal(result.delta_f_, offset=1)
da_s = numpy.diagonal(result.d_delta_f_, offset=1)
for a, da in zip(a_s, da_s):
DeltaA += QuantityWithError(a, da)
self.results.DeltaA[component] = kBT_to_kJ(DeltaA, self.Temperature)
else:
a = result.delta_f_.loc[0.00, 1.00]
da = result.d_delta_f_.loc[0.00, 1.00]
self.results.DeltaA[component] = kBT_to_kJ(QuantityWithError(a, da), self.Temperature)
GibbsFreeEnergy += self.results.DeltaA[component] # error propagation is automagic!

# hydration free energy Delta A = -(Delta A_coul + Delta A_vdw)
GibbsFreeEnergy *= -1
self.results.DeltaA.Gibbs = GibbsFreeEnergy

if autosave:
self.save()

self.logger_DeltaA0()
return self.results.DeltaA.Gibbs

if autosave:
self.save()

def write_DeltaA0(self, filename, mode='w'):
"""Write free energy components to a file.

Expand Down Expand Up @@ -1087,6 +1218,7 @@ def choose_script_from(scripts):
def __repr__(self):
return "%s(filename=%r)" % (self.__class__.__name__, self.filename)


class Ghyd(Gsolv):
"""Sets up and analyses MD to obtain the hydration free energy of a solute."""
solvent_default = "water"
Expand Down Expand Up @@ -1138,7 +1270,7 @@ class Gwoct(Goct):
"""
solvent_default = "wetoctanol"
dirname_default = os.path.join(Gsolv.topdir_default, solvent_default)


def p_transfer(G1, G2, **kwargs):
"""Compute partition coefficient from two :class:`Gsolv` objects.
Expand Down
149 changes: 149 additions & 0 deletions mdpow/tests/test_analysis_alchemlyb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import os.path
import pytest
import py.path

import yaml
import pybol

from numpy.testing import assert_array_almost_equal

from six.moves import cPickle as pickle

import mdpow.fep

from pkg_resources import resource_filename
RESOURCES = py.path.local(resource_filename(__name__, 'testing_resources'))
MANIFEST = RESOURCES.join("manifest.yml")

def fix_manifest(topdir):
"""Create a temporary manifest with a custom `path`.

Fix manifest in a local temporary copy in existing dir topdir
where the `path` is an absolute path to our "states" directory. We
use `pkg_resources.resource_filename` to anchor the path.

Arguments
---------
topdir : py.path.local
existing temporary directory (as provided by, for instance,
`pytest.tmpdir`)

Returns
-------
new_manifest : py.path.local
Path to the new manifest.

Example
-------
Use as ::

new_manifest = fix_manifest(tmpdir)
m = pybol.Manifest(new_manifest.strpath)

"""
manifest = yaml.load(MANIFEST.open())
# simple heuristic: last element of the recorded manifest::path is the name
# of the states directory, typically 'states' (from .../testing_resources/states)
manifest['path'] = RESOURCES.join(os.path.basename(manifest['path'])).strpath
new_manifest = topdir.join("local_manifest.yml")
yaml.dump(manifest, stream=new_manifest.open("w"))
return new_manifest


# session scope if read-only use

@pytest.fixture(scope="function")
def fep_benzene_directory(tmpdir_factory):
topdir = tmpdir_factory.mktemp('analysis')
m = pybol.Manifest(fix_manifest(topdir).strpath)
m.assemble('FEP', topdir.strpath)
return topdir.join("benzene")

class TestAnalyze(object):
def get_Gsolv(self, pth):
gsolv = pth.join("FEP", "water", "Gsolv.fep")
G = pickle.load(gsolv.open())
# patch paths
G.basedir = pth.strpath
G.filename = gsolv.strpath
return G

@pytest.mark.parametrize('method, Gibbs, coulomb, vdw', [
('TI',
(-3.901068, 0.550272),
(8.417035, 0.22289),
(-4.515967, 0.50311)),
('BAR',
(-4.091241, 0.385413),
(8.339705, 0.166802),
(-4.248463, 0.347449)),
('MBAR',
(-6.793117, 0.475149),
(8.241836, 0.219235),
(-1.448719, 0.421548))
])
def test_estimator_alchemlyb(self, fep_benzene_directory, method,
Gibbs, coulomb, vdw):
G = self.get_Gsolv(fep_benzene_directory)
G.SI = False
G.method = method
G.start = 0
G.stop = None
# ensure conversion EDR to XVG.bz2; if the fixture is session scoped
# then other workers will pick up these files. Make sure that only one
# runs convert because there is no file locking, if in doubt, make
# fep_benzene_directory locally scoped
G.convert_edr()
try:
G.analyze_alchemlyb(force=True, autosave=False)
except IOError as err:
raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format(
err.strerror, err.filename))
DeltaA = G.results.DeltaA
assert_array_almost_equal(DeltaA.Gibbs.astuple(), Gibbs,
decimal=6)
assert_array_almost_equal(DeltaA.coulomb.astuple(), coulomb,
decimal=6)
assert_array_almost_equal(DeltaA.vdw.astuple(), vdw,
decimal=6)

def test_SI(self, fep_benzene_directory):
G = self.get_Gsolv(fep_benzene_directory)
G.SI = True
G.method = 'TI'
G.start = 0
G.stop = None
G.convert_edr()
try:
G.analyze_alchemlyb(force=True, autosave=False)
except IOError as err:
raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format(
err.strerror, err.filename))
DeltaA = G.results.DeltaA
assert_array_almost_equal(DeltaA.Gibbs.astuple(), (-2.908885, 2.175976),
decimal=6)
assert_array_almost_equal(DeltaA.coulomb.astuple(), (7.755779, 0.531481),
decimal=6)
assert_array_almost_equal(DeltaA.vdw.astuple(), (-4.846894, 2.110071),
decimal=6)

def test_start_stop_stride(self, fep_benzene_directory):
G = self.get_Gsolv(fep_benzene_directory)
G.SI = False
G.method = 'TI'
G.start = 10
G.stride = 2
G.stop = 200
G.convert_edr()
try:
G.analyze_alchemlyb(force=True, autosave=False)
except IOError as err:
raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format(
err.strerror, err.filename))
DeltaA = G.results.DeltaA
assert_array_almost_equal(DeltaA.Gibbs.astuple(), (-3.318109, 0.905128),
decimal=6)
assert_array_almost_equal(DeltaA.coulomb.astuple(), (8.146806, 0.348866),
decimal=6)
assert_array_almost_equal(DeltaA.vdw.astuple(), (-4.828696, 0.835195),
decimal=6)
Loading