Skip to content

Commit

Permalink
Update fep with alchemlyb and multiple edr files input (#132)
Browse files Browse the repository at this point in the history
* Add eneconv to combine multiple edr files

* use glob to gather all files

* add dgdl_total_edr

* add alchemlyb dependece

* add collect and analyze functions for alchemlyb estimators

* add function to convert a energy from kBT to kJ/mol

* add test for kBT_to_kJ

* Add error calculation for BAR method.
fix unit problem of VDW and Coulomb components

* add test for analyze_alchemlyb

* add pandas dependece

* remove duplicate check for method

* update SI test with consevative=True

* kBT_to_kJ() now always returns a copy

* return a copy instead of changing it

* add test for start stop stride

Co-authored-by: Oliver Beckstein <[email protected]>
  • Loading branch information
VOD555 and orbeckst authored Mar 20, 2021
1 parent 544e338 commit 97f0cf6
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 10 deletions.
152 changes: 142 additions & 10 deletions mdpow/fep.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,26 @@
import warnings

import numpy
import pandas as pd

import scipy.integrate
from scipy import constants
import numkit.integration
import numkit.timeseries

from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk
from alchemlyb.estimators import TI, BAR, MBAR
from alchemlyb.parsing.gmx import _extract_dataframe
from alchemlyb.preprocessing.subsampling import statistical_inefficiency

import gromacs, gromacs.utilities
try:
import gromacs.setup
except (ImportError, OSError):
raise ImportError("Gromacs installation not found, source GMXRC?")
from gromacs.utilities import asiterable, AttributeDict, in_dir, openany
from numkit.observables import QuantityWithError
from glob import glob

import logging
logger = logging.getLogger('mdpow.fep')
Expand Down Expand Up @@ -168,6 +176,10 @@ def kJ_to_kcal(x):
"""Convert a energy in kJ to kcal."""
return x / 4.184

def kBT_to_kJ(x, T):
"""Convert a energy in kBT to kJ/mol."""
return x * constants.N_A*constants.k*T*1e-3


class FEPschedule(AttributeDict):
"""Describe mdp parameter choices as key - value pairs.
Expand Down Expand Up @@ -289,6 +301,12 @@ class Gsolv(Journalled):
#: :meth:`Simulation.get_protocol` and :class:`restart.Journal`
protocols = ["setup", "fep_run"]

#: Estimators in alchemlyb
estimators = {'TI': {'extract': extract_dHdl, 'estimator': TI},
'BAR': {'extract': extract_u_nk, 'estimator': BAR},
'MBAR': {'extract': extract_u_nk, 'estimator': MBAR}
}

# TODO: initialize from default cfg
schedules_default = {'coulomb':
FEPschedule(name='coulomb',
Expand Down Expand Up @@ -373,6 +391,13 @@ def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs)
the value set in a loaded pickle file. [``False``]
*stride*
collect every *stride* data line, see :meth:`Gsolv.collect` [1]
*start*
Start frame of data analyzed in every fep window.
*stop*
Stop frame of data analyzed in every fep window.
*SI*
Set to ``True`` if you want to perform statistical inefficiency
to preprocess the data.
*kwargs*
other undocumented arguments (see source for the moment)
Expand Down Expand Up @@ -461,6 +486,9 @@ def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs)

# for analysis
self.stride = kwargs.pop('stride', 1)
self.start = kwargs.pop('start', 0)
self.stop = kwargs.pop('stop', None)
self.SI = kwargs.pop('SI', False)

# other variables
#: Results from the analysis
Expand Down Expand Up @@ -689,11 +717,12 @@ def dgdl_edr(self, *args):
could be found
"""
fn = os.path.join(*args + (self.deffnm + '.edr',))
if not os.path.exists(fn):
logger.error("Missing dgdl.edr file %(fn)r.", vars())
raise IOError(errno.ENOENT, "Missing dgdl.edr file", fn)
return fn
pattern = os.path.join(*args + (self.deffnm + '*.edr',))
edrs = glob(pattern)
if not edrs:
logger.error("Missing dgdl.edr file %(pattern)r.", vars())
raise IOError(errno.ENOENT, "Missing dgdl.edr file", pattern)
return [os.path.abspath(i) for i in edrs]

def dgdl_tpr(self, *args):
"""Return filename of the dgdl TPR file.
Expand All @@ -715,6 +744,24 @@ def dgdl_tpr(self, *args):
raise IOError(errno.ENOENT, "Missing TPR file", fn)
return fn

def dgdl_total_edr(self, *args, **kwargs):
"""Return filename of the combined dgdl EDR file.
:Arguments:
*args*
joins the arguments into a path and adds the default
filename for the dvdl file
:Keywords:
*total_edr_name*
Name of the user defined total edr file.
:Returns: path to total EDR
"""
total_edr_name = kwargs.get("total_edr_name", "total.edr")
fn = os.path.join(*args + (total_edr_name,))
return fn

def convert_edr(self):
"""Convert EDR files to compressed XVG files."""
Expand All @@ -725,10 +772,13 @@ def convert_edr(self):
edr_files = [self.dgdl_edr(self.wdir(component, l)) for l in lambdas]
tpr_files = [self.dgdl_tpr(self.wdir(component, l)) for l in lambdas]
for tpr, edr in zip(tpr_files, edr_files):
deffnm = os.path.splitext(edr)[0]
xvgfile = deffnm + ".xvg" # hack
logger.info(" {0} --> {1}".format(edr, xvgfile))
gromacs.g_energy(s=tpr, f=edr, odh=xvgfile)
dirct = os.path.abspath(os.path.dirname(tpr))
total_edr = self.dgdl_total_edr(dirct)
logger.info(" {0} --> {1}".format('edrs', total_edr))
gromacs.eneconv(f=edr, o=total_edr)
xvgfile = os.path.join(dirct, self.deffnm + ".xvg") # hack
logger.info(" {0} --> {1}".format(total_edr, xvgfile))
gromacs.g_energy(s=tpr, f=total_edr, odh=xvgfile)

def collect(self, stride=None, autosave=True, autocompress=True):
"""Collect dV/dl from output.
Expand Down Expand Up @@ -953,6 +1003,87 @@ def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000):
self.logger_DeltaA0()
return self.results.DeltaA.Gibbs

def collect_alchemlyb(self, start=0, stop=None, stride=None, autosave=True, autocompress=True):
extract = self.estimators[self.method]['extract']

if autocompress:
# must be done before adding to results.xvg or we will not find the file later
self.compress_dgdl_xvg()

logger.info("[%(dirname)s] Finding dgdl xvg files, reading with "
"stride=%(stride)d permissive=%(permissive)r." % vars(self))
for component, lambdas in self.lambdas.items():
val = []
for l in lambdas:
xvg_file = self.dgdl_xvg(self.wdir(component, l))
xvg_df = extract(xvg_file, T=self.Temperature).iloc[start:stop:stride]
if self.SI:
ts = _extract_dataframe(xvg_file).iloc[start:stop:stride]
ts = pd.DataFrame({'time': ts.iloc[:,0], 'dhdl': ts.iloc[:,1]})
ts = ts.set_index('time')
xvg_df = statistical_inefficiency(xvg_df, series=ts, conservative=True)
val.append(xvg_df)
self.results.xvg[component] = (numpy.array(lambdas), pd.concat(val))

if autosave:
self.save()

def analyze_alchemlyb(self, start=0, stop=None, stride=None, force=False, autosave=True):
stride = stride or self.stride
start = start or self.start
stop = stop or self.stop

if self.method in ['TI', 'BAR', 'MBAR']:
estimator = self.estimators[self.method]['estimator']
else:
errmsg = "The method is not supported."
logger.error(errmsg)
raise ValueError(errmsg)

if force or not self.has_dVdl():
try:
self.collect_alchemlyb(start, stop, stride, autosave=False)
except IOError as err:
if err.errno == errno.ENOENT:
self.convert_edr()
self.collect_alchemlyb(start, stop, stride, autosave=False)
else:
logger.exception()
raise
else:
logger.info("Analyzing stored data.")

# total free energy difference at const P (all simulations are done in NPT)
GibbsFreeEnergy = QuantityWithError(0,0)

for component, (lambdas, xvgs) in self.results.xvg.items():
result = estimator().fit(xvgs)
if self.method == 'BAR':
DeltaA = QuantityWithError(0,0)
a_s= numpy.diagonal(result.delta_f_, offset=1)
da_s = numpy.diagonal(result.d_delta_f_, offset=1)
for a, da in zip(a_s, da_s):
DeltaA += QuantityWithError(a, da)
self.results.DeltaA[component] = kBT_to_kJ(DeltaA, self.Temperature)
else:
a = result.delta_f_.loc[0.00, 1.00]
da = result.d_delta_f_.loc[0.00, 1.00]
self.results.DeltaA[component] = kBT_to_kJ(QuantityWithError(a, da), self.Temperature)
GibbsFreeEnergy += self.results.DeltaA[component] # error propagation is automagic!

# hydration free energy Delta A = -(Delta A_coul + Delta A_vdw)
GibbsFreeEnergy *= -1
self.results.DeltaA.Gibbs = GibbsFreeEnergy

if autosave:
self.save()

self.logger_DeltaA0()
return self.results.DeltaA.Gibbs

if autosave:
self.save()

def write_DeltaA0(self, filename, mode='w'):
"""Write free energy components to a file.
Expand Down Expand Up @@ -1087,6 +1218,7 @@ def choose_script_from(scripts):
def __repr__(self):
return "%s(filename=%r)" % (self.__class__.__name__, self.filename)


class Ghyd(Gsolv):
"""Sets up and analyses MD to obtain the hydration free energy of a solute."""
solvent_default = "water"
Expand Down Expand Up @@ -1138,7 +1270,7 @@ class Gwoct(Goct):
"""
solvent_default = "wetoctanol"
dirname_default = os.path.join(Gsolv.topdir_default, solvent_default)


def p_transfer(G1, G2, **kwargs):
"""Compute partition coefficient from two :class:`Gsolv` objects.
Expand Down
149 changes: 149 additions & 0 deletions mdpow/tests/test_analysis_alchemlyb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import os.path
import pytest
import py.path

import yaml
import pybol

from numpy.testing import assert_array_almost_equal

from six.moves import cPickle as pickle

import mdpow.fep

from pkg_resources import resource_filename
RESOURCES = py.path.local(resource_filename(__name__, 'testing_resources'))
MANIFEST = RESOURCES.join("manifest.yml")

def fix_manifest(topdir):
"""Create a temporary manifest with a custom `path`.
Fix manifest in a local temporary copy in existing dir topdir
where the `path` is an absolute path to our "states" directory. We
use `pkg_resources.resource_filename` to anchor the path.
Arguments
---------
topdir : py.path.local
existing temporary directory (as provided by, for instance,
`pytest.tmpdir`)
Returns
-------
new_manifest : py.path.local
Path to the new manifest.
Example
-------
Use as ::
new_manifest = fix_manifest(tmpdir)
m = pybol.Manifest(new_manifest.strpath)
"""
manifest = yaml.load(MANIFEST.open())
# simple heuristic: last element of the recorded manifest::path is the name
# of the states directory, typically 'states' (from .../testing_resources/states)
manifest['path'] = RESOURCES.join(os.path.basename(manifest['path'])).strpath
new_manifest = topdir.join("local_manifest.yml")
yaml.dump(manifest, stream=new_manifest.open("w"))
return new_manifest


# session scope if read-only use

@pytest.fixture(scope="function")
def fep_benzene_directory(tmpdir_factory):
topdir = tmpdir_factory.mktemp('analysis')
m = pybol.Manifest(fix_manifest(topdir).strpath)
m.assemble('FEP', topdir.strpath)
return topdir.join("benzene")

class TestAnalyze(object):
def get_Gsolv(self, pth):
gsolv = pth.join("FEP", "water", "Gsolv.fep")
G = pickle.load(gsolv.open())
# patch paths
G.basedir = pth.strpath
G.filename = gsolv.strpath
return G

@pytest.mark.parametrize('method, Gibbs, coulomb, vdw', [
('TI',
(-3.901068, 0.550272),
(8.417035, 0.22289),
(-4.515967, 0.50311)),
('BAR',
(-4.091241, 0.385413),
(8.339705, 0.166802),
(-4.248463, 0.347449)),
('MBAR',
(-6.793117, 0.475149),
(8.241836, 0.219235),
(-1.448719, 0.421548))
])
def test_estimator_alchemlyb(self, fep_benzene_directory, method,
Gibbs, coulomb, vdw):
G = self.get_Gsolv(fep_benzene_directory)
G.SI = False
G.method = method
G.start = 0
G.stop = None
# ensure conversion EDR to XVG.bz2; if the fixture is session scoped
# then other workers will pick up these files. Make sure that only one
# runs convert because there is no file locking, if in doubt, make
# fep_benzene_directory locally scoped
G.convert_edr()
try:
G.analyze_alchemlyb(force=True, autosave=False)
except IOError as err:
raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format(
err.strerror, err.filename))
DeltaA = G.results.DeltaA
assert_array_almost_equal(DeltaA.Gibbs.astuple(), Gibbs,
decimal=6)
assert_array_almost_equal(DeltaA.coulomb.astuple(), coulomb,
decimal=6)
assert_array_almost_equal(DeltaA.vdw.astuple(), vdw,
decimal=6)

def test_SI(self, fep_benzene_directory):
G = self.get_Gsolv(fep_benzene_directory)
G.SI = True
G.method = 'TI'
G.start = 0
G.stop = None
G.convert_edr()
try:
G.analyze_alchemlyb(force=True, autosave=False)
except IOError as err:
raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format(
err.strerror, err.filename))
DeltaA = G.results.DeltaA
assert_array_almost_equal(DeltaA.Gibbs.astuple(), (-2.908885, 2.175976),
decimal=6)
assert_array_almost_equal(DeltaA.coulomb.astuple(), (7.755779, 0.531481),
decimal=6)
assert_array_almost_equal(DeltaA.vdw.astuple(), (-4.846894, 2.110071),
decimal=6)

def test_start_stop_stride(self, fep_benzene_directory):
G = self.get_Gsolv(fep_benzene_directory)
G.SI = False
G.method = 'TI'
G.start = 10
G.stride = 2
G.stop = 200
G.convert_edr()
try:
G.analyze_alchemlyb(force=True, autosave=False)
except IOError as err:
raise AssertionError("Failed to convert edr to xvg: {0}: {1}".format(
err.strerror, err.filename))
DeltaA = G.results.DeltaA
assert_array_almost_equal(DeltaA.Gibbs.astuple(), (-3.318109, 0.905128),
decimal=6)
assert_array_almost_equal(DeltaA.coulomb.astuple(), (8.146806, 0.348866),
decimal=6)
assert_array_almost_equal(DeltaA.vdw.astuple(), (-4.828696, 0.835195),
decimal=6)
Loading

0 comments on commit 97f0cf6

Please sign in to comment.