Merge pull request #185 from Becksteinlab/fix-mdpow-solvationenergy

fix mdpow solvationenergy and p_transfer()
Becksteinlab · Aug 5, 2021 · 8afad4b · 8afad4b
2 parents a31003c + f14a3bc
commit 8afad4b
Show file tree

Hide file tree

Showing 9 changed files with 154 additions and 38 deletions.
diff --git a/CHANGES b/CHANGES
@@ -13,6 +13,12 @@ Fixes
 
 * restrict alchemlyb to <0.5.0 for Python 2 (otherwise the installation fails
   due to an unsatisfiable pandas dependency) (#180)
+* fix failure of mdpow-solvation script "AttributeError: 'AttributeDict' object
+  has no attribute 'Gibbs'" (#180)
+* fix fep.p_transfer() only analyzing the second input Gsolv (would lead to an
+  error like "results.DeltaA does not contain 'Gibbs'") (PR #185)
+* ensure that pickle files created under Python 2 are also readable under
+  Python 3  
 
 
 2021-08-02    0.7.0

diff --git a/mdpow/fep.py b/mdpow/fep.py
@@ -3,7 +3,7 @@
 # mdpow: fep.py
 # Copyright (c) 2010 Oliver Beckstein
 
-"""
+r"""
 :mod:`mdpow.fep` -- Calculate free energy of solvation
 ======================================================
 
@@ -272,7 +272,7 @@ def __deepcopy__(self, memo):
         return x
 
 class Gsolv(Journalled):
-    """Simulations to calculate and analyze the solvation free energy.
+    r"""Simulations to calculate and analyze the solvation free energy.
 
     :math:`\Delta A` is computed from the decharging and the
     decoupling step. With our choice of ``lambda=0`` being the fully
@@ -1029,6 +1029,11 @@ def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000):
         return self.results.DeltaA.Gibbs
 
     def collect_alchemlyb(self, SI=True, start=0, stop=None, stride=None, autosave=True, autocompress=True):
+        """Collect the data files using alchemlyb.
+
+        Unlike :meth:`collect`, this method can subsample with the
+        statistical inefficiency (parameter `SI`).
+        """
         extract = self.estimators[self.method]['extract']
 
         if autocompress:
@@ -1062,6 +1067,13 @@ def collect_alchemlyb(self, SI=True, start=0, stop=None, stride=None, autosave=T
             self.save()
 
     def analyze_alchemlyb(self, SI=True, start=0, stop=None, stride=None, force=False, autosave=True):
+        """Compute the free energy from the simulation data with alchemlyb.
+
+        Unlike :meth:`analyze`, the MBAR estimator is available (in
+        addition to TI). Note that SI should be enabled for meaningful
+        error estimates.
+
+        """
         stride = stride or self.stride
         start = start or self.start
         stop = stop or self.stop
@@ -1400,20 +1412,21 @@ def p_transfer(G1, G2, **kwargs):
                 logger.error(errmsg)
                 raise ValueError(errmsg)
 
-        if kwargs['force'] or (not hasattr(G.results.DeltaA, 'Gibbs')):
+        logger.info("The solvent is %s .", G.solvent_type)
+        if kwargs['force'] or 'Gibbs' not in G.results.DeltaA:
             # write out the settings when the analysis is performed
-            logger.info("The solvent is %s .", G.solvent_type)
             logger.info("Estimator is %s.", estimator)
             logger.info("Free energy calculation method is %s.", G.method)
 
-    if estimator == 'mdpow':
-        G.analyze(**G_kwargs)
-    elif estimator == 'alchemlyb':
-        if G_kwargs['SI']:
-            logger.info("Statistical inefficiency analysis will be performed.")
+            if estimator == 'mdpow':
+                G_kwargs.pop('SI', None)  # G.analyze() does not know SI
+                G.analyze(**G_kwargs)
+            elif estimator == 'alchemlyb':
+                logger.info("Statistical inefficiency analysis will %s be performed." %
+                            ("" if G_kwargs['SI'] else "not"))
+                G.analyze_alchemlyb(**G_kwargs)
         else:
-            logger.info("Statistical inefficiency analysis won't be performed.")
-        G.analyze_alchemlyb(**G_kwargs)
+            logger.info("Using already calculated free energy DeltaA")
 
     # x.Gibbs are QuantityWithError so they do error propagation
     transferFE = G2.results.DeltaA.Gibbs - G1.results.DeltaA.Gibbs

diff --git a/mdpow/restart.py b/mdpow/restart.py
@@ -249,6 +249,10 @@ def load(self, filename=None):
 
         If no *filename* was supplied then the filename is taken from the
         attribute :attr:`~Journalled.filename`.
+
+        .. versionchanged:: 0.7.1
+           Can read pickle files with either Python 2.7 or 3.x, regardless of
+           the Python version that created the pickle.
         """
         if filename is None:
             try:
@@ -260,7 +264,15 @@ def load(self, filename=None):
                 errmsg = "Neither filename nor default filename provided to load from."
                 logger.error(errmsg)
                 raise ValueError(errmsg)
+
+        # Do not remove this code when dropping Py 2.7 support as it is needed to
+        # be able to read old data files with Python 3 MDPOW.
         with open(filename, 'rb') as f:
-            instance = pickle.load(f)
+            try:
+                instance = pickle.load(f)
+            except UnicodeDecodeError:
+                logger.debug("Reading old Python 2 Pickle file %(filename)r" % vars())
+                instance = pickle.load(f, encoding='latin1')
+
         self.__dict__.update(instance.__dict__)
         logger.debug("Instance loaded from %(filename)r" % vars())
diff --git a/mdpow/tests/__init__.py b/mdpow/tests/__init__.py
@@ -1 +1,17 @@
 # tests for MDPOW
+
+import py.path
+from pkg_resources import resource_filename
+
+
+RESOURCES = py.path.local(resource_filename(__name__, 'testing_resources'))
+
+MOLECULES = {
+    "benzene": RESOURCES.join("molecules", "benzene"),
+}
+STATES = {
+    "FEP": RESOURCES.join("states", "FEP"),
+    "base": RESOURCES.join("states", "base"),
+    "md_npt": RESOURCES.join("states", "FEP"),
+}
+CONFIGURATIONS = RESOURCES.join("test_configurations")
diff --git a/mdpow/tests/test_analysis.py b/mdpow/tests/test_analysis.py
@@ -1,10 +1,8 @@
 from __future__ import absolute_import
 
 import os.path
-import sys
 
 import pytest
-import py.path
 
 import yaml
 import pybol
@@ -16,8 +14,8 @@
 import numkit
 import mdpow.fep
 
-from pkg_resources import resource_filename
-RESOURCES = py.path.local(resource_filename(__name__, 'testing_resources'))
+from . import RESOURCES
+
 MANIFEST = RESOURCES.join("manifest.yml")
 
 def fix_manifest(topdir):
@@ -31,7 +29,7 @@ def fix_manifest(topdir):
     ---------
     topdir : py.path.local
         existing temporary directory (as provided by, for instance,
-        `pytest.tmpdir`)
+       `pytest.tmpdir`)
 
     Returns
     -------
@@ -67,12 +65,9 @@ def fep_benzene_directory(tmpdir_factory):
 class TestAnalyze(object):
     def get_Gsolv(self, pth):
         gsolv = pth.join("FEP", "water", "Gsolv.fep")
-        if sys.version_info.major == 2:
-            G = pickle.load(gsolv.open())
-        elif sys.version_info.major == 3:
-            # Needed to read old pickle files
-            with open(gsolv, 'rb') as f:
-                G = pickle.load(f, encoding='latin1')
+        # Loading only works with magic code in restart.Journalled.load()
+        # so that Pickles produced with Python 2 can also be read with 3:
+        G = mdpow.fep.Ghyd(filename=gsolv.strpath)
         # patch paths
         G.basedir = pth.strpath
         G.filename = gsolv.strpath

diff --git a/mdpow/tests/test_analysis_alchemlyb.py b/mdpow/tests/test_analysis_alchemlyb.py
@@ -1,10 +1,8 @@
 from __future__ import absolute_import
 
 import os.path
-import sys
 
 import pytest
-import py.path
 
 import yaml
 import pybol
@@ -16,8 +14,8 @@
 
 import mdpow.fep
 
-from pkg_resources import resource_filename
-RESOURCES = py.path.local(resource_filename(__name__, 'testing_resources'))
+from . import RESOURCES
+
 MANIFEST = RESOURCES.join("manifest.yml")
 
 def fix_manifest(topdir):
@@ -31,7 +29,7 @@ def fix_manifest(topdir):
     ---------
     topdir : py.path.local
         existing temporary directory (as provided by, for instance,
-        `pytest.tmpdir`)
+       `pytest.tmpdir`)
 
     Returns
     -------
@@ -67,16 +65,12 @@ def fep_benzene_directory(tmpdir_factory):
 class TestAnalyze(object):
     def get_Gsolv(self, pth):
         gsolv = pth.join("FEP", "water", "Gsolv.fep")
-        # Needed to load old pickle files in python 3
-        if sys.version_info.major >= 3:
-            with open(gsolv, 'rb') as f:
-                G = pickle.load(f, encoding='latin1')
-                # patch paths
-        elif sys.version_info.major == 2:
-            G = pickle.load(gsolv.open())
+        # Loading only works with magic code in restart.Journalled.load()
+        # so that Pickles produced with Python 2 can also be read with 3:
+        G = mdpow.fep.Ghyd(filename=gsolv.strpath)
+        # patch paths
         G.basedir = pth.strpath
         G.filename = gsolv.strpath
-
         return G
 
     @pytest.mark.parametrize('method, Gibbs, coulomb, vdw', [

diff --git a/mdpow/tests/test_fep.py b/mdpow/tests/test_fep.py
@@ -1,7 +1,6 @@
 from __future__ import absolute_import
 
 import sys
-from six.moves import reload_module
 
 import pytest
 

diff --git a/mdpow/tests/test_fep_analysis.py b/mdpow/tests/test_fep_analysis.py
@@ -0,0 +1,81 @@
+from __future__ import absolute_import
+
+import pytest
+
+import os.path
+import shutil
+import copy
+
+import mdpow.fep
+
+from . import STATES
+
+@pytest.fixture
+def FEP_dir(tmpdir):
+    name = STATES['FEP'].basename  # a py.path.local
+    fepdir = tmpdir.join(name)     # a py.path.local
+    shutil.copytree(STATES['FEP'].strpath, fepdir.strpath)
+    assert os.path.isdir(fepdir.strpath)
+    return fepdir
+
+def setup_Ghyd(fepdir):
+    basedir = fepdir.join("benzene")
+    gsolv = basedir.join("FEP", "water", "Gsolv.fep")
+    G = mdpow.fep.Ghyd(filename=gsolv.strpath)
+    # patch paths
+    G.basedir = basedir.strpath
+    G.filename = gsolv.strpath
+    return G
+
+
+@pytest.fixture
+def Ghyd(FEP_dir):
+    return setup_Ghyd(FEP_dir)
+
+@pytest.fixture
+def Ghyd_other(FEP_dir):
+    return setup_Ghyd(FEP_dir)
+
+def test_load_Ghyd(Ghyd):
+    assert isinstance(Ghyd, mdpow.fep.Ghyd)
+
+@pytest.mark.parametrize("kwargs", (
+    {},
+    {'SI': True, 'estimator': 'alchemlyb', 'method': 'TI'},
+    {'SI': False, 'estimator': 'alchemlyb', 'method': 'MBAR', 'force': False},
+    {'SI': False, 'estimator': 'mdpow', 'method': 'TI', 'force': True},
+    ),
+    ids=["defaults",
+         "SI=True, estimator='alchemlyb', method='TI'",
+         "SI=False, estimator='alchemlyb', method='MBAR', force=False",
+         "SI=False, estimator='mdpow', method='TI', force=True",
+         ]
+    )
+def test_p_transfer(Ghyd, Ghyd_other, kwargs):
+    """Test transfer water <-> water with same data."""
+    G1 = Ghyd
+    G2 = Ghyd_other
+
+    transferFE, logPow = mdpow.fep.p_transfer(G1, G2, **kwargs)
+
+    assert transferFE == pytest.approx(0.0)
+    assert logPow == pytest.approx(0.0)
+
+def test_p_transfer_wrong_method(Ghyd, Ghyd_other):
+    """Test transfer water <-> water with same data."""
+    G1 = Ghyd
+    G2 = Ghyd_other
+
+    with pytest.raises(ValueError,
+                       match="Method MBAR is not implemented in MDPOW, use estimator='alchemlyb'"):
+        mdpow.fep.p_transfer(G1, G2, estimator="mdpow", method="MBAR")
+
+
+def test_pOW_error(Ghyd, Ghyd_other):
+    with pytest.raises(ValueError):
+        mdpow.fep.pOW(Ghyd, Ghyd_other)
+
+def test_pCW_error(Ghyd, Ghyd_other):
+    with pytest.raises(ValueError):
+        mdpow.fep.pCW(Ghyd, Ghyd_other)
+
diff --git a/scripts/mdpow-solvationenergy b/scripts/mdpow-solvationenergy
@@ -132,7 +132,7 @@ def run_gsolv(solvent, directory, **kwargs):
         kwargs.setdefault('SI', gsolv.SI)
 
     # make sure that we have data
-    if kwargs['force'] or (not hasattr(gsolv.results.DeltaA, 'Gibbs')):
+    if kwargs['force'] or 'Gibbs' not in gsolv.results.DeltaA:
         logger.info("[%(directory)s] Forcing (re)analysis of hydration free energy data", vars())
         # write out the settings when the analysis is performed
         logger.info("Estimator is %s.", estimator)