[coordinates] new implementation featuring streaming data in chunks (…

…mdtraj backend).
gph82 · Feb 22, 2015 · 8705b73 · 8705b73
2 parents 593fdaa + 036e96a
commit 8705b73
Show file tree

Hide file tree

Showing 70 changed files with 3,681 additions and 2,742 deletions.
diff --git a/.gitignore b/.gitignore
@@ -23,3 +23,4 @@ Untitled*.ipynb
 # generated from MANIFEST.in
 MANIFEST
 doc/source/api/generated/
+.ipynb_checkpoints
diff --git a/.project b/.project
@@ -10,6 +10,16 @@
 			<arguments>
 			</arguments>
 		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.ui.externaltools.ExternalToolBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+				<dictionary>
+					<key>LaunchConfigHandle</key>
+					<value>&lt;project&gt;/.externalToolBuilders/build_extensions.launch</value>
+				</dictionary>
+			</arguments>
+		</buildCommand>
 	</buildSpec>
 	<natures>
 		<nature>org.python.pydev.pythonNature</nature>

diff --git a/.travis.yml b/.travis.yml
@@ -6,9 +6,9 @@ python:
 env:
   global:
     - PATH=$HOME/miniconda/bin:$HOME/miniconda/envs/ci/bin/:$PATH
-    - common_py_deps="pip nose cython jpype1"
+    - common_py_deps="pip nose cython jpype1 mdtraj scikit-learn"
     - doc_deps="ipython sphinx matplotlib numpydoc pyzmq"
-    - deps="scipy=0.11 numpy=1.7"
+    - deps="scipy numpy"
   matrix:
     secure: "byk9bmnGvP3qDfpYvPKX4909KeS6pJQtfW+GkSsuHy4vnp++gu2IsXC/CJeCB0r7hpoRp7Z+XlOtYmJLvb585LZmGaqIs5LKs1DimJniAg5anpywOnaXodspeFcz6UWtLlIAQQS3SvP9SXvvrlIiF8IwXqWfEewtoGbpiCj3dEo="
 #    - deps="scipy=0.11 numpy=1.7 cython"
@@ -23,7 +23,7 @@ before_install:
 - deactivate # travis python venv
 - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O mc.sh -o /dev/null
 - bash mc.sh -b
-- conda create -q --yes -n ci -c https://conda.binstar.org/marscher 
+- conda create -q --yes -n ci -c https://conda.binstar.org/omnia
         python=$TRAVIS_PYTHON_VERSION $deps $common_py_deps 
 - source activate ci
 - python -c "import scipy; print scipy.__version__; print scipy.__numpy_version__"
@@ -33,6 +33,8 @@ install:
 - python setup.py install
 
 script:
+- printf "[Java]\nstartup=True" > ~/pyemma.cfg
+- cat ~/pyemma.cfg
 - python setup.py test 
 
 after_success:

diff --git a/doc/source/api/coordinates.transform.tica.rst b/doc/source/api/coordinates.transform.tica.rst
diff --git a/doc/source/api/index.rst b/doc/source/api/index.rst
@@ -2,16 +2,16 @@
 
 Coordinates
 ===========
-The *coordinates* package implements common transformations used in
-Markov state modeling, like RMSD, TICA etc.
+The *coordinates* package contains tools to select features from MD-trajectories
+to assign them to a discrete state space, which will be later used in Markov
+modeling.
 
 .. toctree::
    :maxdepth: 1
 
    coordinates.io
    coordinates.transform
    coordinates.clustering
-   coordinates.tica
 
 Markov State Models
 ===================

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -41,6 +41,17 @@
     #'sphinx.ext.viewcode'
 ]
 
+# for edit on github button
+html_context = {
+    'github_user': 'markovmodel',
+    'display_github': True,
+    'github_repo': 'PyEMMA',
+    'github_version': 'devel',
+    'conf_py_path': '/doc/',
+    'html_logo': 'logo-200px.png',
+    'base_url': 'http://pythonhosted.org/pyEMMA'
+}
+
 # Add any paths that contain templates here, relative to this directory.
 #templates_path = ['_templates']
 
@@ -286,20 +297,22 @@
 # -----------------------------------------------------------------------------
 
 import glob
-autosummary_generate = glob.glob("*.rst")
-autosummary_generate += glob.glob("api/*.rst")
+
+autosummary_generate = True
+autodoc_default_flags = ['members', 'inherited-members']
 
 # see https://github.com/numpy/numpydoc/issues/5
 numpydoc_class_members_toctree = False
+numpydoc_show_class_members = False
 
 # intersphinx for linking to other api's
 intersphinx_mapping = {
     'http://docs.python.org/': None,
     'http://docs.scipy.org/doc/numpy': None,
     'http://docs.scipy.org/doc/scipy/reference/': None,
     'http://matplotlib.sourceforge.net/': None,
+    'http://mdtraj.org/latest/': None,
 }
-#autodoc_default_flags = []
 
 # todo list extension
 todo_include_todos = True

diff --git a/meta.yaml b/meta.yaml
diff --git a/pyemma/__init__.py b/pyemma/__init__.py
@@ -1,12 +1,12 @@
 r"""
-==========================================
-    Emma2 - Emma's Markov Model Algorithms
-==========================================
+=======================================
+PyEMMA - Emma's Markov Model Algorithms
+=======================================
 """
 from . import coordinates
 from . import msm
 from . import util
 
 from ._version import get_versions
 __version__ = get_versions()['version']
-del get_versions
+del get_versions
diff --git a/pyemma/coordinates/__init__.py b/pyemma/coordinates/__init__.py
@@ -12,6 +12,4 @@
    :toctree: generated/
 
 """
-from . import transform
-from . import io
-from . import clustering
+from .api import *
diff --git a/pyemma/coordinates/acf.py b/pyemma/coordinates/acf.py
@@ -0,0 +1,106 @@
+import numpy as np
+import sys
+
+__author__ = 'Fabian Paul'
+__all__ = ['acf']
+
+
+def acf(trajs, stride=1, max_lag=None, subtract_mean=True, normalize=True, mean=None):
+    '''Computes the (combined) autocorrelation function of multiple trajectories.
+
+       Parameters
+       ----------
+       trajs : list of (*,N) ndarrays
+         the observable trajectories, N is the number of observables
+       stride : int (default = 1)
+         only take every n'th frame from trajs
+       max_lag : int (default = maximum trajectory length / stride)
+         only compute acf up to this lag time
+       subtract_mean : bool (default = True)
+         subtract trajectory mean before computing acfs
+       normalize : bool (default = True)
+         divide acf be the variance such that acf[0,:]==1
+       mean : (N) ndarray (optional)
+         if subtract_mean is True, you can give the trajectory mean
+         so this functions doesn't have to compute it again
+
+       Returns
+       -------
+       acf : (max_lag,N) ndarray
+           autocorrelation functions for all observables
+
+       Note
+       ----
+       The computation uses FFT (with zero-padding) and is done im memory (RAM).
+    '''
+    if not isinstance(trajs, list):
+        trajs = [trajs]
+
+    mytrajs = [None] * len(trajs)
+    for i in xrange(len(trajs)):
+        if trajs[i].ndim == 1:
+            mytrajs[i] = trajs[i].reshape((trajs[i].shape[0], 1))
+        elif trajs[i].ndim == 2:
+            mytrajs[i] = trajs[i]
+        else:
+            raise Exception(
+                'Unexpected number of dimensions in trajectory number %d' % i)
+    trajs = mytrajs
+
+    assert stride > 0, 'stride must be > 0'
+    assert max_lag is None or max_lag > 0, 'max_lag must be > 0'
+
+    if subtract_mean and mean is None:
+        # compute mean over all trajectories
+        mean = trajs[0].sum(axis=0)
+        n_samples = trajs[0].shape[0]
+        for i, traj in enumerate(trajs[1:]):
+            if traj.shape[1] != mean.shape[0]:
+                raise Exception(('number of order parameters in tarjectory number %d differs' +
+                                 'from the number found in previous trajectories.') % (i + 1))
+            mean += traj.sum(axis=0)
+            n_samples += traj.shape[0]
+        mean /= n_samples
+
+    acf = np.array([[]])
+    # number of samples for every tau
+    N = np.array([])
+
+    for i, traj in enumerate(trajs):
+        data = traj[::stride]
+        if subtract_mean:
+            data -= mean
+        # calc acfs
+        l = data.shape[0]
+        fft = np.fft.fft(data, n=2 ** int(np.ceil(np.log2(l * 2 - 1))), axis=0)
+        acftraj = np.fft.ifft(fft * np.conjugate(fft), axis=0).real
+        # throw away acf data for long lag times (and negative lag times)
+        if max_lag and max_lag < l:
+            acftraj = acftraj[:max_lag, :]
+        else:
+            acftraj = acftraj[:l, :]
+            if max_lag:
+                sys.stderr.write(
+                    'Warning: trajectory number %d is shorter than maximum lag.\n' % i)
+        # find number of samples used for every lag
+        Ntraj = np.linspace(l, l - acftraj.shape[0] + 1, acftraj.shape[0])
+        # adapt shape of acf: resize temporal dimension, additionally set
+        # number of order parameters of acf in the first step
+        if acf.shape[1] < acftraj.shape[1] and acf.shape[1] > 0:
+            raise Exception(('number of order parameters in tarjectory number %d differs ' +
+                             'from the number found in previous trajectories.') % i)
+        if acf.shape[1] < acftraj.shape[1] or acf.shape[0] < acftraj.shape[0]:
+            acf.resize(acftraj.shape)
+            N.resize(acftraj.shape[0])
+        # update acf and number of samples
+        acf[0:acftraj.shape[0], :] += acftraj
+        N[0:acftraj.shape[0]] += Ntraj
+
+    # divide by number of samples
+    acf = np.transpose(np.transpose(acf) / N)
+
+    # normalize acfs
+    if normalize:
+        acf /= acf[0, :].copy()
+
+    return acf