From 7942eac3305b7fde5708fffb257740d06afde7bd Mon Sep 17 00:00:00 2001 From: jouno Date: Sun, 22 Feb 2015 17:26:18 +0100 Subject: [PATCH 1/3] [coordinated] bugfix in util/subsample (stride now works across multiple chunks) --- pyemma/coordinates/util/stat.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pyemma/coordinates/util/stat.py b/pyemma/coordinates/util/stat.py index 17c59556d..566d77adb 100644 --- a/pyemma/coordinates/util/stat.py +++ b/pyemma/coordinates/util/stat.py @@ -39,8 +39,22 @@ def subsample(transform, dimensions, stride=1): ''' trajs = [np.zeros((0, len(dimensions))) for _ in xrange(transform.number_of_trajectories())] + last_i = -1 for i, chunk in transform: - trajs[i] = np.concatenate((trajs[i], chunk[::stride, dimensions])) + if i != last_i: + t_0 = 0 + t_next = 0 + last_i = i + size = chunk.shape[0] + if t_next-t_0 < size: + block = chunk[t_next-t_0::stride, dimensions] + trajs[i] = np.concatenate((trajs[i], block)) + n_out = (size - (t_next-t_0) - 1)//stride + 1 + assert block.shape[0] == n_out + else: + n_out = 0 + t_0 += size + t_next += stride*n_out return trajs From 265a9b1957a6e6439ad8585f98e779ad1e24f392 Mon Sep 17 00:00:00 2001 From: jouno Date: Sun, 22 Feb 2015 17:35:10 +0100 Subject: [PATCH 2/3] [coordinates] bugfix PCA (subtract mean) --- pyemma/coordinates/transform/pca.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyemma/coordinates/transform/pca.py b/pyemma/coordinates/transform/pca.py index 07b103a85..ce5b69587 100644 --- a/pyemma/coordinates/transform/pca.py +++ b/pyemma/coordinates/transform/pca.py @@ -146,5 +146,6 @@ def map(self, X): :param X: the input data :return: the projected data """ - Y = np.dot(X, self.R[:, 0:self.output_dimension]) + X_meanfree = X - self.mu + Y = np.dot(X_meanfree, self.R[:, 0:self.output_dimension]) return Y From a0254fb63bcdb393310fcd0f0e594e33f8d62725 Mon Sep 17 00:00:00 2001 From: jouno Date: Sun, 22 Feb 2015 20:59:50 +0100 Subject: [PATCH 3/3] [coordinates] more fixes in Tica and FeatureReader, will we ever get this right? --- pyemma/coordinates/io/feature_reader.py | 2 +- pyemma/coordinates/transform/tica.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pyemma/coordinates/io/feature_reader.py b/pyemma/coordinates/io/feature_reader.py index 455d1d580..117e68bfd 100644 --- a/pyemma/coordinates/io/feature_reader.py +++ b/pyemma/coordinates/io/feature_reader.py @@ -270,7 +270,7 @@ def next_chunk(self, lag=0): self.t += chunk.xyz.shape[0] - if (self.t >= self.trajectory_length(self.curr_itraj) and + if (self.t + lag >= self.trajectory_length(self.curr_itraj) and self.curr_itraj < len(self.trajfiles) - 1): log.debug('closing current trajectory "%s"' % self.trajfiles[self.curr_itraj]) diff --git a/pyemma/coordinates/transform/tica.py b/pyemma/coordinates/transform/tica.py index 1f124b9a1..51856744e 100644 --- a/pyemma/coordinates/transform/tica.py +++ b/pyemma/coordinates/transform/tica.py @@ -162,9 +162,13 @@ def param_add_data(self, X, itraj, t, first_chunk, last_chunk_in_traj, if ipass == 1: X_meanfree = X - self.mu - Y_meanfree = Y - self.mu self.cov += np.dot(X_meanfree.T, X_meanfree) - self.cov_tau += np.dot(X_meanfree.T, Y_meanfree) + fake_data = max(t+X.shape[0]-self.trajectory_length(itraj)+self.lag,0) + end = X.shape[0]-fake_data + if end > 0: + X_meanfree = X[0:end] - self.mu + Y_meanfree = Y[0:end] - self.mu + self.cov_tau += np.dot(X_meanfree.T, Y_meanfree) if last_chunk: return True # finished! @@ -175,7 +179,7 @@ def param_add_data(self, X, itraj, t, first_chunk, last_chunk_in_traj, def param_finish(self): # norm self.cov /= self.N - 1 - self.cov_tau /= self.N - self.lag - 1 + self.cov_tau /= self.N - self.lag*self.number_of_trajectories() - 1 # symmetrize covariance matrices self.cov = self.cov + self.cov.T