diff --git a/pyemma/coordinates/io/feature_reader.py b/pyemma/coordinates/io/feature_reader.py index 455d1d580..117e68bfd 100644 --- a/pyemma/coordinates/io/feature_reader.py +++ b/pyemma/coordinates/io/feature_reader.py @@ -270,7 +270,7 @@ def next_chunk(self, lag=0): self.t += chunk.xyz.shape[0] - if (self.t >= self.trajectory_length(self.curr_itraj) and + if (self.t + lag >= self.trajectory_length(self.curr_itraj) and self.curr_itraj < len(self.trajfiles) - 1): log.debug('closing current trajectory "%s"' % self.trajfiles[self.curr_itraj]) diff --git a/pyemma/coordinates/transform/pca.py b/pyemma/coordinates/transform/pca.py index 07b103a85..ce5b69587 100644 --- a/pyemma/coordinates/transform/pca.py +++ b/pyemma/coordinates/transform/pca.py @@ -146,5 +146,6 @@ def map(self, X): :param X: the input data :return: the projected data """ - Y = np.dot(X, self.R[:, 0:self.output_dimension]) + X_meanfree = X - self.mu + Y = np.dot(X_meanfree, self.R[:, 0:self.output_dimension]) return Y diff --git a/pyemma/coordinates/transform/tica.py b/pyemma/coordinates/transform/tica.py index 1f124b9a1..51856744e 100644 --- a/pyemma/coordinates/transform/tica.py +++ b/pyemma/coordinates/transform/tica.py @@ -162,9 +162,13 @@ def param_add_data(self, X, itraj, t, first_chunk, last_chunk_in_traj, if ipass == 1: X_meanfree = X - self.mu - Y_meanfree = Y - self.mu self.cov += np.dot(X_meanfree.T, X_meanfree) - self.cov_tau += np.dot(X_meanfree.T, Y_meanfree) + fake_data = max(t+X.shape[0]-self.trajectory_length(itraj)+self.lag,0) + end = X.shape[0]-fake_data + if end > 0: + X_meanfree = X[0:end] - self.mu + Y_meanfree = Y[0:end] - self.mu + self.cov_tau += np.dot(X_meanfree.T, Y_meanfree) if last_chunk: return True # finished! @@ -175,7 +179,7 @@ def param_add_data(self, X, itraj, t, first_chunk, last_chunk_in_traj, def param_finish(self): # norm self.cov /= self.N - 1 - self.cov_tau /= self.N - self.lag - 1 + self.cov_tau /= self.N - self.lag*self.number_of_trajectories() - 1 # symmetrize covariance matrices self.cov = self.cov + self.cov.T diff --git a/pyemma/coordinates/util/stat.py b/pyemma/coordinates/util/stat.py index 17c59556d..566d77adb 100644 --- a/pyemma/coordinates/util/stat.py +++ b/pyemma/coordinates/util/stat.py @@ -39,8 +39,22 @@ def subsample(transform, dimensions, stride=1): ''' trajs = [np.zeros((0, len(dimensions))) for _ in xrange(transform.number_of_trajectories())] + last_i = -1 for i, chunk in transform: - trajs[i] = np.concatenate((trajs[i], chunk[::stride, dimensions])) + if i != last_i: + t_0 = 0 + t_next = 0 + last_i = i + size = chunk.shape[0] + if t_next-t_0 < size: + block = chunk[t_next-t_0::stride, dimensions] + trajs[i] = np.concatenate((trajs[i], block)) + n_out = (size - (t_next-t_0) - 1)//stride + 1 + assert block.shape[0] == n_out + else: + n_out = 0 + t_0 += size + t_next += stride*n_out return trajs