Skip to content

Commit

Permalink
Merge pull request markovmodel#82 from fabian-paul/feature_coortransf…
Browse files Browse the repository at this point in the history
…orm_fix_subsample

[coordinates] bugfix in util/subsample (stride now works across multiple chunks)
  • Loading branch information
marscher committed Feb 23, 2015
2 parents 69eb582 + a0254fb commit 358edf6
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 6 deletions.
2 changes: 1 addition & 1 deletion pyemma/coordinates/io/feature_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def next_chunk(self, lag=0):

self.t += chunk.xyz.shape[0]

if (self.t >= self.trajectory_length(self.curr_itraj) and
if (self.t + lag >= self.trajectory_length(self.curr_itraj) and
self.curr_itraj < len(self.trajfiles) - 1):
log.debug('closing current trajectory "%s"'
% self.trajfiles[self.curr_itraj])
Expand Down
3 changes: 2 additions & 1 deletion pyemma/coordinates/transform/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,5 +146,6 @@ def map(self, X):
:param X: the input data
:return: the projected data
"""
Y = np.dot(X, self.R[:, 0:self.output_dimension])
X_meanfree = X - self.mu
Y = np.dot(X_meanfree, self.R[:, 0:self.output_dimension])
return Y
10 changes: 7 additions & 3 deletions pyemma/coordinates/transform/tica.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,13 @@ def param_add_data(self, X, itraj, t, first_chunk, last_chunk_in_traj,

if ipass == 1:
X_meanfree = X - self.mu
Y_meanfree = Y - self.mu
self.cov += np.dot(X_meanfree.T, X_meanfree)
self.cov_tau += np.dot(X_meanfree.T, Y_meanfree)
fake_data = max(t+X.shape[0]-self.trajectory_length(itraj)+self.lag,0)
end = X.shape[0]-fake_data
if end > 0:
X_meanfree = X[0:end] - self.mu
Y_meanfree = Y[0:end] - self.mu
self.cov_tau += np.dot(X_meanfree.T, Y_meanfree)

if last_chunk:
return True # finished!
Expand All @@ -175,7 +179,7 @@ def param_add_data(self, X, itraj, t, first_chunk, last_chunk_in_traj,
def param_finish(self):
# norm
self.cov /= self.N - 1
self.cov_tau /= self.N - self.lag - 1
self.cov_tau /= self.N - self.lag*self.number_of_trajectories() - 1

# symmetrize covariance matrices
self.cov = self.cov + self.cov.T
Expand Down
16 changes: 15 additions & 1 deletion pyemma/coordinates/util/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,22 @@ def subsample(transform, dimensions, stride=1):
'''
trajs = [np.zeros((0, len(dimensions)))
for _ in xrange(transform.number_of_trajectories())]
last_i = -1
for i, chunk in transform:
trajs[i] = np.concatenate((trajs[i], chunk[::stride, dimensions]))
if i != last_i:
t_0 = 0
t_next = 0
last_i = i
size = chunk.shape[0]
if t_next-t_0 < size:
block = chunk[t_next-t_0::stride, dimensions]
trajs[i] = np.concatenate((trajs[i], block))
n_out = (size - (t_next-t_0) - 1)//stride + 1
assert block.shape[0] == n_out
else:
n_out = 0
t_0 += size
t_next += stride*n_out
return trajs


Expand Down

0 comments on commit 358edf6

Please sign in to comment.