forked from FeeLab/seqNMF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo.m
147 lines (134 loc) · 5.64 KB
/
demo.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
% DESCRIPTION: Demo code for running seqNMF on simulated and real data,
% including how to test significance of each factor on held-out data, and
% how to select lambda
%
% ------------------------------------------------------------------------
% Andrew Bahle and Emily Mackevicius 1.26.2018
%
% See paper:
% https://www.biorxiv.org/content/early/2018/03/02/273128
%% Generate some synthetic data
number_of_seqences = 2;
T = 3000; % length of data to generate
Nneurons = [10;10]; % number of neurons in each sequence
Dt = 3.*ones(number_of_seqences,1); % gap between each member of the sequence
NeuronNoise = 0.001; % probability of added noise in each bin
SeqNoiseTime = zeros(number_of_seqences,1); % Jitter parameter = 0%
SeqNoiseNeuron = 1.*ones(number_of_seqences,1); % Participation parameter = 100%
X = generate_data(T,Nneurons,Dt,NeuronNoise,SeqNoiseTime,SeqNoiseNeuron,0,0,0,0,0);
%% Fit with seqNMF
K = 5;
L = 50;
lambda =.002;
shg; clf
display('Running seqNMF on simulated data (2 simulated sequences + noise)')
[W,H] = seqNMF(X,'K',K, 'L', L,'lambda', lambda);
%% Look at factors
figure; SimpleWHPlot(W,H); title('SeqNMF reconstruction')
figure; SimpleWHPlot(W,H,X); title('SeqNMF factors, with raw data')
%% load example HVC calcium imaging data (from 6991FirstFewDaysForBatch)
clear all
display('Attempting to load MackeviciusData from seqNMF repository')
load MackeviciusData
display('loaded data')
%% break data into training set and test set
splitN = floor(size(NEURAL,2)*.75);
splitS = floor(size(SONG,2)*.75);
trainNEURAL = NEURAL(:,1:splitN);
trainSONG = SONG(:,1:splitS);
testNEURAL = NEURAL(:,(splitN+1):end);
testSONG = SONG(:,(splitS+1):end);
%% plot one example factorization
rng(235); % fixed rng seed for reproduceability
X = trainNEURAL;
K = 10;
L = 2/3; % units of seconds
Lneural = ceil(L*VIDEOfs);
Lsong = ceil(L*SONGfs);
shg
display('Running seqNMF on real neural data (from songbird HVC, recorded by Emily Mackevicius, Fee Lab)')
[W, H, ~,loadings,power]= seqNMF(X,'K',K,'L',Lneural,...
'lambdaL1W', .1, 'lambda', .005, 'maxiter', 100, 'showPlot', 1,...
'lambdaOrthoW', 0);
p = .05; % desired p value for factors
display('Testing significance of factors on held-out data')
[pvals,is_significant] = test_significance(testNEURAL,W,p);
W = W(:,is_significant,:);
H = H(is_significant,:);
% plot, sorting neurons by latency within each factor
[max_factor, L_sort, max_sort, hybrid] = helper.ClusterByFactor(W(:,:,:),1);
indSort = hybrid(:,3);
tstart = 180; % plot data starting at this timebin
figure; WHPlot(W(indSort,:,:),H(:,tstart:end), X(indSort,tstart:end), ...
0,trainSONG(:,floor(tstart*SONGfs/VIDEOfs):end))
title('Significant seqNMF factors, with raw data')
figure; WHPlot(W(indSort,:,:),H(:,tstart:end), ...
helper.reconstruct(W(indSort,:,:),H(:,tstart:end)),...
0,trainSONG(:,floor(tstart*SONGfs/VIDEOfs):end))
title('SeqNMF reconstruction')
%% Procedure for choosing lambda
nLambdas = 20; % increase if you're patient
K = 10;
X = trainNEURAL;
lambdas = sort([logspace(-1,-5,nLambdas)], 'ascend');
loadings = [];
regularization = [];
cost = [];
for li = 1:length(lambdas)
[N,T] = size(X);
[W, H, ~,loadings(li,:),power]= seqNMF(X,'K',K,'L',Lneural,...
'lambdaL1W', .1, 'lambda', lambdas(li), 'maxiter', 100, 'showPlot', 0);
[cost(li),regularization(li),~] = helper.get_seqNMF_cost(X,W,H);
display(['Testing lambda ' num2str(li) '/' num2str(length(lambdas))])
end
%% plot costs as a function of lambda
windowSize = 3;
b = (1/windowSize)*ones(1,windowSize);
a = 1;
Rs = filtfilt(b,a,regularization);
minRs = prctile(regularization,10); maxRs= prctile(regularization,90);
Rs = (Rs-minRs)/(maxRs-minRs);
R = (regularization-minRs)/(maxRs-minRs);
Cs = filtfilt(b,a,cost);
minCs = prctile(cost,10); maxCs = prctile(cost,90);
Cs = (Cs -minCs)/(maxCs-minCs);
C = (cost -minCs)/(maxCs-minCs);
clf; hold on
plot(lambdas,Rs, 'b')
plot(lambdas,Cs,'r')
scatter(lambdas, R, 'b', 'markerfacecolor', 'flat');
scatter(lambdas, C, 'r', 'markerfacecolor', 'flat');
xlabel('Lambda'); ylabel('Cost (au)')
set(legend('Correlation cost', 'Reconstruction cost'), 'Box', 'on')
set(gca, 'xscale', 'log', 'ytick', [], 'color', 'none')
set(gca,'color','none','tickdir','out','ticklength', [0.025, 0.025])
%% chose lambda=.005; run multiple times, see number of sig factors
loadings = [];
pvals = [];
is_significant = [];
X = trainNEURAL;
nIter = 20; % increase if patient
display('Running seqNMF multiple times for lambda=0.005')
for iteri = 1:nIter
[W, H, ~,loadings(iteri,:),power]= seqNMF(X,'K',K,'L',Lneural,...
'lambdaL1W', .1, 'lambda', .005, 'maxiter', 100, 'showPlot', 0);
p = .05;
[pvals(iteri,:),is_significant(iteri,:)] = test_significance(testNEURAL,W,p);
W = W(:,is_significant(iteri,:)==1,:);
H = H(is_significant(iteri,:)==1,:);
[max_factor, L_sort, max_sort, hybrid] = helper.ClusterByFactor(W(:,:,:),1);
indSort = hybrid(:,3);
tstart = 300;
clf; WHPlot(W(indSort,:,:),H(:,tstart:end), X(indSort,tstart:end), 0,trainSONG(:,floor(tstart*SONGfs/VIDEOfs):end))
display(['seqNMF run ' num2str(iteri) '/' num2str(nIter)])
end
figure; hold on
h = histogram(sum(is_significant,2), 'edgecolor', 'w', 'facecolor', .7*[1 1 1]);
h.BinCounts = h.BinCounts/sum(h.BinCounts)*100;
xlim([0 10]);
xlabel('# significant factors')
ylabel('% seqNMF runs')
%% Plot factor-triggered song examples and rastors
addpath(genpath('misc_elm'));
figure; HTriggeredSpec(H,trainSONG,VIDEOfs,SONGfs,Lsong);
figure; HTriggeredRaster(H,trainNEURAL(indSort,:),Lneural);