-
Notifications
You must be signed in to change notification settings - Fork 3
/
RUN_clustering_KMEANS.py
98 lines (81 loc) · 3.63 KB
/
RUN_clustering_KMEANS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# -*- coding: utf-8 -*-
"""
Created on Fri May 27 16:44:10 2016
@author: sa1013
"""
"""
Kmeans clustering to learn a brain parcellation from rest fMRI
====================================================================
We use spatially-constrained Ward-clustering to create a set of
parcels. These parcels are particularly interesting for creating a
'compressed' representation of the data, replacing the data in the fMRI
images by mean on the parcellation.
"""
import warnings
import time
import os
import numpy as np
from scipy.io import loadmat, savemat
from scipy import sparse
from sklearn.cluster import KMeans
from sklearn import decomposition
warnings.filterwarnings("ignore")
Ks = [25, 35, 50, 75, 100]
#Ks.extend(range(100,260,25))
alpha = 1.0 # just functional
sessions = '12'
hems = 'LR'
mat = loadmat('/vol/medic02/users/sk1712/subjectIDs100_set1.mat')
subjectIDs = mat['subjectIDs']
# See http://stackoverflow.com/questions/32857029/python-scikit-learn-pca-explained-variance-ratio-cutoff
# for the explanation of variance. Apparently, using k=500 explaines almost 50% of the variance.
### Kmeans ######################################################################
print "Generating parcellations with K-means"
print "Running script for {0} parcels".format(Ks)
for hem in hems:
for i, sID in enumerate(subjectIDs):
subjectID = str(sID[0])
root = '/vol/vipdata/data/HCP100/'
print "Subject: {0}, ID: {1}".format(i,subjectID)
getFrom = root + subjectID + '/processed/'
#mat = loadmat(getFrom + subjectID + '_N_' + hem + '.mat');
#neigh = mat['N']
#sN = sparse.csr_matrix(neigh)
saveTo = getFrom + 'networks/'
if not os.path.exists(saveTo):
os.makedirs(saveTo)
for s in sessions: # sessions
print "Session: {0}".format(s)
mat = loadmat(getFrom + subjectID + '_dtseries_fix_' + s + '_normalized_corrected_' + hem + '.mat')
dtseries = mat['dtseries1'] if s is '1' else mat['dtseries2']
mat = loadmat(getFrom + subjectID + '_midthickness_vertices_' + hem + '.mat')
coords = mat['vertices']
mat = loadmat(getFrom + subjectID + '_atlasroi_cdata_' + hem + '.mat')
cdata = mat['cdata']
ind = [i for i, j in enumerate(cdata) if j > 0]
xyz = coords[ind,:]
# PCA
pca = decomposition.PCA(n_components=510)
dtseries_reduced = pca.fit_transform(dtseries)
print "Explained variance {0:.2f}%".format(100*pca.explained_variance_ratio_.cumsum()[-1])
merged = dtseries_reduced
#merged = np.column_stack((alpha*dtseries_reduced,(1-alpha)*xyz))
print merged.shape
if hem == 'L':
sNshape = 29696
elif hem == 'R':
sNshape = 29716
wardLabels = np.zeros((sNshape,len(Ks)))
start = time.time()
for k, K in enumerate(Ks):
print "{0} -> K: {1}".format(k,K)
# K-MEANS
model = KMeans(n_clusters=K, max_iter=100,
n_init=10, n_jobs=-1)
fitted = model.fit(merged)
wardLabels[:,k] = fitted.labels_ + 1
print "Elapsed time {0} seconds" \
.format((time.time() - start))
saveName = saveTo + subjectID + '_wardLabels_alpha' + str(int(alpha*100)) + '_' + s + '_' + hem
print "Saved as {0}".format(saveName)
savemat(saveName + '.mat', {'wardLabels':wardLabels})