-
Notifications
You must be signed in to change notification settings - Fork 0
/
MeanShiftCluster.m
120 lines (98 loc) · 5.54 KB
/
MeanShiftCluster.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
function [clustCent,data2cluster,cluster2dataCell] = MeanShiftCluster(dataPts,bandWidth,plotFlag)
%perform MeanShift Clustering of data using a flat kernel
%
% ---INPUT---
% dataPts - input data, (numDim x numPts)
% bandWidth - is bandwidth parameter (scalar)
% plotFlag - display output if 2 or 3 D (logical)
% ---OUTPUT---
% clustCent - is locations of cluster centers (numDim x numClust)
% data2cluster - for every data point which cluster it belongs to (numPts)
% cluster2dataCell - for every cluster which points are in it (numClust)
%
% Bryan Feldman 02/24/06
% MeanShift first appears in
% K. Funkunaga and L.D. Hosteler, "The Estimation of the Gradient of a
% Density Function, with Applications in Pattern Recognition"
%*** Check input ****
if nargin < 2
error('no bandwidth specified')
end
if nargin < 3
plotFlag = false;
end
%**** Initialize stuff ***
[numDim,numPts] = size(dataPts);
numClust = 0;
bandSq = bandWidth^2;
initPtInds = 1:numPts;
maxPos = max(dataPts,[],2); %biggest size in each dimension
minPos = min(dataPts,[],2); %smallest size in each dimension
boundBox = maxPos-minPos; %bounding box size
sizeSpace = norm(boundBox); %indicator of size of data space
stopThresh = 1e-3*bandWidth; %when mean has converged
clustCent = []; %center of clust
beenVisitedFlag = zeros(1,numPts,'uint8'); %track if a points been seen already
numInitPts = numPts; %number of points to posibaly use as initilization points
clusterVotes = zeros(1,numPts,'uint16'); %used to resolve conflicts on cluster membership
while numInitPts
tempInd = ceil( (numInitPts-1e-6)*rand); %pick a random seed point
stInd = initPtInds(tempInd); %use this point as start of mean
myMean = dataPts(:,stInd); % intilize mean to this points location
myMembers = []; % points that will get added to this cluster
thisClusterVotes = zeros(1,numPts,'uint16'); %used to resolve conflicts on cluster membership
while 1 %loop untill convergence
sqDistToAll = sum((repmat(myMean,1,numPts) - dataPts).^2); %dist squared from mean to all points still active
inInds = find(sqDistToAll < bandSq); %points within bandWidth
thisClusterVotes(inInds) = thisClusterVotes(inInds)+1; %add a vote for all the in points belonging to this cluster
myOldMean = myMean; %save the old mean
myMean = mean(dataPts(:,inInds),2); %compute the new mean
myMembers = [myMembers inInds]; %add any point within bandWidth to the cluster
beenVisitedFlag(myMembers) = 1; %mark that these points have been visited
%*** plot stuff ****
if plotFlag
figure(12345),clf,hold on
if numDim == 2
plot(dataPts(1,:),dataPts(2,:),'.')
plot(dataPts(1,myMembers),dataPts(2,myMembers),'ms')
plot(myMean(1),myMean(2),'go')
plot(myOldMean(1),myOldMean(2),'rd')
pause(.1)
end
end
%**** if mean doesn't move much stop this cluster ***
if norm(myMean-myOldMean) < stopThresh
%check for merge posibilities
mergeWith = 0;
for cN = 1:numClust
distToOther = norm(myMean-clustCent(:,cN)); %distance from posible new clust max to old clust max
if distToOther < bandWidth/2 %if its within bandwidth/2 merge new and old
mergeWith = cN;
break;
end
end
if mergeWith > 0 % something to merge
clustCent(:,mergeWith) = 0.5*(myMean+clustCent(:,mergeWith)); %record the max as the mean of the two merged (I know biased towards new ones)
%clustMembsCell{mergeWith} = unique([clustMembsCell{mergeWith} myMembers]); %record which points inside
clusterVotes(mergeWith,:) = clusterVotes(mergeWith,:) + thisClusterVotes; %add these votes to the merged cluster
else %its a new cluster
numClust = numClust+1; %increment clusters
clustCent(:,numClust) = myMean; %record the mean
%clustMembsCell{numClust} = myMembers; %store my members
clusterVotes(numClust,:) = thisClusterVotes;
end
break;
end
end
initPtInds = find(beenVisitedFlag == 0); %we can initialize with any of the points not yet visited
numInitPts = length(initPtInds); %number of active points in set
end
[val,data2cluster] = max(clusterVotes,[],1); %a point belongs to the cluster with the most votes
%*** If they want the cluster2data cell find it for them
if nargout > 2
cluster2dataCell = cell(numClust,1);
for cN = 1:numClust
myMembers = find(data2cluster == cN);
cluster2dataCell{cN} = myMembers;
end
end