forked from mwgeurts/libra
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcvMcd.m
executable file
·167 lines (142 loc) · 4.35 KB
/
cvMcd.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
function result = cvMcd(data,kmax,resMCD,h)
%CVMCD calculates the robust cross-validated PRESS (predicted residual error sum of squares)
% curve for the MCD method in a fast way.
%
% Input arguments:
% data : the full data set
% kmax : the maximal number of components to be considered (mostly kmax = p).
% resMCD : the result of mcdcov(data,'plots',0,'factor',1)
% h : the quantile used in MCD.
%
% output:
% result.press : vector of length kmax with the press values
% result.weights : the weights for all observations
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at:
% http://wis.kuleuven.be/stat/robust.html
%
% Written by Sanne Engelen
% Last Update: 01/07/2004
% Some initialisations:
n = size(data,1);
p = size(data,2);
r = rank(data);
Pk = [];
Lk = [];
teller_if_lus = 0;
if nargin < 4
alfa = 0.75;
h=floor(2*floor((n+p+1)/2)-n+2*(n-floor((n+p+1)/2))*alfa);
end
outWeights = weightscvMcd(data,r,kmax,resMCD,h);
w_min = outWeights.w_min;
Hopt = resMCD.Hsubsets.Hopt;
inputH0.H0 = Hopt;
Tfull = mean(data(Hopt,:));
Sfull = cov(data(Hopt,:));
for i = 1:n
% deciding which index should be removed from H0.
inputH0.same = 0;
if isempty(find(inputH0.H0 == i))
inputH0.j = h;
if teller_if_lus >= 1
inputH0.same = 1;
end
teller_if_lus = teller_if_lus + 1;
else
inputH0.j = find(inputH0.H0 == i);
end
% assigning the input variables:
inputFull.T = Tfull;
inputFull.S = Sfull;
if ~inputH0.same
res = removeObsMcd(data,i,inputH0,inputFull);
end
if (isempty(find(inputH0.H0 == i))) & (teller_if_lus == 1)
resfixed = res;
end
if isempty(find(inputH0.H0 == i)) & (teller_if_lus ~= 1)
res = resfixed;
end
P_min_i = res.P_min_i;
L_min_i = res.L_min_i;
mu_min_i = res.mu_min_i;
for k = 1:kmax
clear Pk Lk;
Pk = P_min_i(:,1:k);
Lk = L_min_i(1:k,1:k);
Xhoedk_min_i(i,(k-1)*p + 1:k*p) = (data(i,:) - mu_min_i)*Pk*Pk' + mu_min_i;
if k~=r
odk(i,k) = norm(data(i,:) - Xhoedk_min_i(i,(k-1)*p + 1:k*p));
else
odk(i,k) = 0;
end
end
end
for k = 1:kmax
press_min(k) = 1/sum(w_min)*w_min*odk(:,k).^2;
end
result.press = press_min;
result.weights = outWeights;
%----------------------------------------------------------------------------------
function out = weightscvMcd(data,r,kmax,resMCD,h)
% computes the weights used to calculate the robust PRESS values.
%
% input:
% data : the whole data
% r : the rank of the data
% kmax : the maximal number of components to be considered
% resMCD : the result of mcdcov(data,'plots',0,'factor',1)
% h : the number of observations on which the computations are based.
%
% output:
% out.w_min : the weights computed by taken the minimum over all k
% Some initialisations:
n = size(data,1);
p = size(data,2);
Pk = [];
Lk = [];
Tik = [];
[P,L] = eig(resMCD.cov);
[L,I] = greatsort(diag(L));
P = P(:,I);
for k = 1:kmax
Pk = P(:,1:k);
if h==n
Lk=L(1:k);
else
Lk = chi2inv(h/n,k)/chi2inv(h/n,kmax/2)*L(1:k);% with correction for the factor
end
muk = resMCD.center;
Xhoedk(:,(k-1)*p + 1:k*p) = (data - repmat(muk,n,1))*Pk*Pk' + repmat(muk,n,1);
Tk = (data - repmat(muk,n,1))*Pk;
for i =1:n
% defining the sd for the observation that is left:
sdk(i,k) = sqrt(mahalanobis(Tk(i,:),zeros(1,k),'cov',diag(Lk)));
% defining the od for the observation that is left:
if k~=r
odk(i,k) = norm(data(i,:) - Xhoedk(i,(k-1)*p + 1:k*p));
else
odk(i,k) = 1;
end
end
% defining weights for odk and sdk:
if k~=r
[m,s]=unimcd(odk(:,k).^(2/3),h);
cutoff(k)=sqrt(norminv(0.975,m,s).^3);
wod(:,k) = (odk(:,k) <= cutoff(k));
else
cutoff(k)= 0;
wod(:,k) = 1;
end
wsd(:,k) = (sdk(:,k) <= sqrt(chi2inv(0.975,k)));
end
% determine the weights for every observation:
wk = wsd & wod;
if size(wk,1) == 1 | size(wk,2) == 1
w_min = wk';
else
w_min = min(wk');
end
out.w_min = w_min;