-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathStatisticalTesting.R
67 lines (50 loc) · 2.1 KB
/
StatisticalTesting.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#Read validation sample data
d<-read.csv(file="Data/Stratifiedrandomsample.csv",header=T)
#Compute errors and squared errors
d$eKED <- d$SOM_A_hori - d$KED
d$eRF <- d$SOM_A_hori - d$RF
d$e2KED <- d$eKED^2
d$e2RF <- d$eRF^2
#Compute stratum sample sizes
(nh<-tapply(d$SOM_A_hori,INDEX=d$stratum,FUN=length))
#Read sizes of strata
strata<-read.csv(file="Data/StrataSize.csv",header=T)
names(strata)[1] <- "stratum"
#Compute stratum weights
Nh <- strata$Number.of.Pixels
wh <- Nh/sum(Nh)
#Estimate population Mean Error (ME) for RF
meRF_h<-tapply(d$eRF,INDEX=d$stratum,FUN=mean)
(meRF<-sum(wh*meRF_h))
#Estimate standard error of estimated mean error
#Note that in stratum 2 there is only 1 point
#A solution is to collapse strata 2 and 1 (strata 1 and 2 are similar geological units)
#Collapse strata with 1 point only
levels<-sort(unique(d$stratum))
collapsedstrata<-c(1,1,2,3,4,5,6,7)
lut <- data.frame(stratum = levels, collapsedstrata)
d <- merge(x = d, y = lut)
(strata <- merge(x = strata, y = lut))
#Estimate sample sizes and total number of pixels per collapsed stratum
nhc<-tapply(d$SOM_A_hori,INDEX=d$collapsedstrata,FUN=length)
Nhc<-tapply(strata$Number.of.Pixels,INDEX=strata$collapsedstrata,FUN=sum)
#Compute collapsed stratum weights and estimate standard error of estimated ME
whc<-Nhc/sum(Nhc)
vareRF_h<-tapply(d$eRF,INDEX=d$collapsedstrata,FUN=var)
(sdmeRF<-sqrt(sum(whc^2*vareRF_h/nhc)))
#t test of hypothesis ME = 0 (no bias, systematic error = 0)
t <- meRF/sdmeRF
lowertail <- (t<0)
df <- nrow(d) - length(unique(d$collapsedstrata))
(p <- 2*pt(t,df=df,lower.tail=lowertail))
#Paired t test of hypothesis MSE(KED) = MSE(RF)
#Compute for each validation point differences of squared errors (paired differences)
d$dife2 <- d$e2KED-d$e2RF
mdife2_h<-tapply(d$dife2,INDEX=d$stratum,FUN=mean)
(mdife2<-sum(wh*mdife2_h))
vardife2_h<-tapply(d$dife2,INDEX=d$collapsedstrata,FUN=var)
(sdmdife2<-sqrt(sum(whc^2*vardife2_h/nhc)))
t <- mdife2/sdmdife2
lowertail <- (t<0)
df <- nrow(d) - length(unique(d$collapsedstrata))
(p <- 2*pt(t,df=df,lower.tail=lowertail))