-
Notifications
You must be signed in to change notification settings - Fork 0
/
Variable_samp_size_fig.R
104 lines (65 loc) · 3.68 KB
/
Variable_samp_size_fig.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
load("rivsys.RData")
library(hierfstat)
##unequal sample sizes
#repl 1
ss<-sample(rep(c(1:5*2,20,40),each=2),replace=FALSE)
FsM.10kl.niv1<-lapply(dos.g2k,function(x) {a<-rep(1:np-1,c(ss))*50+unlist(lapply(ss,function(x) sample(50,size=x)));hierfstat::fs.dosage(x[a,],pop=rep(1:np,ss))$FsM})
tmp<-lapply(FsM.10kl.niv1,function(x) (x-ebeta[[20]])^2)
Rmses.10kl.niv1<-matrix(numeric(np^2),ncol=np)
for (i in 1:20) Rmses.10kl.niv1<-Rmses.10kl.niv1+tmp[[i]]
Rmses.10kl.niv1<-(Rmses.10kl.niv1/20)^.5
#repl 2
ss<-sample(rep(c(1:5*2,20,40),each=2),replace=FALSE)
FsM.10kl.niv2<-lapply(dos.g2k,function(x) {a<-rep(1:np-1,c(ss))*50+unlist(lapply(ss,function(x) sample(50,size=x)));hierfstat::fs.dosage(x[a,],pop=rep(1:np,ss))$FsM})
tmp<-lapply(FsM.10kl.niv2,function(x) (x-ebeta[[20]])^2)
Rmses.10kl.niv2<-matrix(numeric(np^2),ncol=np)
for (i in 1:20) Rmses.10kl.niv2<-Rmses.10kl.niv2+tmp[[i]]
Rmses.10kl.niv2<-(Rmses.10kl.niv2/20)^.5
##subsampling populations
#rep1
rs1<-sample(14,size=7)
FsM.10kl.rs1<-lapply(FsM.10kl,function(x) {tmp<-mean(hierfstat::mat2vec(x[rs1,rs1]));(x[rs1,rs1]-tmp)/(1-tmp)})
#rep 2
rs2<-sample(14,size=7)
FsM.10kl.rs2<-lapply(FsM.10kl,function(x) {tmp<-mean(hierfstat::mat2vec(x[rs2,rs2]));(x[rs2,rs2]-tmp)/(1-tmp)})
#rep 3
rs3<-sample(14,size=7)
FsM.10kl.rs3<-lapply(FsM.10kl,function(x) {tmp<-mean(hierfstat::mat2vec(x[rs3,rs3]));(x[rs3,rs3]-tmp)/(1-tmp)})
##################################################
##assumes ebeta[[20]], the expected values for FST in the river system after 2000 generations loaded in the environment
png("Subsamp_ind_pop_500x500.png",width=500,height=500)
par(mfrow=c(2,3))
plot(ebeta[[20]],FsM.10kl.niv1[[1]],pch=16,col="red",ylim=range(unlist(FsM.10kl.niv1)),xlab="E[FST]",ylab="FST",main="");abline(c(0,1))
for (i in 2:20) points(ebeta[[20]],FsM.10kl.niv1[[i]],pch=16,col="red")
fig_label("A",cex=2)
plot(ebeta[[20]],FsM.10kl.niv2[[1]],pch=16,col="red",ylim=range(unlist(FsM.10kl.niv2)),xlab="E[FST]",ylab="FST",main="");abline(c(0,1))
for (i in 2:20) points(ebeta[[20]],FsM.10kl.niv2[[i]],pch=16,col="red")
fig_label("B",cex=2)
##assumes Rmses for 10k loci and 50, 20, 10, 5 and 2 individuals loaded in the environment
boxplot(as.vector(Rmses.10kl),as.vector(Rmses.10kl.ni20),as.vector(Rmses.10kl.ni10),as.vector(Rmses.10kl.ni5),as.vector(Rmses.10kl.ni2),as.vector(Rmses.10kl.niv1),as.vector(Rmses.10kl.niv2),ylab="RMSEs",names=c("50 i","20 i","10 i","5 i","2 i","var i","var i"),las=2,xlab="sampling scheme")
fig_label("C",cex=2)
tmp1<-ebeta[[20]][rs1,rs1]
tmp2<-mean(hierfstat::mat2vec(tmp1))
tmp1<-(tmp1-tmp2)/(1-tmp2)
plot(tmp1,FsM.10kl.rs1[[1]],pch=16,col="red",ylim=range(unlist(FsM.10kl.rs1)),xlab="E[FST]",ylab="FST",main="");abline(c(0,1))
for (i in 2:20) points(tmp1,FsM.10kl.rs1[[i]],pch=16,col="red")
fig_label("D",cex=2)
tmp1<-ebeta[[20]][rs2,rs2]
tmp2<-mean(hierfstat::mat2vec(tmp1))
tmp1<-(tmp1-tmp2)/(1-tmp2)
plot(tmp1,FsM.10kl.rs2[[1]],pch=16,col="red",ylim=range(unlist(FsM.10kl.rs2)),xlab="E[FST]",ylab="FST",main="");abline(c(0,1))
for (i in 2:20) points(tmp1,FsM.10kl.rs2[[i]],pch=16,col="red")
fig_label("E",cex=2)
tmp1<-ebeta[[20]][rs3,rs3]
tmp2<-mean(hierfstat::mat2vec(tmp1))
tmp1<-(tmp1-tmp2)/(1-tmp2)
plot(tmp1,FsM.10kl.rs3[[1]],pch=16,col="red",ylim=range(unlist(FsM.10kl.rs3)),xlab="E[FST]",ylab="FST",main="");abline(c(0,1))
for (i in 2:20) points(tmp1,FsM.10kl.rs3[[i]],pch=16,col="red")
fig_label("F",cex=2)
par(mfrow=c(1,1))
dev.off()
#################################
#plot(ebeta[[20]],FsM.10kl[[1]],pch=16,col="red",ylim=range(unlist(FsM.10kl)),xlab="E[FST]",ylab="FST",main="");abline(c(0,1))
#for (i in 2:20) points(ebeta[[20]],FsM.10kl[[i]],pch=16,col="red")
#corrplot(FsM.10kl[[1]],is.corr=FALSE)
#corrplot(FsM.10kl.niv[[1]],is.corr=FALSE)