-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSVM
54 lines (46 loc) · 1.42 KB
/
SVM
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
rm(list=ls(all=TRUE))
#install.packages("mlbench")
require(mlbench)
require(e1071)
#uploading data
data("BreastCancer")
BC <- subset(BreastCancer, select=-Id)
table(is.na(BC))
BC<-na.omit(BC)
#diving into training and test set
set.seed(1)
train=sample(nrow(BC),nrow(BC)*0.7)
model <- svm(Class ~ ., data=BC[train,], cross=10,scale=TRUE)
summary(model)
BC<-as.data.frame(BC,na.omit=TRUE)
ypred=predict(model,BC[-train,])
sum(diag(table(predict=ypred, truth=BC[-train,10])))/nrow(BC[-train,])
#repeating the procedure 50 times
accuracy<-c()
for(i in 1:50){
train=sample(nrow(BC),nrow(BC)*0.7)
model <- svm(Class ~ ., data=BC[train,], cross=10,scale=TRUE)
BC<-as.data.frame(BC,na.omit=TRUE)
ypred=predict(model,BC[-train,])
accuracy[i]=sum(diag(table(predict=ypred, truth=BC[-train,10])))/nrow(BC[-train,])
}
mean(accuracy)
sd(accuracy)
mean_t<-c()
sd_t<-c()
for(j in seq(from=0.5,to=0.95,by=0.05)){
accuracy<-c()
for(i in 1:50){
train=sample(nrow(BC),nrow(BC)*j)
model <- svm(Class ~ ., data=BC[train,], cross=10,scale=TRUE)
BC<-as.data.frame(BC,na.omit=TRUE)
ypred=predict(model,BC[-train,])
accuracy[i]=sum(diag(table(predict=ypred, truth=BC[-train,10])))/nrow(BC[-train,])
}
mean_t[length(mean_t)+1]=mean(accuracy)
sd_t[length(sd_t)+1]=sd(accuracy)
}
0.35*50000
50000*18+1
require(ggplot2)
ggplot()+geom_point(aes(x=seq(from=0.5,to=0.95,by=0.05),y=mean_t))+ggtitle("Mean accuracy v. Data split")