-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfitModels.R
109 lines (88 loc) · 2.39 KB
/
fitModels.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
###path
mypath<-"C:/Users/ildefons/vrije/ildefons"
require(randomForest)
require(gbm)
#####################################
#normal-1
#####################################
fname<-file.path(mypath,"n1norm.RData")
load(fname)
train.norm<-n1.norm
ycol<-ncol(train.norm)
range<-37:(ncol(train.norm)-2)
X.train<-cbind(train.norm[,range])
colnames(X.train)<-paste("v",as.character(1:ncol(X.train)),sep="")
myrows<-which(train.norm[,ycol]==1)
other<-which(train.norm[,ycol]==0)
set.seed(1234)
rows0<-sample(other,50000)
myrows<-c(myrows,rows0)
library(randomForest)
set.seed(1234)
rf1<-randomForest(x=X.train[myrows,],y=train.norm[myrows,ycol],importance=FALSE, ntree=400, do.trace=T,nodesize=450)
train.gbm<-data.frame(cbind(X.train[myrows,],y=train.norm[myrows,ycol]))
#Model: gbm
set.seed(1234)
GBM_NTREES = 400
GBM_SHRINKAGE = 0.05
GBM_DEPTH = 6
GBM_MINOBS = 400
#build the GBM model
library(gbm)
set.seed(1234)
gbm1 <- gbm(y~.,
data=train.gbm,
distribution = "gaussian",
n.trees = GBM_NTREES,
shrinkage = GBM_SHRINKAGE,
interaction.depth = GBM_DEPTH,
n.minobsinnode = GBM_MINOBS,
verbose = TRUE,
#cv.folds = 1,
bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best
train.fraction = 1)
#####################################
#normal-2
#####################################
fname<-file.path(mypath,"n2norm.RData")
load(fname)
train.norm<-n2.norm
ycol<-ncol(train.norm)
range<-37:(ncol(train.norm)-2)
X.train<-cbind(train.norm[,range])
colnames(X.train)<-paste("v",as.character(1:ncol(X.train)),sep="")
myrows<-which(train.norm[,ycol]==1)
other<-which(train.norm[,ycol]==0)
set.seed(1234)
rows0<-sample(other,50000)
myrows<-c(myrows,rows0)
library(randomForest)
set.seed(1234)
rf2<-randomForest(x=X.train[myrows,],y=train.norm[myrows,ycol],importance=FALSE, ntree=400, do.trace=T,nodesize=450)
train.gbm<-data.frame(cbind(X.train[myrows,],y=train.norm[myrows,ycol]))
#Model: gbm
set.seed(1234)
GBM_NTREES = 400
GBM_SHRINKAGE = 0.05
GBM_DEPTH = 6
GBM_MINOBS = 400
#build the GBM model
library(gbm)
set.seed(1234)
gbm2 <- gbm(y~.,
data=train.gbm,
distribution = "gaussian",
n.trees = GBM_NTREES,
shrinkage = GBM_SHRINKAGE,
interaction.depth = GBM_DEPTH,
n.minobsinnode = GBM_MINOBS,
verbose = TRUE,
#cv.folds = 1,
bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best
train.fraction = 1)
fname<-file.path(mypath,"mymodels.RData")
save( rf1,
gbm1,
rf2,
gbm2,
file=fname)