Skip to content

Commit

Permalink
update models
Browse files Browse the repository at this point in the history
Updated models xgboost and lightgbm to deal with overfitting
  • Loading branch information
PaoloBnn committed Sep 20, 2019
1 parent 4c90d6b commit 7ce6fab
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 17 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: Retip
Type: Package
Title: Retention Time Prediction for Metabolomics
Version: 0.5.3
Version: 0.5.4
Authors@R: c(
person("Paolo", "Bonini", , "[email protected]", c("aut", "cre")),
person("Tobias", "Kind", , "[email protected]",role = "aut"),
Expand Down
5 changes: 3 additions & 2 deletions R/model_lighgbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ valids <- list(test = dtest)
params <- list(objective = "regression", metric = "rmse")

# building cross validation model
modelcv <- lightgbm::lgb.cv(params, dtrain, nrounds=5000,nfold = 10, valids,verbose = 1, early_stopping_rounds = 1000, record = TRUE, eval_freq = 1L,stratified = TRUE)
modelcv <- lightgbm::lgb.cv(params, dtrain, nrounds=5000,nfold = 10, valids,verbose = 1, early_stopping_rounds = 1000, record = TRUE, eval_freq = 1L,stratified = TRUE,max_depth=4,max_leaf=20,max_bin=100)

# select the best iter in cross validation
best.iter <- modelcv$best_iter
Expand All @@ -42,10 +42,11 @@ best.iter <- modelcv$best_iter
params <- list(objective = "regression_l2",metric = "rmse")

# building final model
model <- lightgbm::lgb.train(params, dtrain, nrounds=best.iter, valids,verbose = 0, early_stopping_rounds =1000, record = TRUE, eval_freq = 1L)
model <- lightgbm::lgb.train(params, dtrain, nrounds=best.iter, valids,verbose = 0, early_stopping_rounds =1000, record = TRUE, eval_freq = 1L,max_depth=4,max_leaf=20,max_bin=100)

print(paste0("End training"))


return(model)

}
Expand Down
26 changes: 12 additions & 14 deletions R/model_xgboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,24 @@ fit.xgboost <- function(x){
cv.ctrl <-caret::trainControl(method = "cv",number = 10)

# These are the tune grid parameters
xgb.grid <- base::expand.grid(nrounds=c(100,200,300,400,500,600,700),
max_depth = c(5),
eta = c(0.025,0.05),
gamma = c(0.01),
colsample_bytree = c(0.75),
subsample = c(0.50),
min_child_weight = c(0))
xgb.grid <- base::expand.grid(nrounds=c(300,400,500,600,700,800,1000),
max_depth = c(2,3,4,5),
eta = c(0.01,0.02),
gamma = c(1),
colsample_bytree = c(0.5),
subsample = c(0.5),
min_child_weight = c(10))

print("Computing model Xgboost ... Please wait ...")

# Model training using the above parameters
set.seed(101)
model_xgb <-caret::train(RT ~.,
data=x,
method="xgbTree",
metric = "RMSE",
trControl=cv.ctrl,
tuneGrid=xgb.grid,
tuneLength = 14)

data=x,
method="xgbTree",
metric = "RMSE",
trControl=cv.ctrl,
tuneGrid=xgb.grid)



Expand Down

0 comments on commit 7ce6fab

Please sign in to comment.