From 7ce6fab8f81b2c39222fb7c5009ffd43f30a8985 Mon Sep 17 00:00:00 2001 From: Paolo Bonini <45519144+PaoloBnn@users.noreply.github.com> Date: Fri, 20 Sep 2019 19:26:16 +0200 Subject: [PATCH] update models Updated models xgboost and lightgbm to deal with overfitting --- DESCRIPTION | 2 +- R/model_lighgbm.R | 5 +++-- R/model_xgboost.R | 26 ++++++++++++-------------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a1d569b..6b5ecde 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Retip Type: Package Title: Retention Time Prediction for Metabolomics -Version: 0.5.3 +Version: 0.5.4 Authors@R: c( person("Paolo", "Bonini", , "pb@ngalab.com", c("aut", "cre")), person("Tobias", "Kind", , "tkind@ucdavis.edu",role = "aut"), diff --git a/R/model_lighgbm.R b/R/model_lighgbm.R index e289150..f9d155f 100644 --- a/R/model_lighgbm.R +++ b/R/model_lighgbm.R @@ -33,7 +33,7 @@ valids <- list(test = dtest) params <- list(objective = "regression", metric = "rmse") # building cross validation model -modelcv <- lightgbm::lgb.cv(params, dtrain, nrounds=5000,nfold = 10, valids,verbose = 1, early_stopping_rounds = 1000, record = TRUE, eval_freq = 1L,stratified = TRUE) +modelcv <- lightgbm::lgb.cv(params, dtrain, nrounds=5000,nfold = 10, valids,verbose = 1, early_stopping_rounds = 1000, record = TRUE, eval_freq = 1L,stratified = TRUE,max_depth=4,max_leaf=20,max_bin=100) # select the best iter in cross validation best.iter <- modelcv$best_iter @@ -42,10 +42,11 @@ best.iter <- modelcv$best_iter params <- list(objective = "regression_l2",metric = "rmse") # building final model -model <- lightgbm::lgb.train(params, dtrain, nrounds=best.iter, valids,verbose = 0, early_stopping_rounds =1000, record = TRUE, eval_freq = 1L) +model <- lightgbm::lgb.train(params, dtrain, nrounds=best.iter, valids,verbose = 0, early_stopping_rounds =1000, record = TRUE, eval_freq = 1L,max_depth=4,max_leaf=20,max_bin=100) print(paste0("End training")) + return(model) } diff --git a/R/model_xgboost.R b/R/model_xgboost.R index fc82be4..5ba0d99 100644 --- a/R/model_xgboost.R +++ b/R/model_xgboost.R @@ -13,26 +13,24 @@ fit.xgboost <- function(x){ cv.ctrl <-caret::trainControl(method = "cv",number = 10) # These are the tune grid parameters - xgb.grid <- base::expand.grid(nrounds=c(100,200,300,400,500,600,700), - max_depth = c(5), - eta = c(0.025,0.05), - gamma = c(0.01), - colsample_bytree = c(0.75), - subsample = c(0.50), - min_child_weight = c(0)) + xgb.grid <- base::expand.grid(nrounds=c(300,400,500,600,700,800,1000), + max_depth = c(2,3,4,5), + eta = c(0.01,0.02), + gamma = c(1), + colsample_bytree = c(0.5), + subsample = c(0.5), + min_child_weight = c(10)) print("Computing model Xgboost ... Please wait ...") # Model training using the above parameters set.seed(101) model_xgb <-caret::train(RT ~., - data=x, - method="xgbTree", - metric = "RMSE", - trControl=cv.ctrl, - tuneGrid=xgb.grid, - tuneLength = 14) - + data=x, + method="xgbTree", + metric = "RMSE", + trControl=cv.ctrl, + tuneGrid=xgb.grid)