Skip to content

Commit

Permalink
Merge pull request #31 from boost-R/refactor_noncyc
Browse files Browse the repository at this point in the history
Refactor noncyclical fitting code
  • Loading branch information
hofnerb authored Apr 4, 2017
2 parents 4f21eb6 + 6309410 commit bb9a1da
Show file tree
Hide file tree
Showing 12 changed files with 292 additions and 463 deletions.
3 changes: 3 additions & 0 deletions R/AAA.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
### the variance of the negative gradient
options(gamboostLSS_stab_ngrad = FALSE)
}

# get rid of NOTEs in R CMD check for "undefined global functions or variables"
globalVariables(c("ngradient", "y", "combined_risk"))
2 changes: 1 addition & 1 deletion R/cvrisk.nc_mboostLSS.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ plot.nc_cvriskLSS <- function(x, type = "lines",
main = attr(x, "type"),
...) {
if (type != "lines")
warning("Only ", sQuote('type = "lines"'), " supported for non-cyclical fitting")
warning("Only ", sQuote('type = "lines"'), " supported for noncyclical fitting")
plot.cvriskLSS(x = x, type = "lines", xlab = xlab, ylab = ylab,
ylim = ylim, main = main, ...)
}
Expand Down
20 changes: 10 additions & 10 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,20 +119,20 @@ rescale_weights <- function(w) {

## helper function in a modified version based on mboost_2.2-3
## print trace of boosting iterations
do_trace <- function(current, mstart, risk,
linebreak = options("width")$width / 2, mstop = 1000) {
current <- current - mstart
do_trace <- function(current, risk, mstart,
linebreak = round(options("width")$width / 2), mstop = 1000) {

current <- current - mstart

if (current != mstop) {
if ((current - 1) %/% linebreak == (current - 1) / linebreak) {
if (current %% linebreak == 1) {
mchr <- formatC(current + mstart, format = "d",
width = nchar(mstop) + 1, big.mark = "'")
cat(paste("[", mchr, "] ",sep = ""))
} else {
if ((current %/% linebreak != current / linebreak)) {
cat(".")
} else {
cat(" -- risk:", risk[current + mstart], "\n")
}
}
cat(".")
if (current %% linebreak == 0) {
cat(" -- risk:", risk[current + mstart], "\n")
}
} else {
cat("\nFinal risk:", risk[current + mstart], "\n")
Expand Down
538 changes: 188 additions & 350 deletions R/mboostLSS.R

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions R/methods.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Methods

"[.mboostLSS" <- function(x, i, return = TRUE, ...) {
stopifnot((length(i) == 1 | length(i) == length(x)) && i > 0)
stopifnot(length(i) == 1 | length(i) == length(x))
attr(x, "subset")(i)
if (return) return(x)
invisible(NULL)
Expand Down Expand Up @@ -455,8 +455,7 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER,
if (nsel >= q)
break
}
#this changes nothing for method = "cycling" but fixes mstop for
#method = "inner" or "outer"
#this changes nothing for method = "cyclic" but fixes mstop for method = "noncyclic"
mstop <- check(mstop, "mstop", names(x))
## complete paths
if (any(sapply(xs, length) < mstop)) {
Expand All @@ -476,7 +475,7 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER,
ret <- unlist(ret)

## compute selection paths
#merging for method cycling
#merging for method cyclic
if(!inherits(x, "nc_mboostLSS")){
sequences <- lapply(1:length(xs), function(i) {
res <- matrix(FALSE, nrow = length(nms[[i]]), ncol = mstop[[i]])
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ For installation instructions see below.
Instructions on how to use `gamboostLSS` can be found here:
- [gamboostLSS tutorial](https://www.jstatsoft.org/article/view/v074i01).

Details on the noncyclical fitting method can be found here:
- [noncyclical fitting](https://arxiv.org/abs/1611.10171); This is a preleminary version currently under review.

## Issues & Feature Requests

Expand Down
12 changes: 2 additions & 10 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,13 @@ install:
- ps: Bootstrap

# Adapt as necessary starting from here

# environment:
# matrix:
# - TEST_DIR: patch
# - TEST_DIR: pkg

before_build:
# - cp ../travis-tool.sh ./travis-tool.sh
# - cp travis-tool.sh.cmd %TEST_DIR%/travis-tool.sh.cmd
# - cd %TEST_DIR%
- bash -c "echo '^travis-tool\.sh\.cmd$' >> .Rbuildignore"
# - dir

build_script:
- travis-tool.sh install_deps
- travis-tool.sh install_github hofnerb/stabs
- travis-tool.sh install_github boost-R/mboost

test_script:
- travis-tool.sh run_tests
Expand Down
20 changes: 20 additions & 0 deletions inst/CITATION
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,24 @@ citEntry(
)


citEntry(
entry= "TechReport",
title = "Stability selection for component-wise gradient boosting in multiple dimensions",
author = personList(as.person("Janek Thomas"),
as.person("Andreas Mayr"),
as.person("Bernd Bischl"),
as.person("Matthias Schmid"),
as.person("Adam Smith"),
as.person("Benjamin Hofner")),
year = "2016",
institution = "ArXiv",
header = "To cite the noncyclical fitting method of 'gamboostLSS' use:",
url = "https://arxiv.org/abs/1611.10171",
textVersion =
paste("Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2016).",
"Stability selection for component-wise gradient boosting in multiple dimensions.",
"arXiv preprint arXiv:1611.10171.")
)


citFooter('\nUse ', sQuote('toBibtex(citation("gamboostLSS"))'), ' to extract BibTeX references.')
9 changes: 4 additions & 5 deletions man/cvrisk.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ make.grid(max, length.out = 10, min = NULL, log = TRUE,
bootstrap samples.
}
\item{grid}{
If the model was fitted with \code{method = "cycling"}, grid is
If the model was fitted with \code{method = "cyclic"}, grid is
a matrix of stopping parameters the empirical risk is to be evaluated for.
Each row represents a parameter combination. The number of columns must be
equal to the number of parameters of the GAMLSS family. Per default,
equal to the number of parameters of the GAMLSS family. Per default,
\code{make.grid(mstop(object))} is used.

Otherwise (i.e., for \code{method = "inner"} or \code{method = "outer"}) grid
Otherwise (i.e., for \code{method = "noncyclic"}) grid
is a vector of mstop values. Per default all steps up to the intial stopping
iteration, i.e., \code{1:mstop(object)} are used.
}
Expand Down Expand Up @@ -132,8 +132,7 @@ make.grid(max, length.out = 10, min = NULL, log = TRUE,
fold to fold. The heatmap shows only the average risk but in a nicer
fashion.

For the noncyclical fitting methods (i.e. \code{method = "inner"} or
\code{method = "outer"}) only the line plot for exists.
For the \code{method = "noncyclic"} only the line plot for exists.

Hofner et al. (2015) provide a detailed description of
cross-validation for \code{\link{gamboostLSS}} models and show a
Expand Down
33 changes: 20 additions & 13 deletions man/mboostLSS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@
}
\usage{
mboostLSS(formula, data = list(), families = GaussianLSS(),
control = boost_control(), weights = NULL, method = c("cycling", "inner", "outer"), ...)
control = boost_control(), weights = NULL,
method = c("cyclic", "noncyclic"), ...)
glmboostLSS(formula, data = list(), families = GaussianLSS(),
control = boost_control(), weights = NULL, method = c("cycling", "inner", "outer"), ...)
control = boost_control(), weights = NULL,
method = c("cyclic", "noncyclic"), ...)
gamboostLSS(formula, data = list(), families = GaussianLSS(),
control = boost_control(), weights = NULL, method = c("cycling", "inner", "outer"), ...)
control = boost_control(), weights = NULL,
method = c("cyclic", "noncyclic"), ...)
blackboostLSS(formula, data = list(), families = GaussianLSS(),
control = boost_control(), weights = NULL,method = c("cycling", "inner", "outer"), ...)
control = boost_control(), weights = NULL,
method = c("cyclic", "noncyclic"), ...)

## fit function:
mboostLSS_fit(formula, data = list(), families = GaussianLSS(),
Expand Down Expand Up @@ -58,9 +62,9 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(),
\item{call}{ used to forward the call from \code{mboostLSS},
\code{glmboostLSS}, \code{gamboostLSS} and \code{blackboostLSS}.
This argument should not be directly specified by users!}
\item{method}{ fitting method, currently three methods are supported:
\code{"cycling"}, \code{"inner"} and \code{"outer"}. The latter two require a
one dimensional \code{mstop} value.}
\item{method}{ fitting method, currently two methods are supported:
\code{"cyclic"} and \code{"noncycli"}. The latter two requires a one dimensional \code{mstop}
value.}
\item{\dots}{Further arguments to be passed to \code{mboostLSS_fit}.
In \code{mboostLSS_fit}, \code{\dots} represent further arguments to be
passed to \code{\link{mboost}} and \code{\link{mboost_fit}}. So
Expand Down Expand Up @@ -99,7 +103,7 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(),
function without \code{LSS}. For further possible arguments see
these functions as well as \code{\link{mboost_fit}}.

For \code{method = "cycling"} it is possible to specify one or
For \code{method = "cyclic"} it is possible to specify one or
multiple \code{mstop} and \code{nu} values via
\code{\link{boost_control}}. In the case of one single value, this
value is used for all distribution parameters of the GAMLSS model.
Expand All @@ -113,12 +117,11 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(),
specify, e.g., \code{mstop = 100} via \code{\link{boost_control}}. For
more-dimensional stopping, one can specify, e.g., \code{mstop =
list(mu = 100, sigma = 200)} (see examples).
If \code{method} is set to \code{"inner"} or \code{"outer"}, \code{mstop} has
If \code{method} is set to \code{"noncyclic"}, \code{mstop} has
to be a one dimensional integer. Instead of cycling through all distribution
parameters, in each iteration only the best baselearner is used. For
\code{"inner"} one baselearner of every parameter is selected via RSS, the
distribution parameter is then chosen via the loss. For \code{"outer"} the
selection is solely based on the (outer) loss.
parameters, in each iteration only the best baselearner is used. One baselearner of every
parameter is selected via RSS, the distribution parameter is then chosen via the loss.
For details on the noncyclic fitting method see Thomas et. al. (2016).

To (potentially) stabilize the model estimation by standardizing the
negative gradients one can use the argument \code{stabilization} of
Expand Down Expand Up @@ -152,6 +155,10 @@ Statistical Society, Series C (Applied Statistics), 54, 507-554.

Buehlmann, P. and Hothorn, T. (2007), Boosting algorithms: Regularization,
prediction and model fitting. Statistical Science, 22(4), 477--505.

Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2016),
Stability selection for component-wise gradient boosting in multiple dimensions.
arXiv preprint arXiv:1611.10171.
}

\seealso{
Expand Down
2 changes: 1 addition & 1 deletion man/stabsel.mboostLSS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ dat <- data.frame(x1, x2, x3, x4, x5, x6, y)
### linear model with y ~ . for both components: 400 boosting iterations
model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat,
control = boost_control(mstop = 400),
center = TRUE, method = "inner")
center = TRUE, method = "noncyclic")

\donttest{### Do not test the following code per default on CRAN as it takes some time to run:

Expand Down
Loading

0 comments on commit bb9a1da

Please sign in to comment.