Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch multi-core to parallel package #151

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pkg/BayesFactor/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ Authors@R: c(person("Richard D.", "Morey", role = c("aut", "cre", "cph"), email
person("Tahira", "Jamil", role = c("ctb","cph"), email = "[email protected]"),
person("Simon", "Urbanek", role = c("ctb", "cph"), email = "[email protected]"),
person("Karl", "Forner", role = c("ctb", "cph"), email = "[email protected]"),
person("Alexander", "Ly", role = c("ctb", "cph"), email = "[email protected]"))
person("Alexander", "Ly", role = c("ctb", "cph"), email = "[email protected]"),
person("Marius", "Barth", role = c("ctb", "cph"), email = "[email protected]", comment = c(orcid = "0000-0002-3421-6665")))
Description: A suite of functions for computing
various Bayes factors for simple designs, including contingency tables,
one- and two-sample designs, one-way designs, general ANOVA designs, and
Expand All @@ -28,10 +29,9 @@ Imports:
MatrixModels,
Rcpp (>= 0.11.2),
methods,
hypergeo
hypergeo,
parallel
Suggests:
doMC,
foreach,
testthat,
knitr,
markdown,
Expand All @@ -44,5 +44,5 @@ URL: https://richarddmorey.github.io/BayesFactor/
BugReports: https://github.com/richarddmorey/BayesFactor/issues
LazyLoad: yes
LinkingTo: Rcpp (>= 0.11.2), RcppEigen (>= 0.3.2.2.0)
RoxygenNote: 7.2.3
RoxygenNote: 7.1.1
Encoding: UTF-8
69 changes: 47 additions & 22 deletions pkg/BayesFactor/R/anovaBF.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@
##' @param rscaleRandom prior scale for standardized random effects
##' @param rscaleEffects A named vector of prior settings for individual factors,
##' overriding rscaleFixed and rscaleRandom. Values are scales, names are factor names.
##' @param multicore if \code{TRUE} use multiple cores through the \code{doMC}
##' package. Unavailable on Windows.
##' @param multicore if \code{TRUE} use multiple cores through the \pkg{parallel}
##' package.
##' @param method approximation method, if needed. See \code{\link{nWayAOV}} for
##' details.
##' @param noSample if \code{TRUE}, do not sample, instead returning NA.
Expand Down Expand Up @@ -175,29 +175,55 @@ anovaBF <-
models <- lapply(models, addRandomModelPart, dataTypes = dataTypes)
models <- c(models, addRandomModelPart(fmla, dataTypes, null=TRUE))
}

if(multicore){

# On Windows, the parallel package can only create PSOCK clusters;
# these seem to hamper performance (compared to single-core operation)
if(isTRUE(multicore && .Platform$OS.type == "unix")){
message("Note: Progress bars and callbacks are suppressed when running multicore.")
if(!requireNamespace("doMC", quietly = TRUE)){
stop("Required package (doMC) missing for multicore functionality.")
# Create a cluster if and only if no default cluster is available
if(is.null(parallel::getDefaultCluster())) {
cl <- parallel::makeForkCluster(nnodes = getOption("mc.cores", default = parallel::detectCores()))
on.exit(parallel::stopCluster(cl))
} else {
cl <- parallel::getDefaultCluster()
}

doMC::registerDoMC()
if(foreach::getDoParWorkers()==1){
warning("Multicore specified, but only using 1 core. Set options(cores) to something >1.")

# first split models into batches (to reduce cross-talk between workers)
n_workers <- length(cl)
idx <- rep(
seq_len(n_workers),
each = ceiling(length(models)/n_workers),
length.out = length(models)
)

nested_models <- vector(mode = "list", length = n_workers)
for (i in seq_along(nested_models)) {
nested_models[[i]] <- models[idx == i]
}

bfs <- foreach::"%dopar%"(
foreach::foreach(gIndex=models, .options.multicore=mcoptions),
lmBF(gIndex,data = data, whichRandom = whichRandom,
rscaleFixed = rscaleFixed, rscaleRandom = rscaleRandom,
rscaleEffects = rscaleEffects, iterations = iterations,
method=method, progress=FALSE, noSample = noSample)
)

# then use clusterMap() for each batch
bfs <- unlist(parallel::clusterMap(
cl = cl,
nested_models,
MoreArgs = list(
FUN = lmBF,
data = data,
whichRandom = whichRandom,
rscaleFixed = rscaleFixed,
rscaleRandom = rscaleRandom,
rscaleEffects = rscaleEffects,
iterations = iterations,
method = method,
progress = FALSE,
noSample = noSample
),
fun = lapply,
SIMPLIFY = TRUE
))

}else{ # Single core
checkCallback(callback,as.integer(0))
bfs = NULL
bfs <- vector(mode = "list", length = length(models))
myCallback <- function(prgs){
frac <- (i - 1 + prgs/1000)/length(models)
ret <- callback(frac*1000)
Expand All @@ -208,14 +234,13 @@ anovaBF <-
}else{
pb = NULL
}
for(i in 1:length(models)){
oneModel <- lmBF(models[[i]],data = data, whichRandom = whichRandom,
for(i in seq_along(models)){
bfs[[i]] <- lmBF(models[[i]], data = data, whichRandom = whichRandom,
rscaleFixed = rscaleFixed, rscaleRandom = rscaleRandom,
rscaleEffects = rscaleEffects, iterations = iterations,
progress = FALSE, method = method,
noSample=noSample,callback=myCallback)
if(inherits(pb,"txtProgressBar")) setTxtProgressBar(pb, i)
bfs = c(bfs,oneModel)
}
if(inherits(pb,"txtProgressBar")) close(pb)
checkCallback(callback,as.integer(1000))
Expand Down
56 changes: 39 additions & 17 deletions pkg/BayesFactor/R/generalTestBF.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
##' @param rscaleCont prior scale for standardized slopes
##' @param rscaleEffects A named vector of prior settings for individual factors,
##' overriding rscaleFixed and rscaleRandom. Values are scales, names are factor names.
##' @param multicore if \code{TRUE} use multiple cores through the \code{doMC}
##' package. Unavailable on Windows.
##' @param multicore if \code{TRUE} use multiple cores through the \pkg{parallel}
##' package.
##' @param method approximation method, if needed. See \code{\link{nWayAOV}} for
##' details.
##' @param noSample if \code{TRUE}, do not sample, instead returning NA.
Expand Down Expand Up @@ -93,24 +93,46 @@ generalTestBF <-
"The maximum can be increased by changing ",
"options('BFMaxModels').")

if(multicore){
# On Windows, the parallel package can only create PSOCK clusters;
# these seem to hamper performance (compared to single-core operation)
if(isTRUE(multicore && .Platform$OS.type == "unix")){
message("Note: Progress bars and callbacks are suppressed when running multicore.")
if(!requireNamespace("doMC", quietly = TRUE)){
stop("Required package (doMC) missing for multicore functionality.")
# Create a cluster only if no default cluster is available
if(is.null(parallel::getDefaultCluster())) {
cl <- parallel::makeForkCluster(nnodes = getOption("mc.cores", default = parallel::detectCores()))
on.exit(parallel::stopCluster(cl))
} else {
cl <- parallel::getDefaultCluster()
}

doMC::registerDoMC()
if(foreach::getDoParWorkers()==1){
warning("Multicore specified, but only using 1 core. Set options(cores) to something >1.")

# first split models into batches (to reduce cross-talk between workers)
n_workers <- length(cl)
idx <- rep(
seq_len(n_workers),
each = ceiling(length(models)/n_workers),
length.out = length(models)
)

nested_models <- vector(mode = "list", length = n_workers)
for (i in seq_along(nested_models)) {
nested_models[[i]] <- models[idx == i]
}

bfs <- foreach::"%dopar%"(
foreach::foreach(gIndex=models, .options.multicore=mcoptions),
lmBF(gIndex,data = data, whichRandom = whichRandom,
rscaleFixed = rscaleFixed, rscaleRandom = rscaleRandom,
rscaleCont = rscaleCont, rscaleEffects = rscaleEffects, iterations = iterations, method=method,
progress=FALSE,noSample=noSample)
)

# then use clusterMap() for each batch
bfs <- unlist(parallel::clusterMap(
cl = cl,
nested_models,
MoreArgs = list(
FUN = lmBF,
data = data,
whichRandom = whichRandom,
rscaleFixed = rscaleFixed, rscaleRandom = rscaleRandom,
rscaleEffects = rscaleEffects, iterations = iterations,
method=method, progress=FALSE, noSample = noSample
),
fun = lapply,
SIMPLIFY = TRUE)
)
}else{ # Single core
checkCallback(callback,as.integer(0))
bfs = NULL
Expand Down
46 changes: 35 additions & 11 deletions pkg/BayesFactor/R/methods-BFBayesFactor.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,45 @@ setValidity("BFBayesFactor", function(object){
setMethod("recompute", "BFBayesFactor", function(x, progress = getOption('BFprogress', interactive()), multicore = FALSE, callback = function(...) as.integer(0), ...){

modelList = c(x@numerator,x@denominator)

if(multicore){

# On Windows, the parallel package can only create PSOCK clusters;
# these seem to hamper performance (compared to single-core operation)
if(isTRUE(multicore && .Platform$OS.type == "unix")){
callback = function(...) as.integer(0)
message("Note: Progress bars and callbacks are suppressed when running multicore.")
if( !suppressMessages( requireNamespace("doMC", quietly = TRUE) ) ){
stop("Required package (doMC) missing for multicore functionality.")
}
doMC::registerDoMC()
if(foreach::getDoParWorkers()==1){
warning("Multicore specified, but only using 1 core. Set options(cores) to something >1.")
# Create a cluster if and only if no default cluster is available
if(is.null(parallel::getDefaultCluster())) {
cl <- parallel::makeForkCluster(nnodes = getOption("mc.cores", default = parallel::detectCores()))
on.exit(parallel::stopCluster(cl))
} else {
cl <- parallel::getDefaultCluster()
}
bfs = foreach::"%dopar%"(
foreach::foreach(gIndex=modelList, .options.multicore=mcoptions),
compare(numerator = gIndex, data = x@data, ...)

# first split models into batches (to reduce cross-talk between workers)
n_workers <- length(cl)
idx <- rep(
seq_len(n_workers),
each = ceiling(length(modelList)/n_workers),
length.out = length(modelList)
)

nested_models <- vector(mode = "list", length = n_workers)
for (i in seq_along(nested_models)) {
nested_models[[i]] <- modelList[idx == i]
}

# then use clusterMap() for each batch
bfs <- unlist(parallel::clusterMap(
cl = cl,
nested_models,
MoreArgs = list(
FUN = compare,
data = x@data,
...
),
fun = lapply,
SIMPLIFY = TRUE,
))

}else{ # No multicore
checkCallback(callback,as.integer(0))
Expand Down
4 changes: 2 additions & 2 deletions pkg/BayesFactor/man/anovaBF.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pkg/BayesFactor/man/generalTestBF.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.