diff --git a/DESCRIPTION b/DESCRIPTION index 71ee923..26e4fbc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,7 @@ Description: In computationally demanding analysis projects, 'clustermq' by Schubert (2019) ), and 'batchtools' by Lang, Bischl, and Surmann (2017). . -Version: 0.0.6.9011 +Version: 0.0.7 License: MIT + file LICENSE URL: https://wlandau.github.io/crew.aws.batch/, https://github.com/wlandau/crew.aws.batch diff --git a/NEWS.md b/NEWS.md index 08c4088..aa6a6cc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# crew.aws.batch 0.0.6.9011 (development) +# crew.aws.batch 0.0.7 * Send both cancellation and termination requests to end jobs. * Fix launcher bug/typo where parameters were supplied to container overrides. @@ -7,6 +7,7 @@ * Support `options_metrics`. * Reduce argument clutter with `crew_options_aws_batch()`. Supports direct inputs for CPUs, GPUs, and memory without having to specify a complicated `containerOverrides` list. * Sanitize job names. +* Use `crashes_error` from `crew`. # crew.aws.batch 0.0.6 diff --git a/R/crew_controller_aws_batch.R b/R/crew_controller_aws_batch.R index 91ae6ce..90fad8d 100644 --- a/R/crew_controller_aws_batch.R +++ b/R/crew_controller_aws_batch.R @@ -41,7 +41,7 @@ crew_controller_aws_batch <- function( reset_packages = FALSE, reset_options = FALSE, garbage_collection = FALSE, - launch_max = 5L, + crashes_error = 5L, processes = NULL, r_arguments = c("--no-save", "--no-restore"), options_metrics = crew::crew_options_metrics(), @@ -88,7 +88,7 @@ crew_controller_aws_batch <- function( reset_packages = reset_packages, reset_options = reset_options, garbage_collection = garbage_collection, - launch_max = launch_max, + crashes_error = crashes_error, tls = tls, processes = processes, r_arguments = r_arguments, diff --git a/R/crew_launcher_aws_batch.R b/R/crew_launcher_aws_batch.R index f8e2e1d..2a49c43 100644 --- a/R/crew_launcher_aws_batch.R +++ b/R/crew_launcher_aws_batch.R @@ -82,7 +82,7 @@ crew_launcher_aws_batch <- function( reset_packages = FALSE, reset_options = FALSE, garbage_collection = FALSE, - launch_max = 5L, + crashes_error = 5L, tls = crew::crew_tls(mode = "automatic"), processes = NULL, r_arguments = c("--no-save", "--no-restore"), @@ -137,7 +137,7 @@ crew_launcher_aws_batch <- function( reset_packages = reset_packages, reset_options = reset_options, garbage_collection = garbage_collection, - launch_max = launch_max, + crashes_error = crashes_error, tls = tls, processes = processes, r_arguments = r_arguments, @@ -216,7 +216,7 @@ crew_class_launcher_aws_batch <- R6::R6Class( #' @param reset_packages See [crew_launcher_aws_batch()]. #' @param reset_options See [crew_launcher_aws_batch()]. #' @param garbage_collection See [crew_launcher_aws_batch()]. - #' @param launch_max See [crew_launcher_aws_batch()]. + #' @param crashes_error See [crew_launcher_aws_batch()]. #' @param tls See [crew_launcher_aws_batch()]. #' @param processes See [crew_launcher_aws_batch()]. #' @param r_arguments See [crew_launcher_aws_batch()]. @@ -235,7 +235,7 @@ crew_class_launcher_aws_batch <- R6::R6Class( reset_packages = NULL, reset_options = NULL, garbage_collection = NULL, - launch_max = NULL, + crashes_error = NULL, tls = NULL, processes = NULL, r_arguments = NULL, @@ -255,7 +255,7 @@ crew_class_launcher_aws_batch <- R6::R6Class( reset_packages = reset_packages, reset_options = reset_options, garbage_collection = garbage_collection, - launch_max = launch_max, + crashes_error = crashes_error, tls = tls, processes = processes, r_arguments = r_arguments, diff --git a/man/crew_class_launcher_aws_batch.Rd b/man/crew_class_launcher_aws_batch.Rd index 41145df..39382b9 100644 --- a/man/crew_class_launcher_aws_batch.Rd +++ b/man/crew_class_launcher_aws_batch.Rd @@ -115,7 +115,7 @@ Abstract launcher constructor. reset_packages = NULL, reset_options = NULL, garbage_collection = NULL, - launch_max = NULL, + crashes_error = NULL, tls = NULL, processes = NULL, r_arguments = NULL, @@ -151,7 +151,7 @@ Abstract launcher constructor. \item{\code{garbage_collection}}{See \code{\link[=crew_launcher_aws_batch]{crew_launcher_aws_batch()}}.} -\item{\code{launch_max}}{See \code{\link[=crew_launcher_aws_batch]{crew_launcher_aws_batch()}}.} +\item{\code{crashes_error}}{See \code{\link[=crew_launcher_aws_batch]{crew_launcher_aws_batch()}}.} \item{\code{tls}}{See \code{\link[=crew_launcher_aws_batch]{crew_launcher_aws_batch()}}.} diff --git a/man/crew_controller_aws_batch.Rd b/man/crew_controller_aws_batch.Rd index 884b3ef..fcf219b 100644 --- a/man/crew_controller_aws_batch.Rd +++ b/man/crew_controller_aws_batch.Rd @@ -24,7 +24,7 @@ crew_controller_aws_batch( reset_packages = FALSE, reset_options = FALSE, garbage_collection = FALSE, - launch_max = 5L, + crashes_error = 5L, processes = NULL, r_arguments = c("--no-save", "--no-restore"), options_metrics = crew::crew_options_metrics(), @@ -128,15 +128,14 @@ because packages sometimes rely on options they set at loading time.} \item{garbage_collection}{\code{TRUE} to run garbage collection between tasks, \code{FALSE} to skip.} -\item{launch_max}{Positive integer of length 1, maximum allowed -consecutive launch attempts which do not complete any tasks. -Enforced on a worker-by-worker basis. -The futile launch count resets to back 0 -for each worker that completes a task. -It is recommended to set \code{launch_max} above 0 -because sometimes workers are unproductive under perfectly ordinary -circumstances. But \code{launch_max} should still be small enough -to detect errors in the underlying platform.} +\item{crashes_error}{Positive integer scalar. If a worker exits +\code{crashes_error} times in a row without completing all its assigned +tasks, then the launcher throws an informative error. +The reason for \code{crashes_error} +is to avoid an infinite loop where a task crashes a worker +(through a segfault, maxing out memory, etc) but the worker always +relaunches. To monitor the resources of \code{crew} workers, +please see \url{https://wlandau.github.io/crew/articles/logging.html}.} \item{processes}{\code{NULL} or positive integer of length 1, number of local processes to diff --git a/man/crew_launcher_aws_batch.Rd b/man/crew_launcher_aws_batch.Rd index ab57242..ab20b9b 100644 --- a/man/crew_launcher_aws_batch.Rd +++ b/man/crew_launcher_aws_batch.Rd @@ -17,7 +17,7 @@ crew_launcher_aws_batch( reset_packages = FALSE, reset_options = FALSE, garbage_collection = FALSE, - launch_max = 5L, + crashes_error = 5L, tls = crew::crew_tls(mode = "automatic"), processes = NULL, r_arguments = c("--no-save", "--no-restore"), @@ -98,15 +98,14 @@ because packages sometimes rely on options they set at loading time.} \item{garbage_collection}{\code{TRUE} to run garbage collection between tasks, \code{FALSE} to skip.} -\item{launch_max}{Positive integer of length 1, maximum allowed -consecutive launch attempts which do not complete any tasks. -Enforced on a worker-by-worker basis. -The futile launch count resets to back 0 -for each worker that completes a task. -It is recommended to set \code{launch_max} above 0 -because sometimes workers are unproductive under perfectly ordinary -circumstances. But \code{launch_max} should still be small enough -to detect errors in the underlying platform.} +\item{crashes_error}{Positive integer scalar. If a worker exits +\code{crashes_error} times in a row without completing all its assigned +tasks, then the launcher throws an informative error. +The reason for \code{crashes_error} +is to avoid an infinite loop where a task crashes a worker +(through a segfault, maxing out memory, etc) but the worker always +relaunches. To monitor the resources of \code{crew} workers, +please see \url{https://wlandau.github.io/crew/articles/logging.html}.} \item{tls}{A TLS configuration object from \code{\link[crew:crew_tls]{crew_tls()}}.}