added batch fct for tsf

ComPlat · Sep 9, 2024 · a8aeb94 · a8aeb94
1 parent 6fa9b64
commit a8aeb94
Show file tree

Hide file tree

Showing 16 changed files with 545 additions and 407 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,34 +1,34 @@
 FROM rocker/shiny:4.3.1
 
 RUN apt-get update && apt-get install -y \
-    --no-install-recommends \
-    git-core \
-    libssl-dev \
-    curl \
-    libcurl4-gnutls-dev \
-    libsodium-dev \
-    libxml2-dev \
-    libicu-dev \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+  --no-install-recommends \
+  git-core \
+  libssl-dev \
+  curl \
+  libcurl4-gnutls-dev \
+  libsodium-dev \
+  libxml2-dev \
+  libicu-dev \
+  && apt-get clean \
+  && rm -rf /var/lib/apt/lists/*
 
 ENV _R_SHLIB_STRIP_=true
 ENV SHINY_LOG_STDERR=1
 
 RUN install2.r --error --skipinstalled \
-    shiny \
-    DT \
-    shinydashboard \
-    shinyWidgets \
-    shinyjs \
-    rootSolve \
-    ggplot2 \
-    patchwork \
-    R6 \
-    sensitivity \
-    openxlsx \
-    future \
-    promises
+  shiny \
+  DT \
+  shinydashboard \
+  shinyWidgets \
+  shinyjs \
+  rootSolve \
+  ggplot2 \
+  patchwork \
+  R6 \
+  sensitivity \
+  openxlsx \
+  future \
+  promises
 
 
 COPY ./tsf/ /home/tsf

diff --git a/Tests/IDA/Rplots.pdf b/Tests/IDA/Rplots.pdf
diff --git a/Tests/IDA/TestBatch.R b/Tests/IDA/TestBatch.R
@@ -0,0 +1,29 @@
+setwd("/home/konrad/Documents/Thermosimfit/Tests/IDA")
+library(tsf)
+
+# Test several opti calls in parallel
+path <- "idaBatch.csv"
+lowerBounds <- c(
+  kG = 1000,
+  I0 = 0,
+  IHD = 0,
+  ID = 0
+)
+upperBounds <- c(
+  kG = 10^8,
+  I0 = 100,
+  IHD = 10^7,
+  ID = 10^7
+)
+additionalParameters <- c(
+  host = 1e-6,
+  dye = 1e-6,
+  kHD = 3e6
+)
+
+tsf::batch(
+  "ida",
+  lowerBounds, upperBounds,
+  path, additionalParameters,
+  ngen = 20
+)
diff --git a/tsf/DESCRIPTION b/tsf/DESCRIPTION
@@ -27,7 +27,7 @@ Imports:
   plotly
 Suggests: knitr, rmarkdown, tinytest
 VignetteBuilder: knitr
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 URL: https://complat.github.io/Thermosimfit/
diff --git a/tsf/NAMESPACE b/tsf/NAMESPACE
@@ -1,7 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
-export(Communicator)
 export(ErrorClass)
+export(batch)
 export(convertToNum)
 export(createPolynom)
 export(opti)
@@ -10,15 +10,14 @@ export(runApp)
 export(sensitivity)
 import(DT)
 import(R6)
-import(future)
+import(callr)
 import(ggplot2)
 import(openxlsx)
 import(patchwork)
-import(promises)
+import(plotly, except=c(last_plot))
 import(rootSolve)
-import(sensitivity)
+import(sensitivity, except=c(print.src))
 import(shiny, except=c(dataTableOutput, renderDataTable, runExample))
 import(shinyWidgets, except=c(alert))
-import(plotly, except=c(last_plot))
 import(shinydashboard)
 import(shinyjs)
diff --git a/tsf/R/Batch.R b/tsf/R/Batch.R
@@ -100,72 +100,10 @@ seperate_batch_results <- function(list) {
   )
 }
 
-batch <- function(case,
-                  lowerBounds, upperBounds,
-                  path,
-                  additionalParameters,
-                  seed = NULL, npop = 40, ngen = 200, Topology = "random",
-                  errorThreshold = -Inf, num_rep = 1) {
-  # import data
-  list_df <- importDataBatch(path)
-  if (!is.list(list_df)) {
-    return(ErrorClass$new("Could not import data"))
-  }
-  for (i in seq_along(list_df)) {
-    if (!is.data.frame(list_df[[i]])) {
-      return(list_df[[i]])
-    }
-  }
-  # seed case
-  seed_original <- seed
-  seed_from <- 1:1e6
-  # size calculations
-  num_data_sets <- length(list_df)
-  result <- vector("list", length = num_data_sets * num_rep)
-  seeds <- numeric(length = num_data_sets * num_rep)
-  counter <- 1
-
-  # run optimization
-  for (i in seq_len(num_data_sets)) {
-    if (is.null(seed)) {
-      seed <- sample(seed_from, 1)
-    } else {
-      seed <- seed_original
-    }
-    # seeds[counter] <- seed
-    result[[counter]] <- opti(
-      case = case, lowerBounds = lowerBounds, upperBounds = upperBounds,
-      list_df[[i]], additionalParameters, seed = seed, npop = npop, ngen = ngen,
-      Topology = Topology, errorThreshold = errorThreshold
-    )
-    counter <- counter + 1
-    if (num_rep > 1) {
-      for (j in seq_len(num_rep - 1)) {
-        seed <- sample(seed_from, 1)
-        # seeds[counter] <- seed
-        result[[counter]] <- opti(
-          case = case, lowerBounds = lowerBounds, upperBounds = upperBounds,
-          list_df[[i]], additionalParameters, seed = seed, npop = npop, ngen = ngen,
-          Topology = Topology, errorThreshold = errorThreshold
-        )
-        counter <- counter + 1
-      }
-    }
-  }
-
-  list <- seperate_batch_results(result)
-  list(
-    list,
-    plotStates(list),
-    plotParams(list),
-    plotMetrices(list)
-  )
-}
-
 call_several_opti <- function(case, lb, ub,
-                                    df_list, ap, seed_list,
-                                    npop, ngen, topo,
-                                    et, messages, env) {
+                              df_list, ap, seed_list,
+                              npop, ngen, topo,
+                              et, messages, env) {
   env <- new.env()
   env$intermediate_results <- vector("list", length(df_list))
   tryCatch(
@@ -199,8 +137,10 @@ call_several_opti_in_bg <- function(case, lb, ub, df_list, ap,
              seed_list, npop, ngen, topo,
              et, messages) {
       env <- new.env()
-      env$intermediate_results <- lapply(seq_len(length((df_list))),
-        function(x) x)
+      env$intermediate_results <- lapply(
+        seq_len(length((df_list))),
+        function(x) x
+      )
 
       for (i in seq_len(length(df_list))) {
         tryCatch(

diff --git a/tsf/R/RunBatch.R b/tsf/R/RunBatch.R
@@ -0,0 +1,134 @@
+#' Runs a batch of optimization tasks
+#'
+#' @export
+#' @param case is a character argument which specifies the optimization case.
+#' Either "dba_dye_const", "dba_host_const", "ida" or "gda"
+#' @param lowerBounds is a numeric vector with the lower bounds for the optimization
+#' @param upperBounds is a numeric vector with the upper bounds for the optimization
+#' @param path is a character argument which specifies the path to the data
+#' @param additionalParameters is a numeric vector with additional parameters
+#'        In case of *dba_dye_const* or *dba_host_const the order of the parameters is: *khd*, *I0*, *IHD* and *ID*
+#'        In case of *ida* and *ga* the order of the parameters is: *kg*, *I0*, *IHD* and *ID*.
+#' @param seed is an optional integer argument defining the seed which is set directly for the optimization. In case the argument is not set the current time is used as seed.
+#' @param npop is an optional integer argument defining the number of particles during optimization. The default value is set to 40.
+#' @param ngen is an optional integer argument defining the number of generations of the particle swarm optimization. The default value is set to 200.
+#' @param Topology is an optional character argument defining which topology should be used by the particle swarm algorithm. The options are "star" and "random". The default topology is the "random" topology.
+#' @param errorThreshold is an optional numeric argument defining a sufficient small error which acts as a stop signal for the particle swarm algorithm. The default value is set to -Inf.
+#' @param num_rep is an optional integer argument defining the number of replicates for each dataset
+#' @param num_cores is an optional integer argument defining the maximum number of cores which should be used for the optimization
+#' @examples
+#' path <- paste0(system.file("examples", package = "tsf"), "/IDABatch.csv")
+#' lowerBounds <- c(
+#'   kG = 1000,
+#'   I0 = 0,
+#'   IHD = 0,
+#'   ID = 0
+#' )
+#' upperBounds <- c(
+#'   kG = 10^8,
+#'   I0 = 100,
+#'   IHD = 10^7,
+#'   ID = 10^7
+#' )
+#' additionalParameters <- c(
+#'   host = 1e-6,
+#'   dye = 1e-6,
+#'   kHD = 3e6
+#' )
+#' tsf::batch(
+#'   "ida",
+#'   lowerBounds, upperBounds,
+#'   path, additionalParameters,
+#'   ngen = 20
+#' )
+batch <- function(case,
+                  lowerBounds, upperBounds,
+                  path,
+                  additionalParameters,
+                  seed = NA, npop = 40, ngen = 200, Topology = "random",
+                  errorThreshold = -Inf, num_rep = 1, num_cores = 1) {
+  # import data
+  list_df <- importDataBatch(path)
+  if (!is.list(list_df)) {
+    return(ErrorClass$new("Could not import data"))
+  }
+  for (i in seq_along(list_df)) {
+    if (!is.data.frame(list_df[[i]])) {
+      return(ErrorClass$new("Found non data.frame entry"))
+    }
+  }
+
+  # num cores calculation
+  size <- length(list_df) * num_rep
+  if (num_cores > size) {
+    num_cores <- size
+  }
+
+  # seed case and generation of seeds
+  seed_case <- determine_seed_case(seed, num_rep)
+  seed_origin <- NULL
+  if (seed_case == 3) {
+    seed_origin <- seed
+  }
+  seeds <- numeric(size)
+  seeds_from <- 1:1e6
+  for (i in seq_len(size)) {
+    if (seed_case == 1) {
+      seed <- sample(seeds_from, 1)
+    } else if (seed_case == 3) {
+      if (i %in% seq(1, size, num_rep)) {
+        seed <- seed_origin
+      } else {
+        seed <- sample(seeds_from, 1)
+      }
+    } else if (seed_case == 2) {
+      seed <- seed # TODO: check is this correct
+    }
+    seeds[i] <- seed
+  }
+
+  # create message for each optimization
+  messages <- character(size)
+  counter_messages <- 1
+  for (i in seq_len(length(list_df))) {
+    for (j in seq_len(num_rep)) {
+      messages[counter_messages] <-
+        paste0("Dataset = ", i, "; Replicate = ", j)
+      counter_messages <- counter_messages + 1
+    }
+  }
+
+  # 3. Fill task queue
+  dfs <- rep(list_df, each = num_rep)
+  tq <- TaskQueue$new(
+    case,
+    lowerBounds, upperBounds, dfs,
+    additionalParameters, seeds,
+    npop, ngen, Topology, errorThreshold,
+    messages, num_cores
+  )
+
+  # 4. assign tasks
+  tq$assign()
+  old_status <- ""
+
+  while (TRUE) {
+    new_status <- tq$get_status(old_status)
+    cat(new_status)
+    cat("\n")
+    old_status <- new_status
+    if (!tq$queue_empty() && tq$check()) {
+      tq$assign()
+    }
+    if (tq$queue_empty()) break
+    Sys.sleep(1)
+  }
+
+  list <- tq$seperate_results()
+  list(
+    list,
+    plotStates(list),
+    plotParams(list),
+    plotMetrices(list)
+  )
+}