From d2ed35e34bf48e217d8d131ded4ffbff01051d74 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Mon, 2 Sep 2024 23:33:24 +0200
Subject: [PATCH] partially update benchmark

---
 src/workflows/run_benchmark/config.vsh.yaml |  11 ++
 src/workflows/run_benchmark/main.nf         | 149 +++-----------------
 src/workflows/run_benchmark/test.sh         |   2 +-
 3 files changed, 32 insertions(+), 130 deletions(-)

diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index 86765e8..4e0687c 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -66,9 +66,20 @@ dependencies:
     repository: openproblems-v2
   - name: common/extract_metadata
     repository: openproblems-v2
+  - name: control_methods/majority_vote
+  - name: control_methods/random_labels
   - name: control_methods/true_labels
+  - name: methods/knn
   - name: methods/logistic_regression
+  - name: methods/mlp
+  - name: methods/naive_bayes
+  - name: methods/scanvi
+  - name: methods/scanvi_scarches
+  - name: methods/seurat_transferdata
+  - name: methods/xgboost
   - name: metrics/accuracy
+  - name: metrics/f1
+
 
 runners:
   - type: nextflow
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index 68e5ecd..9c02bf8 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -1,27 +1,35 @@
 workflow auto {
-  findStatesTemp(params, meta.config)
+  findStates(params, meta.config)
     | meta.workflow.run(
       auto: [publish: "state"]
     )
 }
 
+methods = [
+  majority_vote,
+  random_labels,
+  true_labels,
+  knn,
+  logistic_regression,
+  mlp,
+  naive_bayes,
+  scanvi,
+  scanvi_scarches,
+  seurat_transferdata,
+  xgboost
+]
+
+metrics = [
+  accuracy,
+  f1
+]
+
 workflow run_wf {
   take:
   input_ch
 
   main:
 
-  // construct list of methods
-  methods = [
-    true_labels,
-    logistic_regression
-  ]
-
-  // construct list of metrics
-  metrics = [
-    accuracy
-  ]
-
   /****************************
    * EXTRACT DATASET METADATA *
    ****************************/
@@ -192,120 +200,3 @@ workflow run_wf {
   emit:
   output_ch
 }
-
-// temp fix for rename_keys typo
-
-def findStatesTemp(Map params, Map config) {
-  def auto_config = deepClone(config)
-  def auto_params = deepClone(params)
-
-  auto_config = auto_config.clone()
-  // override arguments
-  auto_config.argument_groups = []
-  auto_config.arguments = [
-    [
-      type: "string",
-      name: "--id",
-      description: "A dummy identifier",
-      required: false
-    ],
-    [
-      type: "file",
-      name: "--input_states",
-      example: "/path/to/input/directory/**/state.yaml",
-      description: "Path to input directory containing the datasets to be integrated.",
-      required: true,
-      multiple: true,
-      multiple_sep: ";"
-    ],
-    [
-      type: "string",
-      name: "--filter",
-      example: "foo/.*/state.yaml",
-      description: "Regex to filter state files by path.",
-      required: false
-    ],
-    // to do: make this a yaml blob?
-    [
-      type: "string",
-      name: "--rename_keys",
-      example: ["newKey1:oldKey1", "newKey2:oldKey2"],
-      description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.",
-      required: false,
-      multiple: true,
-      multiple_sep: ";"
-    ],
-    [
-      type: "string",
-      name: "--settings",
-      example: '{"output_dataset": "dataset.h5ad", "k": 10}',
-      description: "Global arguments as a JSON glob to be passed to all components.",
-      required: false
-    ]
-  ]
-  if (!(auto_params.containsKey("id"))) {
-    auto_params["id"] = "auto"
-  }
-
-  // run auto config through processConfig once more
-  auto_config = processConfig(auto_config)
-
-  workflow findStatesTempWf {
-    helpMessage(auto_config)
-
-    output_ch = 
-      channelFromParams(auto_params, auto_config)
-        | flatMap { autoId, args ->
-
-          def globalSettings = args.settings ? readYamlBlob(args.settings) : [:]
-
-          // look for state files in input dir
-          def stateFiles = args.input_states
-
-          // filter state files by regex
-          if (args.filter) {
-            stateFiles = stateFiles.findAll{ stateFile ->
-              def stateFileStr = stateFile.toString()
-              def matcher = stateFileStr =~ args.filter
-              matcher.matches()}
-          }
-
-          // read in states
-          def states = stateFiles.collect { stateFile ->
-            def state_ = readTaggedYaml(stateFile)
-            [state_.id, state_]
-          }
-
-          // construct renameMap
-          if (args.rename_keys) {
-            def renameMap = args.rename_keys.collectEntries{renameString ->
-              def split = renameString.split(":")
-              assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey;newKey:oldKey'"
-              split
-            }
-
-            // rename keys in state, only let states through which have all keys
-            // also add global settings
-            states = states.collectMany{id, state ->
-              def newState = [:]
-
-              for (key in renameMap.keySet()) {
-                def origKey = renameMap[key]
-                if (!(state.containsKey(origKey))) {
-                  return []
-                }
-                newState[key] = state[origKey]
-              }
-
-              [[id, globalSettings + newState]]
-            }
-          }
-
-          states
-        }
-    emit:
-    output_ch
-  }
-
-  return findStatesTempWf
-}
\ No newline at end of file
diff --git a/src/workflows/run_benchmark/test.sh b/src/workflows/run_benchmark/test.sh
index b0dbc24..a13dac9 100755
--- a/src/workflows/run_benchmark/test.sh
+++ b/src/workflows/run_benchmark/test.sh
@@ -10,7 +10,7 @@ set -e
 
 # export TOWER_WORKSPACE_ID=53907369739130
 
-DATASETS_DIR="resources_test/task_template"
+DATASETS_DIR="resources_test/label_projection"
 OUTPUT_DIR="output/temp"
 
 if [ ! -d "$OUTPUT_DIR" ]; then