From d2ed35e34bf48e217d8d131ded4ffbff01051d74 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 2 Sep 2024 23:33:24 +0200 Subject: [PATCH] partially update benchmark --- src/workflows/run_benchmark/config.vsh.yaml | 11 ++ src/workflows/run_benchmark/main.nf | 149 +++----------------- src/workflows/run_benchmark/test.sh | 2 +- 3 files changed, 32 insertions(+), 130 deletions(-) diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 86765e8..4e0687c 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -66,9 +66,20 @@ dependencies: repository: openproblems-v2 - name: common/extract_metadata repository: openproblems-v2 + - name: control_methods/majority_vote + - name: control_methods/random_labels - name: control_methods/true_labels + - name: methods/knn - name: methods/logistic_regression + - name: methods/mlp + - name: methods/naive_bayes + - name: methods/scanvi + - name: methods/scanvi_scarches + - name: methods/seurat_transferdata + - name: methods/xgboost - name: metrics/accuracy + - name: metrics/f1 + runners: - type: nextflow diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 68e5ecd..9c02bf8 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -1,27 +1,35 @@ workflow auto { - findStatesTemp(params, meta.config) + findStates(params, meta.config) | meta.workflow.run( auto: [publish: "state"] ) } +methods = [ + majority_vote, + random_labels, + true_labels, + knn, + logistic_regression, + mlp, + naive_bayes, + scanvi, + scanvi_scarches, + seurat_transferdata, + xgboost +] + +metrics = [ + accuracy, + f1 +] + workflow run_wf { take: input_ch main: - // construct list of methods - methods = [ - true_labels, - logistic_regression - ] - - // construct list of metrics - metrics = [ - accuracy - ] - /**************************** * EXTRACT DATASET METADATA * ****************************/ @@ -192,120 +200,3 @@ workflow run_wf { emit: output_ch } - -// temp fix for rename_keys typo - -def findStatesTemp(Map params, Map config) { - def auto_config = deepClone(config) - def auto_params = deepClone(params) - - auto_config = auto_config.clone() - // override arguments - auto_config.argument_groups = [] - auto_config.arguments = [ - [ - type: "string", - name: "--id", - description: "A dummy identifier", - required: false - ], - [ - type: "file", - name: "--input_states", - example: "/path/to/input/directory/**/state.yaml", - description: "Path to input directory containing the datasets to be integrated.", - required: true, - multiple: true, - multiple_sep: ";" - ], - [ - type: "string", - name: "--filter", - example: "foo/.*/state.yaml", - description: "Regex to filter state files by path.", - required: false - ], - // to do: make this a yaml blob? - [ - type: "string", - name: "--rename_keys", - example: ["newKey1:oldKey1", "newKey2:oldKey2"], - description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", - required: false, - multiple: true, - multiple_sep: ";" - ], - [ - type: "string", - name: "--settings", - example: '{"output_dataset": "dataset.h5ad", "k": 10}', - description: "Global arguments as a JSON glob to be passed to all components.", - required: false - ] - ] - if (!(auto_params.containsKey("id"))) { - auto_params["id"] = "auto" - } - - // run auto config through processConfig once more - auto_config = processConfig(auto_config) - - workflow findStatesTempWf { - helpMessage(auto_config) - - output_ch = - channelFromParams(auto_params, auto_config) - | flatMap { autoId, args -> - - def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] - - // look for state files in input dir - def stateFiles = args.input_states - - // filter state files by regex - if (args.filter) { - stateFiles = stateFiles.findAll{ stateFile -> - def stateFileStr = stateFile.toString() - def matcher = stateFileStr =~ args.filter - matcher.matches()} - } - - // read in states - def states = stateFiles.collect { stateFile -> - def state_ = readTaggedYaml(stateFile) - [state_.id, state_] - } - - // construct renameMap - if (args.rename_keys) { - def renameMap = args.rename_keys.collectEntries{renameString -> - def split = renameString.split(":") - assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey;newKey:oldKey'" - split - } - - // rename keys in state, only let states through which have all keys - // also add global settings - states = states.collectMany{id, state -> - def newState = [:] - - for (key in renameMap.keySet()) { - def origKey = renameMap[key] - if (!(state.containsKey(origKey))) { - return [] - } - newState[key] = state[origKey] - } - - [[id, globalSettings + newState]] - } - } - - states - } - emit: - output_ch - } - - return findStatesTempWf -} \ No newline at end of file diff --git a/src/workflows/run_benchmark/test.sh b/src/workflows/run_benchmark/test.sh index b0dbc24..a13dac9 100755 --- a/src/workflows/run_benchmark/test.sh +++ b/src/workflows/run_benchmark/test.sh @@ -10,7 +10,7 @@ set -e # export TOWER_WORKSPACE_ID=53907369739130 -DATASETS_DIR="resources_test/task_template" +DATASETS_DIR="resources_test/label_projection" OUTPUT_DIR="output/temp" if [ ! -d "$OUTPUT_DIR" ]; then