From 3ec53e7537d8eb419d5cf6d9b98279d51d628ccc Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Thu, 26 Dec 2024 09:32:31 +0000 Subject: [PATCH 1/3] Deprecate publish component --- .../cellranger_postprocessing/main.nf | 33 ++++++++++++------- .../integration/totalvi_leiden/main.nf | 17 ++++------ .../multiomics/process_batches/main.nf | 21 ++++++------ src/workflows/qc/qc/main.nf | 13 ++------ 4 files changed, 42 insertions(+), 42 deletions(-) diff --git a/src/workflows/ingestion/cellranger_postprocessing/main.nf b/src/workflows/ingestion/cellranger_postprocessing/main.nf index be180049a11..d7736914051 100644 --- a/src/workflows/ingestion/cellranger_postprocessing/main.nf +++ b/src/workflows/ingestion/cellranger_postprocessing/main.nf @@ -3,9 +3,19 @@ workflow run_wf { input_ch main: - // perform correction if so desired - output_ch = input_ch + | map{id, state -> + assert (state.perform_correction || state.min_genes != null || state.min_counts != null): + "Either perform_correct, min_genes or min_counts should be specified!" + [id, state] + } + // Make sure there is not conflict between the output from this workflow + // And the output from any of the components + | map {id, state -> + def new_state = state + ["workflow_output": state.output] + [id, new_state] + } + // perform correction if so desired | cellbender_remove_background.run( runIf: {id, state -> state.perform_correction}, fromState: { id, state -> @@ -13,7 +23,8 @@ workflow run_wf { input: state.input, epochs: state.cellbender_epochs, output_layer: "cellbender_corrected", - output_compression: "gzip" + output_compression: "gzip", + output: state.workflow_output, ] }, toState: { id, output, state -> @@ -31,19 +42,19 @@ workflow run_wf { min_counts: state.min_counts, layer: state.layer, output_compression: "gzip", - do_subset: true + do_subset: true, + output: state.workflow_output, ] }, toState: [input: "output"] ) // Make sure to use the correct ouput file names, - // irrespective wether or not any of the above - // components were run - | publish.run( - fromState: [ input: "input", output: "output" ], - toState: ["output": "output"] - ) - | setState(["output"]) + // irrespective of which component(s) (or combinations of then) + // were run. The above components + // should put their output into 'input' + | map {id, state -> + [id, ["output": state.input]] + } emit: output_ch diff --git a/src/workflows/integration/totalvi_leiden/main.nf b/src/workflows/integration/totalvi_leiden/main.nf index 8595426399f..0684c45ce27 100644 --- a/src/workflows/integration/totalvi_leiden/main.nf +++ b/src/workflows/integration/totalvi_leiden/main.nf @@ -44,10 +44,12 @@ workflow neighbors_leiden_umap { | umap.run( fromState: [ "input": "input", + "output": "workflow_output", "uns_neighbors": "uns_neighbors", "obsm_output": "obsm_umap", "query_modality": "modality", ], + args: ["output_compression": "gzip"] toState: ["output": "output"] ) @@ -110,7 +112,9 @@ workflow run_wf { } [id, new_state, state] } - | neighbors_leiden_umap + // neighbors_leiden_umap is not a subworkflow or module, but a + // workflow defined in this script, so not .run functionality is available + | neighbors_leiden_umap | map { id, state, orig_state -> // for ADT stateMapping = [ "uns_neighbors": "prot_uns_neighbors", @@ -131,16 +135,9 @@ workflow run_wf { } [id, new_state + ["input": state.output]] } + // neighbors_leiden_umap is not a subworkflow or module, but a + // workflow defined in this script, so not .run functionality is available | neighbors_leiden_umap - | publish.run( - fromState: { id, state -> [ - "input": state.output, - "output": state.workflow_output, - "compression": "gzip" - ] - }, - toState: ["output", "output"] - ) | setState(["output", "reference_model_path", "query_model_path"]) emit: output_ch diff --git a/src/workflows/multiomics/process_batches/main.nf b/src/workflows/multiomics/process_batches/main.nf index 437403ed851..49a7486b802 100644 --- a/src/workflows/multiomics/process_batches/main.nf +++ b/src/workflows/multiomics/process_batches/main.nf @@ -176,6 +176,7 @@ workflow run_wf { "modality": "rna", "var_pca_feature_selection": state.highly_variable_features_var_output, // run PCA on highly variable genes only "pca_overwrite": state.pca_overwrite, + "output": state.workflow_output, ], "dimensionality_reduction_scaling_rna": [ @@ -194,6 +195,7 @@ workflow run_wf { "uns_neighbors": "neighbors_scaled", "obsp_neighbor_connectivities": "connectivities_scaled", "obsp_neighbor_distances": "distances_scaled", + "output": state.workflow_output, ], "dimensionality_reduction_prot": [ @@ -201,7 +203,8 @@ workflow run_wf { "input": state.input, "layer": "clr", "modality": "prot", - "pca_overwrite": state.pca_overwrite + "pca_overwrite": state.pca_overwrite, + "output": state.workflow_output, ] ] return stateMappings[component.name] @@ -209,16 +212,12 @@ workflow run_wf { toState: ["input": "output"] ) } - | publish.run( - fromState: { id, state -> [ - "input": state.input, - "output": state.workflow_output, - ] - }, - toState: ["output": "output"] - ) - | setState(["output"]) - + // At the end of the reduce statement, + // the `toState` closure put the output back into + // into the 'input' slot + | map {id, state -> + [id, ["output": state.input]] + } emit: output_ch diff --git a/src/workflows/qc/qc/main.nf b/src/workflows/qc/qc/main.nf index fa22f332263..87f4c12c2a7 100644 --- a/src/workflows/qc/qc/main.nf +++ b/src/workflows/qc/qc/main.nf @@ -63,7 +63,9 @@ workflow run_wf { "output_var_num_nonzero_obs": state.output_var_num_nonzero_obs, "output_var_total_counts_obs": state.output_var_total_counts_obs, "output_var_obs_mean": state.output_var_obs_mean, - "output_var_pct_dropout": state.output_var_pct_dropout + "output_var_pct_dropout": state.output_var_pct_dropout, + "output": state.workflow_output, + "compression": "gzip" ] if (state.var_qc_metrics) { newState += ["var_qc_metrics": state.var_qc_metrics] @@ -72,15 +74,6 @@ workflow run_wf { }, // use map when viash 0.7.6 is released // related to https://github.com/viash-io/viash/pull/515 - toState: ["input": "output"] - ) - | publish.run( - fromState: { id, state -> [ - "input": state.input, - "output": state.workflow_output, - "compression": "gzip" - ] - }, toState: ["output": "output"] ) | setState(["output"]) From 24cfc7b51621832743261b9f499a5fe3eef5e8a1 Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Thu, 26 Dec 2024 09:40:56 +0000 Subject: [PATCH 2/3] Update CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5ee7977e17..36eacd7d5d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# openpipelines 2.1.0 + +# MAJOR CHANGES + +* The `transfer/publish` component is deprecated and will be removed in a future major release (PR #941). + # openpipelines 2.0.0 ## BREAKING CHANGES From dac4375382bb9171755789187d14c32712bb858f Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Thu, 26 Dec 2024 09:41:51 +0000 Subject: [PATCH 3/3] Update configs --- .../ingestion/cellranger_postprocessing/config.vsh.yaml | 1 - src/workflows/integration/totalvi_leiden/config.vsh.yaml | 1 - src/workflows/multiomics/process_batches/config.vsh.yaml | 1 - src/workflows/multiomics/process_samples/config.vsh.yaml | 1 - src/workflows/qc/qc/config.vsh.yaml | 1 - 5 files changed, 5 deletions(-) diff --git a/src/workflows/ingestion/cellranger_postprocessing/config.vsh.yaml b/src/workflows/ingestion/cellranger_postprocessing/config.vsh.yaml index 75de01529e4..2c865dbe7ff 100644 --- a/src/workflows/ingestion/cellranger_postprocessing/config.vsh.yaml +++ b/src/workflows/ingestion/cellranger_postprocessing/config.vsh.yaml @@ -61,7 +61,6 @@ dependencies: - name: correction/cellbender_remove_background - name: filter/filter_with_counts - name: filter/subset_h5mu - - name: transfer/publish # test_dependencies: # - name: convert/from_10xh5_to_h5mu test_resources: diff --git a/src/workflows/integration/totalvi_leiden/config.vsh.yaml b/src/workflows/integration/totalvi_leiden/config.vsh.yaml index 38add7094e8..381c7a1c029 100644 --- a/src/workflows/integration/totalvi_leiden/config.vsh.yaml +++ b/src/workflows/integration/totalvi_leiden/config.vsh.yaml @@ -181,7 +181,6 @@ dependencies: - name: integrate/totalvi - name: dimred/umap - name: neighbors/find_neighbors - - name: transfer/publish resources: - type: nextflow_script path: main.nf diff --git a/src/workflows/multiomics/process_batches/config.vsh.yaml b/src/workflows/multiomics/process_batches/config.vsh.yaml index fe76018effe..dc6319b90c4 100644 --- a/src/workflows/multiomics/process_batches/config.vsh.yaml +++ b/src/workflows/multiomics/process_batches/config.vsh.yaml @@ -152,7 +152,6 @@ dependencies: alias: dimensionality_reduction_rna - name: workflows/multiomics/dimensionality_reduction alias: dimensionality_reduction_prot - - name: transfer/publish - name: workflows/multiomics/dimensionality_reduction alias: dimensionality_reduction_scaling_rna resources: diff --git a/src/workflows/multiomics/process_samples/config.vsh.yaml b/src/workflows/multiomics/process_samples/config.vsh.yaml index 30d18209206..f1390225bd6 100644 --- a/src/workflows/multiomics/process_samples/config.vsh.yaml +++ b/src/workflows/multiomics/process_samples/config.vsh.yaml @@ -277,7 +277,6 @@ dependencies: alias: split_modalities_workflow - name: dataflow/merge - name: dataflow/concatenate_h5mu - - name: transfer/publish - name: workflows/rna/rna_singlesample - name: workflows/prot/prot_singlesample - name: workflows/gdo/gdo_singlesample diff --git a/src/workflows/qc/qc/config.vsh.yaml b/src/workflows/qc/qc/config.vsh.yaml index 2d61f206da8..ef4fc6779c2 100644 --- a/src/workflows/qc/qc/config.vsh.yaml +++ b/src/workflows/qc/qc/config.vsh.yaml @@ -134,7 +134,6 @@ argument_groups: description: Destination path to the output. example: output.h5mu dependencies: - - name: transfer/publish - name: metadata/grep_annotation_column - name: qc/calculate_qc_metrics resources: