From 3ec53e7537d8eb419d5cf6d9b98279d51d628ccc Mon Sep 17 00:00:00 2001
From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com>
Date: Thu, 26 Dec 2024 09:32:31 +0000
Subject: [PATCH 1/3] Deprecate publish component

---
 .../cellranger_postprocessing/main.nf         | 33 ++++++++++++-------
 .../integration/totalvi_leiden/main.nf        | 17 ++++------
 .../multiomics/process_batches/main.nf        | 21 ++++++------
 src/workflows/qc/qc/main.nf                   | 13 ++------
 4 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/src/workflows/ingestion/cellranger_postprocessing/main.nf b/src/workflows/ingestion/cellranger_postprocessing/main.nf
index be180049a11..d7736914051 100644
--- a/src/workflows/ingestion/cellranger_postprocessing/main.nf
+++ b/src/workflows/ingestion/cellranger_postprocessing/main.nf
@@ -3,9 +3,19 @@ workflow run_wf {
   input_ch
 
   main:
-  // perform correction if so desired
-
   output_ch = input_ch
+    | map{id, state ->
+      assert (state.perform_correction || state.min_genes != null || state.min_counts != null):
+        "Either perform_correct, min_genes or min_counts should be specified!"
+      [id, state]
+    }
+    // Make sure there is not conflict between the output from this workflow
+    // And the output from any of the components
+    | map {id, state ->
+      def new_state = state + ["workflow_output": state.output]
+      [id, new_state]
+    }
+    // perform correction if so desired
     | cellbender_remove_background.run(
       runIf: {id, state -> state.perform_correction},
       fromState: { id, state ->
@@ -13,7 +23,8 @@ workflow run_wf {
           input: state.input,
           epochs: state.cellbender_epochs,
           output_layer: "cellbender_corrected",
-          output_compression: "gzip"
+          output_compression: "gzip",
+          output: state.workflow_output,
         ]
       },
       toState: { id, output, state -> 
@@ -31,19 +42,19 @@ workflow run_wf {
           min_counts: state.min_counts,
           layer: state.layer,
           output_compression: "gzip",
-          do_subset: true
+          do_subset: true,
+          output: state.workflow_output,
         ]
       },
       toState: [input: "output"]
     )
     // Make sure to use the correct ouput file names, 
-    // irrespective wether or not any of the above 
-    // components were run
-    | publish.run(
-      fromState: [ input: "input", output: "output" ],
-      toState: ["output": "output"]
-    )
-    | setState(["output"])
+    // irrespective of which component(s) (or combinations of then)
+    // were run. The above components
+    // should put their output into 'input'
+    | map {id, state -> 
+      [id, ["output": state.input]]
+    }
 
   emit:
   output_ch
diff --git a/src/workflows/integration/totalvi_leiden/main.nf b/src/workflows/integration/totalvi_leiden/main.nf
index 8595426399f..0684c45ce27 100644
--- a/src/workflows/integration/totalvi_leiden/main.nf
+++ b/src/workflows/integration/totalvi_leiden/main.nf
@@ -44,10 +44,12 @@ workflow neighbors_leiden_umap {
     | umap.run(
       fromState: [
           "input": "input",
+          "output": "workflow_output",
           "uns_neighbors": "uns_neighbors",
           "obsm_output": "obsm_umap",
           "query_modality": "modality",
         ],
+      args: ["output_compression": "gzip"]
       toState: ["output": "output"]
     )
 
@@ -110,7 +112,9 @@ workflow run_wf {
       }
       [id, new_state, state]
     }
-    | neighbors_leiden_umap
+    // neighbors_leiden_umap is not a subworkflow or module, but a 
+    // workflow defined in this script, so not .run functionality is available
+    | neighbors_leiden_umap 
     | map { id, state, orig_state -> // for ADT
       stateMapping = [
         "uns_neighbors": "prot_uns_neighbors",
@@ -131,16 +135,9 @@ workflow run_wf {
       }
       [id, new_state + ["input": state.output]]
     }
+    // neighbors_leiden_umap is not a subworkflow or module, but a 
+    // workflow defined in this script, so not .run functionality is available
     | neighbors_leiden_umap
-    | publish.run(
-      fromState: { id, state -> [
-          "input": state.output,
-          "output": state.workflow_output,
-          "compression": "gzip"
-        ]
-      },
-      toState: ["output", "output"]
-    )
     | setState(["output", "reference_model_path", "query_model_path"])
   emit:
   output_ch
diff --git a/src/workflows/multiomics/process_batches/main.nf b/src/workflows/multiomics/process_batches/main.nf
index 437403ed851..49a7486b802 100644
--- a/src/workflows/multiomics/process_batches/main.nf
+++ b/src/workflows/multiomics/process_batches/main.nf
@@ -176,6 +176,7 @@ workflow run_wf {
                     "modality": "rna",
                     "var_pca_feature_selection": state.highly_variable_features_var_output, // run PCA on highly variable genes only
                     "pca_overwrite": state.pca_overwrite,
+                    "output": state.workflow_output,
                   ],
                 "dimensionality_reduction_scaling_rna":
                   [
@@ -194,6 +195,7 @@ workflow run_wf {
                     "uns_neighbors": "neighbors_scaled",
                     "obsp_neighbor_connectivities": "connectivities_scaled",
                     "obsp_neighbor_distances": "distances_scaled",
+                    "output": state.workflow_output,
                   ],
                 "dimensionality_reduction_prot":
                   [
@@ -201,7 +203,8 @@ workflow run_wf {
                     "input": state.input,
                     "layer": "clr",
                     "modality": "prot",
-                    "pca_overwrite": state.pca_overwrite
+                    "pca_overwrite": state.pca_overwrite,
+                    "output": state.workflow_output,
                   ]
               ]
               return stateMappings[component.name]
@@ -209,16 +212,12 @@ workflow run_wf {
             toState: ["input": "output"]
           )
       }
-      | publish.run(
-        fromState: { id, state -> [
-            "input": state.input,
-            "output": state.workflow_output,
-          ]
-        },
-        toState: ["output": "output"]
-      )
-      | setState(["output"])
-
+      // At the end of the reduce statement,
+      // the `toState` closure put the output back into
+      // into the 'input' slot
+      | map {id, state ->
+        [id, ["output": state.input]]
+      }
 
   emit:
   output_ch
diff --git a/src/workflows/qc/qc/main.nf b/src/workflows/qc/qc/main.nf
index fa22f332263..87f4c12c2a7 100644
--- a/src/workflows/qc/qc/main.nf
+++ b/src/workflows/qc/qc/main.nf
@@ -63,7 +63,9 @@ workflow run_wf {
             "output_var_num_nonzero_obs": state.output_var_num_nonzero_obs,
             "output_var_total_counts_obs": state.output_var_total_counts_obs,
             "output_var_obs_mean": state.output_var_obs_mean,
-            "output_var_pct_dropout": state.output_var_pct_dropout
+            "output_var_pct_dropout": state.output_var_pct_dropout,
+            "output": state.workflow_output,
+            "compression": "gzip"
           ]
           if (state.var_qc_metrics) {
             newState += ["var_qc_metrics": state.var_qc_metrics]
@@ -72,15 +74,6 @@ workflow run_wf {
         },
         // use map when viash 0.7.6 is released
         // related to https://github.com/viash-io/viash/pull/515
-        toState: ["input": "output"]
-      )
-      | publish.run(
-        fromState: { id, state -> [
-            "input": state.input,
-            "output": state.workflow_output,
-            "compression": "gzip"
-          ]
-        },
         toState: ["output": "output"]
       )
       | setState(["output"]) 

From 24cfc7b51621832743261b9f499a5fe3eef5e8a1 Mon Sep 17 00:00:00 2001
From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com>
Date: Thu, 26 Dec 2024 09:40:56 +0000
Subject: [PATCH 2/3] Update CHANGELOG

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a5ee7977e17..36eacd7d5d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# openpipelines 2.1.0
+
+# MAJOR CHANGES
+
+* The `transfer/publish` component is deprecated and will be removed in a future major release (PR #941).
+
 # openpipelines 2.0.0
 
 ## BREAKING CHANGES

From dac4375382bb9171755789187d14c32712bb858f Mon Sep 17 00:00:00 2001
From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com>
Date: Thu, 26 Dec 2024 09:41:51 +0000
Subject: [PATCH 3/3] Update configs

---
 .../ingestion/cellranger_postprocessing/config.vsh.yaml          | 1 -
 src/workflows/integration/totalvi_leiden/config.vsh.yaml         | 1 -
 src/workflows/multiomics/process_batches/config.vsh.yaml         | 1 -
 src/workflows/multiomics/process_samples/config.vsh.yaml         | 1 -
 src/workflows/qc/qc/config.vsh.yaml                              | 1 -
 5 files changed, 5 deletions(-)

diff --git a/src/workflows/ingestion/cellranger_postprocessing/config.vsh.yaml b/src/workflows/ingestion/cellranger_postprocessing/config.vsh.yaml
index 75de01529e4..2c865dbe7ff 100644
--- a/src/workflows/ingestion/cellranger_postprocessing/config.vsh.yaml
+++ b/src/workflows/ingestion/cellranger_postprocessing/config.vsh.yaml
@@ -61,7 +61,6 @@ dependencies:
   - name: correction/cellbender_remove_background
   - name: filter/filter_with_counts
   - name: filter/subset_h5mu
-  - name: transfer/publish
 # test_dependencies:
 #   - name: convert/from_10xh5_to_h5mu
 test_resources:
diff --git a/src/workflows/integration/totalvi_leiden/config.vsh.yaml b/src/workflows/integration/totalvi_leiden/config.vsh.yaml
index 38add7094e8..381c7a1c029 100644
--- a/src/workflows/integration/totalvi_leiden/config.vsh.yaml
+++ b/src/workflows/integration/totalvi_leiden/config.vsh.yaml
@@ -181,7 +181,6 @@ dependencies:
   - name: integrate/totalvi
   - name: dimred/umap
   - name: neighbors/find_neighbors
-  - name: transfer/publish
 resources:
   - type: nextflow_script
     path: main.nf
diff --git a/src/workflows/multiomics/process_batches/config.vsh.yaml b/src/workflows/multiomics/process_batches/config.vsh.yaml
index fe76018effe..dc6319b90c4 100644
--- a/src/workflows/multiomics/process_batches/config.vsh.yaml
+++ b/src/workflows/multiomics/process_batches/config.vsh.yaml
@@ -152,7 +152,6 @@ dependencies:
     alias: dimensionality_reduction_rna
   - name: workflows/multiomics/dimensionality_reduction
     alias: dimensionality_reduction_prot
-  - name: transfer/publish
   - name: workflows/multiomics/dimensionality_reduction
     alias: dimensionality_reduction_scaling_rna 
 resources:
diff --git a/src/workflows/multiomics/process_samples/config.vsh.yaml b/src/workflows/multiomics/process_samples/config.vsh.yaml
index 30d18209206..f1390225bd6 100644
--- a/src/workflows/multiomics/process_samples/config.vsh.yaml
+++ b/src/workflows/multiomics/process_samples/config.vsh.yaml
@@ -277,7 +277,6 @@ dependencies:
     alias: split_modalities_workflow
   - name: dataflow/merge
   - name: dataflow/concatenate_h5mu
-  - name: transfer/publish
   - name: workflows/rna/rna_singlesample
   - name: workflows/prot/prot_singlesample
   - name: workflows/gdo/gdo_singlesample
diff --git a/src/workflows/qc/qc/config.vsh.yaml b/src/workflows/qc/qc/config.vsh.yaml
index 2d61f206da8..ef4fc6779c2 100644
--- a/src/workflows/qc/qc/config.vsh.yaml
+++ b/src/workflows/qc/qc/config.vsh.yaml
@@ -134,7 +134,6 @@ argument_groups:
         description: Destination path to the output.
         example: output.h5mu
 dependencies:
-  - name: transfer/publish
   - name: metadata/grep_annotation_column
   - name: qc/calculate_qc_metrics
 resources: