From 346a9c54a754946ed2fcc3ef050de147c50dd52b Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Mon, 9 Dec 2024 17:08:02 -0500 Subject: [PATCH] Removed everything after profile_dist --- modules/local/appendmetadata/main.nf | 56 ------ modules/local/arborview.nf | 32 --- modules/local/gas/mcluster/main.nf | 37 ---- tests/pipelines/main.nf.test | 201 ------------------- tests/pipelines/main_gm_thresholds.nf.test | 39 +--- tests/pipelines/main_missing_alleles.nf.test | 38 ---- workflows/fastmatchirida.nf | 12 -- 7 files changed, 1 insertion(+), 414 deletions(-) delete mode 100644 modules/local/appendmetadata/main.nf delete mode 100644 modules/local/arborview.nf delete mode 100644 modules/local/gas/mcluster/main.nf diff --git a/modules/local/appendmetadata/main.nf b/modules/local/appendmetadata/main.nf deleted file mode 100644 index aa88a6c..0000000 --- a/modules/local/appendmetadata/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process APPEND_METADATA { - tag "append_metadata" - label 'process_single' - - input: - val clusters_path // cluster data as a TSV path - // this needs to be "val", because "path" - // won't stage the file correctly for exec - val metadata_rows // metadata rows (no headers) to be appened, list of lists - val metadata_headers // headers to name the metadata columns - - output: - path("clusters_and_metadata.tsv"), emit: clusters - - exec: - def clusters_rows // has a header row - def clusters_rows_map = [:] - def metadata_rows_map = [:] - def merged = [] - - clusters_path.withReader { reader -> - clusters_rows = reader.readLines()*.split('\t') - } - - // Create a map of the cluster rows: - // Start on i = 1 because we don't want the headers. - for(int i = 1; i < clusters_rows.size(); i++) - { - // "sample" -> ["sample", 1, 2, 3, ...] - clusters_rows_map[clusters_rows[i][0]] = clusters_rows[i] - } - - // Create a map of the metadata rows: - // Start on i = 0 because there are no headers included. - for(int i = 0; i < metadata_rows.size(); i++) - { - // "sample" -> ["sample", meta1, meta2, meta3, ...] - metadata_rows_map[metadata_rows[i][0]] = metadata_rows[i] - } - - // Merge the headers - merged.add(clusters_rows[0] + metadata_headers) - - // Merge the remain rows in original order: - // Start on i = 1 because we don't want the headers. - for(int i = 1; i < clusters_rows.size(); i++) - { - def sample_key = clusters_rows[i][0] - merged.add(clusters_rows_map[sample_key] + metadata_rows_map[sample_key][1..-1]) - } - - task.workDir.resolve("clusters_and_metadata.tsv").withWriter { writer -> - merged.each { writer.writeLine it.join("\t") } - } - -} diff --git a/modules/local/arborview.nf b/modules/local/arborview.nf deleted file mode 100644 index 407468a..0000000 --- a/modules/local/arborview.nf +++ /dev/null @@ -1,32 +0,0 @@ -// Inline output tree into ArborView.html -// TODO include versions for python and arbor view when available - - - -process ARBOR_VIEW { - label "process_low" - tag "Inlining Tree Data" - stageInMode 'copy' // Need to copy in arbor view html - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python%3A3.12' : - 'biocontainers/python:3.12' }" - - input: - tuple path(tree), path(contextual_data) - path(arbor_view) // need to make sure this is copied - - - output: - path(output_value), emit: html - - - script: - output_value = "clustered_data_arborview.html" - """ - inline_arborview.py -d ${contextual_data} -n ${tree} -o ${output_value} -t ${arbor_view} - """ - - - -} diff --git a/modules/local/gas/mcluster/main.nf b/modules/local/gas/mcluster/main.nf deleted file mode 100644 index 7d832bb..0000000 --- a/modules/local/gas/mcluster/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -// Denovo clustering module for GAS - -process GAS_MCLUSTER{ - label "process_high" - tag "Denovo Clustering" - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' : - 'biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }" - - input: - path(dist_matrix) - - output: - path("${prefix}/distances.{text,parquet}"), emit: distances, optional: true - path("${prefix}/thresholds.json"), emit: thresholds - path("${prefix}/clusters.{text,parquet}"), emit: clusters - path("${prefix}/tree.nwk"), emit: tree - path("${prefix}/run.json"), emit: run - path "versions.yml", emit: versions - - script: - prefix = "clusters" - """ - gas mcluster --matrix $dist_matrix \\ - --outdir $prefix \\ - --method '${params.gm_method}' \\ - --threshold ${params.gm_thresholds} \\ - --delimeter '${params.gm_delimiter}' - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - genomic_address_service: \$( gas mcluster -V | sed -e "s/gas//g" ) - END_VERSIONS - """ - -} diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test index 2d984e6..14665d5 100644 --- a/tests/pipelines/main.nf.test +++ b/tests/pipelines/main.nf.test @@ -48,46 +48,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - assert actual_tree.exists() - assert actual_clusters.exists() - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text - - // Check appended metadata is correct: - def actual_metadata = path("$launchDir/results/append/clusters_and_metadata.tsv") - assert actual_metadata.exists() - def expected_metadata = path("$baseDir/tests/data/append/expected_clusters_and_metadata.tsv") - assert actual_metadata.text == expected_metadata.text - - // Check that the ArborView output is created - def actual_arborview = path("$launchDir/results/ArborView/arborview.clustered_data_arborview.html") - assert actual_arborview.exists() - assert actual_arborview.text.contains("id\\taddress\\tlevel_1\\tlevel_2\\tlevel_3\\tsample\\tmyheader_1\\tmyheader_2\\tmyheader_3\\tmyheader_4\\tmyheader_5\\tmyheader_6\\tmyheader_7\\tmyheader_8\\nsample1\\t1.1.1\\t1\\t1\\t1\\tsample1\\t1.1\\t1.2\\t1.3\\t1.4\\t1.5\\t1.6\\t1.7\\t1.8\\nsample2\\t1.1.1\\t1\\t1\\t1\\tsample2\\t2.1\\t2.2\\t2.3\\t2.4\\t2.5\\t2.6\\t2.7\\t2.8\\nsample3\\t1.2.2\\t1\\t2\\t2\\tsample3\\t3.1\\t3.2\\t3.3\\t3.4\\t3.5\\t3.6\\t3.7\\t3.8\\n") - // compare IRIDA Next JSON output - def iridanext_json = path("$launchDir/results/iridanext.output.json").json - def iridanext_global = iridanext_json.files.global - def iridanext_samples = iridanext_json.files.samples - def iridanext_metadata = iridanext_json.metadata.samples - - assert iridanext_global.findAll { it.path == "ArborView/arborview.clustered_data_arborview.html" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.run.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.tree.nwk" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.clusters.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.thresholds.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "distances/profile_dists.run.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "distances/profile_dists.results.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "distances/profile_dists.ref_profile.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "distances/profile_dists.query_profile.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "distances/profile_dists.allele_map.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "merged/locidex.merge.profile.tsv" }.size() == 1 - - assert iridanext_samples.isEmpty() - assert iridanext_metadata.isEmpty() } } @@ -117,37 +77,12 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree_hamming.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text - - // Make sure average-linkage (default) parameter was used - assert "average" == path("$launchDir/results/clusters/gas.mcluster.run.json").json.parameters.method - - // Check appended metadata is correct: - def actual_metadata = path("$launchDir/results/append/clusters_and_metadata.tsv") - def expected_metadata = path("$baseDir/tests/data/append/expected_clusters_and_metadata_hamming.tsv") - assert actual_metadata.text == expected_metadata.text - - // Check that the ArborView output is created - def actual_arborview = path("$launchDir/results/ArborView/arborview.clustered_data_arborview.html") - assert actual_arborview.text.contains("id\\taddress\\tlevel_1\\tlevel_2\\tlevel_3\\tsample\\tmetadata_1\\tmetadata_2\\tmetadata_3\\tmetadata_4\\tmetadata_5\\tmetadata_6\\tmetadata_7\\tmetadata_8\\nsample1\\t1.1.1\\t1\\t1\\t1\\tsample1\\t\\t\\t\\t\\t\\t\\t\\t\\nsample2\\t1.1.2\\t1\\t1\\t2\\tsample2\\t\\t\\t\\t\\t\\t\\t\\t\\nsample3\\t2.2.3\\t2\\t2\\t3\\tsample3\\t\\t\\t\\t\\t\\t\\t\\t\\n") - // compare IRIDA Next JSON output def iridanext_json = path("$launchDir/results/iridanext.output.json").json def iridanext_global = iridanext_json.files.global def iridanext_samples = iridanext_json.files.samples def iridanext_metadata = iridanext_json.metadata.samples - assert iridanext_global.findAll { it.path == "ArborView/arborview.clustered_data_arborview.html" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.run.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.tree.nwk" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.clusters.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.thresholds.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.run.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.results.text" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.ref_profile.text" }.size() == 1 @@ -187,16 +122,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree_hamming_single_linkage.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_single_linkage.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text - - // Make sure single-linkage parameter was used - assert "single" == path("$launchDir/results/clusters/gas.mcluster.run.json").json.parameters.method } } @@ -227,16 +152,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree_hamming_complete_linkage.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_complete_linkage.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text - - // Make sure complete-linkage parameter was used - assert "complete" == path("$launchDir/results/clusters/gas.mcluster.run.json").json.parameters.method } } @@ -319,38 +234,12 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - assert actual_tree.exists() - assert actual_clusters.exists() - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text - - // Check appended metadata is correct: - def actual_metadata = path("$launchDir/results/append/clusters_and_metadata.tsv") - assert actual_metadata.exists() - def expected_metadata = path("$baseDir/tests/data/append/expected_clusters_and_metadata_no_metadata.tsv") - assert actual_metadata.text == expected_metadata.text - - // Check that the ArborView output is created - def actual_arborview = path("$launchDir/results/ArborView/arborview.clustered_data_arborview.html") - assert actual_arborview.exists() - assert actual_arborview.text.contains("id\\taddress\\tlevel_1\\tlevel_2\\tlevel_3\\tsample\\tmetadata_1\\tmetadata_2\\tmetadata_3\\tmetadata_4\\tmetadata_5\\tmetadata_6\\tmetadata_7\\tmetadata_8\\nsample1\\t1.1.1\\t1\\t1\\t1\\tsample1\\t\\t\\t\\t\\t\\t\\t\\t\\nsample2\\t1.1.1\\t1\\t1\\t1\\tsample2\\t\\t\\t\\t\\t\\t\\t\\t\\nsample3\\t1.2.2\\t1\\t2\\t2\\tsample3\\t\\t\\t\\t\\t\\t\\t\\t\\n") - // compare IRIDA Next JSON output def iridanext_json = path("$launchDir/results/iridanext.output.json").json def iridanext_global = iridanext_json.files.global def iridanext_samples = iridanext_json.files.samples def iridanext_metadata = iridanext_json.metadata.samples - assert iridanext_global.findAll { it.path == "ArborView/arborview.clustered_data_arborview.html" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.run.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.tree.nwk" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.clusters.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.thresholds.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.run.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.results.text" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.ref_profile.text" }.size() == 1 @@ -391,26 +280,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - assert actual_tree.exists() - assert actual_clusters.exists() - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text - - // Check appended metadata is correct: - def actual_metadata = path("$launchDir/results/append/clusters_and_metadata.tsv") - assert actual_metadata.exists() - def expected_metadata = path("$baseDir/tests/data/append/expected_clusters_and_metadata_little_metadata.tsv") - assert actual_metadata.text == expected_metadata.text - - // Check that the ArborView output is created - def actual_arborview = path("$launchDir/results/ArborView/arborview.clustered_data_arborview.html") - assert actual_arborview.exists() - assert actual_arborview.text.contains("id\\taddress\\tlevel_1\\tlevel_2\\tlevel_3\\tsample\\tmetadata_1\\tmetadata_2\\tmetadata_3\\tmetadata_4\\tmetadata_5\\tmetadata_6\\tmetadata_7\\tmetadata_8\\nsample1\\t1.1.1\\t1\\t1\\t1\\tsample1\\t\\t\\t\\t1.4\\t\\t\\t\\t\\nsample2\\t1.1.1\\t1\\t1\\t1\\tsample2\\t\\t\\t\\t\\t\\t\\t\\t\\nsample3\\t1.2.2\\t1\\t2\\t2\\tsample3\\t3.1\\t3.2\\t\\t\\t\\t\\t\\t3.8\\n") // compare IRIDA Next JSON output def iridanext_json = path("$launchDir/results/iridanext.output.json").json @@ -418,11 +287,6 @@ nextflow_pipeline { def iridanext_samples = iridanext_json.files.samples def iridanext_metadata = iridanext_json.metadata.samples - assert iridanext_global.findAll { it.path == "ArborView/arborview.clustered_data_arborview.html" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.run.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.tree.nwk" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.clusters.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.thresholds.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.run.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.results.text" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.ref_profile.text" }.size() == 1 @@ -477,26 +341,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-mismatched-ids.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - assert actual_tree.exists() - assert actual_clusters.exists() - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree-mismatched-ids.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_dists-mismatched-ids.tsv") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text - - // Check appended metadata is correct: - def actual_metadata = path("$launchDir/results/append/clusters_and_metadata.tsv") - assert actual_metadata.exists() - def expected_metadata = path("$baseDir/tests/data/append/expected_clusters_and_metadata-mismatched-ids.tsv") - assert actual_metadata.text == expected_metadata.text - - // Check that the ArborView output is created - def actual_arborview = path("$launchDir/results/ArborView/arborview.clustered_data_arborview.html") - assert actual_arborview.exists() - assert actual_arborview.text.contains("id\\taddress\\tlevel_1\\tlevel_2\\tlevel_3\\tsample\\tmetadata_1\\tmetadata_2\\tmetadata_3\\tmetadata_4\\tmetadata_5\\tmetadata_6\\tmetadata_7\\tmetadata_8\\nsampleA\\t1.1.1\\t1\\t1\\t1\\tsampleA\\t1.1\\t1.2\\t1.3\\t1.4\\t1.5\\t1.6\\t1.7\\t1.8\\nsampleB\\t1.1.1\\t1\\t1\\t1\\tsampleB\\t2.1\\t2.2\\t2.3\\t2.4\\t2.5\\t2.6\\t2.7\\t2.8\\nsampleC\\t1.2.2\\t1\\t2\\t2\\tsampleC\\t3.1\\t3.2\\t3.3\\t3.4\\t3.5\\t3.6\\t3.7\\t3.8\\n") // compare IRIDA Next JSON output def iridanext_json = path("$launchDir/results/iridanext.output.json").json @@ -504,11 +348,6 @@ nextflow_pipeline { def iridanext_samples = iridanext_json.files.samples def iridanext_metadata = iridanext_json.metadata.samples - assert iridanext_global.findAll { it.path == "ArborView/arborview.clustered_data_arborview.html" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.run.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.tree.nwk" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.clusters.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.thresholds.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.run.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.results.text" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.ref_profile.text" }.size() == 1 @@ -560,26 +399,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-partial-mismatched-ids.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - assert actual_tree.exists() - assert actual_clusters.exists() - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree-partial-mismatched-ids.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_dists-partial-mismatched-ids.tsv") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text - - // Check appended metadata is correct: - def actual_metadata = path("$launchDir/results/append/clusters_and_metadata.tsv") - assert actual_metadata.exists() - def expected_metadata = path("$baseDir/tests/data/append/expected_clusters_and_metadata-partial-mismatched-ids.tsv") - assert actual_metadata.text == expected_metadata.text - - // Check that the ArborView output is created - def actual_arborview = path("$launchDir/results/ArborView/arborview.clustered_data_arborview.html") - assert actual_arborview.exists() - assert actual_arborview.text.contains("id\\taddress\\tlevel_1\\tlevel_2\\tlevel_3\\tsample\\tmetadata_1\\tmetadata_2\\tmetadata_3\\tmetadata_4\\tmetadata_5\\tmetadata_6\\tmetadata_7\\tmetadata_8\\nsampleA\\t1.1.1\\t1\\t1\\t1\\tsampleA\\t1.1\\t1.2\\t1.3\\t1.4\\t1.5\\t1.6\\t1.7\\t1.8\\nsampleB\\t1.1.1\\t1\\t1\\t1\\tsampleB\\t2.1\\t2.2\\t2.3\\t2.4\\t2.5\\t2.6\\t2.7\\t2.8\\nsample3\\t1.2.2\\t1\\t2\\t2\\tsample3\\t3.1\\t3.2\\t3.3\\t3.4\\t3.5\\t3.6\\t3.7\\t3.8\\n") // compare IRIDA Next JSON output def iridanext_json = path("$launchDir/results/iridanext.output.json").json @@ -587,11 +406,6 @@ nextflow_pipeline { def iridanext_samples = iridanext_json.files.samples def iridanext_metadata = iridanext_json.metadata.samples - assert iridanext_global.findAll { it.path == "ArborView/arborview.clustered_data_arborview.html" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.run.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.tree.nwk" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.clusters.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.thresholds.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.run.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.results.text" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.ref_profile.text" }.size() == 1 @@ -638,27 +452,12 @@ nextflow_pipeline { assert sample2_report.exists() == true assert sample3_report.exists() == true - // Check appended metadata is correct: - def actual_metadata = path("$launchDir/results/append/clusters_and_metadata.tsv") - assert actual_metadata.exists() - def expected_metadata = path("$baseDir/tests/data/append/expected_clusters_and_metadata_addsamplename.tsv") - assert actual_metadata.text == expected_metadata.text - - // Check that the ArborView output is created - def actual_arborview = path("$launchDir/results/ArborView/arborview.clustered_data_arborview.html") - assert actual_arborview.exists() - assert actual_arborview.text.contains("id\\taddress\\tlevel_1\\tlevel_2\\tlevel_3\\tsample\\tmetadata_1\\tmetadata_2\\tmetadata_3\\tmetadata_4\\tmetadata_5\\tmetadata_6\\tmetadata_7\\tmetadata_8\\nS_1\\t1.1.1\\t1\\t1\\t1\\tsample1\\t1.1\\t1.2\\t1.3\\t1.4\\t1.5\\t1.6\\t1.7\\t1.8\\nS2_\\t1.1.1\\t1\\t1\\t1\\tsample2\\t2.1\\t2.2\\t2.3\\t2.4\\t2.5\\t2.6\\t2.7\\t2.8\\nS2__sample3\\t1.2.2\\t1\\t2\\t2\\tsample3\\t3.1\\t3.2\\t3.3\\t3.4\\t3.5\\t3.6\\t3.7\\t3.8\\n") // compare IRIDA Next JSON output (should not be changed by adding sample_name column) def iridanext_json = path("$launchDir/results/iridanext.output.json").json def iridanext_global = iridanext_json.files.global def iridanext_samples = iridanext_json.files.samples def iridanext_metadata = iridanext_json.metadata.samples - assert iridanext_global.findAll { it.path == "ArborView/arborview.clustered_data_arborview.html" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.run.json" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.tree.nwk" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.clusters.text" }.size() == 1 - assert iridanext_global.findAll { it.path == "clusters/gas.mcluster.thresholds.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.run.json" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.results.text" }.size() == 1 assert iridanext_global.findAll { it.path == "distances/profile_dists.ref_profile.text" }.size() == 1 diff --git a/tests/pipelines/main_gm_thresholds.nf.test b/tests/pipelines/main_gm_thresholds.nf.test index ee857cd..f72b365 100644 --- a/tests/pipelines/main_gm_thresholds.nf.test +++ b/tests/pipelines/main_gm_thresholds.nf.test @@ -123,13 +123,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree_hamming.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_thresh_0.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text } } @@ -159,13 +152,7 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree_hamming.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_thresh_1.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text + } } @@ -195,10 +182,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_thresh_2.txt") - assert actual_clusters.text == expected_clusters.text } } @@ -228,10 +211,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_thresh_3.txt") - assert actual_clusters.text == expected_clusters.text } } @@ -261,10 +240,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_thresh_0.0.txt") - assert actual_clusters.text == expected_clusters.text } } @@ -294,10 +269,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_thresh_1.0.txt") - assert actual_clusters.text == expected_clusters.text } } @@ -327,10 +298,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_thresh_3.2.txt") - assert actual_clusters.text == expected_clusters.text } } @@ -360,10 +327,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hamming.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hamming_thresh_3.3.txt") - assert actual_clusters.text == expected_clusters.text } } } diff --git a/tests/pipelines/main_missing_alleles.nf.test b/tests/pipelines/main_missing_alleles.nf.test index 3637c86..5030e1f 100644 --- a/tests/pipelines/main_missing_alleles.nf.test +++ b/tests/pipelines/main_missing_alleles.nf.test @@ -29,13 +29,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hash-missing.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree_hash_missing.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hash_missing.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text } } @@ -66,13 +59,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hash-missing-count-missing.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_tree = path("$launchDir/results/clusters/gas.mcluster.tree.nwk") - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_tree = path("$baseDir/tests/data/clusters/expected_tree_hash_missing_count_missing.nwk") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hash_missing_count_missing.txt") - assert actual_tree.text == expected_tree.text - assert actual_clusters.text == expected_clusters.text } } @@ -104,10 +90,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hash-remove-missing-loci.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hash_remove_missing_loci.txt") - assert actual_clusters.text == expected_clusters.text } } @@ -138,10 +120,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hash-more-missing.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hash_more_missing.txt") - assert actual_clusters.text == expected_clusters.text } } @@ -173,10 +151,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hash-more-missing-remove-sample.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hash_more_missing_remove_sample.txt") - assert actual_clusters.text == expected_clusters.text } } @@ -228,15 +202,7 @@ nextflow_pipeline { assert path("$launchDir/results/distances/profile_dists.query_profile.text") .readLines()[0] == "sample_id\tl2\tl3" - // Check computed distance matrix is correct and that the file exists - def actual_distances = path("$launchDir/results/distances/profile_dists.results.text") - def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hash-keep-two-loci.tsv") - assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hash_keep_two_loci.txt") - assert actual_clusters.text == expected_clusters.text } } @@ -275,10 +241,6 @@ nextflow_pipeline { def expected_distances = path("$baseDir/tests/data/distances/expected_dists-hash-keep-one-loci.tsv") assert actual_distances.text == expected_distances.text - // Check computed clusters are correct and exist - def actual_clusters = path("$launchDir/results/clusters/gas.mcluster.clusters.text") - def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters_hash_keep_one_loci.txt") - assert actual_clusters.text == expected_clusters.text } } diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf index 726030f..82c3a14 100644 --- a/workflows/fastmatchirida.nf +++ b/workflows/fastmatchirida.nf @@ -28,9 +28,6 @@ Workflowfastmatchirida.initialise(params, log) */ include { LOCIDEX_MERGE } from '../modules/local/locidex/merge/main' include { PROFILE_DISTS } from '../modules/local/profile_dists/main' -include { GAS_MCLUSTER } from '../modules/local/gas/mcluster/main' -include { APPEND_METADATA } from '../modules/local/appendmetadata/main' -include { ARBOR_VIEW } from '../modules/local/arborview.nf' include { INPUT_ASSURE } from "../modules/local/input_assure/main" /* @@ -155,15 +152,6 @@ workflow FASTMATCH { distances = PROFILE_DISTS(merged.combined_profiles, mapping_format, mapping_file, columns_file) ch_versions = ch_versions.mix(distances.versions) - clustered_data = GAS_MCLUSTER(distances.results) - ch_versions = ch_versions.mix(clustered_data.versions) - - data_and_metadata = APPEND_METADATA(clustered_data.clusters, metadata_rows, metadata_headers) - tree_data = clustered_data.tree.merge(data_and_metadata) // mergeing as no key to join on - - tree_html = file("$projectDir/assets/ArborView.html") - ARBOR_VIEW(tree_data, tree_html) - CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') )