From cc5616d8a559d588fdd85cd60f1c84a0e7cac827 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Tue, 26 Mar 2024 17:46:11 -0500 Subject: [PATCH 1/7] test: Add split_fastq test --- tests/main.nf.test | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/main.nf.test b/tests/main.nf.test index 1d9cd2e..fe4d62a 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -21,4 +21,22 @@ nextflow_pipeline { // ) } } + + test("Should split fastqs") { + + when { + params { + outdir = "$outputDir" + split_fastq = true + } + } + + then { + assert workflow.success + + // TODO + // assertAll( + // ) + } + } } From 32fb23a1ae1306256ac339abe603c29aab40dc88 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Tue, 26 Mar 2024 19:31:44 -0500 Subject: [PATCH 2/7] test: Add snapshots for everything --- tests/lib/UTILS.groovy | 11 +++ tests/main.nf.test | 38 ++++++++--- tests/main.nf.test.snap | 146 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+), 8 deletions(-) create mode 100644 tests/lib/UTILS.groovy create mode 100644 tests/main.nf.test.snap diff --git a/tests/lib/UTILS.groovy b/tests/lib/UTILS.groovy new file mode 100644 index 0000000..1bd58a4 --- /dev/null +++ b/tests/lib/UTILS.groovy @@ -0,0 +1,11 @@ +// Function to remove Nextflow version from pipeline_software_mqc_versions.yml + +class UTILS { + public static String removeNextflowVersion(pipeline_software_mqc_versions) { + def softwareVersions = path(pipeline_software_mqc_versions).yaml + if (softwareVersions.containsKey("Workflow")) { + softwareVersions.Workflow.remove("Nextflow") + } + return softwareVersions + } +} diff --git a/tests/main.nf.test b/tests/main.nf.test index fe4d62a..35847e3 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -14,11 +14,22 @@ nextflow_pipeline { } then { - assert workflow.success + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml")).match("software_versions") }, + { assert snapshot( + workflow.trace.tasks().size(), + path("$outputDir/compartments").list(), + path("$outputDir/distance_decay").list(), + path("$outputDir/hicpro").list(), + path("$outputDir/multiqc/multiqc_data").list(), + path("$outputDir/tads").list(), + ).match()}, - // TODO - // assertAll( - // ) + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/fastqc/SRR4292758_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/SRR4292758_2_fastqc.html").exists() }, + ) } } @@ -32,11 +43,22 @@ nextflow_pipeline { } then { - assert workflow.success + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml")).match("split_software_versions") }, + { assert snapshot( + workflow.trace.tasks().size(), + path("$outputDir/compartments").list(), + path("$outputDir/distance_decay").list(), + path("$outputDir/hicpro").list(), + path("$outputDir/multiqc/multiqc_data").list(), + path("$outputDir/tads").list(), + ).match()}, - // TODO - // assertAll( - // ) + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/fastqc/SRR4292758_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/SRR4292758_2_fastqc.html").exists() }, + ) } } } diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap new file mode 100644 index 0000000..b148ef4 --- /dev/null +++ b/tests/main.nf.test.snap @@ -0,0 +1,146 @@ +{ + "split_software_versions": { + "content": [ + "{BOWTIE2_ALIGN={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRIMMED={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_BUILD={bowtie2=2.5.2}, COMBINE_MATES={python=3.9.12}, COOLER_BALANCE={cooler=0.9.2}, COOLER_CLOAD={cooler=0.9.2}, COOLER_DUMP={cooler=0.9.2}, COOLER_MAKEBINS={cooler=0.9.2}, COOLER_ZOOMIFY={cooler=0.9.2}, COOLTOOLS_EIGSCIS={cooltools=0.5.1}, COOLTOOLS_INSULATION={cooltools=0.5.1}, CUSTOM_GETCHROMSIZES={getchromsizes=1.16.1}, FASTQC={fastqc=0.12.1}, GET_RESTRICTION_FRAGMENTS={python=3.9.12}, GET_VALID_INTERACTION={python=3.9.12}, HICPRO2PAIRS={pairix=0.3.7}, HIC_FIND_TADS={hicexplorer=3.7.2}, HIC_PLOT_DIST_VS_COUNTS={hicexplorer=3.7.2}, MERGE_BOWTIE2={samtools=1.15.1}, MERGE_STATS={python=3.9.12}, MERGE_VALID_INTERACTION={sort=8.3}, SPLIT_COOLER_DUMP={cooler=null}, TRIM_READS={gzip=1.1}, Workflow={nf-core/hic=v2.2.0dev}}" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-26T19:31:20.226377845" + }, + "software_versions": { + "content": [ + "{BOWTIE2_ALIGN={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRIMMED={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_BUILD={bowtie2=2.5.2}, COMBINE_MATES={python=3.9.12}, COOLER_BALANCE={cooler=0.9.2}, COOLER_CLOAD={cooler=0.9.2}, COOLER_DUMP={cooler=0.9.2}, COOLER_MAKEBINS={cooler=0.9.2}, COOLER_ZOOMIFY={cooler=0.9.2}, COOLTOOLS_EIGSCIS={cooltools=0.5.1}, COOLTOOLS_INSULATION={cooltools=0.5.1}, CUSTOM_GETCHROMSIZES={getchromsizes=1.16.1}, FASTQC={fastqc=0.12.1}, GET_RESTRICTION_FRAGMENTS={python=3.9.12}, GET_VALID_INTERACTION={python=3.9.12}, HICPRO2PAIRS={pairix=0.3.7}, HIC_FIND_TADS={hicexplorer=3.7.2}, HIC_PLOT_DIST_VS_COUNTS={hicexplorer=3.7.2}, MERGE_BOWTIE2={samtools=1.15.1}, MERGE_STATS={python=3.9.12}, MERGE_VALID_INTERACTION={sort=8.3}, SPLIT_COOLER_DUMP={cooler=null}, TRIM_READS={gzip=1.1}, Workflow={nf-core/hic=v2.2.0dev}}" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-26T19:28:03.999972948" + }, + "Run with profile test": { + "content": [ + 37, + [ + "SRR4292758.2000_compartments.cis.bw:md5,d7b3015f4856ef058a870056f9c3bdf1", + "SRR4292758.2000_compartments.cis.lam.txt:md5,e05a7cc3c0af218a5cf6f3ad6ffa8da5", + "SRR4292758.2000_compartments.cis.vecs.tsv:md5,0d1d4c5fd45d2400a781069bb93d2b76" + ], + [ + "SRR4292758_distcount.png:md5,5314c9ad16a1e857682fde4e584103bc", + "SRR4292758_distcount.txt:md5,bac196d56bcb489c2ef6fe2ea26e07c9" + ], + [ + [ + "SRR4292758_0_bwt2pairs.bam:md5,dc752ae9feb57f0ff2daa3a215956915" + ], + [ + [ + "SRR4292758.mRSstat:md5,99af5a2e6136a5e003d667afbf438707", + "SRR4292758.mpairstat:md5,281aa85283e57308187c33a5bad7dc82", + "SRR4292758.null.mmapstat:md5,33dff67a7684ffe44d5dfaccf60d6aad", + "SRR4292758_allValidPairs.mergestat:md5,b0a81a4d69ea8e44495a714ac87701e8" + ] + ], + [ + "SRR4292758.allValidPairs:md5,55e00a772766779f5d5c2bf2b6fcef6d", + [ + "SRR4292758_contacts.pairs.gz:md5,201d59212cf443fe5da09dbddeef8677", + "SRR4292758_contacts.pairs.gz.px2:md5,6275247d930753121aa026870864aa25" + ] + ] + ], + [ + "multiqc.log:md5,90a7ccd82c900d4a5aa08d3d3da772c5", + "multiqc_citations.txt:md5,76bcbb4e4103209b523ed9da8fef74bc", + "multiqc_data.json:md5,c71ad3b9813fcd2c301eb95bc850e568", + "multiqc_fastqc.txt:md5,64b89edc94607362b7591cb821b28b93", + "multiqc_general_stats.txt:md5,892258a0f91b4d21d95e2c5e1ff1cc08", + "multiqc_hicpro.txt:md5,67513d15d5a00c08d93fd6a7fbd189fa", + "multiqc_software_versions.txt:md5,8700e2b122c2107aff2a88dbd18933ec", + "multiqc_sources.txt:md5,de6736507ee9f445e58287ed10090aba" + ], + [ + [ + "SRR4292758.1000_balanced_hicfindtads_boundaries.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_boundaries.gff:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_domains.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_score.bedgraph:md5,da38c91d9b23820e290d2c987f27ab70", + "SRR4292758.1000_balanced_hicfindtads_tad_score.bm:md5,81b33e5cd3ff14efac94c2065ac68fe1", + "SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool:md5,3203ee7258462d0c2891e7b62c0bca04" + ], + [ + "SRR4292758.1000_balanced_insulation.tsv:md5,fd0a37e94600e9da4c8de0c58f07e3ed" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-26T19:28:04.028638511" + }, + "Should split fastqs": { + "content": [ + 37, + [ + "SRR4292758.2000_compartments.cis.bw:md5,d7b3015f4856ef058a870056f9c3bdf1", + "SRR4292758.2000_compartments.cis.lam.txt:md5,e05a7cc3c0af218a5cf6f3ad6ffa8da5", + "SRR4292758.2000_compartments.cis.vecs.tsv:md5,0d1d4c5fd45d2400a781069bb93d2b76" + ], + [ + "SRR4292758_distcount.png:md5,5314c9ad16a1e857682fde4e584103bc", + "SRR4292758_distcount.txt:md5,bac196d56bcb489c2ef6fe2ea26e07c9" + ], + [ + [ + "SRR4292758_0_bwt2pairs.bam:md5,676047426d024788971139d92d2ab097" + ], + [ + [ + "SRR4292758.mRSstat:md5,17bfb0c8be6c0940c2863d147c1ea824", + "SRR4292758.mpairstat:md5,281aa85283e57308187c33a5bad7dc82", + "SRR4292758.null.mmapstat:md5,5a72e08a7b8098eeb6e1331c8b866b15", + "SRR4292758_allValidPairs.mergestat:md5,b0a81a4d69ea8e44495a714ac87701e8" + ] + ], + [ + "SRR4292758.allValidPairs:md5,55e00a772766779f5d5c2bf2b6fcef6d", + [ + "SRR4292758_contacts.pairs.gz:md5,201d59212cf443fe5da09dbddeef8677", + "SRR4292758_contacts.pairs.gz.px2:md5,6275247d930753121aa026870864aa25" + ] + ] + ], + [ + "multiqc.log:md5,5e26b89dad43c71657280738996dbd0d", + "multiqc_citations.txt:md5,76bcbb4e4103209b523ed9da8fef74bc", + "multiqc_data.json:md5,41cf9afa708b9a7cdaf827f28f38b096", + "multiqc_fastqc.txt:md5,64b89edc94607362b7591cb821b28b93", + "multiqc_general_stats.txt:md5,892258a0f91b4d21d95e2c5e1ff1cc08", + "multiqc_hicpro.txt:md5,b97b7b08c5075b76c99a902bd268095c", + "multiqc_software_versions.txt:md5,8700e2b122c2107aff2a88dbd18933ec", + "multiqc_sources.txt:md5,d79093d1f6997a31509a14695dd635ea" + ], + [ + [ + "SRR4292758.1000_balanced_hicfindtads_boundaries.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_boundaries.gff:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_domains.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_score.bedgraph:md5,da38c91d9b23820e290d2c987f27ab70", + "SRR4292758.1000_balanced_hicfindtads_tad_score.bm:md5,81b33e5cd3ff14efac94c2065ac68fe1", + "SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool:md5,3cef5afd1125b90bb73dcce77d987129" + ], + [ + "SRR4292758.1000_balanced_insulation.tsv:md5,fd0a37e94600e9da4c8de0c58f07e3ed" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-26T19:31:20.698967004" + } +} \ No newline at end of file From 3672949c7c654e05a756679f0d3d519b30500f2d Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Wed, 27 Mar 2024 14:15:36 -0500 Subject: [PATCH 3/7] test: Actually split up the fastq --- tests/main.nf.test | 1 + tests/main.nf.test.snap | 27 +++++++++++++++------------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/main.nf.test b/tests/main.nf.test index 35847e3..804defa 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -39,6 +39,7 @@ nextflow_pipeline { params { outdir = "$outputDir" split_fastq = true + fastq_chunks_size = 125000 } } diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap index b148ef4..1638abc 100644 --- a/tests/main.nf.test.snap +++ b/tests/main.nf.test.snap @@ -83,7 +83,7 @@ }, "Should split fastqs": { "content": [ - 37, + 70, [ "SRR4292758.2000_compartments.cis.bw:md5,d7b3015f4856ef058a870056f9c3bdf1", "SRR4292758.2000_compartments.cis.lam.txt:md5,e05a7cc3c0af218a5cf6f3ad6ffa8da5", @@ -95,13 +95,16 @@ ], [ [ - "SRR4292758_0_bwt2pairs.bam:md5,676047426d024788971139d92d2ab097" + "SRR4292758_0_bwt2pairs.bam:md5,22ddfe3478dce932d7ac675347713ff2", + "SRR4292758_1_bwt2pairs.bam:md5,3d6aecbf178c60f1569884c022954549", + "SRR4292758_2_bwt2pairs.bam:md5,11dc852af492a171a80584d8289cf57a", + "SRR4292758_3_bwt2pairs.bam:md5,4b0297fbf1f6058e8072a3dcd85c0af0" ], [ [ "SRR4292758.mRSstat:md5,17bfb0c8be6c0940c2863d147c1ea824", - "SRR4292758.mpairstat:md5,281aa85283e57308187c33a5bad7dc82", - "SRR4292758.null.mmapstat:md5,5a72e08a7b8098eeb6e1331c8b866b15", + "SRR4292758.mpairstat:md5,a11582165e0b7c4dcb92a4ec1e386004", + "SRR4292758.null.mmapstat:md5,acd2498b689e3c3f1075a2b2350d2147", "SRR4292758_allValidPairs.mergestat:md5,b0a81a4d69ea8e44495a714ac87701e8" ] ], @@ -114,14 +117,14 @@ ] ], [ - "multiqc.log:md5,5e26b89dad43c71657280738996dbd0d", + "multiqc.log:md5,5511aba692e73cda6d79d4829307ebc7", "multiqc_citations.txt:md5,76bcbb4e4103209b523ed9da8fef74bc", - "multiqc_data.json:md5,41cf9afa708b9a7cdaf827f28f38b096", - "multiqc_fastqc.txt:md5,64b89edc94607362b7591cb821b28b93", - "multiqc_general_stats.txt:md5,892258a0f91b4d21d95e2c5e1ff1cc08", - "multiqc_hicpro.txt:md5,b97b7b08c5075b76c99a902bd268095c", + "multiqc_data.json:md5,b74316fef34083bd9067bac294fa8b9e", + "multiqc_fastqc.txt:md5,48bb574f77c2075cf8e0d4ad601dbfc3", + "multiqc_general_stats.txt:md5,a5349bbfb06594658b9b46faefc74298", + "multiqc_hicpro.txt:md5,dc06d0ca4000f957f87336caee63de66", "multiqc_software_versions.txt:md5,8700e2b122c2107aff2a88dbd18933ec", - "multiqc_sources.txt:md5,d79093d1f6997a31509a14695dd635ea" + "multiqc_sources.txt:md5,664b1c154779e917886b3f180c656613" ], [ [ @@ -130,7 +133,7 @@ "SRR4292758.1000_balanced_hicfindtads_domains.bed:md5,d41d8cd98f00b204e9800998ecf8427e", "SRR4292758.1000_balanced_hicfindtads_score.bedgraph:md5,da38c91d9b23820e290d2c987f27ab70", "SRR4292758.1000_balanced_hicfindtads_tad_score.bm:md5,81b33e5cd3ff14efac94c2065ac68fe1", - "SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool:md5,3cef5afd1125b90bb73dcce77d987129" + "SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool:md5,09813863808bbbf10fede3547007f131" ], [ "SRR4292758.1000_balanced_insulation.tsv:md5,fd0a37e94600e9da4c8de0c58f07e3ed" @@ -141,6 +144,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-26T19:31:20.698967004" + "timestamp": "2024-03-27T14:12:00.152845997" } } \ No newline at end of file From 5d18b8fd20f5a35d4a1d885bf125c5f13d7255ae Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Wed, 27 Mar 2024 14:42:22 -0500 Subject: [PATCH 4/7] chore: Swap test files to raw.githubusercontent.com Idk why --- assets/samplesheet.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index e699919..69f5f75 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,2 +1,2 @@ sample,fastq_1,fastq_2 -SRR4292758,https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz,https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz +SRR4292758,https://raw.githubusercontent.com/nf-core/test-datasets/hic/data/SRR4292758_00_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/hic/data/SRR4292758_00_R2.fastq.gz From d0ac3442bca209a9bf6d191edd78dcea087f9c0b Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Wed, 27 Mar 2024 19:12:55 -0500 Subject: [PATCH 5/7] chore: Remove local MultiQC --- modules/local/multiqc.nf | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 modules/local/multiqc.nf diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf deleted file mode 100644 index f5587d0..0000000 --- a/modules/local/multiqc.nf +++ /dev/null @@ -1,35 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda "bioconda::multiqc=1.19" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : - 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" - - input: - path multiqc_config - path (mqc_custom_config) - path workflow_summary - path ('fastqc/*') - path ('input_*/*') - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - multiqc -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} From 6d0473e3065517af78fff2683cd17b77c1450e7d Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Wed, 27 Mar 2024 19:19:30 -0500 Subject: [PATCH 6/7] refactor: Use fromSamplesheet from nf-validation --- assets/schema_input.json | 2 +- bin/check_samplesheet.py | 258 ------------------ modules/local/samplesheet_check.nf | 31 --- subworkflows/local/input_check.nf | 68 ----- .../local/utils_nfcore_hic_pipeline/main.nf | 69 +++-- tests/main.nf.test | 50 +++- tests/main.nf.test.snap | 121 ++------ 7 files changed, 108 insertions(+), 491 deletions(-) delete mode 100755 bin/check_samplesheet.py delete mode 100644 modules/local/samplesheet_check.nf delete mode 100644 subworkflows/local/input_check.nf diff --git a/assets/schema_input.json b/assets/schema_input.json index a4fb5de..2819403 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -28,6 +28,6 @@ "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "fastq_1", "fastq_2"] } } diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index dde3baa..0000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,258 +0,0 @@ -#!/usr/bin/env python - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - ##row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 096c80b..0000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/hic/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 79b7954..0000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,68 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - if (params.split_fastq){ - - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channels(it) } - .splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true) - .map { it -> [it[0], [it[1], it[2]]]} - .groupTuple(by: [0]) - .flatMap { it -> setMetaChunk(it) } - .collate(2) - .set { reads } - - } else { - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channels(it) } - .map { it -> [it[0], [it[1], it[2]]]} - .groupTuple(by: [0]) - .flatMap { it -> setMetaChunk(it) } - .collate(2) - .set { reads } - } - - emit: - reads // channel: [ val(meta), [ reads ] ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channels(LinkedHashMap row) { - def meta = [:] - meta.id = row.sample - meta.single_end = false - - def array = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - array = [ meta, file(row.fastq_1), file(row.fastq_2) ] - return array -} - -// Set the meta.chunk value in case of technical replicates -def setMetaChunk(row){ - def map = [] - row[1].eachWithIndex() { file,i -> - meta = row[0].clone() - meta.chunk = i - meta.part = row[1].size() - map += [meta, file] - } - return map -} diff --git a/subworkflows/local/utils_nfcore_hic_pipeline/main.nf b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf index 1c1d7cb..fa90d66 100644 --- a/subworkflows/local/utils_nfcore_hic_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf @@ -20,8 +20,6 @@ include { imNotification } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' -include { INPUT_CHECK } from '../input_check' - /* ======================================================================================== SUBWORKFLOW TO INITIALISE PIPELINE @@ -80,34 +78,36 @@ workflow PIPELINE_INITIALISATION { validateInputParameters() // - // TODO nf-core: Create channel from input file provided through params.input + // Create channel from input file provided through params.input // - // Channel - // .fromSamplesheet("input") - // .map { - // meta, fastq_1, fastq_2 -> - // if (!fastq_2) { - // return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - // } else { - // return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - // } - // } - // .groupTuple() - // .map { - // validateInputSamplesheet(it) - // } - // .map { - // meta, fastqs -> - // return [ meta, fastqs.flatten() ] - // } + Channel + .fromSamplesheet("input") + .set { ch_input } + if (params.split_fastq) { + ch_input + .splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true) + .set { ch_input } + } - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - input - ) - .reads + ch_input + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .flatMap { it -> setMetaChunk(it) } + .collate(2) + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } .set { ch_samplesheet } emit: @@ -259,3 +259,16 @@ def methodsDescriptionText(mqc_methods_yaml) { return description_html.toString() } + +// Set the meta.chunk value in case of technical replicates +def setMetaChunk(row){ + def map = [] + row[1].eachWithIndex() { file, i -> + println row[0] + meta = row[0].clone() + meta.chunk = i + meta.part = row[1].size() + map += [meta, file] + } + return map +} diff --git a/tests/main.nf.test b/tests/main.nf.test index 804defa..5f229ce 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -19,16 +19,31 @@ nextflow_pipeline { { assert snapshot(UTILS.removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml")).match("software_versions") }, { assert snapshot( workflow.trace.tasks().size(), - path("$outputDir/compartments").list(), + // FIXME Not deterministic on CI but works locally + // path("$outputDir/compartments").list(), path("$outputDir/distance_decay").list(), - path("$outputDir/hicpro").list(), - path("$outputDir/multiqc/multiqc_data").list(), - path("$outputDir/tads").list(), + path("$outputDir/hicpro/stats/SRR4292758/SRR4292758.mpairstat"), + path("$outputDir/hicpro/stats/SRR4292758/SRR4292758_allValidPairs.mergestat"), + path("$outputDir/hicpro/valid_pairs").list(), + // path("$outputDir/multiqc/multiqc_data").list(), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_boundaries.bed"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_boundaries.gff"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_domains.bed"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_score.bedgraph"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_tad_score.bm"), + path("$outputDir/tads/insulation").list(), ).match()}, { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.bw").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.lam.txt").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.vecs.tsv").exists() }, { assert new File("$outputDir/fastqc/SRR4292758_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/SRR4292758_2_fastqc.html").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_0_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.mRSstat").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.null.mmapstat").exists() }, + { assert new File("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool").exists() }, ) } } @@ -49,16 +64,35 @@ nextflow_pipeline { { assert snapshot(UTILS.removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml")).match("split_software_versions") }, { assert snapshot( workflow.trace.tasks().size(), - path("$outputDir/compartments").list(), + // FIXME Not deterministic on CI but works locally + // path("$outputDir/compartments").list(), path("$outputDir/distance_decay").list(), - path("$outputDir/hicpro").list(), - path("$outputDir/multiqc/multiqc_data").list(), - path("$outputDir/tads").list(), + path("$outputDir/hicpro/stats/SRR4292758/SRR4292758_allValidPairs.mergestat"), + // FIXME path("$outputDir/hicpro/valid_pairs").list(), + // path("$outputDir/multiqc/multiqc_data").list(), + // path("$outputDir/multiqc/multiqc_data").list(), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_boundaries.bed"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_boundaries.gff"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_domains.bed"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_score.bedgraph"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_tad_score.bm"), + path("$outputDir/tads/insulation").list(), ).match()}, { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.bw").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.lam.txt").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.vecs.tsv").exists() }, { assert new File("$outputDir/fastqc/SRR4292758_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/SRR4292758_2_fastqc.html").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_0_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_1_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_2_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_3_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.mpairstat").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.mRSstat").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.null.mmapstat").exists() }, + { assert new File("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool").exists() }, ) } } diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap index 1638abc..3e7c079 100644 --- a/tests/main.nf.test.snap +++ b/tests/main.nf.test.snap @@ -7,7 +7,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-26T19:31:20.226377845" + "timestamp": "2024-04-09T18:39:34.552071308" }, "software_versions": { "content": [ @@ -17,133 +17,60 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-26T19:28:03.999972948" + "timestamp": "2024-04-09T18:36:56.07112172" }, "Run with profile test": { "content": [ - 37, - [ - "SRR4292758.2000_compartments.cis.bw:md5,d7b3015f4856ef058a870056f9c3bdf1", - "SRR4292758.2000_compartments.cis.lam.txt:md5,e05a7cc3c0af218a5cf6f3ad6ffa8da5", - "SRR4292758.2000_compartments.cis.vecs.tsv:md5,0d1d4c5fd45d2400a781069bb93d2b76" - ], + 36, [ "SRR4292758_distcount.png:md5,5314c9ad16a1e857682fde4e584103bc", "SRR4292758_distcount.txt:md5,bac196d56bcb489c2ef6fe2ea26e07c9" ], + "SRR4292758.mpairstat:md5,281aa85283e57308187c33a5bad7dc82", + "SRR4292758_allValidPairs.mergestat:md5,b0a81a4d69ea8e44495a714ac87701e8", [ + "SRR4292758.allValidPairs:md5,55e00a772766779f5d5c2bf2b6fcef6d", [ - "SRR4292758_0_bwt2pairs.bam:md5,dc752ae9feb57f0ff2daa3a215956915" - ], - [ - [ - "SRR4292758.mRSstat:md5,99af5a2e6136a5e003d667afbf438707", - "SRR4292758.mpairstat:md5,281aa85283e57308187c33a5bad7dc82", - "SRR4292758.null.mmapstat:md5,33dff67a7684ffe44d5dfaccf60d6aad", - "SRR4292758_allValidPairs.mergestat:md5,b0a81a4d69ea8e44495a714ac87701e8" - ] - ], - [ - "SRR4292758.allValidPairs:md5,55e00a772766779f5d5c2bf2b6fcef6d", - [ - "SRR4292758_contacts.pairs.gz:md5,201d59212cf443fe5da09dbddeef8677", - "SRR4292758_contacts.pairs.gz.px2:md5,6275247d930753121aa026870864aa25" - ] + "SRR4292758_contacts.pairs.gz:md5,201d59212cf443fe5da09dbddeef8677", + "SRR4292758_contacts.pairs.gz.px2:md5,6275247d930753121aa026870864aa25" ] ], + "SRR4292758.1000_balanced_hicfindtads_boundaries.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_boundaries.gff:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_domains.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_score.bedgraph:md5,da38c91d9b23820e290d2c987f27ab70", + "SRR4292758.1000_balanced_hicfindtads_tad_score.bm:md5,81b33e5cd3ff14efac94c2065ac68fe1", [ - "multiqc.log:md5,90a7ccd82c900d4a5aa08d3d3da772c5", - "multiqc_citations.txt:md5,76bcbb4e4103209b523ed9da8fef74bc", - "multiqc_data.json:md5,c71ad3b9813fcd2c301eb95bc850e568", - "multiqc_fastqc.txt:md5,64b89edc94607362b7591cb821b28b93", - "multiqc_general_stats.txt:md5,892258a0f91b4d21d95e2c5e1ff1cc08", - "multiqc_hicpro.txt:md5,67513d15d5a00c08d93fd6a7fbd189fa", - "multiqc_software_versions.txt:md5,8700e2b122c2107aff2a88dbd18933ec", - "multiqc_sources.txt:md5,de6736507ee9f445e58287ed10090aba" - ], - [ - [ - "SRR4292758.1000_balanced_hicfindtads_boundaries.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "SRR4292758.1000_balanced_hicfindtads_boundaries.gff:md5,d41d8cd98f00b204e9800998ecf8427e", - "SRR4292758.1000_balanced_hicfindtads_domains.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "SRR4292758.1000_balanced_hicfindtads_score.bedgraph:md5,da38c91d9b23820e290d2c987f27ab70", - "SRR4292758.1000_balanced_hicfindtads_tad_score.bm:md5,81b33e5cd3ff14efac94c2065ac68fe1", - "SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool:md5,3203ee7258462d0c2891e7b62c0bca04" - ], - [ - "SRR4292758.1000_balanced_insulation.tsv:md5,fd0a37e94600e9da4c8de0c58f07e3ed" - ] + "SRR4292758.1000_balanced_insulation.tsv:md5,fd0a37e94600e9da4c8de0c58f07e3ed" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-26T19:28:04.028638511" + "timestamp": "2024-04-09T18:36:56.085219308" }, "Should split fastqs": { "content": [ - 70, - [ - "SRR4292758.2000_compartments.cis.bw:md5,d7b3015f4856ef058a870056f9c3bdf1", - "SRR4292758.2000_compartments.cis.lam.txt:md5,e05a7cc3c0af218a5cf6f3ad6ffa8da5", - "SRR4292758.2000_compartments.cis.vecs.tsv:md5,0d1d4c5fd45d2400a781069bb93d2b76" - ], + 69, [ "SRR4292758_distcount.png:md5,5314c9ad16a1e857682fde4e584103bc", "SRR4292758_distcount.txt:md5,bac196d56bcb489c2ef6fe2ea26e07c9" ], + "SRR4292758_allValidPairs.mergestat:md5,b0a81a4d69ea8e44495a714ac87701e8", + "SRR4292758.1000_balanced_hicfindtads_boundaries.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_boundaries.gff:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_domains.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_score.bedgraph:md5,da38c91d9b23820e290d2c987f27ab70", + "SRR4292758.1000_balanced_hicfindtads_tad_score.bm:md5,81b33e5cd3ff14efac94c2065ac68fe1", [ - [ - "SRR4292758_0_bwt2pairs.bam:md5,22ddfe3478dce932d7ac675347713ff2", - "SRR4292758_1_bwt2pairs.bam:md5,3d6aecbf178c60f1569884c022954549", - "SRR4292758_2_bwt2pairs.bam:md5,11dc852af492a171a80584d8289cf57a", - "SRR4292758_3_bwt2pairs.bam:md5,4b0297fbf1f6058e8072a3dcd85c0af0" - ], - [ - [ - "SRR4292758.mRSstat:md5,17bfb0c8be6c0940c2863d147c1ea824", - "SRR4292758.mpairstat:md5,a11582165e0b7c4dcb92a4ec1e386004", - "SRR4292758.null.mmapstat:md5,acd2498b689e3c3f1075a2b2350d2147", - "SRR4292758_allValidPairs.mergestat:md5,b0a81a4d69ea8e44495a714ac87701e8" - ] - ], - [ - "SRR4292758.allValidPairs:md5,55e00a772766779f5d5c2bf2b6fcef6d", - [ - "SRR4292758_contacts.pairs.gz:md5,201d59212cf443fe5da09dbddeef8677", - "SRR4292758_contacts.pairs.gz.px2:md5,6275247d930753121aa026870864aa25" - ] - ] - ], - [ - "multiqc.log:md5,5511aba692e73cda6d79d4829307ebc7", - "multiqc_citations.txt:md5,76bcbb4e4103209b523ed9da8fef74bc", - "multiqc_data.json:md5,b74316fef34083bd9067bac294fa8b9e", - "multiqc_fastqc.txt:md5,48bb574f77c2075cf8e0d4ad601dbfc3", - "multiqc_general_stats.txt:md5,a5349bbfb06594658b9b46faefc74298", - "multiqc_hicpro.txt:md5,dc06d0ca4000f957f87336caee63de66", - "multiqc_software_versions.txt:md5,8700e2b122c2107aff2a88dbd18933ec", - "multiqc_sources.txt:md5,664b1c154779e917886b3f180c656613" - ], - [ - [ - "SRR4292758.1000_balanced_hicfindtads_boundaries.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "SRR4292758.1000_balanced_hicfindtads_boundaries.gff:md5,d41d8cd98f00b204e9800998ecf8427e", - "SRR4292758.1000_balanced_hicfindtads_domains.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "SRR4292758.1000_balanced_hicfindtads_score.bedgraph:md5,da38c91d9b23820e290d2c987f27ab70", - "SRR4292758.1000_balanced_hicfindtads_tad_score.bm:md5,81b33e5cd3ff14efac94c2065ac68fe1", - "SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool:md5,09813863808bbbf10fede3547007f131" - ], - [ - "SRR4292758.1000_balanced_insulation.tsv:md5,fd0a37e94600e9da4c8de0c58f07e3ed" - ] + "SRR4292758.1000_balanced_insulation.tsv:md5,fd0a37e94600e9da4c8de0c58f07e3ed" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-27T14:12:00.152845997" + "timestamp": "2024-04-09T18:39:34.716731734" } } \ No newline at end of file From 4b288e080bbbc6a78fe0d084e07ee051e30e6b80 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Tue, 9 Apr 2024 15:23:22 -0500 Subject: [PATCH 7/7] ci: Use pdiff in testing --- .github/workflows/ci.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8cb450..f973e31 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -73,9 +73,7 @@ jobs: architecture: "x64" - name: Install pdiff to see diff between nf-test snapshots - run: | - python -m pip install --upgrade pip - pip install pdiff + run: python -m pip install --upgrade pip pdiff cryptography - uses: nf-core/setup-nf-test@v1 @@ -84,6 +82,9 @@ jobs: nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - name: Run nf-test + env: + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" run: | nf-test test --verbose --tag PIPELINE --profile "+${{ matrix.profile }}" --junitxml=test.xml --tap=test.tap