diff --git a/conf/modules/msisensorpro.config b/conf/modules/msisensorpro.config index 8253cccc50..1526a61f13 100644 --- a/conf/modules/msisensorpro.config +++ b/conf/modules/msisensorpro.config @@ -22,4 +22,14 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + withName: 'MSISENSORPRO_MSITUMORONLY' { + ext.args = { params.wes ? '-c 20' : '-c 15' } // default values by MSIsensorpro + ext.prefix = { "${meta.id}.tumor_only" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/msisensorpro/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/main.nf b/main.nf index 92bcc47b2b..1830805b3f 100755 --- a/main.nf +++ b/main.nf @@ -77,6 +77,7 @@ include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nf include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals' include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit' +include { MSISENSORPRO_SCAN } from './modules/nf-core/msisensorpro/scan/main' // Initialize fasta file with meta map: fasta = params.fasta ? Channel.fromPath(params.fasta).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() @@ -91,6 +92,7 @@ germline_resource = params.germline_resource ? Channel.fromPath(para known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) +msisensorpro_baseline = params.msisensorpro_baseline ? Channel.fromPath(params.msisensorpro_baseline).collect() : Channel.empty() pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]) @@ -166,8 +168,18 @@ workflow NFCORE_SAREK { aligner == "bwa-mem2" ? bwamem2 : dragmap - // TODO: add a params for msisensorpro_scan - msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan + // Reference msi list for MSIsensorpro + if (params.tools && params.tools.split(',').contains('msisensorpro')) { + if (params.msisensorpro_scan) { + msisensorpro_scan = Channel.fromPath(params.msisensorpro_scan).collect() + } else { + MSISENSORPRO_SCAN(fasta) + msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } + versions = versions.mix(MSISENSORPRO_SCAN.out.versions) + } + } else { + msisensorpro_scan = Channel.empty() + } // For ASCAT, extracted from zip or tar.gz files allele_files = PREPARE_GENOME.out.allele_files @@ -177,7 +189,7 @@ workflow NFCORE_SAREK { rt_file = PREPARE_GENOME.out.rt_file // Tabix indexed vcf files - bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.empty([]) + bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.value([]) dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) @@ -296,6 +308,7 @@ workflow NFCORE_SAREK { loci_files, mappability, msisensorpro_scan, + msisensorpro_baseline, ngscheckmate_bed, pon, pon_tbi, diff --git a/modules/nf-core/msisensorpro/msisomatic/main.nf b/modules/nf-core/msisensorpro/msisomatic/main.nf index 9b0084d949..52a33705a3 100644 --- a/modules/nf-core/msisensorpro/msisomatic/main.nf +++ b/modules/nf-core/msisensorpro/msisomatic/main.nf @@ -26,7 +26,7 @@ process MSISENSORPRO_MSISOMATIC { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def fasta = fasta ? "-g ${fasta}" : "" - def intervals = intervals ? " -e ${intervals} " : "" + def intervals = intervals ? "-e ${intervals} " : "" """ msisensor-pro \\ msi \\ diff --git a/modules/nf-core/msisensorpro/msisomatic/meta.yml b/modules/nf-core/msisensorpro/msisomatic/meta.yml index a6dda66ff2..7f7f4281e3 100644 --- a/modules/nf-core/msisensorpro/msisomatic/meta.yml +++ b/modules/nf-core/msisensorpro/msisomatic/meta.yml @@ -45,7 +45,7 @@ input: pattern: "*.{fasta}" - msisensor_scan: type: file - description: Output from msisensor-pro/scan, conaining list of msi regions + description: Output from msisensor-pro/scan, containing list of msi regions pattern: "*.list" output: - meta: @@ -69,10 +69,6 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - list: - type: file - description: File containing microsatellite list - pattern: "*.{list}" authors: - "@FriederikeHanssen" maintainers: diff --git a/modules/nf-core/msisensorpro/msitumoronly/environment.yml b/modules/nf-core/msisensorpro/msitumoronly/environment.yml new file mode 100644 index 0000000000..47842c75b3 --- /dev/null +++ b/modules/nf-core/msisensorpro/msitumoronly/environment.yml @@ -0,0 +1,7 @@ +name: msisensorpro_msitumoronly +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::msisensor-pro=1.2.0 diff --git a/modules/nf-core/msisensorpro/msitumoronly/main.nf b/modules/nf-core/msisensorpro/msitumoronly/main.nf new file mode 100644 index 0000000000..92dc3b6b56 --- /dev/null +++ b/modules/nf-core/msisensorpro/msitumoronly/main.nf @@ -0,0 +1,46 @@ +process MSISENSORPRO_MSITUMORONLY { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/msisensor-pro:1.2.0--hfc31af2_0' : + 'biocontainers/msisensor-pro:1.2.0--hfc31af2_0' }" + + input: + tuple val(meta), path(tumor), path(tumor_index), path(intervals) + path (fasta) + path (msisensor_baseline) + + output: + tuple val(meta), path("${prefix}") , emit: output_report + tuple val(meta), path("${prefix}_dis") , emit: output_dis + tuple val(meta), path("${prefix}_all") , emit: output_all + tuple val(meta), path("${prefix}_unstable"), emit: output_unstable + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def fasta = fasta ? "-g ${fasta}" : "" + def intervals = intervals ? "-e ${intervals} " : "" + """ + msisensor-pro \\ + pro \\ + -d ${msisensor_baseline} \\ + -t ${tumor} \\ + ${fasta} \\ + -o $prefix \\ + -b ${task.cpus} \\ + ${intervals} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msisensor-pro: \$(msisensor-pro 2>&1 | sed -nE 's/Version:\\sv([0-9]\\.[0-9])/\\1/ p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/msisensorpro/msitumoronly/meta.yml b/modules/nf-core/msisensorpro/msitumoronly/meta.yml new file mode 100644 index 0000000000..d6b81a5913 --- /dev/null +++ b/modules/nf-core/msisensorpro/msitumoronly/meta.yml @@ -0,0 +1,68 @@ +name: msisensorpro_msitumoronly +description: MSIsensor-pro evaluates Microsatellite Instability (MSI) for cancer patients with next generation sequencing data. It accepts the whole genome sequencing, whole exome sequencing and target region (panel) sequencing data as input +keywords: + - micro-satellite-scan + - msisensor-pro + - msi + - somatic + - tumor-only +tools: + - msisensorpro: + description: Microsatellite Instability (MSI) detection using high-throughput sequencing data. + homepage: https://github.com/xjtu-omics/msisensor-pro + documentation: https://github.com/xjtu-omics/msisensor-pro/wiki + tool_dev_url: https://github.com/xjtu-omics/msisensor-pro + doi: "10.1016/j.gpb.2020.02.001" + licence: ["Custom Licence"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - tumor_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: bed file containing interval information, optional + pattern: "*.{bed}" + - fasta: + type: file + description: Reference genome + pattern: "*.{fasta}" + - msisensor_baseline: + type: file + description: File containing custom list of msi regions from a panel of normals to be used as a baseline +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output_report: + type: file + description: File containing final report with all detected microsatellites, unstable somatic microsatellites, msi score + - output_dis: + type: file + description: File containing distribution results + - output_all: + type: file + description: File containing all sites + - output_unstable: + type: file + description: File containing unstable sites + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" + - "@bounlu" +maintainers: + - "@FriederikeHanssen" diff --git a/nextflow.config b/nextflow.config index de95ae8c86..6a3d9552e3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,6 +71,8 @@ params { ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling + msisensorpro_scan = null // by default the reference is build from the fasta file + msisensorpro_baseline = null // by default tumor-only mode is not used in MSIsensorpro only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1611d58f40..bd10acb70f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -733,6 +733,16 @@ "hidden": true, "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, + "msisensorpro_scan": { + "type": "string", + "fa_icon": "fas fa-file-alt", + "description": "Path to MSIsensorpro reference genome microsatellites information file." + }, + "msisensorpro_baseline": { + "type": "string", + "fa_icon": "fas fa-file-alt", + "description": "Path to MSIsensorpro custom baseline file for tumor-only analysis." + }, "ngscheckmate_bed": { "type": "string", "fa_icon": "fas fa-file", diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index cdfabfc3ac..21a7e0ba34 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -44,13 +44,12 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { wes // boolean: [mandatory] [default: false] whether targeted data is processed main: - versions = Channel.empty() + versions = Channel.empty() //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config vcf_freebayes = Channel.empty() vcf_manta = Channel.empty() vcf_strelka = Channel.empty() - out_msisensorpro = Channel.empty() vcf_mutect2 = Channel.empty() vcf_tiddit = Channel.empty() @@ -175,11 +174,10 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { } // MSISENSOR - if (tools.split(',').contains('msisensorpro')) { + if (tools.split(',').contains('msisensorpro') && msisensorpro_scan) { MSISENSORPRO_MSISOMATIC(cram.combine(intervals_bed_combined), fasta.map{ meta, fasta -> [ fasta ] }, msisensorpro_scan) versions = versions.mix(MSISENSORPRO_MSISOMATIC.out.versions) - out_msisensorpro = out_msisensorpro.mix(MSISENSORPRO_MSISOMATIC.out.output_report) } // MUTECT2 @@ -190,7 +188,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { // joint_mutect2 mode needs different meta.map than regular mode cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> joint_mutect2 ? - //we need to keep all fields and then remove on a per-tool-basis to ensure proper joining at the filtering step + // we need to keep all fields and then remove on a per-tool-basis to ensure proper joining at the filtering step [ meta + [ id:meta.patient ], [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] : [ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] }, @@ -232,7 +230,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { ) emit: - out_msisensorpro vcf_all vcf_freebayes vcf_manta diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf index 59b14ed898..97b4e6f54d 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf @@ -11,6 +11,7 @@ include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_cal include { BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC } from '../bam_variant_calling_tumor_only_controlfreec/main' include { BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA } from '../bam_variant_calling_tumor_only_manta/main' include { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 } from '../bam_variant_calling_tumor_only_mutect2/main' +include { MSISENSORPRO_MSITUMORONLY } from '../../../modules/nf-core/msisensorpro/msitumoronly/main' workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { take: @@ -32,21 +33,22 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped mappability + msisensorpro_baseline // channel: [optional] msisensorpro_baseline panel_of_normals // channel: [optional] panel_of_normals panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode wes // boolean: [mandatory] [default: false] whether targeted data is processed main: - versions = Channel.empty() + versions = Channel.empty() //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config - vcf_freebayes = Channel.empty() - vcf_manta = Channel.empty() - vcf_mpileup = Channel.empty() - vcf_mutect2 = Channel.empty() - vcf_strelka = Channel.empty() - vcf_tiddit = Channel.empty() + vcf_freebayes = Channel.empty() + vcf_manta = Channel.empty() + vcf_mpileup = Channel.empty() + vcf_mutect2 = Channel.empty() + vcf_strelka = Channel.empty() + vcf_tiddit = Channel.empty() // MPILEUP if (tools.split(',').contains('mpileup') || tools.split(',').contains('controlfreec')) { @@ -109,6 +111,13 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) } + // MSISENSOR + if (tools.split(',').contains('msisensorpro') && msisensorpro_baseline) { + MSISENSORPRO_MSITUMORONLY(cram.combine(intervals_bed_combined), fasta.map{ meta, fasta -> [ fasta ] }, msisensorpro_baseline) + + versions = versions.mix(MSISENSORPRO_MSITUMORONLY.out.versions) + } + // MUTECT2 if (tools.split(',').contains('mutect2')) { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2( diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 772af47b37..a7e7614310 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -12,7 +12,6 @@ include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf- include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable/main' include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' -include { MSISENSORPRO_SCAN } from '../../../modules/nf-core/msisensorpro/scan/main' include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' include { TABIX_TABIX as TABIX_BCFTOOLS_ANNOTATIONS } from '../../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix/main' @@ -50,7 +49,6 @@ workflow PREPARE_GENOME { DRAGMAP_HASHTABLE(fasta) // If aligner is dragmap GATK4_CREATESEQUENCEDICTIONARY(fasta) - MSISENSORPRO_SCAN(fasta) SAMTOOLS_FAIDX(fasta, [ [ id:'no_fai' ], [] ] ) // the following are flattened and mapped in case the user supplies more than one value for the param @@ -105,7 +103,6 @@ workflow PREPARE_GENOME { versions = versions.mix(BWAMEM2_INDEX.out.versions) versions = versions.mix(DRAGMAP_HASHTABLE.out.versions) versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - versions = versions.mix(MSISENSORPRO_SCAN.out.versions) versions = versions.mix(SAMTOOLS_FAIDX.out.versions) versions = versions.mix(TABIX_BCFTOOLS_ANNOTATIONS.out.versions) versions = versions.mix(TABIX_DBSNP.out.versions) @@ -125,7 +122,6 @@ workflow PREPARE_GENOME { germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi - msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi allele_files // path: allele_files diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf index 245bfaec1a..1ca34be787 100644 --- a/subworkflows/local/samplesheet_to_channel/main.nf +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -153,7 +153,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ } input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples - def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] + def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller'] def requested_tools_requiring_normal_samples = [] tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> if (tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) @@ -164,7 +164,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ } } - // Fails when wrongfull extension for intervals file + // Fails when wrongful extension for intervals file if (wes && !step == 'annotate') { if (intervals && !intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") diff --git a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf index 23415aed48..32f95c5524 100644 --- a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf @@ -109,6 +109,8 @@ workflow PIPELINE_INITIALISATION { params.known_snps, params.known_snps_tbi, params.mappability, + params.msisensorpro_scan, + params.msisensorpro_baseline, params.multiqc_config, params.ngscheckmate_bed, params.pon, diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index 90307f19c2..09c639d4e9 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -127,6 +127,7 @@ workflow SAREK { loci_files mappability msisensorpro_scan + msisensorpro_baseline ngscheckmate_bed pon pon_tbi @@ -792,6 +793,7 @@ workflow SAREK { intervals_bed_combined, intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi mappability, + msisensorpro_baseline, pon, pon_tbi, params.joint_mutect2,