diff --git a/LICENSE b/LICENSE index a0f0ead..ca8d2b2 100644 --- a/LICENSE +++ b/LICENSE @@ -23,4 +23,4 @@ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -SUCH DAMAGE. \ No newline at end of file +SUCH DAMAGE. diff --git a/asm_annot.nf b/asm_annot.nf index 2a8d0ba..2118828 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -25,12 +25,6 @@ log.info "Results can be found in : ${params.out_dir}" log.info "=================================================" log.info "" -// Needed to run on the Abel cluster -preCmd = """ -if [ -f /cluster/bin/jobsetup ]; -then set +u; source /cluster/bin/jobsetup; set -u; fi -""" - // First, define the input data that go into input channels Channel .fromFilePairs( params.reads, size:params.setsize ) @@ -50,7 +44,6 @@ process run_fastqc { file "$pair_id" into fastqc_results """ - ${preCmd} mkdir ${pair_id} fastqc -q ${reads} -o ${pair_id} -t $task.cpus """ @@ -68,7 +61,6 @@ process run_multiqc { file "multiqc_report.html" into multiqc_report """ - ${preCmd} multiqc fastqc_output """ } @@ -88,7 +80,6 @@ process collate_data { set pair_id, file("${pair_id}*_concat.fq.gz") into (reads, pilon_reads) """ - ${preCmd} cat ${pair_id}*R1* > ${pair_id}_R1_concat.fq.gz cat ${pair_id}*R2* > ${pair_id}_R2_concat.fq.gz """ @@ -111,7 +102,6 @@ process run_strip { file "${pair_id}_bbduk_output.log" """ - ${preCmd} bbduk.sh threads=$task.cpus ref=${params.stripgenome} \ in1=${pair_id}_R1_concat.fq.gz \ in2=${pair_id}_R2_concat.fq.gz \ @@ -138,7 +128,6 @@ process run_trim { file "${pair_id}_concat_stripped_trimmed.log" """ - ${preCmd} trimmomatic PE -threads $task.cpus -trimlog ${pair_id}_concat_stripped_trimmed.log ${pair_id}*_concat_stripped.fq.gz \ -baseout ${pair_id}_trimmed.fq.gz ILLUMINACLIP:${params.adapter_dir}/${params.adapters}:${params.illuminaClipOptions} \ SLIDINGWINDOW:${params.slidingwindow} \ @@ -169,7 +158,6 @@ process run_spadesasm { file "${pair_id}_spades.log" """ - ${preCmd} spades.py ${params.careful} --cov-cutoff=${params.cov_cutoff} \ -1 ${pair_id}_R1_concat_stripped_trimmed.fq.gz \ -2 ${pair_id}_R2_concat_stripped_trimmed.fq.gz \ @@ -202,7 +190,6 @@ process run_bwamem { file("${pair_id}_mapped_sorted.bam.bai") into bwamem_results """ - ${preCmd} bwa index ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta bwa mem -t $task.cpus ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \ *.fq.gz | samtools sort -o ${pair_id}_mapped_sorted.bam - @@ -230,7 +217,6 @@ process run_pilon { file "${pair_id}_pilon_spades.fasta" into asms_for_quast """ - ${preCmd} export _JAVA_OPTIONS=$task.javaopts pilon --threads $task.cpus --genome ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \ --bam ${pair_id}_mapped_sorted.bam --output ${pair_id}_pilon_spades \ @@ -252,7 +238,6 @@ process run_prokka { set pair_id, file("${pair_id}.*") into annotation_results """ - ${preCmd} prokka --compliant --force --usegenus --cpus $task.cpus \ --centre ${params.centre} --prefix ${pair_id} --locustag ${params.locustag} \ --genus ${params.genus} --species ${params.species} \ @@ -278,9 +263,7 @@ process quast_eval { file quast_evaluation_all into quast_evaluation_all """ - ${preCmd} quast --threads $task.cpus -o quast_evaluation_all \ - -G ${params.quast_genes} -R ${params.quast_ref} \ - --scaffolds ${asm_list} + -g ${params.quast_genes} -R ${params.quast_ref} ${asm_list} """ } diff --git a/bin/filetypes.py b/bin/filetypes.py index c35764a..2bfff37 100644 --- a/bin/filetypes.py +++ b/bin/filetypes.py @@ -26,5 +26,3 @@ def lane(filename): if field.startswith("L00"): lane = field return lane - - diff --git a/conf/asm_annot_template.config b/conf/asm_annot_template.config index 4a497bc..89bcc47 100644 --- a/conf/asm_annot_template.config +++ b/conf/asm_annot_template.config @@ -25,8 +25,8 @@ params.help = false params.savemode = "copy" // BBDuk params, has to be absolute paths -params.stripgenome = "/work/projects/nn9305k/genome_references/genomes/PhiX/PhiX.fasta" -params.stripdir = "/work/projects/nn9305k/genome_references/bbmap_refs" +params.stripgenome = "/cluster/projects/nn9305k/genome_references/genomes/PhiX/PhiX.fasta" +params.stripdir = "/cluster/projects/nn9305k/genome_references/bbmap_refs" // Trimmomatic configuration variables @@ -37,7 +37,7 @@ params.leading = 3 params.trailing = 3 params.minlen = 36 params.adapters = "TruSeq3-PE.fa" -params.adapter_dir = "/home/karinlag/bin/Trimmomatic-0.36/adapters" +params.adapter_dir = "/cluster/projects/nn9305k/db_flatfiles/trimmomatic_adapters" // SPAdes configuration variables @@ -58,6 +58,6 @@ params.centre = "NVI" // QUAST variables -params.genome_directory = "/work/projects/nn9305k/genome_references/genomes/" +params.genome_directory = "/cluster/projects/nn9305k/genome_references/genomes/" params.quast_ref = "${params.genome_directory}ecoli/GCF_000005845.2_ASM584v2_genomic.fna" params.quast_genes = "${params.genome_directory}ecoli/GCF_000005845.2_ASM584v2_genomic.gff" diff --git a/conf/condaslurm.config b/conf/condaslurm.config index fdedc3c..dce1f40 100644 --- a/conf/condaslurm.config +++ b/conf/condaslurm.config @@ -5,5 +5,5 @@ */ process { - conda = '/work/projects/nn9305k/src/anaconda3/envs/bifrost' + conda = '/cluster/projects/nn9305k/src/miniconda/envs/bifrost' } diff --git a/conf/condastandard.config b/conf/condastandard.config index 44a97da..e2d8e5a 100644 --- a/conf/condastandard.config +++ b/conf/condastandard.config @@ -6,5 +6,5 @@ */ process { - conda = '/home/karinlag/anaconda3/envs/bifrost' + conda = '/home/karinlag/src/anaconda3/envs/bifrost' } diff --git a/conf/qc_track_template.config b/conf/qc_track_template.config index 2a5c547..d0ec741 100644 --- a/conf/qc_track_template.config +++ b/conf/qc_track_template.config @@ -21,7 +21,7 @@ params.out_dir = "track_one" // General configuration variables params.pwd = "$PWD" params.help = false -params.savemode = "link" +params.savemode = "copy" // Raw fastqc results params.fastqc = "fastqc" diff --git a/conf/slurm.config b/conf/slurm.config index 0a3d20e..649ac51 100644 --- a/conf/slurm.config +++ b/conf/slurm.config @@ -19,12 +19,12 @@ process { executor = 'slurm' - clusterOptions = '--job-name=nxf_test --account=nn9305k --mem-per-cpu=3140' + clusterOptions = '--job-name=nxf_test --account=nn9305k --mem-per-cpu=4700M' queueSize = 24 maxRetries = 3 errorStrategy='retry' - cpus = 16 + cpus = 20 withLabel: one {cpus = 1} //this is mostly for pilon, but can be used elsewhere too diff --git a/conf/third_party_software.md b/conf/third_party_software.md index 0d75c0e..46793dd 100644 --- a/conf/third_party_software.md +++ b/conf/third_party_software.md @@ -1,11 +1,11 @@ # Third party software -The Bifrost pipeline depends on several third party packages. +The Bifrost pipeline depends on several third party packages. These have to be made available to the pipeline in some way. The way that these are made available to the pipeline depends on which system the pipeline is being run on. -Please note: not all of the software is used for all tracks. +Please note: not all of the software is used for all tracks. The track(s) that each software is used in is noted below. @@ -27,7 +27,7 @@ There are currently three tracks: * Track Three: Trimming with trimmomatic followed by assembly with SPAdes. Trimming results are evaluated with MultiQC, and assemblies with QUAST - + ## Profiles We currently have two profiles set up, standard and slurm. @@ -36,16 +36,11 @@ We currently have two profiles set up, standard and slurm. This profile is used when running on a normal stand-alone computer. This assumes that all software is available on the command line, unless otherwise noted with a full path in -the standard.config file. +the standard.config file. ### slurm.config This profile is used when running on a system that uses the slurm queue management system. At present, this also depends heavily on the module system. Any software not in the module -system needs to either be available on the command line, or -should be specified using the full path. - - - - - +system needs to either be available on the command line, or +should be specified using the full path. diff --git a/qc_track.nf b/qc_track.nf index 1a09a16..24143c6 100644 --- a/qc_track.nf +++ b/qc_track.nf @@ -22,12 +22,6 @@ log.info "Results can be found in : ${params.out_dir}" log.info "=================================================" log.info "" -// Needed to run on the Abel cluster -preCmd = """ -if [ -f /cluster/bin/jobsetup ]; -then set +u; source /cluster/bin/jobsetup; set -u; fi -""" - // First, define the input data that go into input channels Channel .fromFilePairs( params.reads, size:params.setsize ) @@ -48,7 +42,6 @@ process run_fastqc { file "$pair_id" into fastqc_results """ - ${preCmd} mkdir ${pair_id} fastqc -q ${reads} -o ${pair_id} -t $task.cpus """ @@ -66,7 +59,6 @@ process run_multiqc { file "multiqc_report.html" into multiqc_report """ - ${preCmd} multiqc fastqc_output """ } diff --git a/specific_genes.nf b/specific_genes.nf index b3c84ff..b29b835 100644 --- a/specific_genes.nf +++ b/specific_genes.nf @@ -24,11 +24,6 @@ log.info "Results can be found in : ${params.out_dir}" log.info "=================================================" log.info "" -preCmd = """ -if [ -f /cluster/bin/jobsetup ]; -then set +u; source /cluster/bin/jobsetup; set -u; fi -""" - // First, define the input data that go into input channels Channel .fromFilePairs( params.reads, size:params.setsize ) @@ -55,7 +50,6 @@ process collate_data { (read_pairs_mlst, read_pairs_amr, read_pairs_vir) """ - ${preCmd} cat ${pair_id}*R1* > ${pair_id}_R1_concat.fq.gz cat ${pair_id}*R2* > ${pair_id}_R2_concat.fq.gz """ @@ -77,7 +71,6 @@ process run_ariba_mlst_prep { params.do_mlst == "yes" """ - ${preCmd} ariba pubmlstget "${params.mlst_scheme}" mlst_db """ } @@ -100,7 +93,6 @@ process run_ariba_mlst_pred { params.do_mlst == "yes" """ - ${preCmd} ariba run --threads $task.cpus mlst_db/ref_db ${pair_id}_R*_concat.fq.gz ${pair_id}_ariba &> ariba.out echo -e "header\t" \$(head -1 ${pair_id}_ariba/mlst_report.tsv) > ${pair_id}_mlst_report.tsv echo -e "${pair_id}\t" \$(tail -1 ${pair_id}_ariba/mlst_report.tsv) >> ${pair_id}_mlst_report.tsv @@ -123,7 +115,6 @@ process run_ariba_mlst_summarize { params.do_mlst == "yes" """ - ${preCmd} cat ${pair_id_mlst_tsv} >> mlst_summarized_results_tmp.tsv head -1 mlst_summarized_results_tmp.tsv > mlst_summarized_results.tsv cat mlst_summarized_results_tmp.tsv | grep -v "ST" >> mlst_summarized_results.tsv @@ -145,7 +136,6 @@ process run_ariba_amr_prep { params.do_amr == "yes" """ - ${preCmd} ariba getref ${params.amr_db} amr_db ariba prepareref -f amr_db.fa -m amr_db.tsv db_amr_prepareref """ @@ -168,7 +158,6 @@ process run_ariba_amr_pred { """ - ${preCmd} ariba run --threads $task.cpus db_amr_prepareref ${pair_id}_R*_concat.fq.gz ${pair_id}_ariba &> ariba.out cp ${pair_id}_ariba/report.tsv ${pair_id}_amr_report.tsv @@ -191,7 +180,6 @@ process run_ariba_amr_summarize { params.do_amr == "yes" """ - ${preCmd} ariba summary amr_summarized ${pair_id_amr_tsv} """ } @@ -209,7 +197,6 @@ process run_ariba_vir_prep { params.do_vir == "yes" """ - ${preCmd} ariba getref ${params.vir_db} vir_db ariba prepareref -f vir_db.fa -m vir_db.tsv db_vir_prepareref """ @@ -231,7 +218,6 @@ process run_ariba_vir_pred { params.do_vir == "yes" """ - ${preCmd} ariba run --threads $task.cpus db_vir_prepareref ${pair_id}_R*_concat.fq.gz \ ${pair_id}_ariba &> ariba.out cp ${pair_id}_ariba/report.tsv ${pair_id}_vir_report.tsv @@ -255,7 +241,6 @@ process run_ariba_vir_summarize { params.do_vir == "yes" """ - ${preCmd} ariba summary vir_summarized ${pair_id_vir_tsv} """ }