plots fixed

linsalrob · Aug 12, 2024 · e19b616 · e19b616
1 parent 006353f
commit e19b616
Show file tree

Hide file tree

Showing 8 changed files with 29 additions and 14 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,8 +6,6 @@ sphae-Bu*
 sphae-Bf*
 
 sphae/workflow/conda/
-sphae.sh
-sphae.out
 .snakemake/
 sphae/.snakemake
 sphae/workflow/databases/

diff --git a/Changes.md b/Changes.md
@@ -2,6 +2,7 @@
 
 ## v1.4.4
 - adding the option to run pharokka with --pyrodigal-gv to test for alternate coding genes
+- sphae plots fix
 
 ## v1.4.3
 - Summary file update

diff --git a/README.md b/README.md
@@ -25,7 +25,6 @@
 The steps that sphae takes are shown here:
 <p align="center">
   <img src="img/sphae_steps.png#gh-light-mode-only" width="300">
-  <img src="img/sphae_stepsdark.png#gh-dark-mode-only" width="300">
 </p>
 
 This snakemake workflow was built using Snaketool [https://doi.org/10.1371/journal.pcbi.1010705], to assemble and annotate phage sequences. Currently, this tool is being developed for phage genomes. The steps include,
@@ -55,7 +54,7 @@ pip install sphae
 conda create -y -n sphae 
 conda activate sphae
 #install sphae
-conda install sphae
+mamba install sphae
 ```
 **Source Install**
 
@@ -124,9 +123,6 @@ sphae run --input tests/data/nanopore-subset --sequencing longread --output exam
 #For newer ONT sequencing data where polishing is not required, run the command
 sphae run --input tests/data/nanopore-subset --sequencing longread --output example -k --no_medaka
 
-#For PacBio sequencing data, run the longread seuqencing with no polishing
-sphae run --input <pacbio sequencing>  --sequencing longread --output example -k --no_medaka
-
 #To run either of the commands on the cluster, add --executor slurm to the command. There is a little bit of setup to do here.
 #Setup a ~/.config/snakemake/slurm/config.yaml file - https://snakemake.github.io/snakemake-plugin-catalog/plugins/executor/slurm.html#advanced-resource-specifications
 #I may have set this workflow to run only slurm right now, will make it more generic soon.
@@ -210,8 +206,7 @@ Genome summary file includes the following information to help,
 
     Note: Currently, Sphae runs Phold in CPU mode, but efforts are underway to support Phold GPU mode for faster processing of this step.
 
-8. Adding new tools to worklow
-9. How to change the number of base pairs to subsample for a sample?
+8. How to change the number of base pairs to subsample for a sample?
     Run the command `sphae config`
     This copies the config file within the workflow to the current directory. Open this file and update the line `bases: 10000000` to for instance `bases: 300000`
     Then run sphae run with the command `sphae run --input tests/data/illumina-subset --output example -k --config <path to the config file with the change>`

diff --git a/sphae.sh b/sphae.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+#SBATCH --job-name=sphae-medaka
+#SBATCH --mail-type=ALL
+#SBATCH --output=%x-%j.out.txt
+#SBATCH --error=%x-%j.err.txt
+#SBATCH --time=1-0
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=64
+#SBATCH --mem=500G
+#SBATCH --partition=high-capacity
+#SBATCH --qos=hc-concurrent-jobs
+
+#sphae install
+#sphae run --input tests/data/illumina-subset --threads 64 -k 
+#sphae run --input tests/data/nanopore-subset --sequencing longread --threads 64 -k
+#sphae run --input tests/data/nanopore-subset --sequencing longread --threads 64 -k --no_medaka 
+#sphae annotate --genome tests/data/genome --threads 64
+
diff --git a/sphae/workflow/envs/phynteny.yaml b/sphae/workflow/envs/phynteny.yaml
@@ -5,6 +5,7 @@ channels:
     - defaults
 dependencies:
     - python==3.10.0
+    - wget >=1.21.4
     - pip 
     - pip:
         - numpy==1.26.4

diff --git a/sphae/workflow/envs/qc.yaml b/sphae/workflow/envs/qc.yaml
@@ -5,5 +5,6 @@ channels:
 dependencies:
     - fastp>=0.23.4
     - filtlong>=0.2.1
+    - pigz>=2.8
     - rasusa>=2.0.0
     - seqkit >=2.6.1
diff --git a/sphae/workflow/rules/789.annot.smk b/sphae/workflow/rules/789.annot.smk
@@ -121,7 +121,7 @@ rule phynteny_plotter:
         fasta=os.path.join(input_dir, PATTERN_LONG)
     params:
         gff3=os.path.join(dir_annot, "{sample}-phynteny", "phynteny.gff3"),
-        prefix="phynteny",
+        prefix="{sample}",
         output=os.path.join(dir_annot, "{sample}-phynteny", "plots")
     output:
         plot=os.path.join(dir_annot, "{sample}-phynteny", "plots", "{sample}.png")
@@ -134,10 +134,10 @@ rule phynteny_plotter:
         """
         if [[ -s {input.gbk} ]] ; then
             genbank_to -g {input.gbk} --gff3 {params.gff3}
-            phold plot -i {input.gbk} -f -p {wildcards.sample} -o {params.output}
-            touch {output.plot}
+            phold plot -i {input.gbk} -f -p {params.prefix} -o {params.output}
+            mv {params.output}/*.png {output.plot}
         else
-            touch {output.plot}
+            mv {params.output}/*.png {output.plot}
         fi
         """
 

diff --git a/sphae/workflow/scripts/summary.py b/sphae/workflow/scripts/summary.py
@@ -39,7 +39,7 @@ def copy_multiple_files(params):
         shutil.copy(gbk, new_gbk_path)
 
     # Find and copy _plot.png files with renaming
-    plot_files = glob.glob(f"{params['annot']}/phynteny-*/{params['sample']}_*_phynteny/*_plot.png")
+    plot_files = glob.glob(f"{params['annot']}/phynteny-*/{params['sample']}_*_phynteny/plots/*.png")
     for i, plt in enumerate(plot_files, start=1):
         samplenames = f"{params['sample']}_{i}"
         new_png_path = os.path.join(outdir, f"{samplenames}.png")