Skip to content

Commit

Permalink
Merge branch 'main' of github.com:linsalrob/sphae
Browse files Browse the repository at this point in the history
  • Loading branch information
linsalrob committed Nov 30, 2023
2 parents 5ce0e93 + f1d50e9 commit ec85f0a
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 9 deletions.
2 changes: 2 additions & 0 deletions sphae/workflow/rules/2.targets.smk
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ if config.args.sequencing == 'paired':
targets.annotate.append(expand(os.path.join(dir.pharokka, "{sample}-pr", "{sample}_top_hits_mash_inphared.tsv"),sample=samples.names))
targets.annotate.append(expand(os.path.join(dir.final, "{sample}-pr", "{sample}_summary.txt"), sample=samples.names))
targets.annotate.append(expand(os.path.join(dir.pharokka, "{sample}-pr", "{sample}_length_gc_cds_density.tsv"), sample=samples.names))
targets.annotate.append(expand(os.path.join(dir.pharokka, "{sample}-pr", "{sample}_cds_functions.tsv"), sample=samples.names))
elif config.args.sequencing == 'longread':
targets.annotate.append(expand(os.path.join(dir.pharokka, "{sample}-sr", "{sample}.gbk"), sample=samples.names))
targets.annotate.append(expand(os.path.join(dir.pharokka, "{sample}-sr", "{sample}_pharokka_plot.png"), sample=samples.names))
Expand All @@ -52,3 +53,4 @@ elif config.args.sequencing == 'longread':
targets.annotate.append(expand(os.path.join(dir.pharokka, "{sample}-sr", "{sample}_top_hits_mash_inphared.tsv"),sample=samples.names))
targets.annotate.append(expand(os.path.join(dir.final, "{sample}-sr", "{sample}_summary.txt"), sample=samples.names))
targets.annotate.append(expand(os.path.join(dir.pharokka, "{sample}-sr", "{sample}_length_gc_cds_density.tsv"), sample=samples.names))
targets.annotate.append(expand(os.path.join(dir.pharokka, "{sample}-sr", "{sample}_cds_functions.tsv"), sample=samples.names))
2 changes: 2 additions & 0 deletions sphae/workflow/rules/3.qc_qa.smk
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,6 @@ rule trimnami:
{params.host} \
{params.profile} \
--log {log}
touch {output}
"""
10 changes: 8 additions & 2 deletions sphae/workflow/rules/7.pharokka.smk
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ rule pharokka_megahit:
vfdb=os.path.join(dir.pharokka, "{sample}-pr", "top_hits_vfdb.tsv"),
spacers=os.path.join(dir.pharokka, "{sample}-pr", "{sample}_minced_spacers.txt"),
taxa=os.path.join(dir.pharokka, "{sample}-pr", "{sample}_top_hits_mash_inphared.tsv"),
cdden=os.path.join(dir.pharokka, "{sample}-pr", "{sample}_length_gc_cds_density.tsv")
cdden=os.path.join(dir.pharokka, "{sample}-pr", "{sample}_length_gc_cds_density.tsv"),
cds=os.path.join(dir.pharokka, "{sample}-pr", "{sample}_cds_functions.tsv")
conda:
os.path.join(dir.env, "pharokka.yaml")
threads:
Expand Down Expand Up @@ -59,6 +60,7 @@ rule pharokka_megahit:
touch {output.spacers}
touch {output.taxa}
touch {output.cdden}
touch {output.cds}
else
touch {output.gbk}
touch {output.plot}
Expand All @@ -67,6 +69,7 @@ rule pharokka_megahit:
touch {output.spacers}
touch {output.taxa}
touch {output.cdden}
touch {output.cds}
fi
"""

Expand Down Expand Up @@ -99,7 +102,8 @@ rule pharokka_flye:
vfdb=os.path.join(dir.pharokka, "{sample}-sr", "top_hits_vfdb.tsv"),
spacers=os.path.join(dir.pharokka, "{sample}-sr", "{sample}_minced_spacers.txt"),
taxa=os.path.join(dir.pharokka, "{sample}-sr", "{sample}_top_hits_mash_inphared.tsv"),
cdden=os.path.join(dir.pharokka, "{sample}-sr", "{sample}_length_gc_cds_density.tsv")
cdden=os.path.join(dir.pharokka, "{sample}-sr", "{sample}_length_gc_cds_density.tsv"),
cds=os.path.join(dir.pharokka, "{sample}-sr", "{sample}_cds_functions.tsv")
conda:
os.path.join(dir.env, "pharokka.yaml")
threads:
Expand Down Expand Up @@ -128,6 +132,7 @@ rule pharokka_flye:
touch {output.spacers}
touch {output.taxa}
touch {output.cdden}
touch {output.cds}
else
touch {output.gbk}
touch {output.plot}
Expand All @@ -136,5 +141,6 @@ rule pharokka_flye:
touch {output.spacers}
touch {output.taxa}
touch {output.cdden}
touch {output.cds}
fi
"""
2 changes: 2 additions & 0 deletions sphae/workflow/rules/8.final-reporting.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ rule summarize_paired:
plot=os.path.join(dir.pharokka, "{sample}-pr", "{sample}_pharokka_plot.png"),
ph_taxa =os.path.join(dir.pharokka, "{sample}-pr", "{sample}_top_hits_mash_inphared.tsv"),
cdden=os.path.join(dir.pharokka, "{sample}-pr", "{sample}_length_gc_cds_density.tsv"),
cds=os.path.join(dir.pharokka, "{sample}-pr", "{sample}_cds_functions.tsv")
output:
summary=os.path.join(dir.final, "{sample}-pr", "{sample}_summary.txt")
params:
Expand All @@ -39,6 +40,7 @@ rule summarize_longread:
spacers=os.path.join(dir.pharokka, "{sample}-sr", "{sample}_minced_spacers.txt"),
ph_taxa =os.path.join(dir.pharokka, "{sample}-sr", "{sample}_top_hits_mash_inphared.tsv"),
cdden=os.path.join(dir.pharokka, "{sample}-sr", "{sample}_length_gc_cds_density.tsv"),
cds=os.path.join(dir.pharokka, "{sample}-sr", "{sample}_cds_functions.tsv")
output:
summary=os.path.join(dir.final, "{sample}-sr", "{sample}_summary.txt")
params:
Expand Down
48 changes: 41 additions & 7 deletions sphae/workflow/scripts/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import pandas as pd

def copy_files(input_files, params):
shutil.copy(input_files['genome'], 'genomes')
shutil.copy(input_files['gbk'], 'gbks')
shutil.copy(input_files['plot'], 'plots')
shutil.copy(input_files['genome'], params['genomes'])
shutil.copy(input_files['gbk'], params['gbks'])
shutil.copy(input_files['plot'], params['plots'])

def generate_summary(input_files, output_summary, params):
with open(output_summary, 'w') as summary:
Expand All @@ -15,9 +15,10 @@ def generate_summary(input_files, output_summary, params):
if line_count > 1:
with open(input_files['table'], 'r') as table_file:
lines = table_file.readlines()
summary.write(f"Length: {lines[1].split(',')[12]}\n")
summary.write(f"Coding density: {lines[1].split(',')[4]}\n")
summary.write(f"Circular: {lines[1].split(',')[13]}\n")
#print (lines[1])
summary.write(f"Length: {lines[1].split(',')[2]}\n")
summary.write(f"Circular: {lines[1].split(',')[3]}\n")
summary.write(f"Graph connections: {lines[1].split(',')[4]}\n")
summary.write(f"Completeness: {lines[1].split(',')[20]}\n")
summary.write(f"Contamination: {lines[1].split(',')[22]}\n")

Expand All @@ -27,6 +28,19 @@ def generate_summary(input_files, output_summary, params):
for index, row in tax.iterrows():
summary.write(f"{row['Description']}\t{row['mash_matching_hashes']}\n")

with open(input_files['cds'], 'r') as cds:
cds_df=pd.read_csv(cds, sep='\t')
cds_data = cds_df[cds_df['Description'] == 'CDS']
count_value = cds_data['Count'].values[0]
summary.write(f"Number of CDS: {count_value}\n")

with open(input_files['cdden'], 'r') as cdden:
cdn=pd.read_csv(cdden, sep='\t')
gc_percent = cdn['gc_perc'].values[0]
coding_density = cdn['cds_coding_density'].values[0]
summary.write(f"GC percent: {gc_percent}%\n")
summary.write(f"Coding density: {coding_density}\n")

if 'integra' in open(input_files['gbk']).read():
summary.write("Integrase found, below is the gene name found\n")
with open(input_files['gbk'], 'r') as gbk_file:
Expand All @@ -35,6 +49,25 @@ def generate_summary(input_files, output_summary, params):
summary.write(line)
else:
summary.write("No integrase\n")

if 'recombinase' in open(input_files['gbk']).read():
summary.write("Recombinase found, below is the gene name found\n")
with open(input_files['gbk'], 'r') as gbk_file:
for line in gbk_file:
if 'recombinase' in line:
summary.write(line)
else:
summary.write("No Recombinase\n")

if 'transposase' in open(input_files['gbk']).read():
summary.write("Transposase found, below is the gene name found\n")
with open(input_files['gbk'], 'r') as gbk_file:
for line in gbk_file:
if 'transposase' in line:
summary.write(line)
else:
summary.write("No Transposases\n")


if len(open(input_files['amr']).readlines()) == 1:
summary.write("No AMR genes\n")
Expand Down Expand Up @@ -88,7 +121,8 @@ def analyze_assembly(input_files, output_summary, params):
'vfdb': snakemake.input.vfdb,
'spacers': snakemake.input.spacers,
'taxa': snakemake.input.ph_taxa,
'cdden': snakemake.input.cdden
'cdden': snakemake.input.cdden,
'cds': snakemake.input.cds
}

output_summary = snakemake.output.summary
Expand Down

0 comments on commit ec85f0a

Please sign in to comment.