diff --git a/phables/workflow/rules/postprocess.smk b/phables/workflow/rules/postprocess.smk index 5b46457..feee9db 100644 --- a/phables/workflow/rules/postprocess.smk +++ b/phables/workflow/rules/postprocess.smk @@ -43,12 +43,15 @@ rule koverage_postprocess: """Format TSV of samples and reads from Koverage""" input: koverage_tsv = os.path.join(OUTDIR, "results", "sample_coverage.tsv"), - samples_file = os.path.join(OUTDIR, "phables.samples.tsv") + samples_file = os.path.join(OUTDIR, "phables.samples.tsv"), + seq_file = os.path.join(OUTDIR, "genomes_and_unresolved_edges.fasta") output: os.path.join(OUTDIR, "sample_genome_read_counts.tsv") params: koverage_tsv = os.path.join(OUTDIR, "results", "sample_coverage.tsv"), samples_file = os.path.join(OUTDIR, "phables.samples.tsv"), + seq_file = os.path.join(OUTDIR, "genomes_and_unresolved_edges.fasta"), + info_file = os.path.join(OUTDIR, "genomes_and_unresolved_edges_info.tsv"), output_path = OUTDIR, log = os.path.join(LOGSDIR, "format_koverage_results_output.log") log: diff --git a/phables/workflow/scripts/format_koverage_results.py b/phables/workflow/scripts/format_koverage_results.py index cbfd8cf..d3944ca 100644 --- a/phables/workflow/scripts/format_koverage_results.py +++ b/phables/workflow/scripts/format_koverage_results.py @@ -7,6 +7,7 @@ import logging import os import subprocess +from Bio import SeqIO from collections import defaultdict import pandas as pd @@ -25,6 +26,8 @@ def main(): samples_file = snakemake.params.samples_file koverage_tsv = snakemake.params.koverage_tsv + seq_file = snakemake.params.seq_file + info_file = snakemake.params.info_file output_path = snakemake.params.output_path log = snakemake.params.log @@ -132,6 +135,18 @@ def main(): f"Estimated mean read depth of resolved genomes can be found in {output_path}sample_genome_mean_coverage.tsv" ) + + # Make sequence information file + with open(info_file, "w") as myfile: + myfile.write(f"contig_phables_name\tlength\tcontig_or_phables\n") + for index, record in enumerate(SeqIO.parse(seq_file, "fasta")): + if "phage_comp" in record.id: + myfile.write(f"{record.id}\t{len(record.seq)}\tphables\n") + else: + myfile.write(f"{record.id}\t{len(record.seq)}\tcontig\n") + + logger.info(f"Sequence information file can be found in {info_file}") + # Exit program # --------------