From 9657e533663d8c24faaff63d60b2425cca202117 Mon Sep 17 00:00:00 2001 From: Vijini Mallawaarachchi Date: Wed, 9 Aug 2023 22:20:02 +0930 Subject: [PATCH] DEV: update postprocessing steps --- phables/workflow/rules/postprocess.smk | 5 ++++- .../workflow/scripts/format_koverage_results.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/phables/workflow/rules/postprocess.smk b/phables/workflow/rules/postprocess.smk index 5b46457..feee9db 100644 --- a/phables/workflow/rules/postprocess.smk +++ b/phables/workflow/rules/postprocess.smk @@ -43,12 +43,15 @@ rule koverage_postprocess: """Format TSV of samples and reads from Koverage""" input: koverage_tsv = os.path.join(OUTDIR, "results", "sample_coverage.tsv"), - samples_file = os.path.join(OUTDIR, "phables.samples.tsv") + samples_file = os.path.join(OUTDIR, "phables.samples.tsv"), + seq_file = os.path.join(OUTDIR, "genomes_and_unresolved_edges.fasta") output: os.path.join(OUTDIR, "sample_genome_read_counts.tsv") params: koverage_tsv = os.path.join(OUTDIR, "results", "sample_coverage.tsv"), samples_file = os.path.join(OUTDIR, "phables.samples.tsv"), + seq_file = os.path.join(OUTDIR, "genomes_and_unresolved_edges.fasta"), + info_file = os.path.join(OUTDIR, "genomes_and_unresolved_edges_info.tsv"), output_path = OUTDIR, log = os.path.join(LOGSDIR, "format_koverage_results_output.log") log: diff --git a/phables/workflow/scripts/format_koverage_results.py b/phables/workflow/scripts/format_koverage_results.py index cbfd8cf..d3944ca 100644 --- a/phables/workflow/scripts/format_koverage_results.py +++ b/phables/workflow/scripts/format_koverage_results.py @@ -7,6 +7,7 @@ import logging import os import subprocess +from Bio import SeqIO from collections import defaultdict import pandas as pd @@ -25,6 +26,8 @@ def main(): samples_file = snakemake.params.samples_file koverage_tsv = snakemake.params.koverage_tsv + seq_file = snakemake.params.seq_file + info_file = snakemake.params.info_file output_path = snakemake.params.output_path log = snakemake.params.log @@ -132,6 +135,18 @@ def main(): f"Estimated mean read depth of resolved genomes can be found in {output_path}sample_genome_mean_coverage.tsv" ) + + # Make sequence information file + with open(info_file, "w") as myfile: + myfile.write(f"contig_phables_name\tlength\tcontig_or_phables\n") + for index, record in enumerate(SeqIO.parse(seq_file, "fasta")): + if "phage_comp" in record.id: + myfile.write(f"{record.id}\t{len(record.seq)}\tphables\n") + else: + myfile.write(f"{record.id}\t{len(record.seq)}\tcontig\n") + + logger.info(f"Sequence information file can be found in {info_file}") + # Exit program # --------------