From 1fb94a3ca2169d343e071d558b70dbfcb47bd192 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 13 May 2024 16:01:39 -0400 Subject: [PATCH] working on a better coverage calc --- src/harpy/scripts/depthWindows.py | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100755 src/harpy/scripts/depthWindows.py diff --git a/src/harpy/scripts/depthWindows.py b/src/harpy/scripts/depthWindows.py new file mode 100755 index 000000000..0152c8166 --- /dev/null +++ b/src/harpy/scripts/depthWindows.py @@ -0,0 +1,34 @@ +#! /usr/bin/env python + +import sys +import argparse + +parser = argparse.ArgumentParser(prog = 'depthWindows.py', description = 'Reads the output of samtools depth -a from stdin and calculates a windowed mean.') +parser.add_argument('windowsize', type= int, help = "The window size to use to calcualte mean depth over (non-overlapping)") + +args = parser.parse_args() +_sum = 0 +start = 1 +end = start + args.windowsize - 1 +lastcontig = None +for line in sys.stdin: + # Remove the newline character at the end of the line + line = line.rstrip().split() + contig = line[0] + # the contig has changed, make the end position the last position, print output + if lastcontig and contig != lastcontig: + winsize = (position + 1) - start + print(f"{lastcontig}\t{position}\t{_sum / winsize}", file = sys.stdout) + # reset the window start/end and sum + start = 1 + end = start + args.windowsize - 1 + _sum = 0 + + position = int(line[1]) + _sum += int(line[2]) + + if position == end: + print(f"{contig}\t{end}\t{_sum / args.windowsize}", file = sys.stdout) + start = end + 1 + end = start + args.windowsize - 1 + lastcontig = contig \ No newline at end of file