Skip to content

Commit

Permalink
Add a check for presence of data and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
verku committed Nov 14, 2024
1 parent 2f33c8f commit 683d5d3
Showing 1 changed file with 21 additions and 5 deletions.
26 changes: 21 additions & 5 deletions workflow/scripts/filter_mpile.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,34 @@
#!/usr/bin/python3

# Author: Tom van der Valk
# Author: Tom van der Valk (modified by Diana Robledo-Ruiz and Verena Kutschera)

# Filters a mpileup file for further processing in the GERP pipeline, produced with
# `samtools mpileup -aa -r contigname --no-output-ends sample.bam`

import sys
import operator

for line in sys.stdin:
splitted = line.strip().split("\t")

# Check if at least one sample is present in the mpileup
if len(splitted) < 5:
print("Error: Invalid input line:", line)
sys.stderr.write("Error: Invalid input line:" + line + '\n')
break # Stop the loop if invalid line

# Process column 5 containing the mapped read bases
nucleotide = splitted[4].upper()
A,C,T,G = nucleotide.count("A"), nucleotide.count("C"), nucleotide.count("T"), nucleotide.count("G")

# Print the nucleotide if it is A, C, T, or G, and if only one read mapped at the position
if A+C+T+G == 1 and len(nucleotide) == 1:
print (nucleotide)
print(nucleotide)

# Remove any positions other than one of the four bases, e.g. deletions indicated by "*"
elif A+C+T+G == 0:
print ("N")
print("N")

# Remove any other read bases, including those followed by indels (indicated by "+" and an integer and sequence, or "-" and an integer)
else:
print ("N")
print("N")

0 comments on commit 683d5d3

Please sign in to comment.