-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreference.fa.filter.py
executable file
·49 lines (40 loc) · 1.28 KB
/
reference.fa.filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/python
########################################################
########################################################
########################################################
#Stephen A. Sefick
#July 15, 2016
#Use this script to remove sequences from a
#fasta file that match a supplied pattern
#example usage:
#./reference.fa.filter.py ">Unknown" dp4.fa.masked out
#">Unknown" is the pattern
#dp4.fa.masked is the reference genome
#out is the outfile
########################################################
########################################################
########################################################
#import regular expression and system modules
import re
import sys
#Setup Regular Expression
remove_pattern=sys.argv[1]
pattern = re.compile(remove_pattern)
#set up input
input=sys.argv[2]
output=sys.argv[3]
#read input
with open(input,"r") as infile, open(output, 'w') as outfile:
#set place holder variable to 0 if matches
#what we want to remove
for line in infile:
#if the pattern is matched move to the next line
if pattern.match(line):
chr=0
continue
else:
chr=1
#write the file out
if chr==1:
#print(line)
outfile.write(line)