-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge_blocks.py
107 lines (88 loc) · 4.38 KB
/
merge_blocks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import argparse
import numpy as np
import csv
import os
import sys
def par():
parser = argparse.ArgumentParser(prog="merge_blocks.py")
parser.add_argument("-t","--txt",dest="txt",help="TSV file containing blocks generated by allele_blocks.py",default=None)
parser.add_argument("-n","--numsnps",dest="numsnps",help="Maximum distance for merging two blocks (SNPs). Default = 50.",default=50,type=int,choices=range(2,10000),metavar="{2..10000}")
parser.add_argument("-l","--length",dest="length",help="Maximum distance for merging two blocks (bp). Default = 2000000.",default=2000000,type=int,choices=range(0,20000000),metavar="{0..20000000}")
parser.add_argument("-o","--outfolder",dest="outfolder",help="Folder name for output",default='recomboutput')
args = parser.parse_args()
if args.txt is not None:
intxt = args.txt
elif args.txt is None:
dir = os.path.join(args.outfolder,'1.allele_blocks')
for file in os.listdir(dir):
if file.endswith('_blocks.txt'):
intxt = os.path.join(dir,file)
elif intxt is None:
print("Error: no blocks file found.")
sys.exit(1)
outfolder = args.outfolder
outfolder = outfolder+"/2.merge_blocks/"
if not os.path.isdir(outfolder):
os.makedirs(outfolder)
print("\n\tInput: ",intxt)
print("\tOutput: ",outfolder)
print("\n\tMerging blocks within {} bp and {} SNPs".format(args.length,args.numsnps))
recombdata = csv.reader(open(intxt), delimiter='\t')
next(recombdata)
recd2 = list(recombdata)
x=0
for i in recd2:
del i[9]
x+=1
chroms = set([row[0] for row in recd2])
illegal=["Pan_BP_CH","chrUn"]
chr2=[]
for chr in chroms:
if chr in illegal:
continue
else:
chr2.append(chr)
return recd2, intxt, chr2, outfolder, args.length, args.numsnps
def reduce(recd2, intxt, chr2, outfolder, length, numsnps): #need to add check for -1 or 2 allele calls and skip these or adjust in recombplotter. Better to do here so frequency of COs is adjusted
outfile = os.path.join(outfolder,os.path.basename(intxt)[:-4]+"_merged.txt")
count = 0
i=0
print("\n\tInitial number of blocks: ",len(recd2),"\n")
while i < len(recd2)-1:
if recd2[i][0] in chr2: #check chromosome name, if not in list then delete row
if recd2[i][0] == recd2[i+1][0]: #compare row and next row chromosome, if same then procede
if recd2[i][1] == recd2[i+1][1]: #compare row and next row individual, if same then procede
if recd2[i][2] == recd2[i+1][2]: #compare row and next row allele, if same then procede
if int(recd2[i][6]) + int(length) >= int(recd2[i+1][5]):
if int(recd2[i][4]) + int(numsnps) >= int(recd2[i+1][3]):
recd2[i][6] = recd2[i+1][6] #row end pos set to next rows end pos
recd2[i][7] = int(recd2[i][7])+int(recd2[i+1][7]) #row length set to sum of row and next row length
recd2[i][4] = recd2[i+1][4] #row block end set to next row block end
recd2[i][8] = int(recd2[i][8])+int(recd2[i+1][8]) #row number of SNPs set to sum of row and next row Num SNPs
del recd2[i+1] #next row is deleted
count+=1
i-=1
else:
print("\t",recd2[i][0],"done")
else:
del recd2[i]
i-=1
i+=1
print("\n\tNew number of blocks: ",len(recd2))
print("\tRemoved {} blocks".format(count))
head = ["Chr","Sample","Allele","Block start","Block end","Start pos","End pos","Length","Num SNPs"]
with open(outfile,'w+') as out_rc:
writer = csv.writer(out_rc, delimiter="\t")
writer.writerow(head)
for row in recd2:
writer.writerow(row)
out_rc.close()
print("\tSaved as", outfile)
return recd2
def main():
print("merge_blocks.py:")
a,b,c,d,e,f = par()
reduce(a,b,c,d,e,f)
print("\nmerge_blocks.py finished.\n")
if __name__ == '__main__':
main()