forked from stevemussmann/admixturePipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadmixture.py
100 lines (84 loc) · 2.67 KB
/
admixture.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from __future__ import print_function
from syscall import SysCall
from DefaultListOrderedDict import DefaultListOrderedDict
import argparse
import csv
import json
import os
import os.path
import subprocess
import sys
import numpy as np
import zipfile
class Admixture():
'Class for executing Admixture commands'
def __init__(self, prefix, NP, minK, maxK, rep, cv):
self.prefix = prefix
self.NP = NP
self.minK = minK
self.maxK = maxK
self.rep = rep
self.cv = cv
self.qfiles = DefaultListOrderedDict()
def admix(self):
ks = range(self.minK, self.maxK+1)
#print(ks)
#for each k value
for i in ks:
for j in range(self.rep):
command_string = "admixture -j" + str(self.NP) + " -s " + str(np.random.randint(1000000)) + " --cv=" + str(self.cv) + " " + self.prefix + ".ped " + str(i)
#call Admixture
admixtureCall = SysCall(command_string)
admixtureCall.run_admixture(self.prefix,i,j)
#Manually re-name output files to include _j rep number
for filename in os.listdir("."):
fn = self.prefix + "." + str(i) + "."
if fn in filename:
oldname, extension = os.path.splitext(filename)
newname = oldname + "_" + str(j) + extension
if(extension.endswith("Q")):
self.qfiles[str(i)].append(newname)
os.rename(filename, newname)
# write dict of .Q files
jsonFile=self.prefix + ".qfiles.json"
with open(jsonFile, 'w') as json_file:
json.dump(self.qfiles, json_file)
def zipdir(self,path,ziph):
files = [f for f in os.listdir('.') if os.path.isfile(f)]
for root,dirs,files in os.walk(path, topdown=True):
[dirs.remove(d) for d in list(dirs)]
for f in files:
if f.endswith('.Q'):
ziph.write(os.path.join(root,f))
def create_zip(self):
zipf = zipfile.ZipFile('results.zip', 'w', zipfile.ZIP_DEFLATED)
self.zipdir('./', zipf)
zipf.close()
def print_cv(self):
print("Printing CV values...")
command="grep -h CV " + self.prefix + "*.stdout > " + self.prefix + "_cv_summary.txt"
grepCall = SysCall(command)
grepCall.run_program()
def loglik(self):
fh = open("loglik.txt", 'wb')
for fn in os.listdir("."):
if fn.endswith("stdout"):
temp = open(fn, 'r')
fnlist = fn.split("_")
fnlist2 = fnlist[-2].split(".")
kval = fnlist2[-1]
print(fnlist2)
for line in temp.readlines():
if line.startswith("Loglikelihood:"):
mylist = line.split()
#print(mylist)
fh.write(kval.encode())
fh.write("\t".encode())
fh.write(mylist[-1].encode())
fh.write("\n".encode())
temp.close()
fh.close()
print("Sorting log(likelihood) values...")
command="sort -n -k1 -o loglik.txt loglik.txt"
sortCall = SysCall(command)
sortCall.run_program()