-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathcv.py
55 lines (47 loc) · 1.46 KB
/
cv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from __future__ import print_function
from collections import defaultdict
from decimal import *
import os
import re
class CV():
'Class for operating on cross-validation values from admixture'
def __init__(self, f):
self.infile = f
self.d = defaultdict(list)
def readText(self):
content = list()
with open(self.infile, 'r') as fh:
content = fh.read().splitlines()
self.parseText(content)
def parseText(self, lines):
counter=0
for line in lines:
temp = line.split().pop(-1)
if temp != "-nan":
val = Decimal(temp) #get the cv value for the line
match = re.search(r'(?P<kval>(K=\d+))', line) #regex to find kval
knum = match.group('kval').split('=').pop(-1) #get the k-value for the line
self.d[knum].append(val) #append to dictionary of lists
else:
print("Warning: -NaN value found for a CV value.")
def readMinor(self):
path = os.getcwd()
files = os.listdir(path)
for f in files:
if f.startswith("cv_file.MinClust.K"):
temp = f.split(".")
knum = temp[2] + ".MinClust." + temp[3]
knum = knum.replace("K","")
content = list()
with open(f, 'r') as fh:
content = fh.read().splitlines()
for line in content:
temp = line.split().pop(-1)
if temp != "-nan":
val = Decimal(temp)
#match = re.search(r'(?P<kval>(K=\d+))', line) #regex to find kval
self.d[knum].append(val)
else:
print("Warning: -NaN value found for a CV value.")
def printText(self):
print(self.d)