-
Notifications
You must be signed in to change notification settings - Fork 1
/
linear-reg.py
87 lines (66 loc) · 2.27 KB
/
linear-reg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import json
import csv
import numpy as np
import os
import pandas as pd
import random
import tensorflow as tf
from functions import quan_detector, most_repeared_promoter,dataset
from sklearn.metrics import confusion_matrix
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
np.random.seed(42)
tf.set_random_seed(42)
random.seed(42)
labels_file = 'labes.csv'
labels_df = pd.read_csv(labels_file, index_col=0)
ids_csv = labels_df.FID.tolist()
promoters_list = range(1,2484)
dataset_X = []
for promoter_num in promoters_list:
promoter_file = 'promoters/chr22_'+str(promoter_num)+'.json'
# # read files
with open(promoter_file) as json_data:
ind_var = json.load(json_data)
ids_json = ind_var.keys()
var_num = []
for i in ids_csv:
id_name = str(i)
temp = ind_var[id_name]
var_seq = map(int, temp)
var_num.append(var_seq)
labels_df['vars'] = var_num
lab_num = {1: [1, 0], # positive
2: [0, 1]} # negative
pheno_new = []
for i in labels_df.Pheno.tolist():
pheno_new.append(lab_num[i])
d = {"Pheno": pheno_new, "Vars":labels_df.vars}
dataset_ = pd.DataFrame(d)
dataset_X .append(dataset_.Vars.tolist())
dataset_Y = np.array(dataset_.Pheno.tolist())
dataset_X = np.array(dataset_X).reshape(11908,64*2580,1)
N = len(dataset_X)
# network accuracy
x_train, y_train,x_test,y_test = dataset(dataset_X,dataset_Y,test_ratio=0.1)
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(x_train, y_train)
# Make predictions using the testing set
y_pred = regr.predict(x_test)
y_pred = np.argmax(y_pred,axis=1)
y_test_num = np.argmax(y_test,axis=1)
tn, fp, fn, tp = confusion_matrix(y_test_num, y_pred).ravel()
acc = (tp+tn)*1./(tp+fp+tn+fn)
print acc
dataset_Y = np.argmax(dataset_Y,axis=-1)
x_train, y_train,x_test,y_test = dataset(dataset_X,dataset_Y,test_ratio=0.1)
logisticRegr = linear_model.LogisticRegression()
regr.fit(x_train, y_train)
# Make predictions using the testing set
y_pred = regr.predict(x_test)
y_test_num = np.argmax(y_test,axis=1)
tn, fp, fn, tp = confusion_matrix(y_test_num, y_pred).ravel()
acc = (tp+tn)*1./(tp+fp+tn+fn)
print "LogisticRegression acc is:",acc