-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathspectralE.py
36 lines (28 loc) · 964 Bytes
/
spectralE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pandas as pd
import numpy as np
import sys
import os
filename=sys.argv[1]
outputdir=sys.argv[2]
print(filename)
print(outputdir)
def spectral_entropy(x):
x = np.array(x)
noise = np.absolute(np.random.normal(1e-05, 1e-05, size=len(x)))
x = x + noise
x = x / np.sum(x)
return(-np.sum(np.log(x) * x))
df = pd.read_csv(filename, sep = "\t",header = None,
na_values='.').fillna(0)
df.columns = df.columns.astype(str)
### #bp overlap
num_overlap = np.array(df.iloc[:,-1:].astype(float))
values = df.iloc[:,6:len(df.columns)- 1].astype(float) * num_overlap
coordinates = df.iloc[:, 0:3]
df_c = pd.concat([coordinates.reset_index(drop=True), values], axis=1)
i = df_c.groupby(['0', '1', '2']).sum()
entropy = i.apply(spectral_entropy, axis = 0)
pd.DataFrame({
'sample_index' : values.columns,
'entropy' : entropy}).to_csv(outputdir+'/' + filename.split('/')[-1] + '.txt',
index = False)