-
Notifications
You must be signed in to change notification settings - Fork 1
/
hydrophobic_moment.py
175 lines (165 loc) · 6.85 KB
/
hydrophobic_moment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
Calculates a set of properties from a protein sequence:
- hydrophobicity (according to a particular scale)
- mean hydrophobic dipole moment assuming it is an alpha-helix.
- total charge (at pH 7.4)
- amino acid composition
- discimination factor according to Rob Keller (IJMS, 2011)
Essentially the same as HeliQuest (reproduces the same values).
Author:
Joao Rodrigues
"""
from __future__ import print_function
import argparse
import csv
import math
import os
import time
#
# Definitions
#
scales = {'Fauchere-Pliska': {'A': 0.31, 'R': -1.01, 'N': -0.60,
'D': -0.77, 'C': 1.54, 'Q': -0.22,
'E': -0.64, 'G': 0.00, 'H': 0.13,
'I': 1.80, 'L': 1.70, 'K': -0.99,
'M': 1.23, 'F': 1.79, 'P': 0.72,
'S': -0.04, 'T': 0.26, 'W': 2.25,
'Y': 0.96, 'V': 1.22},
'Eisenberg': {'A': 0.25, 'R': -1.80, 'N': -0.64,
'D': -0.72, 'C': 0.04, 'Q': -0.69,
'E': -0.62, 'G': 0.16, 'H': -0.40,
'I': 0.73, 'L': 0.53, 'K': -1.10,
'M': 0.26, 'F': 0.61, 'P': -0.07,
'S': -0.26, 'T': -0.18, 'W': 0.37,
'Y': 0.02, 'V': 0.54},
}
_supported_scales = list(scales.keys())
aa_charge = {'E': -1, 'D': -1, 'K': 1, 'R': 1}
#
# Functions
#
def assign_hydrophobicity(sequence, scale='Fauchere-Pliska'): # noqa: E302
"""Assigns a hydrophobicity value to each amino acid in the sequence"""
hscale = scales.get(scale, None)
if not hscale:
raise KeyError('{} is not a supported scale. '.format(scale))
hvalues = []
for aa in sequence:
sc_hydrophobicity = hscale.get(aa, None)
if sc_hydrophobicity is None:
raise KeyError('Amino acid not defined in scale: {}'.format(aa))
hvalues.append(sc_hydrophobicity)
return hvalues
def calculate_moment(array, angle=100):
"""Calculates the hydrophobic dipole moment from an array of hydrophobicity
values. Formula defined by Eisenberg, 1982 (Nature). Returns the average
moment (normalized by sequence length)
uH = sqrt(sum(Hi cos(i*d))**2 + sum(Hi sin(i*d))**2),
where i is the amino acid index and d (delta) is an angular value in
degrees (100 for alpha-helix, 180 for beta-sheet).
"""
sum_cos, sum_sin = 0.0, 0.0
for i, hv in enumerate(array):
rad_inc = ((i*angle)*math.pi)/180.0
sum_cos += hv * math.cos(rad_inc)
sum_sin += hv * math.sin(rad_inc)
if len(array) != 0:
return math.sqrt(sum_cos**2 + sum_sin**2) / len(array)
else:
print(array)
return 0
def calculate_charge(sequence, charge_dict=aa_charge):
"""Calculates the charge of the peptide sequence at pH 7.4
"""
sc_charges = [charge_dict.get(aa, 0) for aa in sequence]
return sum(sc_charges)
def calculate_discrimination(mean_uH, total_charge):
"""Returns a discrimination factor according to Rob Keller (IJMS, 2011)
A sequence with d>0.68 can be considered a potential lipid-binding region.
"""
d = 0.944*mean_uH + 0.33*total_charge
return d
def calculate_composition(sequence):
"""Returns a dictionary with percentages per classes"""
# Residue character table
polar_aa = set(('S', 'T', 'N', 'H', 'Q', 'G'))
speci_aa = set(('P', 'C'))
apolar_aa = set(('A', 'L', 'V', 'I', 'M'))
charged_aa = set(('E', 'D', 'K', 'R'))
aromatic_aa = set(('W', 'Y', 'F'))
n_p, n_s, n_a, n_ar, n_c = 0, 0, 0, 0, 0
for aa in sequence:
if aa in polar_aa:
n_p += 1
elif aa in speci_aa:
n_s += 1
elif aa in apolar_aa:
n_a += 1
elif aa in charged_aa:
n_c += 1
elif aa in aromatic_aa:
n_ar += 1
return {'polar': n_p, 'special': n_s,
'apolar': n_a, 'charged': n_c, 'aromatic': n_ar}
def analyze_sequence(name=None, sequence=None, window=18, verbose=False):
"""Runs all the above on a sequence. Pretty prints the results"""
w = window
outdata = [] # for csv writing
# Processing...
seq_len = len(sequence)
print('[+] Analysing sequence {} ({} aa.)'.format(name, seq_len))
print('[+] Using a window of {} aa.'.format(w))
for seq_range in range(0, seq_len):
seq_w = sequence[seq_range:seq_range+w]
if seq_range and len(seq_w) < w:
break
# Numerical values
z = calculate_charge(seq_w)
seq_h = assign_hydrophobicity(seq_w)
av_h = sum(seq_h)/len(seq_h)
av_uH = calculate_moment(seq_h)
d = calculate_discrimination(av_uH, z)
# AA composition
aa_comp = calculate_composition(seq_w)
n_tot_pol = aa_comp['polar'] + aa_comp['charged']
n_tot_apol = aa_comp['apolar'] + aa_comp['aromatic'] + aa_comp['special'] # noqa: E501
n_charged = aa_comp['charged'] # noqa: E501
n_aromatic = aa_comp['aromatic'] # noqa: E501
_t = [name, sequence, seq_range+1, w, seq_w, z, av_h, av_uH, d,
n_tot_pol, n_tot_apol, n_charged, n_aromatic]
outdata.append(_t)
if verbose:
print(' Window {}: {}-{}-{}'.format(seq_range+1, seq_range,
seq_w, seq_range+w))
print(' z={:<3d} <H>={:4.3f} <uH>={:4.3f} D={:4.3f}'.format(z, av_h, # noqa: E501
av_uH, d)) # noqa: E501
print(' Amino acid composition')
print(' Polar : {:3d} / {:3.2f}%'.format(n_tot_pol, n_tot_pol*100/w)) # noqa: E501
print(' Non-Polar: {:3d} / {:3.2f}%'.format(n_tot_apol, n_tot_apol*100/w)) # noqa: E501
print(' Charged : {:3d} / {:3.2f}%'.format(n_charged, n_charged*100/w)) # noqa: E501
print(' Aromatic : {:3d} / {:3.2f}%'.format(n_aromatic, n_aromatic*100/w)) # noqa: E501
print()
return outdata
def read_fasta_file(afile):
"""Parses a file with FASTA formatted sequences"""
if not os.path.isfile(afile):
raise IOError('File not found/readable: {}'.format(afile))
sequences = []
seq_name, cur_seq = None, None
with open(afile) as handle:
for line in handle:
line = line.strip()
if line.startswith('>'):
if cur_seq:
sequences.append((seq_name, ''.join(cur_seq)))
seq_name = line[1:]
cur_seq = []
elif line:
cur_seq.append(line)
sequences.append((seq_name, ''.join(cur_seq))) # last seq
return sequences
def run_hydrophobic_moment_predictions(seq):
seq = seq.upper()
hdr = assign_hydrophobicity(seq,"Eisenberg")
return calculate_moment(hdr)