-
Notifications
You must be signed in to change notification settings - Fork 21
/
QueryChEMBL.py
202 lines (178 loc) · 8.72 KB
/
QueryChEMBL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
''' Queries the ChEMBL database to find target proteins for drugs.
'''
__author__ = 'Stephen Ramsey'
__copyright__ = 'Oregon State University'
__credits__ = ['Stephen Ramsey']
__license__ = 'MIT'
__version__ = '0.1.0'
__maintainer__ = ''
__email__ = ''
__status__ = 'Prototype'
import urllib
# import requests
# import requests_cache
import sys
from cache_control_helper import CacheControlHelper
from QueryUniprot import QueryUniprot
class QueryChEMBL:
API_BASE_URL = 'https://www.ebi.ac.uk/chembl/api/data'
TIMEOUT_SEC = 120
@staticmethod
def send_query_get(handler, url_suffix):
requests = CacheControlHelper()
url = QueryChEMBL.API_BASE_URL + '/' + handler + '?' + url_suffix
# print(url)
try:
res = requests.get(url,
timeout=QueryChEMBL.TIMEOUT_SEC)
except requests.exceptions.Timeout:
print(url, file=sys.stderr)
print('Timeout in QueryChEMBL for URL: ' + url, file=sys.stderr)
return None
except KeyboardInterrupt:
sys.exit(0)
except BaseException as e:
print(url, file=sys.stderr)
print('%s received in QueryChEMBL for URL: %s' % (e, url), file=sys.stderr)
return None
status_code = res.status_code
if status_code != 200:
print(url, file=sys.stderr)
print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr)
return None
return res.json()
@staticmethod
def get_chembl_ids_for_drug(drug_name):
if not isinstance(drug_name, str):
return set()
drug_name_safe = urllib.parse.quote(drug_name, safe='')
res = QueryChEMBL.send_query_get(handler='compound_record.json',
url_suffix='compound_name__iexact=' + drug_name_safe)
res_chembl_set = set()
if res is not None:
compound_records = res.get('compound_records', None)
if compound_records is not None:
for compound_record in compound_records:
chembl_id = compound_record.get('molecule_chembl_id', None)
if chembl_id is not None:
res_chembl_set.add(chembl_id)
return res_chembl_set
@staticmethod
def get_target_uniprot_ids_for_chembl_id(chembl_id):
print(chembl_id, file=sys.stderr)
if not isinstance(chembl_id, str):
return dict()
res_targets_dict = dict()
target_mechanisms_json = QueryChEMBL.get_mechanisms_for_chembl_id(chembl_id)
for target_mechanism in target_mechanisms_json:
target_chembl_id = target_mechanism.get("target_chembl_id", None)
if target_chembl_id is not None:
target_uniprot_ids = QueryChEMBL.map_chembl_target_to_uniprot_ids(target_chembl_id)
for target_uniprot_id in target_uniprot_ids:
res_targets_dict[target_uniprot_id] = float(1.0)
res = QueryChEMBL.send_query_get(handler='target_prediction.json',
url_suffix='molecule_chembl_id__exact=' + chembl_id + '&target_organism__exact=Homo%20sapiens')
if res is not None:
target_predictions_list = res.get('target_predictions', None)
if target_predictions_list is not None:
for target_prediction in target_predictions_list:
# print(target_prediction)
target_uniprot_id = target_prediction.get('target_accession', None)
target_probability = target_prediction.get('probability', None)
if target_uniprot_id is not None:
target_organism = target_prediction.get('target_organism', None)
if target_organism is not None:
assert target_organism == "Homo sapiens"
# need to get the gene ID for this Uniprot ID
if target_uniprot_id not in res_targets_dict:
res_targets_dict[target_uniprot_id] = float(target_probability)
return res_targets_dict
@staticmethod
def map_chembl_target_to_uniprot_ids(target_chembl_id):
res_json = QueryChEMBL.send_query_get(handler="target.json",
url_suffix="target_chembl_id=" + target_chembl_id)
res_set = set()
# print(res_json)
if res_json is not None:
targets = res_json.get("targets", None)
if targets is not None and len(targets) > 0:
for target in targets:
components = target.get("target_components", None)
if components is not None:
for component in components:
xrefs = component.get("target_component_xrefs", None)
if xrefs is not None:
for xref in xrefs:
if xref is not None:
xref_src_db = xref.get("xref_src_db", None)
if xref_src_db is not None:
if xref_src_db == "UniProt":
uniprot_id = xref.get("xref_id", None)
if uniprot_id is not None:
uniprot_id_citeable = QueryUniprot.get_citeable_accession_for_accession(uniprot_id)
if uniprot_id_citeable is not None:
res_set |= set([uniprot_id_citeable])
return res_set
@staticmethod
def get_target_uniprot_ids_for_drug(drug_name):
if not isinstance(drug_name, str):
return dict()
chembl_ids_for_drug = QueryChEMBL.get_chembl_ids_for_drug(drug_name)
res_uniprot_ids = dict()
for chembl_id in chembl_ids_for_drug:
# print(chembl_id)
uniprot_ids_dict = QueryChEMBL.get_target_uniprot_ids_for_chembl_id(chembl_id)
for uniprot_id in uniprot_ids_dict.keys():
res_uniprot_ids[uniprot_id] = uniprot_ids_dict[uniprot_id]
return res_uniprot_ids
@staticmethod
def get_mechanisms_for_chembl_id(chembl_id):
"""Retrieves mechanism of action and target of each drug.
Args:
chembl_id (str): a ChEMBL id, e.g., "CHEMBL521"
Returns:
array: an array of mechanism of actions, or [] if no mechanism data could be obtained for the given
ChEMBL ID
example:
[
{"action_type": "INHIBITOR",
"binding_site_comment": null,
"direct_interaction": true,
"disease_efficacy": true,
"max_phase": 4,
"mec_id": 1180,
"mechanism_comment": null,
"mechanism_of_action": "Cyclooxygenase inhibitor",
"mechanism_refs": [
{"ref_id": "0443-059748 PP. 229",
"ref_type": "ISBN",
"ref_url": "http://www.isbnsearch.org/isbn/0443059748"
},
{"ref_id": "Ibuprofen",
"ref_type": "Wikipedia",
"ref_url": "http://en.wikipedia.org/wiki/Ibuprofen"}
],
"molecular_mechanism": true,
"molecule_chembl_id": "CHEMBL521",
"record_id": 1343587,
"selectivity_comment": null,
"site_id": null,
"target_chembl_id": "CHEMBL2094253"}
]
"""
if not isinstance(chembl_id, str):
return []
res = QueryChEMBL.send_query_get(handler='mechanism.json',
url_suffix='molecule_chembl_id=' + chembl_id)
res_mechanisms_array = []
if res is not None:
mechanism_records = res.get('mechanisms', None)
if mechanism_records is not None and len(mechanism_records) > 0:
res_mechanisms_array = mechanism_records
return res_mechanisms_array
if __name__ == '__main__':
print(QueryChEMBL.get_target_uniprot_ids_for_chembl_id('CHEMBL521'))
print(QueryChEMBL.get_target_uniprot_ids_for_chembl_id('CHEMBL2364648'))
# print(QueryChEMBL.get_mechanisms_for_chembl_id("CHEMBL521"))
# print(QueryChEMBL.map_chembl_target_to_uniprot_ids("CHEMBL2094253"))
# print(QueryChEMBL.get_mechanisms_for_chembl_id("CHEMBL521"))