Skip to content

Commit

Permalink
Display InterPro information in lift-over results
Browse files Browse the repository at this point in the history
  • Loading branch information
memgonzales committed Sep 9, 2023
1 parent 5d816ec commit 4b91c01
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 20 deletions.
2 changes: 2 additions & 0 deletions callbacks/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ class Constants(object):
GENE_DESCRIPTIONS = f'{APP_DATA}/gene_descriptions'
TEXT_MINING = f'{APP_DATA}/text_mining'
QTARO = f'{APP_DATA}/qtaro'
IRIC = f'{APP_DATA}/iric_data'

GENE_ID_MAPPING = f'{APP_DATA}/gene_id_mapping'
MSU_MAPPING = f'{GENE_ID_MAPPING}/msu_mapping'
OGI_MAPPING = f'{GENE_ID_MAPPING}/ogi_mapping'
NB_MAPPING = f'{GENE_ID_MAPPING}/nb_mapping'
IRIC_MAPPING = f'{GENE_ID_MAPPING}/iric_mapping'

GENOMES_NIPPONBARE = f'{APP_DATA}/genomes/Nipponbare'
ANNOTATIONS_NB = f'{ANNOTATIONS}/Nb'
Expand Down
43 changes: 24 additions & 19 deletions callbacks/lift_over/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
'IR64': 'indica IR64',
'CMeo': 'japonica CHAO MEO'}

NB_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot',
NB_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot', 'InterPro',
'OGI', 'Chromosome', 'Start', 'End', 'Strand', 'QTL Analyses', 'PubMed Article IDs']
OTHER_REF_COLUMNS = ['OGI', 'Name', 'Chromosome', 'Start', 'End', 'Strand']
FRONT_FACING_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot', 'OGI']
Expand Down Expand Up @@ -403,8 +403,11 @@ def get_qtaro_entry(mapping, gene):
return NULL_PLACEHOLDER


def get_qtaro_entries(mapping, genes):
return [get_qtaro_entry(mapping, gene) for gene in genes]
def get_qtaro_entries(genes):
with open(Constants.QTARO_DICTIONARY, 'rb') as f:
qtaro_dict = pickle.load(f)

return [get_qtaro_entry(qtaro_dict, gene) for gene in genes]


def get_pubmed_entry(gene):
Expand All @@ -417,17 +420,19 @@ def get_pubmed_entry(gene):
except FileNotFoundError:
return NULL_PLACEHOLDER

pubmed_str = ''
for idx, pubmed in enumerate(pubmed_ids):
if idx % 2 == 0:
pubmed_str += f'{pubmed}   '
else:
pubmed_str += f'{pubmed}\n'
return '\n'.join(pubmed_ids)


if pubmed_str[-1] == '\n': # Ends in a newline
return pubmed_str[:-len('\n')]
def get_interpro_entry(gene):
with open(f'{Constants.IRIC}/interpro.pickle', 'rb') as interpro_f, open(f'{Constants.IRIC_MAPPING}/msu_to_iric.pickle', 'rb') as iric_mapping_f:
interpro_mapping = pickle.load(interpro_f)
iric_mapping = pickle.load(iric_mapping_f)

return pubmed_str[:-len('   ')]
try:
return '<br><br>'.join([get_interpro_link_single_str(entry[1], entry[0])
for entry in interpro_mapping[iric_mapping[gene]] if entry[1]])
except KeyError:
return NULL_PLACEHOLDER


def get_nb_ortholog(gene, ref):
Expand Down Expand Up @@ -472,13 +477,12 @@ def get_genes_in_Nb(nb_intervals):
ogi_list = get_ogi_list([sanitize_gene_id(gene.id)
for gene in genes_in_interval], ogi_mapping)

# Get QTARO annotations
with open(Constants.QTARO_DICTIONARY, 'rb') as f:
qtaro_dict = pickle.load(f)
qtaro_list = get_qtaro_entries(
qtaro_dict, [gene.id for gene in genes_in_interval])

qtaro_list = get_qtaro_entries([gene.id for gene in genes_in_interval])
pubmed_ids = [get_pubmed_entry(gene.id) for gene in genes_in_interval]
interpro_list = [get_interpro_entry(
gene.id) for gene in genes_in_interval]

# Get InterPro annotations

# Construct the data frame
df = pd.DataFrame({
Expand All @@ -489,7 +493,8 @@ def get_genes_in_Nb(nb_intervals):
'End': [gene.end for gene in genes_in_interval],
'Strand': [gene.strand for gene in genes_in_interval],
'QTL Analyses': qtaro_list,
'PubMed Article IDs': pubmed_ids
'PubMed Article IDs': pubmed_ids,
'InterPro': interpro_list
})

dfs.append(df)
Expand Down
7 changes: 6 additions & 1 deletion callbacks/links_util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
A_HREF = '<a style="white-space:nowrap" target = "_blank" href="'
A_HREF_WITH_WORD_WRAP = '<a target = "_blank" href="'
CLOSE_A_HREF = '">'
LINK_ICON = '&nbsp;&nbsp;<i class="fa-solid fa-up-right-from-square fa-2xs"></i></a>'
LINK_ICON = '<span style="white-space:nowrap">&nbsp;&nbsp;<i class="fa-solid fa-up-right-from-square fa-2xs"></i></span></a>'


def get_genes_from_kegg_link(link):
Expand Down Expand Up @@ -56,3 +57,7 @@ def get_rgi_genecard_link(result, id_col):

def get_rgi_orthogroup_link(result, id_col):
return A_HREF + 'https://riceome.hzau.edu.cn/orthogroup/' + result[id_col] + CLOSE_A_HREF + result[id_col] + LINK_ICON


def get_interpro_link_single_str(term, id):
return A_HREF_WITH_WORD_WRAP + 'https://www.ebi.ac.uk/interpro/entry/InterPro/' + id + CLOSE_A_HREF + term + LINK_ICON

0 comments on commit 4b91c01

Please sign in to comment.