Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/bioinfodlsu/rice-pilaf into…
Browse files Browse the repository at this point in the history
… input
  • Loading branch information
pbong committed Sep 11, 2023
2 parents 45c6b93 + b3d2a9d commit 2efdf27
Show file tree
Hide file tree
Showing 13 changed files with 226 additions and 75 deletions.
2 changes: 1 addition & 1 deletion callbacks/coexpression/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def convert_transcript_to_msu_id(transcript_ids_str, network):
Returns:
- Equivalent MSU accessions of the KEGG transcript IDs
"""
with open(f'{Constants.GENE_ID_MAPPING}/{network}/transcript-to-msu-id.pickle', 'rb') as f:
with open(f'{Constants.MSU_MAPPING}/{network}/transcript-to-msu-id.pickle', 'rb') as f:
mapping_dict = pickle.load(f)

output_str = ''
Expand Down
24 changes: 12 additions & 12 deletions callbacks/constants.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
class Constants(object):
LIFT_OVER = 'lift-over'
COEXPRESSION = 'co-expression'
TFBS = 'tf-enrichment'
IGV = 'browse-loci'
TEXT_MINING = 'text-mining'
LABEL_LIFT_OVER = 'lift-over'
LABEL_COEXPRESSION = 'co-expression'
LABEL_TFBS = 'tf-enrichment'
LABEL_IGV = 'browse-loci'
LABEL_TEXT_MINING = 'text-mining'

DATA = 'static'
APP_DATA = f'{DATA}/app_data'
RAW_DATA = f'{DATA}/raw_data'

ANNOTATIONS = f'{APP_DATA}/annotations'
ALIGNMENTS = f'{APP_DATA}/alignments'
OGI_MAPPING = f'{APP_DATA}/ogi_mapping'
NB_MAPPING = f'{APP_DATA}/nb_mapping'
GENE_DESCRIPTIONS = f'{APP_DATA}/gene_descriptions'
GENE_ID_MAPPING = f'{APP_DATA}/gene_id_mapping'
TEXT_MINING = f'{APP_DATA}/text_mining'
QTARO = f'{APP_DATA}/qtaro'
IRIC = f'{APP_DATA}/iric_data'

GENE_ID_MAPPING = f'{APP_DATA}/gene_id_mapping'
MSU_MAPPING = f'{GENE_ID_MAPPING}/msu_mapping'
OGI_MAPPING = f'{GENE_ID_MAPPING}/ogi_mapping'
NB_MAPPING = f'{GENE_ID_MAPPING}/nb_mapping'
IRIC_MAPPING = f'{GENE_ID_MAPPING}/iric_mapping'

GENOMES_NIPPONBARE = f'{APP_DATA}/genomes/Nipponbare'
ANNOTATIONS_NB = f'{ANNOTATIONS}/Nb'
Expand All @@ -39,12 +43,8 @@ class Constants(object):
GENOME_WIDE_BED = 'query_genomic_intervals'
TFBS_ANNOTATION = f'{TFBS_BEDS}/annotation'

DATA_PREPARATION_SCRIPTS = 'prepare_data/workflow/scripts'
ENRICHMENT_ANALYSIS_SCRIPTS = f'{DATA_PREPARATION_SCRIPTS}/enrichment_analysis'

ENRICHMENT_ANALYSIS = f'{APP_DATA}/enrichment_analysis'
ENRICHMENT_ANALYSIS_MAPPING = 'mapping'
ENRICHMENT_ANALYSIS_MODULES = 'modules'

KEGG_DOSA_GENESET = f'{ENRICHMENT_ANALYSIS_MAPPING}/kegg-dosa-geneset.pickle'
KEGG_DOSA_PATHWAY_NAMES = f'{ENRICHMENT_ANALYSIS_MAPPING}/kegg-dosa-pathway-names.tsv'
Expand Down
51 changes: 32 additions & 19 deletions callbacks/lift_over/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
'IR64': 'indica IR64',
'CMeo': 'japonica CHAO MEO'}

NB_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot',
NB_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot', 'InterPro',
'OGI', 'Chromosome', 'Start', 'End', 'Strand', 'QTL Analyses', 'PubMed Article IDs']
OTHER_REF_COLUMNS = ['OGI', 'Name', 'Chromosome', 'Start', 'End', 'Strand']
FRONT_FACING_COLUMNS = ['Name', 'Description', 'UniProtKB/Swiss-Prot', 'OGI']
Expand Down Expand Up @@ -354,6 +354,11 @@ def get_ogi_other_ref(ref, nb_intervals):
for intersection in gff_intersections:
ref_interval = to_genomic_interval(
intersection.attributes['Name'][0])

# Skip if assembler does not know what to do with contig
if is_error(ref_interval):
continue

genes_in_interval = list(db_annotation.region(region=(ref_interval.chrom, ref_interval.start, ref_interval.stop),
completely_within=False, featuretype='gene'))

Expand Down Expand Up @@ -398,8 +403,11 @@ def get_qtaro_entry(mapping, gene):
return NULL_PLACEHOLDER


def get_qtaro_entries(mapping, genes):
return [get_qtaro_entry(mapping, gene) for gene in genes]
def get_qtaro_entries(genes):
with open(Constants.QTARO_DICTIONARY, 'rb') as f:
qtaro_dict = pickle.load(f)

return [get_qtaro_entry(qtaro_dict, gene) for gene in genes]


def get_pubmed_entry(gene):
Expand All @@ -412,17 +420,19 @@ def get_pubmed_entry(gene):
except FileNotFoundError:
return NULL_PLACEHOLDER

pubmed_str = ''
for idx, pubmed in enumerate(pubmed_ids):
if idx % 2 == 0:
pubmed_str += f'{pubmed}   '
else:
pubmed_str += f'{pubmed}\n'
return '\n'.join(pubmed_ids)

if pubmed_str[-1] == '\n': # Ends in a newline
return pubmed_str[:-len('\n')]

return pubmed_str[:-len('   ')]
def get_interpro_entry(gene):
with open(f'{Constants.IRIC}/interpro.pickle', 'rb') as interpro_f, open(f'{Constants.IRIC_MAPPING}/msu_to_iric.pickle', 'rb') as iric_mapping_f:
interpro_mapping = pickle.load(interpro_f)
iric_mapping = pickle.load(iric_mapping_f)

try:
return '<br><br>'.join([get_interpro_link_single_str(entry[1], entry[0])
for entry in interpro_mapping[iric_mapping[gene]] if entry[1]])
except KeyError:
return NULL_PLACEHOLDER


def get_nb_ortholog(gene, ref):
Expand Down Expand Up @@ -467,13 +477,10 @@ def get_genes_in_Nb(nb_intervals):
ogi_list = get_ogi_list([sanitize_gene_id(gene.id)
for gene in genes_in_interval], ogi_mapping)

# Get QTARO annotations
with open(Constants.QTARO_DICTIONARY, 'rb') as f:
qtaro_dict = pickle.load(f)
qtaro_list = get_qtaro_entries(
qtaro_dict, [gene.id for gene in genes_in_interval])

qtaro_list = get_qtaro_entries([gene.id for gene in genes_in_interval])
pubmed_ids = [get_pubmed_entry(gene.id) for gene in genes_in_interval]
interpro_list = [get_interpro_entry(
gene.id) for gene in genes_in_interval]

# Construct the data frame
df = pd.DataFrame({
Expand All @@ -484,7 +491,8 @@ def get_genes_in_Nb(nb_intervals):
'End': [gene.end for gene in genes_in_interval],
'Strand': [gene.strand for gene in genes_in_interval],
'QTL Analyses': qtaro_list,
'PubMed Article IDs': pubmed_ids
'PubMed Article IDs': pubmed_ids,
'InterPro': interpro_list
})

dfs.append(df)
Expand Down Expand Up @@ -544,6 +552,11 @@ def get_genes_in_other_ref(ref, nb_intervals):
for intersection in gff_intersections:
ref_interval = to_genomic_interval(
intersection.attributes['Name'][0])

# Skip if assembler does not know what to do with contig
if is_error(ref_interval):
continue

genes_in_interval = list(db_annotation.region(region=(ref_interval.chrom, ref_interval.start, ref_interval.stop),
completely_within=False, featuretype='gene'))

Expand Down
7 changes: 6 additions & 1 deletion callbacks/links_util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
A_HREF = '<a style="white-space:nowrap" target = "_blank" href="'
A_HREF_WITH_WORD_WRAP = '<a target = "_blank" href="'
CLOSE_A_HREF = '">'
LINK_ICON = '&nbsp;&nbsp;<i class="fa-solid fa-up-right-from-square fa-2xs"></i></a>'
LINK_ICON = '<span style="white-space:nowrap">&nbsp;&nbsp;<i class="fa-solid fa-up-right-from-square fa-2xs"></i></span></a>'


def get_genes_from_kegg_link(link):
Expand Down Expand Up @@ -56,3 +57,7 @@ def get_rgi_genecard_link(result, id_col):

def get_rgi_orthogroup_link(result, id_col):
return A_HREF + 'https://riceome.hzau.edu.cn/orthogroup/' + result[id_col] + CLOSE_A_HREF + result[id_col] + LINK_ICON


def get_interpro_link_single_str(term, id):
return A_HREF_WITH_WORD_WRAP + 'https://www.ebi.ac.uk/interpro/entry/InterPro/' + id + CLOSE_A_HREF + term + LINK_ICON
2 changes: 2 additions & 0 deletions callbacks/text_mining/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ def display_text_mining_results(text_mining_is_submitted, homepage_submitted, te

if num_unique_entries == 1:
stats = f'Found matches across {num_unique_entries} publication'
elif num_unique_entries == MAX_NUM_RESULTS:
stats = f'Found matches across over {num_unique_entries} publications. Consider making your search query more specific'
else:
stats = f'Found matches across {num_unique_entries} publications'

Expand Down
2 changes: 1 addition & 1 deletion pages/analysis/browse_loci.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
layout = html.Div(
id={
'type': 'analysis-layout',
'label': Constants.IGV
'label': Constants.LABEL_IGV
},
hidden=True,
children=[
Expand Down
2 changes: 1 addition & 1 deletion pages/analysis/co_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@
layout = html.Div(
id={
'type': 'analysis-layout',
'label': Constants.COEXPRESSION
'label': Constants.LABEL_COEXPRESSION
},
hidden=True,

Expand Down
26 changes: 13 additions & 13 deletions pages/analysis/lift_over.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,25 @@
layout = html.Div(
id={
'type': 'analysis-layout',
'label': Constants.LIFT_OVER
'label': Constants.LABEL_LIFT_OVER
},
hidden=True,
children=[
html.Div([
html.P(
['In this page, you can obtain the list of genes overlapping your input intervals. '
'Optionally, you can choose genomes to lift-over your Nipponbare coordinates to. Click ',
['In this page, you can obtain the list of genes overlapping your input intervals. '
'Optionally, you can choose genomes to lift-over your Nipponbare coordinates to. Click ',
dcc.Link(
['here ', html.I(
id='demo-link',
className='fa-solid fa-up-right-from-square fa-2xs'
)],
href='https://github.com/bioinfodlsu/rice-pilaf/wiki/2.1-Gene-List-and-Lift%E2%80%90over',
target='_blank',
className='top-navbar-item'
),
' for user guide.'
]
['here ', html.I(
id='demo-link',
className='fa-solid fa-up-right-from-square fa-2xs'
)],
href='https://github.com/bioinfodlsu/rice-pilaf/wiki/2.1-Gene-List-and-Lift%E2%80%90over',
target='_blank',
className='top-navbar-item'
),
' for user guide.'
]
)
], className='analysis-intro p-3'),

Expand Down
20 changes: 10 additions & 10 deletions pages/analysis/text_mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@
layout = html.Div(
id={
'type': 'analysis-layout',
'label': Constants.TEXT_MINING
'label': Constants.LABEL_TEXT_MINING
},
hidden=True,
children=[

html.Div([
html.P(
[
'In this page, you can retrieve gene names associated with traits, diseases, chemicals, etc. '
'from a database constructed from text-mined PubMed abstracts. Click ',
'In this page, you can retrieve gene names associated with traits, diseases, chemicals, etc. '
'from a database constructed from text-mined PubMed abstracts. Click ',
dcc.Link(
['here ', html.I(
id='demo-link',
className='fa-solid fa-up-right-from-square fa-2xs'
)],
href='https://github.com/bioinfodlsu/rice-pilaf/wiki/2.2-Gene-retrieval-by-text-mining',
target='_blank',
className='top-navbar-item'
['here ', html.I(
id='demo-link',
className='fa-solid fa-up-right-from-square fa-2xs'
)],
href='https://github.com/bioinfodlsu/rice-pilaf/wiki/2.2-Gene-retrieval-by-text-mining',
target='_blank',
className='top-navbar-item'
),
' for user guide.'
]
Expand Down
24 changes: 12 additions & 12 deletions pages/analysis/tf_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@
layout = html.Div(
id={
'type': 'analysis-layout',
'label': Constants.TFBS
'label': Constants.LABEL_TFBS
},
hidden=True,
children=[
html.Div([
html.P(
['In this page, you can search for transcription factors whose binding sites overlap significantly with your intervals,'
'the idea being that your intervals might contain variants that affect the binding affinity of transcription factors. Click ',
dcc.Link(
['here ', html.I(
id='demo-link',
className='fa-solid fa-up-right-from-square fa-2xs'
)],
href='https://github.com/bioinfodlsu/rice-pilaf/wiki/2.4-Regulatory-Feature-Enrichment',
target='_blank',
className='top-navbar-item'
),
' for user guide.']
'the idea being that your intervals might contain variants that affect the binding affinity of transcription factors. Click ',
dcc.Link(
['here ', html.I(
id='demo-link',
className='fa-solid fa-up-right-from-square fa-2xs'
)],
href='https://github.com/bioinfodlsu/rice-pilaf/wiki/2.4-Regulatory-Feature-Enrichment',
target='_blank',
className='top-navbar-item'
),
' for user guide.']
)
], className='analysis-intro p-3'),

Expand Down
10 changes: 5 additions & 5 deletions pages/analysis_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@

def get_analaysis_layout_dictionary():
return OrderedDict({
Constants.LIFT_OVER: 'Gene List and Lift-Over',
Constants.TEXT_MINING: 'Gene Retrieval by Text Mining',
Constants.COEXPRESSION: 'Co-Expression Network Analysis',
Constants.TFBS: 'Regulatory Feature Enrichment',
Constants.IGV: 'Browse Loci'
Constants.LABEL_LIFT_OVER: 'Gene List and Lift-Over',
Constants.LABEL_TEXT_MINING: 'Gene Retrieval by Text Mining',
Constants.LABEL_COEXPRESSION: 'Co-Expression Network Analysis',
Constants.LABEL_TFBS: 'Regulatory Feature Enrichment',
Constants.LABEL_IGV: 'Browse Loci'
})


Expand Down
Loading

0 comments on commit 2efdf27

Please sign in to comment.