Skip to content

Commit

Permalink
Merge pull request #97 from bioinfodlsu/tfbs
Browse files Browse the repository at this point in the history
Tfbs
  • Loading branch information
memgonzales authored Sep 1, 2023
2 parents bfd9fc2 + 4386d7d commit 0d02672
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 32 deletions.
15 changes: 6 additions & 9 deletions callbacks/tf_enrich/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ..lift_over import util as lift_over_util

Tfbs_input = namedtuple(
'Tfbs_input', ['tfbs_set', 'tfbs_prediction_technique', 'tfbs_fdr'])
'Tfbs_input', ['tfbs_set', 'tfbs_prediction_technique'])


def init_callback(app):
Expand Down Expand Up @@ -35,13 +35,12 @@ def display_input(nb_intervals_str, homepage_is_submitted, *_):
State('tfbs-addl-genes', 'value'),
State('tfbs-set', 'value'),
State('tfbs-prediction-technique', 'value'),
State('tfbs-fdr', 'value'),
prevent_initial_call=True
)
def submit_tfbs_input(tfbs_submitted_n_clicks, homepage_is_submitted, addl_genes, tfbs_set, tfbs_prediction_technique, tfbs_fdr):
def submit_tfbs_input(tfbs_submitted_n_clicks, homepage_is_submitted, addl_genes, tfbs_set, tfbs_prediction_technique):
if homepage_is_submitted and tfbs_submitted_n_clicks >= 1:
submitted_input = Tfbs_input(
tfbs_set, tfbs_prediction_technique, tfbs_fdr)._asdict()
tfbs_set, tfbs_prediction_technique)._asdict()

return True, submitted_input

Expand Down Expand Up @@ -76,7 +75,6 @@ def display_enrichment_results(tfbs_is_submitted, lift_over_nb_entire_table, sub
if homepage_submitted and tfbs_is_submitted:
tfbs_set = tfbs_submitted_input['tfbs_set']
tfbs_prediction_technique = tfbs_submitted_input['tfbs_prediction_technique']
tfbs_fdr = tfbs_submitted_input['tfbs_fdr']

if submitted_addl_genes:
submitted_addl_genes = submitted_addl_genes.strip()
Expand All @@ -90,7 +88,7 @@ def display_enrichment_results(tfbs_is_submitted, lift_over_nb_entire_table, sub
get_annotations_addl_gene(list_addl_genes)

enrichment_results_df = perform_enrichment_all_tf(combined_genes, submitted_addl_genes,
tfbs_set, tfbs_prediction_technique, float(tfbs_fdr), nb_interval_str)
tfbs_set, tfbs_prediction_technique, nb_interval_str)

columns = [{'id': x, 'name': x, 'presentation': 'markdown'}
for x in enrichment_results_df.columns]
Expand All @@ -103,14 +101,13 @@ def display_enrichment_results(tfbs_is_submitted, lift_over_nb_entire_table, sub
Output('tfbs-saved-input', 'data', allow_duplicate=True),
Input('tfbs-set', 'value'),
Input('tfbs-prediction-technique', 'value'),
Input('tfbs-fdr', 'value'),
State('homepage-is-submitted', 'data'),
prevent_initial_call=True
)
def set_input_tfbs_session_state(tfbs_set, tfbs_prediction_technique, tfbs_fdr, homepage_is_submitted):
def set_input_tfbs_session_state(tfbs_set, tfbs_prediction_technique, homepage_is_submitted):
if homepage_is_submitted:
tfbs_saved_input = Tfbs_input(
tfbs_set, tfbs_prediction_technique, tfbs_fdr)._asdict()
tfbs_set, tfbs_prediction_technique)._asdict()

return tfbs_saved_input

Expand Down
48 changes: 30 additions & 18 deletions callbacks/tf_enrich/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
from ..general_util import *

import gffutils
import pybedtools

const = Constants()

COLUMNS = ['Transcription Factor', 'Family',
'p-value', 'Adj. p-value', 'Significant?']
'p-value', 'Adj. p-value']#, 'Significant?']


def create_empty_df():
Expand Down Expand Up @@ -66,13 +67,16 @@ def write_query_genome_intervals_to_file(nb_interval_str, addl_genes):
return filepath


def perform_enrichment_all_tf(lift_over_nb_entire_table, addl_genes, tfbs_set, tfbs_prediction_technique, tfbs_fdr, nb_interval_str):
out_dir = get_path_to_temp(
nb_interval_str, const.TEMP_TFBS, addl_genes, tfbs_set, tfbs_prediction_technique)
def perform_enrichment_all_tf(lift_over_nb_entire_table, addl_genes,
tfbs_set, tfbs_prediction_technique,
nb_interval_str):

out_dir = get_path_to_temp(nb_interval_str, const.TEMP_TFBS, addl_genes, tfbs_set, tfbs_prediction_technique)

# if previously computed
if path_exists(f'{out_dir}/BH_corrected_fdr_{tfbs_fdr}.csv'):
if path_exists(f'{out_dir}/BH_corrected.csv'):
results_df = pd.read_csv(
f'{out_dir}/BH_corrected_fdr_{tfbs_fdr}.csv', dtype=object)
f'{out_dir}/BH_corrected.csv', dtype=object)

results_df['Family'] = results_df['Transcription Factor'].apply(
get_family)
Expand All @@ -81,6 +85,7 @@ def perform_enrichment_all_tf(lift_over_nb_entire_table, addl_genes, tfbs_set, t

return results_df

'''
# single-TF p-values already computed, but not BH_corrected, possibly FDR value changed
elif path_exists(f'{out_dir}/results_before_multiple_corrections.csv'):
results_before_multiple_corrections = pd.read_csv(
Expand All @@ -96,12 +101,11 @@ def perform_enrichment_all_tf(lift_over_nb_entire_table, addl_genes, tfbs_set, t
results_df = results_df[COLUMNS]
return results_df

'''
make_dir(out_dir)

# construct query BED file
out_dir_tf_enrich = get_path_to_temp(
nb_interval_str, const.TEMP_TFBS, addl_genes)
#out_dir_tf_enrich = get_path_to_temp(nb_interval_str, const.TEMP_TFBS, addl_genes)
if tfbs_set == 'promoters':
query_bed = write_query_promoter_intervals_to_file(
lift_over_nb_entire_table, nb_interval_str, addl_genes)
Expand All @@ -111,6 +115,10 @@ def perform_enrichment_all_tf(lift_over_nb_entire_table, addl_genes, tfbs_set, t
nb_interval_str, addl_genes)
sizes = f'{const.TFBS_BEDS}/sizes/{tfbs_set}'

#construct a pybedtool object. we will use pybedtools to compute if there
#is any overlap. If no, don't test for significance using mcdp2.
query_pybed = pybedtools.BedTool(query_bed)

TF_list = []
# keep together using a dict? but BH correction needs a separate list of p_values
pvalue_list = []
Expand All @@ -119,25 +127,29 @@ def perform_enrichment_all_tf(lift_over_nb_entire_table, addl_genes, tfbs_set, t
for tf in os.listdir(os.path.join(const.TFBS_BEDS, tfbs_set, tfbs_prediction_technique, "intervals")):
# print("computing overlaps for: {}".format(tf))
ref_bed = f'{const.TFBS_BEDS}/{tfbs_set}/{tfbs_prediction_technique}/intervals/{tf}'
ref_pybed = pybedtools.BedTool(ref_bed)

out_dir_tf = f'{out_dir}/{tf}'
make_dir(out_dir_tf)

p_value = perform_enrichment_specific_tf(
ref_bed, query_bed, sizes, out_dir_tf)
if query_pybed.intersect(ref_pybed,nonamecheck=True).count() != 0 :

p_value = perform_enrichment_specific_tf(ref_bed, query_bed,
sizes, out_dir_tf)

TF_list.append(tf)
pvalue_list.append(p_value)
TF_list.append(tf)
pvalue_list.append(p_value)

results_no_adj_df = pd.DataFrame(list((zip(TF_list, pvalue_list))), columns=[
"Transcription Factor", "p-value"])
results_no_adj_df.to_csv(
f'{out_dir}/results_before_multiple_corrections.csv', index=False)

results_df = multiple_testing_correction(results_no_adj_df, tfbs_fdr)
results_df = multiple_testing_correction(results_no_adj_df)
display_cols_in_sci_notation(results_df, ['p-value', 'Adj. p-value'])

results_df.to_csv(
f'{out_dir}/BH_corrected_fdr_{tfbs_fdr}.csv', index=False)
f'{out_dir}/BH_corrected.csv', index=False)

results_df['Family'] = results_df['Transcription Factor'].apply(
get_family)
Expand All @@ -160,15 +172,15 @@ def perform_enrichment_specific_tf(ref_bed, query_bed, sizes, out_dir):
return p_value


def multiple_testing_correction(single_tf_results, fdr):
def multiple_testing_correction(single_tf_results):
pvalues = single_tf_results['p-value'].tolist()
sig, adj_pvalue, _, _ = sm.multipletests(
pvalues, alpha=fdr, method='fdr_bh', is_sorted=False, returnsorted=False)
pvalues, method='fdr_bh', is_sorted=False, returnsorted=False)
sig = sig.tolist()
sig = list(map(str, sig))
adj_pvalue = adj_pvalue.tolist()
single_tf_results['Adj. p-value'] = adj_pvalue
single_tf_results['Significant?'] = sig
#single_tf_results['Significant?'] = sig
single_tf_results.sort_values(by=['p-value'], inplace=True)
return single_tf_results

Expand Down
5 changes: 0 additions & 5 deletions pages/analysis/tf_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,6 @@

html.Br(),

dbc.Label("Input threshold for False-Discovery Rate:"),
dbc.Input(id='tfbs-fdr', type='number',
value=0.25, min=0, max=1, step=0.05),
html.Br(),

dbc.Button('Run Analysis',
id='tfbs-submit',
n_clicks=0,
Expand Down

0 comments on commit 0d02672

Please sign in to comment.