diff --git a/app.py b/app.py index 93564ec3..c0919d2b 100644 --- a/app.py +++ b/app.py @@ -261,14 +261,14 @@ callbacks.text_mining.callbacks.init_callback(app) # Create database table -const = Constants() -make_dir(const.TEMP) + +make_dir(Constants.TEMP) try: - connection = sqlite3.connect(const.FILE_STATUS_DB) + connection = sqlite3.connect(Constants.FILE_STATUS_DB) cursor = connection.cursor() - query = f'CREATE TABLE IF NOT EXISTS {const.FILE_STATUS_TABLE} (name TEXT, UNIQUE(name));' + query = f'CREATE TABLE IF NOT EXISTS {Constants.FILE_STATUS_TABLE} (name TEXT, UNIQUE(name));' cursor.execute(query) connection.commit() diff --git a/callbacks/browse_loci/callbacks.py b/callbacks/browse_loci/callbacks.py index b1190e0a..fe931fff 100644 --- a/callbacks/browse_loci/callbacks.py +++ b/callbacks/browse_loci/callbacks.py @@ -11,7 +11,6 @@ from ..file_util import * from ..constants import Constants -const = Constants() def init_callback(app): @@ -73,7 +72,7 @@ def handle_exception(e): @app.server.route('/genomes_nipponbare/') def send_genomes_nipponbare_url(filename): try: - return send_from_directory(const.GENOMES_NIPPONBARE, filename) + return send_from_directory(Constants.GENOMES_NIPPONBARE, filename) except FileNotFoundError: abort(404) @@ -81,7 +80,7 @@ def send_genomes_nipponbare_url(filename): def send_annotations_nb_url(nb_intervals_str, foldername, selected_interval_str, file_format): try: temp_output_folder_dir = get_path_to_temp( - nb_intervals_str, const.TEMP_IGV, foldername) + nb_intervals_str, Constants.TEMP_IGV, foldername) selected_interval_str_filename = convert_text_to_path( selected_interval_str) @@ -96,7 +95,7 @@ def send_annotations_nb_url(nb_intervals_str, foldername, selected_interval_str, @app.server.route('/open_chromatin_panicle/') def send_open_chromatin_panicle_url(filename): try: - return send_from_directory(const.OPEN_CHROMATIN_PANICLE, filename) + return send_from_directory(Constants.OPEN_CHROMATIN_PANICLE, filename) except FileNotFoundError: abort(404) diff --git a/callbacks/browse_loci/util.py b/callbacks/browse_loci/util.py index 10c1a661..ed420357 100644 --- a/callbacks/browse_loci/util.py +++ b/callbacks/browse_loci/util.py @@ -1,17 +1,13 @@ from ..lift_over import util import gffutils -import pandas as pd -import os from ..file_util import * from ..constants import Constants -const = Constants() - def write_igv_tracks_to_file(nb_intervals_str): # tracks found in igv - track_db = [[const.ANNOTATIONS_NB, 'IRGSPMSU.gff.db', 'gff'], - [const.OPEN_CHROMATIN_PANICLE, 'SRR7126116_ATAC-Seq_Panicles.bed', 'bed']] + track_db = [[Constants.ANNOTATIONS_NB, 'IRGSPMSU.gff.db', 'gff'], + [Constants.OPEN_CHROMATIN_PANICLE, 'SRR7126116_ATAC-Seq_Panicles.bed', 'bed']] # write to file the data for igv for db in track_db: @@ -32,7 +28,7 @@ def write_gff_igv_track_to_file(source_dir, source_file, nb_intervals_str): nb_intervals_str) temp_folder = get_path_to_temp( - nb_intervals_str, const.TEMP_IGV, source_file) + nb_intervals_str, Constants.TEMP_IGV, source_file) make_dir(temp_folder) for i in range(len(loci_list)): diff --git a/callbacks/coexpression/util.py b/callbacks/coexpression/util.py index 83f94608..31855130 100644 --- a/callbacks/coexpression/util.py +++ b/callbacks/coexpression/util.py @@ -11,7 +11,6 @@ from collections import namedtuple -const = Constants() # Settings for the module detection algorithms: # - multiplier: Value multiplied to the parameter to get the name of the directory @@ -60,7 +59,7 @@ def get_user_facing_parameter(algo, parameter, network='OS-CX'): parameters = sorted( - map(int, os.listdir(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}'))) + map(int, os.listdir(f'{Constants.NETWORK_MODULES}/{network}/MSU/{algo}'))) return parameters.index(parameter) + 1 @@ -90,7 +89,7 @@ def get_parameters_for_algo(algo, network='OS-CX'): """ param_dict = {} parameters = sorted( - map(int, os.listdir(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}'))) + map(int, os.listdir(f'{Constants.NETWORK_MODULES}/{network}/MSU/{algo}'))) # Display the user-facing parameters for the module detection algorithms for idx, parameter in enumerate(parameters): @@ -124,10 +123,10 @@ def create_module_enrichment_results_dir(genomic_intervals, addl_genes, network, """ if addl_genes: temp_output_folder_dir = get_path_to_temp( - genomic_intervals, const.TEMP_COEXPRESSION, f'{shorten_name(addl_genes)}/{network}/{algo}/{parameters}') + genomic_intervals, Constants.TEMP_COEXPRESSION, f'{shorten_name(addl_genes)}/{network}/{algo}/{parameters}') else: temp_output_folder_dir = get_path_to_temp( - genomic_intervals, const.TEMP_COEXPRESSION, f'{network}/{algo}/{parameters}') + genomic_intervals, Constants.TEMP_COEXPRESSION, f'{network}/{algo}/{parameters}') if not path_exists(temp_output_folder_dir): make_dir(temp_output_folder_dir) @@ -180,7 +179,7 @@ def do_module_enrichment_analysis(implicated_gene_ids, genomic_intervals, addl_g if not path_exists(ENRICHED_MODULES_PATH): ENRICHED_MODULES_PATH_WITH_TIMESTAMP = append_timestamp_to_filename( ENRICHED_MODULES_PATH) - MODULES_PATH = f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv' + MODULES_PATH = f'{Constants.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv' # ==================================================================================== # This replicates the logic of running the universal enrichment function `enricher()` @@ -214,7 +213,7 @@ def do_module_enrichment_analysis(implicated_gene_ids, genomic_intervals, addl_g adj_p_values = false_discovery_control(p_values, method='bh') significant_adj_p_values = [(p_values_indices[idx], adj_p_value) for idx, adj_p_value in enumerate( - adj_p_values) if adj_p_value < const.P_VALUE_CUTOFF] + adj_p_values) if adj_p_value < Constants.P_VALUE_CUTOFF] significant_adj_p_values.sort(key=lambda x: x[1]) significant_adj_p_values = [ f'{ID}\t{adj_p_value}' for ID, adj_p_value in significant_adj_p_values] @@ -265,7 +264,7 @@ def convert_transcript_to_msu_id(transcript_ids_str, network): Returns: - Equivalent MSU accessions of the KEGG transcript IDs """ - with open(f'{const.GENE_ID_MAPPING}/{network}/transcript-to-msu-id.pickle', 'rb') as f: + with open(f'{Constants.GENE_ID_MAPPING}/{network}/transcript-to-msu-id.pickle', 'rb') as f: mapping_dict = pickle.load(f) output_str = '' @@ -279,14 +278,14 @@ def convert_transcript_to_msu_id(transcript_ids_str, network): def get_genes_in_module(module_idx, network, algo, parameters): - with open(f'{const.NETWORK_MODULES}/{network}/transcript/{algo}/{parameters}/{algo}-module-list.tsv') as f: + with open(f'{Constants.NETWORK_MODULES}/{network}/transcript/{algo}/{parameters}/{algo}-module-list.tsv') as f: for idx, module in enumerate(f): if idx + 1 == int(module_idx): return set(module.split('\t')) def get_genes_in_pathway(pathway_id, network): - with open(f'{const.ENRICHMENT_ANALYSIS}/{network}/{const.KEGG_DOSA_GENESET}', 'rb') as f: + with open(f'{Constants.ENRICHMENT_ANALYSIS}/{network}/{Constants.KEGG_DOSA_GENESET}', 'rb') as f: genes_in_pathway = pickle.load(f) return genes_in_pathway[pathway_id] @@ -298,7 +297,7 @@ def get_genes_in_module_and_pathway(pathway_id, module_idx, network, algo, param def get_kegg_pathway_name(pathway_id, network): - with open(f'{const.ENRICHMENT_ANALYSIS}/{network}/{const.KEGG_DOSA_PATHWAY_NAMES}') as pathways: + with open(f'{Constants.ENRICHMENT_ANALYSIS}/{network}/{Constants.KEGG_DOSA_PATHWAY_NAMES}') as pathways: for line in pathways: line = line.split('\t') if line[0].rstrip() == pathway_id: @@ -408,7 +407,8 @@ def convert_to_df_pe(result, module_idx, network, algo, parameters): if result.empty: return create_empty_df_with_cols(cols) - result = result.loc[result['Adj. Combined p-value'] < const.P_VALUE_CUTOFF] + result = result.loc[result['Adj. Combined p-value'] + < Constants.P_VALUE_CUTOFF] # IMPORTANT: Do not change ordering of instructions @@ -444,7 +444,8 @@ def convert_to_df_spia(result, network): if result.empty: return create_empty_df_with_cols(cols) - result = result.loc[result['Adj. Combined p-value'] < const.P_VALUE_CUTOFF] + result = result.loc[result['Adj. Combined p-value'] + < Constants.P_VALUE_CUTOFF] # Prettify display of ID result['ID'] = 'dosa' + result['ID'] @@ -486,7 +487,7 @@ def convert_to_df(active_tab, module_idx, network, algo, parameters): dir = enrichment_tabs[get_tab_index(active_tab)].path enrichment_type = dir.split('/')[-1] - file = f'{const.ENRICHMENT_ANALYSIS}/{network}/output/{algo}/{parameters}/{dir}/results/{enrichment_type}-df-{module_idx}.tsv' + file = f'{Constants.ENRICHMENT_ANALYSIS}/{network}/output/{algo}/{parameters}/{dir}/results/{enrichment_type}-df-{module_idx}.tsv' columns = {'go': ['ID', 'Gene Ontology Term', 'Gene Ratio', 'BG Ratio', 'p-value', 'Adj. p-value', 'q-value', 'Genes', 'Count'], @@ -591,12 +592,12 @@ def load_module_graph(implicated_gene_ids, module, network, algo, parameters, la try: # Ignore the word "Module" at the start module_idx = int(module.split(' ')[1]) - OUTPUT_DIR = f'{const.TEMP}/{network}/{algo}/modules/{parameters}' + OUTPUT_DIR = f'{Constants.TEMP}/{network}/{algo}/modules/{parameters}' coexpress_nw = f'{OUTPUT_DIR}/module-{module_idx}.tsv' if not path_exists(coexpress_nw): - NETWORK_FILE = f'{const.NETWORKS}/{network}.txt' - MODULE_FILE = f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv' + NETWORK_FILE = f'{Constants.NETWORKS}/{network}.txt' + MODULE_FILE = f'{Constants.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv' convert_modules_to_edgelist( NETWORK_FILE, MODULE_FILE, module_idx, OUTPUT_DIR) @@ -621,7 +622,7 @@ def load_module_graph(implicated_gene_ids, module, network, algo, parameters, la def count_modules(network, algo, parameters): - with open(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv') as f: + with open(f'{Constants.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv') as f: return len(f.readlines()) @@ -637,7 +638,7 @@ def get_noun_for_active_tab(active_tab): def count_genes_in_module(implicated_genes, module_idx, network, algo, parameters): - with open(f'{const.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv') as modules: + with open(f'{Constants.NETWORK_MODULES}/{network}/MSU/{algo}/{parameters}/{algo}-module-list.tsv') as modules: for idx, module in enumerate(modules): if idx == module_idx - 1: module_genes = module.strip().split('\t') diff --git a/callbacks/file_util.py b/callbacks/file_util.py index 03834e14..f2098334 100644 --- a/callbacks/file_util.py +++ b/callbacks/file_util.py @@ -5,8 +5,6 @@ import time import sqlite3 -const = Constants() - def path_exists(path): """ @@ -64,7 +62,7 @@ def get_path_to_temp(genomic_interval, analysis_type, *args): analysis_type = convert_text_to_path(analysis_type) - temp_dir = f'{const.TEMP}/{genomic_interval_foldername}/{analysis_type}' + temp_dir = f'{Constants.TEMP}/{genomic_interval_foldername}/{analysis_type}' for folder in args: temp_dir += f'/{convert_text_to_path(folder)}' @@ -76,20 +74,21 @@ def get_path_to_temp(genomic_interval, analysis_type, *args): def get_path_to_text_mining_temp(analysis_type, *args): analysis_type = convert_text_to_path(analysis_type) - temp_dir = f'{const.TEMP}/{analysis_type}' + temp_dir = f'{Constants.TEMP}/{analysis_type}' for folder in args: temp_dir += f'/{convert_text_to_path(folder)}' temp_dir = re.sub(r'/+', '/', temp_dir) - + return temp_dir - + + def shorten_name(name): try: - connection = sqlite3.connect(const.FILE_STATUS_DB) + connection = sqlite3.connect(Constants.FILE_STATUS_DB) cursor = connection.cursor() - query = f'INSERT OR IGNORE INTO {const.FILE_STATUS_TABLE}(name) VALUES("{name}")' + query = f'INSERT OR IGNORE INTO {Constants.FILE_STATUS_TABLE}(name) VALUES("{name}")' cursor.execute(query) connection.commit() @@ -100,10 +99,10 @@ def shorten_name(name): pass try: - connection = sqlite3.connect(const.FILE_STATUS_DB) + connection = sqlite3.connect(Constants.FILE_STATUS_DB) cursor = connection.cursor() - query = f'SELECT rowid FROM {const.FILE_STATUS_TABLE} WHERE name = "{name}"' + query = f'SELECT rowid FROM {Constants.FILE_STATUS_TABLE} WHERE name = "{name}"' cursor.execute(query) row_id = cursor.fetchall()[0][0] diff --git a/callbacks/homepage/callbacks.py b/callbacks/homepage/callbacks.py index 8ac2e9da..4cedfc46 100644 --- a/callbacks/homepage/callbacks.py +++ b/callbacks/homepage/callbacks.py @@ -8,8 +8,6 @@ from ..style_util import * -const = Constants() - def init_callback(app): @@ -72,7 +70,8 @@ def parse_input(nb_intervals_str, n_clicks, n_submit, dccStore_children, *_): if 'homepage-reset' == ctx.triggered_id: # clear data for items in dcc.Store found in session-container - dccStore_children = get_cleared_dccStore_data_excluding_some_data(dccStore_children) + dccStore_children = get_cleared_dccStore_data_excluding_some_data( + dccStore_children) return dccStore_children, None, {'display': 'none'}, False, '' @@ -130,8 +129,7 @@ def set_input_fields_with_preset_input(example_genomic_interval_n_clicks): return get_example_genomic_interval(ctx.triggered_id['description']) raise PreventUpdate - - + @app.callback( Output('homepage-genomic-intervals-saved-input', 'data', allow_duplicate=True), @@ -141,7 +139,6 @@ def set_input_fields_with_preset_input(example_genomic_interval_n_clicks): def set_input_fields(genomic_intervals): return genomic_intervals - @app.callback( Output('homepage-results-container', 'style'), Input('homepage-is-submitted', 'data'), diff --git a/callbacks/homepage/util.py b/callbacks/homepage/util.py index 0c69112d..264dd4d2 100644 --- a/callbacks/homepage/util.py +++ b/callbacks/homepage/util.py @@ -6,7 +6,6 @@ import sqlite3 -const = Constants() example_genomic_intervals = { 'pre-harvest': 'Chr01:1523625-1770814;Chr04:4662701-4670717', @@ -14,15 +13,15 @@ def clear_cache_folder(): - if os.path.exists(const.TEMP): - shutil.rmtree(const.TEMP, ignore_errors=True) + if os.path.exists(Constants.TEMP): + shutil.rmtree(Constants.TEMP, ignore_errors=True) # Drop the table try: - connection = sqlite3.connect(const.FILE_STATUS_DB) + connection = sqlite3.connect(Constants.FILE_STATUS_DB) cursor = connection.cursor() - query = f'DROP TABLE {const.FILE_STATUS_TABLE}' + query = f'DROP TABLE {Constants.FILE_STATUS_TABLE}' cursor.execute(query) connection.commit() @@ -33,13 +32,13 @@ def clear_cache_folder(): pass # Recreate the database - make_dir(const.TEMP) + make_dir(Constants.TEMP) try: - connection = sqlite3.connect(const.FILE_STATUS_DB) + connection = sqlite3.connect(Constants.FILE_STATUS_DB) cursor = connection.cursor() - query = f'CREATE TABLE IF NOT EXISTS {const.FILE_STATUS_TABLE} (name TEXT, UNIQUE(name));' + query = f'CREATE TABLE IF NOT EXISTS {Constants.FILE_STATUS_TABLE} (name TEXT, UNIQUE(name));' cursor.execute(query) connection.commit() @@ -59,10 +58,10 @@ def get_cleared_dccStore_data_excluding_some_data(dccStore_children, *args): for arg in args: if arg in dccStore_ID: flag = True - + if not flag: dccStore_children[i]['props']['data'] = '' - + else: dccStore_children[i]['props']['data'] = '' diff --git a/callbacks/lift_over/callbacks.py b/callbacks/lift_over/callbacks.py index dad5627a..2d293c06 100644 --- a/callbacks/lift_over/callbacks.py +++ b/callbacks/lift_over/callbacks.py @@ -4,7 +4,6 @@ from .util import * from ..constants import Constants from ..general_util import * -const = Constants() def init_callback(app): diff --git a/callbacks/lift_over/util.py b/callbacks/lift_over/util.py index b2e4252b..1c0f48b4 100644 --- a/callbacks/lift_over/util.py +++ b/callbacks/lift_over/util.py @@ -9,7 +9,6 @@ from ..links_util import * -const = Constants() Genomic_interval = namedtuple('Genomic_interval', ['chrom', 'start', 'stop']) # Error codes and messages triggered by a malformed genomic interval entered by the user @@ -315,12 +314,12 @@ def get_ogi_nb(nb_intervals): for nb_interval in nb_intervals: # Load and search GFF_DB of Nipponbare db = gffutils.FeatureDB( - f'{const.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) + f'{Constants.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) genes_in_interval = list(db.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), completely_within=False, featuretype='gene')) # Map Nipponbare accessions to OGIs - ogi_mapping_path = f'{const.OGI_MAPPING}/Nb_to_ogi.pickle' + ogi_mapping_path = f'{Constants.OGI_MAPPING}/Nb_to_ogi.pickle' with open(ogi_mapping_path, 'rb') as f: ogi_mapping = pickle.load(f) for gene in genes_in_interval: @@ -356,11 +355,11 @@ def get_ogi_other_ref(ref, nb_intervals): # Get intervals from other refs that align to (parts) of the input loci db_align = gffutils.FeatureDB( - f'{const.ALIGNMENTS}/{"Nb_"+str(ref)}/{"Nb_"+str(ref)}.gff.db') + f'{Constants.ALIGNMENTS}/{"Nb_"+str(ref)}/{"Nb_"+str(ref)}.gff.db') # Get corresponding intervals on ref db_annotation = gffutils.FeatureDB( - f"{const.ANNOTATIONS}/{ref}/{ref}.gff.db".format(ref)) + f"{Constants.ANNOTATIONS}/{ref}/{ref}.gff.db".format(ref)) for nb_interval in nb_intervals: gff_intersections = list(db_align.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), @@ -372,7 +371,7 @@ def get_ogi_other_ref(ref, nb_intervals): completely_within=False, featuretype='gene')) # Map reference-specific accessions to OGIs - ogi_mapping_path = f'{const.OGI_MAPPING}/{ref}_to_ogi.pickle' + ogi_mapping_path = f'{Constants.OGI_MAPPING}/{ref}_to_ogi.pickle' with open(ogi_mapping_path, 'rb') as f: ogi_mapping = pickle.load(f) for gene in genes_in_interval: @@ -418,7 +417,7 @@ def get_qtaro_entries(mapping, genes): def get_pubmed_entry(gene): try: - with open(f'{const.TEXT_MINING_PUBMED}/{gene}.pickle', 'rb') as f: + with open(f'{Constants.TEXT_MINING_PUBMED}/{gene}.pickle', 'rb') as f: mapping = pickle.load(f) pubmed_ids = [get_pubmed_link_single_str(pubmed_id[0]) for pubmed_id in sorted( @@ -459,12 +458,12 @@ def get_genes_in_Nb(nb_intervals): for nb_interval in nb_intervals: # Load and search GFF_DB of Nipponbare db = gffutils.FeatureDB( - f'{const.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) + f'{Constants.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) genes_in_interval = list(db.region(region=(nb_interval.chrom, nb_interval.start, nb_interval.stop), completely_within=False, featuretype='gene')) # Map accessions to their respective OGIs - ogi_mapping_path = f'{const.OGI_MAPPING}/Nb_to_ogi.pickle' + ogi_mapping_path = f'{Constants.OGI_MAPPING}/Nb_to_ogi.pickle' ogi_list = [] with open(ogi_mapping_path, 'rb') as f: ogi_mapping = pickle.load(f) @@ -472,7 +471,7 @@ def get_genes_in_Nb(nb_intervals): for gene in genes_in_interval], ogi_mapping) # Get QTARO annotations - with open(const.QTARO_DICTIONARY, 'rb') as f: + with open(Constants.QTARO_DICTIONARY, 'rb') as f: qtaro_dict = pickle.load(f) qtaro_list = get_qtaro_entries( qtaro_dict, [gene.id for gene in genes_in_interval]) @@ -497,7 +496,7 @@ def get_genes_in_Nb(nb_intervals): table_gene_ids = pd.concat(dfs, ignore_index=True) # Read in dataframe containing gene descriptions gene_description_df = pd.read_csv( - f'{const.GENE_DESCRIPTIONS}/Nb/Nb_gene_descriptions.csv') + f'{Constants.GENE_DESCRIPTIONS}/Nb/Nb_gene_descriptions.csv') # Right merge because some genes do not have descriptions or UniProtKB/Swiss-Prot IDs table = pd.merge(gene_description_df, table_gene_ids, left_on='Gene_ID', right_on='Name', how='right') @@ -534,11 +533,11 @@ def get_genes_in_other_ref(ref, nb_intervals): # Get intervals from other refs that align to (parts) of the input loci db_align = gffutils.FeatureDB( - f'{const.ALIGNMENTS}/{"Nb_"+str(ref)}/{"Nb_"+str(ref)}.gff.db') + f'{Constants.ALIGNMENTS}/{"Nb_"+str(ref)}/{"Nb_"+str(ref)}.gff.db') # Get corresponding intervals on ref db_annotation = gffutils.FeatureDB( - f"{const.ANNOTATIONS}/{ref}/{ref}.gff.db") + f"{Constants.ANNOTATIONS}/{ref}/{ref}.gff.db") dfs = [] @@ -552,7 +551,7 @@ def get_genes_in_other_ref(ref, nb_intervals): completely_within=False, featuretype='gene')) # Map accessions to their respective OGIs - ogi_mapping_path = f'{const.OGI_MAPPING}/{ref}_to_ogi.pickle' + ogi_mapping_path = f'{Constants.OGI_MAPPING}/{ref}_to_ogi.pickle' ogi_list = [] with open(ogi_mapping_path, 'rb') as f: ogi_mapping = pickle.load(f) @@ -674,7 +673,7 @@ def get_unique_genes_in_other_ref(ref, nb_intervals): subset=['OGI'], keep=False) gene_description_df = pd.read_csv( - f'{const.GENE_DESCRIPTIONS}/{ref}/{ref}_gene_descriptions.csv') + f'{Constants.GENE_DESCRIPTIONS}/{ref}/{ref}_gene_descriptions.csv') # Right merge because some genes do not have descriptions or UniProtKB/Swiss-Prot IDs unique_genes = pd.merge(gene_description_df, unique_genes, left_on='Gene_ID', right_on='Name', how='right') diff --git a/callbacks/text_mining/util.py b/callbacks/text_mining/util.py index cbfdb487..d1d53c81 100644 --- a/callbacks/text_mining/util.py +++ b/callbacks/text_mining/util.py @@ -6,7 +6,7 @@ import ftfy from ..file_util import * -const = Constants() + COLNAMES = ['Gene', 'PMID', 'Title', 'Sentence', 'Score'] @@ -44,7 +44,7 @@ def addl_sanitize_gene(text): def text_mining_query_search(query_string): - text_mining_path = get_path_to_text_mining_temp(const.TEMP_TEXT_MINING) + text_mining_path = get_path_to_text_mining_temp(Constants.TEMP_TEXT_MINING) make_dir(text_mining_path) text_mining_path = f'{text_mining_path}/{query_string}.csv' @@ -74,10 +74,10 @@ def text_mining_query_search(query_string): df = df.sort_values('Score', ascending=False) display_cols_in_fixed_dec_places(df, ['Score']) - + if len(df.index) == 0: df = create_empty_df_with_cols(COLNAMES) - + df.to_csv(f'{text_mining_path}', index=False) return df @@ -91,4 +91,3 @@ def is_error(input): pass return False, None - diff --git a/callbacks/tf_enrich/util.py b/callbacks/tf_enrich/util.py index 0d51ecad..38af7e08 100644 --- a/callbacks/tf_enrich/util.py +++ b/callbacks/tf_enrich/util.py @@ -10,10 +10,9 @@ import gffutils import pybedtools -const = Constants() COLUMNS = ['Transcription Factor', 'Family', - 'p-value', 'Adj. p-value']#, 'Significant?'] + 'p-value', 'Adj. p-value'] # , 'Significant?'] def create_empty_df(): @@ -22,7 +21,7 @@ def create_empty_df(): def get_annotations_addl_gene(addl_genes): db = gffutils.FeatureDB( - f'{const.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) + f'{Constants.ANNOTATIONS}/Nb/IRGSPMSU.gff.db', keep_order=True) return [{'ogi': None, 'name': addl_gene, @@ -35,9 +34,9 @@ def get_annotations_addl_gene(addl_genes): def write_query_promoter_intervals_to_file(gene_table, nb_interval_str, addl_genes, upstream_win_len=500, downstream_win_len=100): - make_dir(get_path_to_temp(nb_interval_str, const.TEMP_TFBS)) + make_dir(get_path_to_temp(nb_interval_str, Constants.TEMP_TFBS)) filepath = get_path_to_temp( - nb_interval_str, const.TEMP_TFBS, addl_genes, const.PROMOTER_BED) + nb_interval_str, Constants.TEMP_TFBS, addl_genes, Constants.PROMOTER_BED) with open(filepath, "w") as f: for gene in gene_table: if gene['Strand'] == '+': @@ -56,9 +55,9 @@ def write_query_promoter_intervals_to_file(gene_table, nb_interval_str, addl_gen def write_query_genome_intervals_to_file(nb_interval_str, addl_genes): - make_dir(get_path_to_temp(nb_interval_str, const.TEMP_TFBS, addl_genes)) + make_dir(get_path_to_temp(nb_interval_str, Constants.TEMP_TFBS, addl_genes)) filepath = get_path_to_temp( - nb_interval_str, const.TEMP_TFBS, const.GENOME_WIDE_BED) + nb_interval_str, Constants.TEMP_TFBS, Constants.GENOME_WIDE_BED) with open(filepath, "w") as f: for interval in nb_interval_str.split(";"): chrom, range = interval.split(":") @@ -71,7 +70,8 @@ def perform_enrichment_all_tf(lift_over_nb_entire_table, addl_genes, tfbs_set, tfbs_prediction_technique, nb_interval_str): - out_dir = get_path_to_temp(nb_interval_str, const.TEMP_TFBS, addl_genes, tfbs_set, tfbs_prediction_technique) + out_dir = get_path_to_temp( + nb_interval_str, Constants.TEMP_TFBS, addl_genes, tfbs_set, tfbs_prediction_technique) # if previously computed if path_exists(f'{out_dir}/BH_corrected.csv'): @@ -105,18 +105,18 @@ def perform_enrichment_all_tf(lift_over_nb_entire_table, addl_genes, make_dir(out_dir) # construct query BED file - #out_dir_tf_enrich = get_path_to_temp(nb_interval_str, const.TEMP_TFBS, addl_genes) + # out_dir_tf_enrich = get_path_to_temp(nb_interval_str, Constants.TEMP_TFBS, addl_genes) if tfbs_set == 'promoters': query_bed = write_query_promoter_intervals_to_file( lift_over_nb_entire_table, nb_interval_str, addl_genes) - sizes = f'{const.TFBS_BEDS}/sizes/{tfbs_set}' + sizes = f'{Constants.TFBS_BEDS}/sizes/{tfbs_set}' elif tfbs_set == 'genome': query_bed = write_query_genome_intervals_to_file( nb_interval_str, addl_genes) - sizes = f'{const.TFBS_BEDS}/sizes/{tfbs_set}' + sizes = f'{Constants.TFBS_BEDS}/sizes/{tfbs_set}' - #construct a pybedtool object. we will use pybedtools to compute if there - #is any overlap. If no, don't test for significance using mcdp2. + # construct a pybedtool object. we will use pybedtools to compute if there + # is any overlap. If no, don't test for significance using mcdp2. query_pybed = pybedtools.BedTool(query_bed) TF_list = [] @@ -124,15 +124,15 @@ def perform_enrichment_all_tf(lift_over_nb_entire_table, addl_genes, pvalue_list = [] # perform annotation overlap statistical significance tests - for tf in os.listdir(os.path.join(const.TFBS_BEDS, tfbs_set, tfbs_prediction_technique, "intervals")): + for tf in os.listdir(os.path.join(Constants.TFBS_BEDS, tfbs_set, tfbs_prediction_technique, "intervals")): # print("computing overlaps for: {}".format(tf)) - ref_bed = f'{const.TFBS_BEDS}/{tfbs_set}/{tfbs_prediction_technique}/intervals/{tf}' + ref_bed = f'{Constants.TFBS_BEDS}/{tfbs_set}/{tfbs_prediction_technique}/intervals/{tf}' ref_pybed = pybedtools.BedTool(ref_bed) out_dir_tf = f'{out_dir}/{tf}' make_dir(out_dir_tf) - if query_pybed.intersect(ref_pybed,nonamecheck=True).count() != 0 : + if query_pybed.intersect(ref_pybed, nonamecheck=True).count() != 0: p_value = perform_enrichment_specific_tf(ref_bed, query_bed, sizes, out_dir_tf) @@ -180,13 +180,13 @@ def multiple_testing_correction(single_tf_results): sig = list(map(str, sig)) adj_pvalue = adj_pvalue.tolist() single_tf_results['Adj. p-value'] = adj_pvalue - #single_tf_results['Significant?'] = sig + # single_tf_results['Significant?'] = sig single_tf_results.sort_values(by=['p-value'], inplace=True) return single_tf_results def get_family(transcription_factor): - with open(f'{const.TFBS_ANNOTATION}/family_mapping.pickle', 'rb') as f: + with open(f'{Constants.TFBS_ANNOTATION}/family_mapping.pickle', 'rb') as f: mapping = pickle.load(f) return ', '.join(mapping[transcription_factor]) diff --git a/pages/analysis/browse_loci.py b/pages/analysis/browse_loci.py index 64d3c4b4..fbd1963f 100644 --- a/pages/analysis/browse_loci.py +++ b/pages/analysis/browse_loci.py @@ -1,12 +1,12 @@ from dash import dcc, html import dash_bootstrap_components as dbc from callbacks.constants import Constants -const = Constants() + layout = html.Div( id={ 'type': 'analysis-layout', - 'label': const.IGV + 'label': Constants.IGV }, hidden=True, children=[ diff --git a/pages/analysis/co_expr.py b/pages/analysis/co_expr.py index 150d1b67..7b3f41aa 100644 --- a/pages/analysis/co_expr.py +++ b/pages/analysis/co_expr.py @@ -5,8 +5,6 @@ from callbacks.coexpression.util import * -const = Constants() - coach = html.Li( [html.B('COACH'), html.Span( @@ -116,7 +114,7 @@ layout = html.Div( id={ 'type': 'analysis-layout', - 'label': const.COEXPRESSION + 'label': Constants.COEXPRESSION }, hidden=True, diff --git a/pages/analysis/lift_over.py b/pages/analysis/lift_over.py index a381ffbf..7352d528 100644 --- a/pages/analysis/lift_over.py +++ b/pages/analysis/lift_over.py @@ -2,13 +2,12 @@ from dash import dash_table, dcc, html from callbacks.lift_over.util import * from callbacks.constants import Constants -const = Constants() layout = html.Div( id={ 'type': 'analysis-layout', - 'label': const.LIFT_OVER + 'label': Constants.LIFT_OVER }, hidden=True, children=[ diff --git a/pages/analysis/text_mining.py b/pages/analysis/text_mining.py index 3bbd801e..9ac170ad 100644 --- a/pages/analysis/text_mining.py +++ b/pages/analysis/text_mining.py @@ -1,12 +1,12 @@ import dash_bootstrap_components as dbc from dash import dash_table, dcc, html from callbacks.constants import Constants -const = Constants() + layout = html.Div( id={ 'type': 'analysis-layout', - 'label': const.TEXT_MINING + 'label': Constants.TEXT_MINING }, hidden=True, children=[ diff --git a/pages/analysis/tf_enrich.py b/pages/analysis/tf_enrich.py index 94459a61..f5fd21d7 100644 --- a/pages/analysis/tf_enrich.py +++ b/pages/analysis/tf_enrich.py @@ -1,12 +1,12 @@ import dash_bootstrap_components as dbc from dash import dash_table, dcc, html from callbacks.constants import Constants -const = Constants() + layout = html.Div( id={ 'type': 'analysis-layout', - 'label': const.TFBS + 'label': Constants.TFBS }, hidden=True, children=[ diff --git a/pages/analysis_layout.py b/pages/analysis_layout.py index 0d80cba5..830b48c6 100644 --- a/pages/analysis_layout.py +++ b/pages/analysis_layout.py @@ -9,16 +9,15 @@ from collections import OrderedDict from callbacks.constants import Constants -const = Constants() def get_analaysis_layout_dictionary(): return OrderedDict({ - const.LIFT_OVER: 'Gene List and Lift-Over', - const.TEXT_MINING: 'Gene Retrieval by Text Mining', - const.COEXPRESSION: 'Co-Expression Network Analysis', - const.TFBS: 'Regulatory Feature Enrichment', - const.IGV: 'Browse Loci' + Constants.LIFT_OVER: 'Gene List and Lift-Over', + Constants.TEXT_MINING: 'Gene Retrieval by Text Mining', + Constants.COEXPRESSION: 'Co-Expression Network Analysis', + Constants.TFBS: 'Regulatory Feature Enrichment', + Constants.IGV: 'Browse Loci' })