Skip to content

Commit

Permalink
Optimize code for getting unique genes in other reference
Browse files Browse the repository at this point in the history
  • Loading branch information
memgonzales committed Sep 12, 2023
1 parent 216c4d9 commit c3c9cab
Showing 1 changed file with 14 additions and 11 deletions.
25 changes: 14 additions & 11 deletions callbacks/lift_over/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,14 +341,11 @@ def get_interpro_entries(genes, interpro_mapping, iric_mapping):
return [get_interpro_entry(gene, interpro_mapping, iric_mapping) for gene in genes]


def get_nb_ortholog(gene, ref):
with open(f'{Constants.NB_MAPPING}/{ref}_to_Nb.pickle', 'rb') as f:
mapping = pickle.load(f)

if mapping[gene]:
return ', '.join(mapping[gene])
def get_nb_ortholog(gene, ref, nb_ortholog_mapping):
if nb_ortholog_mapping[gene]:
return ', '.join(nb_ortholog_mapping[gene])

return NULL_PLACEHOLDER
return NULL_PLACEHOLDER


# ========================
Expand Down Expand Up @@ -601,8 +598,11 @@ def get_unique_genes_in_other_ref(ref, nb_intervals):
genes_in_nb = genes_in_nb[['OGI']]

# Get set difference
unique_genes = pd.concat([genes_in_other_ref, genes_in_nb, genes_in_nb]).drop_duplicates(
subset=['OGI'], keep=False)
genes_in_nb_set = set(map(tuple, genes_in_nb.values))
genes_in_other_ref_set = set(map(tuple, genes_in_other_ref.values))

unique_genes = pd.DataFrame(
list(genes_in_other_ref_set.difference(genes_in_nb_set)))

gene_description_df = pd.read_csv(
f'{Constants.GENE_DESCRIPTIONS}/{ref}/{ref}_gene_descriptions.csv')
Expand All @@ -613,8 +613,11 @@ def get_unique_genes_in_other_ref(ref, nb_intervals):
unique_genes = gene_description_df.join(unique_genes, how='right')
unique_genes = unique_genes.reset_index()

unique_genes['Ortholog in Nipponbare'] = unique_genes.apply(
lambda x: get_nb_ortholog(x['Name'], ref), axis=1)
with open(f'{Constants.NB_MAPPING}/{ref}_to_Nb.pickle', 'rb') as f:
nb_ortholog_mapping = pickle.load(f)

unique_genes['Ortholog in Nipponbare'] = unique_genes.apply(
lambda x: get_nb_ortholog(x['Name'], ref, nb_ortholog_mapping), axis=1)

unique_genes = unique_genes[FRONT_FACING_COLUMNS +
['Ortholog in Nipponbare']]
Expand Down

0 comments on commit c3c9cab

Please sign in to comment.