Skip to content
This repository has been archived by the owner on Sep 24, 2019. It is now read-only.

Commit

Permalink
added script to write gene-orthology .bel file
Browse files Browse the repository at this point in the history
  • Loading branch information
Natalie Catlett committed Mar 3, 2014
1 parent af81c98 commit 99c20ba
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 5 deletions.
19 changes: 14 additions & 5 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,19 @@ class OrthologyData(DataSet):
def __init__(self, dictionary={}, prefix='use-index-term-prefix'):
super().__init__(dictionary, prefix)

def get_values(self):
for term_id in self._dict:
yield term_id

def get_orthologs(self, term_id):
orthologs = set()
mapping = self._dict.get(term_id)
mouse_orthologs = mapping.get('mouse_ortholog_id').split('|')
orthologs.update(mouse_orthologs)
human_orthologs = mapping.get('human_ortholog_id').split('|')
human_orthologs = {'HGNC:' + ortho for ortho in human_orthologs}
orthologs.update(human_orthologs)
if mapping.get('human_ortholog_id') is not '':
human_orthologs = mapping.get('human_ortholog_id').split('|')
human_orthologs = {'HGNC:' + ortho for ortho in human_orthologs}
orthologs.update(human_orthologs)
return orthologs

def __str__(self):
Expand Down Expand Up @@ -293,8 +298,12 @@ def get_values(self):

def get_label(self, term_id):
''' Return preferred label associated with term id. '''
label = self._dict.get(term_id).get('Symbol')
return label
mapping = self._dict.get(term_id)
if mapping is None:
return None
else:
label = mapping.get('Symbol')
return label

def get_encoding(self, term_id):
mapping = self._dict.get(term_id)
Expand Down
83 changes: 83 additions & 0 deletions orthology.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python3
# coding: utf-8

import argparse
import os
import pickle
import time
import datasets
import bel_functions
from collections import defaultdict

if __name__=='__main__':
# command line arguments - directory for pickled data objects
parser = argparse.ArgumentParser(description="""Generate BEL orthology file
from pickled data objects.""")

parser.add_argument("-n", required=True, metavar="DIRECTORY",
help="directory to store the new namespace equivalence data")
args = parser.parse_args()
if os.path.exists(args.n):
os.chdir(args.n)
else:
print('data directory {0} not found!'.format(args.n))

data_list = ['rgd', 'hgnc', 'rgd_ortho', 'mgi']
data_dict = {}
for files in os.listdir("."):
if files.endswith("parsed_data.pickle"):
with open(files,'rb') as f:
d = pickle.load(f)
if str(d) in data_list:
data_dict[str(d)] = d
for data in data_list:
if data not in data_dict.keys():
print('missing required dependency {0}!'.format(data))

hgnc_ortho_statements = set()
for term_id in data_dict.get('hgnc').get_values():
term_label = data_dict.get('hgnc').get_label(term_id)
hgnc_term = bel_functions.bel_term(term_label, 'HGNC', 'g')
orthos = data_dict.get('hgnc').get_orthologs(term_id)
if orthos is not None:
for o in orthos:
if len(o.split(':')) == 2:
prefix, value = o.split(':')
if prefix == 'RGD':
o_label = data_dict.get('rgd').get_label(value)
if prefix == 'MGI':
o_label = data_dict.get('mgi').get_label(value)
if o_label is None:
print('WARNING - missing label for {0}, {1}'.format(prefix, value))
continue
ortho_term = bel_functions.bel_term(o_label, prefix, 'g')
hgnc_ortho_statements.add('{0} orthologous {1}'.format(hgnc_term, ortho_term))

rgd_ortho_statements = set()
for term_id in data_dict.get('rgd_ortho').get_values():
term_label = data_dict.get('rgd').get_label(term_id)
rgd_term = bel_functions.bel_term(term_label, 'RGD', 'g')
orthos = data_dict.get('rgd_ortho').get_orthologs(term_id)
if orthos is not None:
for o in orthos:
prefix = ''
if len(o.split(':')) == 2:
prefix, value = o.split(':')
if prefix == 'HGNC':
o_label = data_dict.get('hgnc').get_label(value)
if prefix == 'MGI':
o_label = data_dict.get('mgi').get_label(value)
if o_label is None:
print('WARNING - {2} missing label for {0}, {1}'.format(prefix, value, rgd_term))
continue
ortho_term = bel_functions.bel_term(o_label, prefix, 'g')
rgd_ortho_statements.add('{0} orthologous {1}'.format(rgd_term, ortho_term))

with open('gene-orthology.bel', 'w') as ortho:
ortho.write('SET Citation = {"Online Resource", "HUGO Gene Nomenclature Committee data download", "ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc_complete_set.txt.gz"}\n')
for s in sorted(hgnc_ortho_statements):
ortho.write(s + '\n')
ortho.write('\n')
ortho.write('SET Citation = {"Online Resource","RGD Orthology FTP file", "ftp://rgd.mcw.edu/pub/data_release/RGD_ORTHOLOGS.txt"}\n')
for s in sorted(rgd_ortho_statements):
ortho.write(s + '\n')

0 comments on commit 99c20ba

Please sign in to comment.