Skip to content

Commit

Permalink
sourced genes_to_phenotype for HPO Subset
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanwilliammd committed Mar 31, 2024
1 parent 4097b73 commit 56df3c3
Show file tree
Hide file tree
Showing 3 changed files with 304,402 additions and 992,378 deletions.
41 changes: 15 additions & 26 deletions phenotype/Generate HPO Subset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,6 @@
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: obonet in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (1.0.0)\n",
"Requirement already satisfied: networkx in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from obonet) (3.1)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
Expand All @@ -27,9 +19,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (1.2.3)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied: pytz>=2017.3 in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from pandas) (2021.1)\n"
"Requirement already satisfied: obonet in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (1.0.0)\n",
"Requirement already satisfied: networkx in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from obonet) (3.1)\n",
"Requirement already satisfied: pandas in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (2.0.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from pandas) (2021.1)\n",
"Requirement already satisfied: tzdata>=2022.1 in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: numpy>=1.20.3 in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from pandas) (1.24.4)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.15.0)\n"
]
},
{
Expand All @@ -41,14 +38,6 @@
"[notice] A new release of pip is available: 23.2.1 -> 24.0\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: numpy>=1.16.5 in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from pandas) (1.24.4)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\ivan-rtx2080\\.conda\\envs\\datascience\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n"
]
}
],
"source": [
Expand All @@ -69,7 +58,7 @@
"\n",
"folder_source = 'rawdl_20240310'\n",
"ontology_file = 'hp.obo'\n",
"hpo_file = 'phenotype_to_genes.txt'"
"hpo_file = 'genes_to_phenotype.txt'"
]
},
{
Expand Down Expand Up @@ -14959,16 +14948,16 @@
"metadata": {},
"outputs": [],
"source": [
"# Read the phenotype_to_genes file, split it to ORPHA and OMIM and save to files\n",
"phenotype_to_genes = pd.read_csv(os.path.join(folder_source, hpo_file), sep='\\t')\n",
"# Read the genes_to_phenotype file, split it to ORPHA and OMIM and save to files\n",
"genes_to_phenotype = pd.read_csv(os.path.join(folder_source, hpo_file), sep='\\t')\n",
"\n",
"# read phenotype_to_genes and filter the disease_id having prefix ORPHA:\n",
"orpha_to_hpo = phenotype_to_genes[phenotype_to_genes['disease_id'].str.contains('ORPHA:')]\n",
"# read genes_to_phenotype and filter the disease_id having prefix ORPHA:\n",
"orpha_to_hpo = genes_to_phenotype[genes_to_phenotype['disease_id'].str.contains('ORPHA:')]\n",
"orpha_to_hpo = orpha_to_hpo.drop_duplicates()\n",
"orpha_to_hpo[['disease_id', 'hpo_id', 'hpo_name']].to_csv(os.path.join('subset', 'orpha2hpo_subset.tsv'), sep='\\t', index=False)\n",
"\n",
"# read phenotype_to_genes and filter the disease_id having prefix OMIM:\n",
"omim_to_hpo = phenotype_to_genes[phenotype_to_genes['disease_id'].str.contains('OMIM:')]\n",
"# read genes_to_phenotype and filter the disease_id having prefix OMIM:\n",
"omim_to_hpo = genes_to_phenotype[genes_to_phenotype['disease_id'].str.contains('OMIM:')]\n",
"omim_to_hpo = omim_to_hpo.drop_duplicates()\n",
"omim_to_hpo[['disease_id', 'hpo_id', 'hpo_name']].to_csv(os.path.join('subset', 'omim2hpo_subset.tsv'), sep='\\t', index=False)"
]
Expand Down
Loading

0 comments on commit 56df3c3

Please sign in to comment.